In [0]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sbn

# Define the directory containing mutation experiment files
data_dir = "~/Datasets"

# Define column titles
column_titles = [
    'Gene', 'WildType.Sequence', 'Mutant.Sequence',
    'mRNA.Expression.WT.Rep1', 'mRNA.Expression.WT.Rep2', 'mRNA.Expression.WT.Rep3',
    'mRNA.Expression.Mut.Rep1', 'mRNA.Expression.Mut.Rep2', 'mRNA.Expression.Mut.Rep3',
    'Protein.Expression.WT.Rep1', 'Protein.Expression.WT.Rep2', 'Protein.Expression.WT.Rep3',
    'Protein.Expression.Mut.Rep1', 'Protein.Expression.Mut.Rep2', 'Protein.Expression.Mut.Rep3',
    'CellViability.WT.Rep1', 'CellViability.WT.Rep2', 'CellViability.WT.Rep3',
    'CellViability.Mut.Rep1', 'CellViability.Mut.Rep2', 'CellViability.Mut.Rep3']

# Function to aggregate information
def aggrefiles(directory):
    all_data = []
    for filename in os.listdir(directory):
        if filename.endswith('.txt'):  
            file_path = os.path.join(directory, filename)
            df = pd.read_csv(file_path, sep='\t', skiprows=1, names=column_titles)
            df['Gene name'] = os.path.splitext(filename)[0]
            all_data.append(df)
    return pd.concat(all_data, ignore_index=True)

mutation_data = aggrefiles(data_dir)

print(mutation_data)

# Calculate difference in cell viability between WT and mutant replicates
mutation_data['CellViability.Difference'] = mutation_data[['CellViability.WT.Rep1', 'CellViability.WT.Rep2', 'CellViability.WT.Rep3']].mean(axis=1) - mutation_data[['CellViability.Mut.Rep1', 'CellViability.Mut.Rep2', 'CellViability.Mut.Rep3']].mean(axis=1)

# Determine mutation type
mutation_data['MutationType'] = mutation_data.apply(lambda row: 'Substitution' if len(row['WildType.Sequence']) == len(row['Mutant.Sequence']) else ('Insertion' if len(row['WildType.Sequence']) < len(row['Mutant.Sequence']) else 'Deletion'), axis=1)

# Analyze mRNA and protein expression levels
mutation_data['mRNA.Change'] = (mutation_data[['mRNA.Expression.Mut.Rep1', 'mRNA.Expression.Mut.Rep2', 'mRNA.Expression.Mut.Rep3']].mean(axis=1) / mutation_data[['mRNA.Expression.WT.Rep1', 'mRNA.Expression.WT.Rep2', 'mRNA.Expression.WT.Rep3']].mean(axis=1)) - 1
mutation_data['Protein.Change'] = (mutation_data[['Protein.Expression.Mut.Rep1', 'Protein.Expression.Mut.Rep2', 'Protein.Expression.Mut.Rep3']].mean(axis=1) / mutation_data[['Protein.Expression.WT.Rep1', 'Protein.Expression.WT.Rep2', 'Protein.Expression.WT.Rep3']].mean(axis=1)) - 1

# Data analyzed placed in a table
analyzed_data = mutation_data.groupby('Gene name').agg({
    'CellViability.Difference': 'mean',
    'MutationType': 'first',
    'mRNA.Change': 'mean',
    'Protein.Change': 'mean'
})

print(analyzed_data)

# Prioritize genes on cell viability
prioritized_data = mutation_data.groupby('Gene name').agg({
    'CellViability.Difference': 'mean',
    'MutationType': 'first',
    'mRNA.Change': 'mean',
    'Protein.Change': 'mean'
}).nlargest(5, 'CellViability.Difference')

print(prioritized_data)

WTrep1 = mutation_data['CellViability.WT.Rep1']
WTrep2 = mutation_data['CellViability.WT.Rep2']
WTrep3 = mutation_data['CellViability.WT.Rep3']
                      
Mutrep1 = mutation_data['CellViability.Mut.Rep1']
Mutrep2 = mutation_data['CellViability.Mut.Rep2']
Mutrep3 = mutation_data['CellViability.Mut.Rep3']
diffe = analyzed_data['CellViability.Difference']

                      
# Cell viability using boxplot
fig = plt.Figure()
sbn.boxplot(data=mutation_data[['CellViability.WT.Rep1', 'CellViability.WT.Rep2', 'CellViability.WT.Rep3']])
sbn.boxplot(data=mutation_data[['CellViability.Mut.Rep1', 'CellViability.Mut.Rep2', 'CellViability.Mut.Rep3']])
plt.title('Cell Viability')
plt.xlabel('Replicates')
plt.ylabel('Cell Viability')
#sbn.scatterplot(data=mutation_data[['CellViability.Mut.Rep1', 'CellViability.Mut.Rep2', 'CellViability.Mut.Rep3']])
plt.show()
#plot_data = { "WTrep1": WTrep1, "WTrep2": WTrep2, "WTrep3": WTrep3, "Mutrep1": Mutrep1, "Mutrep2": Mutrep2, "Mutrep3": Mutrep3 }
# plot_df = pd.DataFrame(plot_data)
# plot_df.head()
# fig, ax = plt.subplot_mosaic([["box", "scatter"],["hist", "hist"]], 
#                            figsize=(7, 7), layout="constrained")
# sbn.boxplot(plot_df, x="WTrep1", y="WTrep2", hue="WTrep3", ax=ax["box"])
# sbn.scatterplot(plot_df,  x="Mutrep1", y="Mutrep2", hue="Mutrep3", ax=ax["scatter"])
# sbn.kdeplot(plot_df, x="WTrep1", hue="WTrep2", ax=ax["hist"])
# plt.show()