Downstream analysis - simulate gene knockout 

In [5]:
import pandas as pd
import shutil
import tempfile
import os


from functions.analysis_utils.stats.stats_proba import compute_mannwhitneyu_test_means

from functions.analysis_utils.genes_intervention.pers_interventions import tailor_bnd_genes_intervention


from functions.analysis_utils.MaBoSS_simulation.maboss_phenotype_patient import compute_phenotype_table


from functions.analysis_utils.MaBoSS_simulation.maboss_phenotype_patient import collect_group_data


In [6]:
drug_interest = 'Refametinib'
#input_interest = 'TNF'
input_interest = 'SPOP'
# try carcinogen later 


#phenotype_interest = 'Proliferation'
phenotype_interest = 'Proliferation'


gene_diff_expr = f'significant_genes_{input_interest}_ON_{phenotype_interest}.csv'

In [7]:
# only the significant genes

folder_model_resistant = f"analysis/{drug_interest}/models/resistant"
folder_model_sensitive = f"analysis/{drug_interest}/models/sensitive"


top_resistant_ids_path = f"analysis/{drug_interest}/top_resistant_ids.txt"
with open(top_resistant_ids_path, "r") as f:
    top_resistant_ids_list = [line.strip() for line in f if line.strip()]


top_sensitive_ids_path = f"analysis/{drug_interest}/top_sensitive_ids.txt"
with open(top_sensitive_ids_path, "r") as f:
    top_sensitive_ids_list = [line.strip() for line in f if line.strip()]



results_gene_enrichment = pd.read_csv(f'analysis/{drug_interest}/results/genes_diff_expressed/{gene_diff_expr}')
results_gene_enrichment = results_gene_enrichment.rename(columns={'Unnamed: 0': 'gene_symbol'})


results_gene_enrichment['diff_expression'] = results_gene_enrichment['Group Resistant Mean'] - results_gene_enrichment['Group Sensitive Mean'] 

results_gene_enrichment_sorted = results_gene_enrichment.sort_values(by="diff_expression", ascending = False)

genes_to_test = list(results_gene_enrichment_sorted['gene_symbol'][:10])

inputs_list = ['EGF', 'FGF', 'TGFB', 'Androgen', 'Hypoxia', 'Nutrients', 'Carcinogen', 'Acidosis', 'TNF', 'SPOP']

phenotypes_list = ['Proliferation', 'DNA_Repair', 'Invasion', 'Apoptosis', 'Migration']

In [8]:
from tqdm import tqdm

# simulate gene KO individually 
genes_to_target = []


# folder to save all the stats data for each gene intervention
folder_result_stats = f'analysis/{drug_interest}/downstream_analysis/results_{input_interest}_{phenotype_interest}/stats'
os.makedirs(folder_result_stats, exist_ok=True)

for gene in tqdm(genes_to_test):
    temp_models_gene_path_temp = f'analysis/{drug_interest}/downstream_analysis/interv_{gene}/models/resistant'
    temp_models_gene_path_sens_temp = f'analysis/{drug_interest}/downstream_analysis/interv_{gene}/models/sensitive'
    temp_results_gene_path_temp = f'analysis/{drug_interest}/downstream_analysis/interv_{gene}/results/resistant'
    temp_results_gene_path_sens_temp = f'analysis/{drug_interest}/downstream_analysis/interv_{gene}/results/sensitive'


    os.makedirs(temp_models_gene_path_temp, exist_ok=True)
    os.makedirs(temp_models_gene_path_sens_temp, exist_ok=True)

    
    os.makedirs(temp_results_gene_path_temp, exist_ok=True)
    os.makedirs(temp_results_gene_path_sens_temp, exist_ok=True)




    # 2. Copy the original models into the temp directory
    shutil.copytree(folder_model_resistant, temp_models_gene_path_temp, dirs_exist_ok=True)
    shutil.copytree(folder_model_sensitive, temp_models_gene_path_sens_temp, dirs_exist_ok=True)


    temp_models_gene_path_temp_interv = f"{temp_models_gene_path_temp}/pers_models"
    temp_models_gene_path_sens_temp_interv = f"{temp_models_gene_path_sens_temp}/pers_models"

    tailor_bnd_genes_intervention(gene, top_resistant_ids_list, temp_models_gene_path_temp_interv, drug_interest)
    tailor_bnd_genes_intervention(gene, top_sensitive_ids_list, temp_models_gene_path_sens_temp_interv, drug_interest)


    for patient in top_resistant_ids_list:
        compute_phenotype_table(temp_results_gene_path_temp, temp_models_gene_path_temp, patient, inputs_list, phenotypes_list, drug_interest)

    for patient in top_sensitive_ids_list:
        compute_phenotype_table(temp_results_gene_path_sens_temp, temp_models_gene_path_sens_temp, patient, inputs_list, phenotypes_list, drug_interest)

    df_res_combined = collect_group_data(temp_results_gene_path_temp)
    df_sens_combined = collect_group_data(temp_results_gene_path_sens_temp)



    patient_res_values =  pd.read_csv(f'{temp_results_gene_path_temp}/combined_results.csv', index_col=0)
    patient_sens_values = pd.read_csv(f'{temp_results_gene_path_sens_temp}/combined_results.csv', index_col=0)
    compute_mannwhitneyu_test_means(gene, folder_result_stats,patient_res_values, patient_sens_values, drug_interest)

    data_greater_side = pd.read_csv(f'{folder_result_stats}/{gene}_p_values_df_mannwhitneyu_greater_sign_{drug_interest}.csv') 


    folders_to_delete = [
        temp_models_gene_path_temp,
        temp_models_gene_path_sens_temp,
        temp_results_gene_path_temp,
        temp_results_gene_path_sens_temp
    ]
    
    if ('Proliferation' not in data_greater_side['Phenotype'].values):
        genes_to_target.append(gene)
    

# delete all the temp folders
    for folder in folders_to_delete:
        if os.path.exists(folder):
            shutil.rmtree(folder)

print(genes_to_target)




  0%|          | 0/10 [00:00<?, ?it/s]

patient_id: SIDM01120
🔍 Processing patient SIDM01120, gene: FOXA1
FOXA1 node found. Replacing...
SIDM01120: CNV — nodes modified
patient_id: SIDM00719
🔍 Processing patient SIDM00719, gene: FOXA1
FOXA1 node found. Replacing...
SIDM00719: CNV — nodes modified
patient_id: SIDM00292
🔍 Processing patient SIDM00292, gene: FOXA1
FOXA1 node found. Replacing...
SIDM00292: CNV — nodes modified
patient_id: SIDM00631
🔍 Processing patient SIDM00631, gene: FOXA1
FOXA1 node found. Replacing...
SIDM00631: CNV — nodes modified
patient_id: SIDM00506
🔍 Processing patient SIDM00506, gene: FOXA1
FOXA1 node found. Replacing...
SIDM00506: CNV — nodes modified
patient_id: SIDM00892
🔍 Processing patient SIDM00892, gene: FOXA1
FOXA1 node found. Replacing...
SIDM00892: CNV — nodes modified
patient_id: SIDM00255
🔍 Processing patient SIDM00255, gene: FOXA1
FOXA1 node found. Replacing...
SIDM00255: CNV — nodes modified
patient_id: SIDM00183
🔍 Processing patient SIDM00183, gene: FOXA1
FOXA1 node found. Replacing...


 10%|█         | 1/10 [11:54<1:47:07, 714.21s/it]

patient_id: SIDM01120
🔍 Processing patient SIDM01120, gene: FGFR3
FGFR3 node found. Replacing...
SIDM01120: CNV — nodes modified
patient_id: SIDM00719
🔍 Processing patient SIDM00719, gene: FGFR3
FGFR3 node found. Replacing...
SIDM00719: CNV — nodes modified
patient_id: SIDM00292
🔍 Processing patient SIDM00292, gene: FGFR3
FGFR3 node found. Replacing...
SIDM00292: CNV — nodes modified
patient_id: SIDM00631
🔍 Processing patient SIDM00631, gene: FGFR3
FGFR3 node found. Replacing...
SIDM00631: CNV — nodes modified
patient_id: SIDM00506
🔍 Processing patient SIDM00506, gene: FGFR3
FGFR3 node found. Replacing...
SIDM00506: CNV — nodes modified
patient_id: SIDM00892
🔍 Processing patient SIDM00892, gene: FGFR3
FGFR3 node found. Replacing...
SIDM00892: CNV — nodes modified
patient_id: SIDM00255
🔍 Processing patient SIDM00255, gene: FGFR3
FGFR3 node found. Replacing...
SIDM00255: CNV — nodes modified
patient_id: SIDM00183
🔍 Processing patient SIDM00183, gene: FGFR3
FGFR3 node found. Replacing...


 20%|██        | 2/10 [23:25<1:33:24, 700.54s/it]

patient_id: SIDM01120
🔍 Processing patient SIDM01120, gene: JUN
JUN node found. Replacing...
SIDM01120: CNV — nodes modified
patient_id: SIDM00719
🔍 Processing patient SIDM00719, gene: JUN
JUN node found. Replacing...
SIDM00719: CNV — nodes modified
patient_id: SIDM00292
🔍 Processing patient SIDM00292, gene: JUN
JUN node found. Replacing...
SIDM00292: CNV — nodes modified
patient_id: SIDM00631
🔍 Processing patient SIDM00631, gene: JUN
JUN node found. Replacing...
SIDM00631: CNV — nodes modified
patient_id: SIDM00506
🔍 Processing patient SIDM00506, gene: JUN
JUN node found. Replacing...
SIDM00506: CNV — nodes modified
patient_id: SIDM00892
🔍 Processing patient SIDM00892, gene: JUN
JUN node found. Replacing...
SIDM00892: CNV — nodes modified
patient_id: SIDM00255
🔍 Processing patient SIDM00255, gene: JUN
JUN node found. Replacing...
SIDM00255: CNV — nodes modified
patient_id: SIDM00183
🔍 Processing patient SIDM00183, gene: JUN
JUN node found. Replacing...
SIDM00183: CNV — nodes modified


 30%|███       | 3/10 [34:50<1:20:54, 693.55s/it]

patient_id: SIDM01120
🔍 Processing patient SIDM01120, gene: FRS2
FRS2 node found. Replacing...
SIDM01120: CNV — nodes modified
patient_id: SIDM00719
🔍 Processing patient SIDM00719, gene: FRS2
FRS2 node found. Replacing...
SIDM00719: CNV — nodes modified
patient_id: SIDM00292
🔍 Processing patient SIDM00292, gene: FRS2
FRS2 node found. Replacing...
SIDM00292: CNV — nodes modified
patient_id: SIDM00631
🔍 Processing patient SIDM00631, gene: FRS2
FRS2 node found. Replacing...
SIDM00631: CNV — nodes modified
patient_id: SIDM00506
🔍 Processing patient SIDM00506, gene: FRS2
FRS2 node found. Replacing...
SIDM00506: CNV — nodes modified
patient_id: SIDM00892
🔍 Processing patient SIDM00892, gene: FRS2
FRS2 node found. Replacing...
SIDM00892: CNV — nodes modified
patient_id: SIDM00255
🔍 Processing patient SIDM00255, gene: FRS2
FRS2 node found. Replacing...
SIDM00255: CNV — nodes modified
patient_id: SIDM00183
🔍 Processing patient SIDM00183, gene: FRS2
FRS2 node found. Replacing...
SIDM00183: CNV —

 40%|████      | 4/10 [46:20<1:09:13, 692.31s/it]

patient_id: SIDM01120
🔍 Processing patient SIDM01120, gene: BCL2
BCL2 node found. Replacing...
SIDM01120: CNV — nodes modified
patient_id: SIDM00719
🔍 Processing patient SIDM00719, gene: BCL2
BCL2 node found. Replacing...
SIDM00719: CNV — nodes modified
patient_id: SIDM00292
🔍 Processing patient SIDM00292, gene: BCL2
BCL2 node found. Replacing...
SIDM00292: CNV — nodes modified
patient_id: SIDM00631
🔍 Processing patient SIDM00631, gene: BCL2
BCL2 node found. Replacing...
SIDM00631: CNV — nodes modified
patient_id: SIDM00506
🔍 Processing patient SIDM00506, gene: BCL2
BCL2 node found. Replacing...
SIDM00506: CNV — nodes modified
patient_id: SIDM00892
🔍 Processing patient SIDM00892, gene: BCL2
BCL2 node found. Replacing...
SIDM00892: CNV — nodes modified
patient_id: SIDM00255
🔍 Processing patient SIDM00255, gene: BCL2
BCL2 node found. Replacing...
SIDM00255: CNV — nodes modified
patient_id: SIDM00183
🔍 Processing patient SIDM00183, gene: BCL2
BCL2 node found. Replacing...
SIDM00183: CNV —

 50%|█████     | 5/10 [57:52<57:40, 692.16s/it]  

patient_id: SIDM01120
🔍 Processing patient SIDM01120, gene: E2F1
E2F1 node found. Replacing...
SIDM01120: CNV — nodes modified
patient_id: SIDM00719
🔍 Processing patient SIDM00719, gene: E2F1
E2F1 node found. Replacing...
SIDM00719: CNV — nodes modified
patient_id: SIDM00292
🔍 Processing patient SIDM00292, gene: E2F1
E2F1 node found. Replacing...
SIDM00292: CNV — nodes modified
patient_id: SIDM00631
🔍 Processing patient SIDM00631, gene: E2F1
E2F1 node found. Replacing...
SIDM00631: CNV — nodes modified
patient_id: SIDM00506
🔍 Processing patient SIDM00506, gene: E2F1
E2F1 node found. Replacing...
SIDM00506: CNV — nodes modified
patient_id: SIDM00892
🔍 Processing patient SIDM00892, gene: E2F1
E2F1 node found. Replacing...
SIDM00892: CNV — nodes modified
patient_id: SIDM00255
🔍 Processing patient SIDM00255, gene: E2F1
E2F1 node found. Replacing...
SIDM00255: CNV — nodes modified
patient_id: SIDM00183
🔍 Processing patient SIDM00183, gene: E2F1
E2F1 node found. Replacing...
SIDM00183: CNV —

 60%|██████    | 6/10 [1:09:42<46:32, 698.22s/it]

patient_id: SIDM01120
🔍 Processing patient SIDM01120, gene: APAF1
APAF1 node found. Replacing...
SIDM01120: CNV — nodes modified
patient_id: SIDM00719
🔍 Processing patient SIDM00719, gene: APAF1
APAF1 node found. Replacing...
SIDM00719: CNV — nodes modified
patient_id: SIDM00292
🔍 Processing patient SIDM00292, gene: APAF1
APAF1 node found. Replacing...
SIDM00292: CNV — nodes modified
patient_id: SIDM00631
🔍 Processing patient SIDM00631, gene: APAF1
APAF1 node found. Replacing...
SIDM00631: CNV — nodes modified
patient_id: SIDM00506
🔍 Processing patient SIDM00506, gene: APAF1
APAF1 node found. Replacing...
SIDM00506: CNV — nodes modified
patient_id: SIDM00892
🔍 Processing patient SIDM00892, gene: APAF1
APAF1 node found. Replacing...
SIDM00892: CNV — nodes modified
patient_id: SIDM00255
🔍 Processing patient SIDM00255, gene: APAF1
APAF1 node found. Replacing...
SIDM00255: CNV — nodes modified
patient_id: SIDM00183
🔍 Processing patient SIDM00183, gene: APAF1
APAF1 node found. Replacing...


 70%|███████   | 7/10 [1:21:19<34:53, 697.70s/it]

patient_id: SIDM01120
🔍 Processing patient SIDM01120, gene: EZH2
EZH2 node found. Replacing...
SIDM01120: CNV — nodes modified
patient_id: SIDM00719
🔍 Processing patient SIDM00719, gene: EZH2
EZH2 node found. Replacing...
SIDM00719: CNV — nodes modified
patient_id: SIDM00292
🔍 Processing patient SIDM00292, gene: EZH2
EZH2 node found. Replacing...
SIDM00292: CNV — nodes modified
patient_id: SIDM00631
🔍 Processing patient SIDM00631, gene: EZH2
EZH2 node found. Replacing...
SIDM00631: CNV — nodes modified
patient_id: SIDM00506
🔍 Processing patient SIDM00506, gene: EZH2
EZH2 node found. Replacing...
SIDM00506: CNV — nodes modified
patient_id: SIDM00892
🔍 Processing patient SIDM00892, gene: EZH2
EZH2 node found. Replacing...
SIDM00892: CNV — nodes modified
patient_id: SIDM00255
🔍 Processing patient SIDM00255, gene: EZH2
EZH2 node found. Replacing...
SIDM00255: CNV — nodes modified
patient_id: SIDM00183
🔍 Processing patient SIDM00183, gene: EZH2
EZH2 node found. Replacing...
SIDM00183: CNV —

 80%|████████  | 8/10 [1:32:37<23:03, 691.56s/it]

patient_id: SIDM01120
🔍 Processing patient SIDM01120, gene: BIRC5
BIRC5 node found. Replacing...
SIDM01120: CNV — nodes modified
patient_id: SIDM00719
🔍 Processing patient SIDM00719, gene: BIRC5
BIRC5 node found. Replacing...
SIDM00719: CNV — nodes modified
patient_id: SIDM00292
🔍 Processing patient SIDM00292, gene: BIRC5
BIRC5 node found. Replacing...
SIDM00292: CNV — nodes modified
patient_id: SIDM00631
🔍 Processing patient SIDM00631, gene: BIRC5
BIRC5 node found. Replacing...
SIDM00631: CNV — nodes modified
patient_id: SIDM00506
🔍 Processing patient SIDM00506, gene: BIRC5
BIRC5 node found. Replacing...
SIDM00506: CNV — nodes modified
patient_id: SIDM00892
🔍 Processing patient SIDM00892, gene: BIRC5
BIRC5 node found. Replacing...
SIDM00892: CNV — nodes modified
patient_id: SIDM00255
🔍 Processing patient SIDM00255, gene: BIRC5
BIRC5 node found. Replacing...
SIDM00255: CNV — nodes modified
patient_id: SIDM00183
🔍 Processing patient SIDM00183, gene: BIRC5
BIRC5 node found. Replacing...


 90%|█████████ | 9/10 [1:43:57<11:27, 687.89s/it]

patient_id: SIDM01120
🔍 Processing patient SIDM01120, gene: BAD
BAD node found. Replacing...
SIDM01120: CNV — nodes modified
patient_id: SIDM00719
🔍 Processing patient SIDM00719, gene: BAD
BAD node found. Replacing...
SIDM00719: CNV — nodes modified
patient_id: SIDM00292
🔍 Processing patient SIDM00292, gene: BAD
BAD node found. Replacing...
SIDM00292: CNV — nodes modified
patient_id: SIDM00631
🔍 Processing patient SIDM00631, gene: BAD
BAD node found. Replacing...
SIDM00631: CNV — nodes modified
patient_id: SIDM00506
🔍 Processing patient SIDM00506, gene: BAD
BAD node found. Replacing...
SIDM00506: CNV — nodes modified
patient_id: SIDM00892
🔍 Processing patient SIDM00892, gene: BAD
BAD node found. Replacing...
SIDM00892: CNV — nodes modified
patient_id: SIDM00255
🔍 Processing patient SIDM00255, gene: BAD
BAD node found. Replacing...
SIDM00255: CNV — nodes modified
patient_id: SIDM00183
🔍 Processing patient SIDM00183, gene: BAD
BAD node found. Replacing...
SIDM00183: CNV — nodes modified


100%|██████████| 10/10 [1:55:14<00:00, 691.44s/it]

[]





the TNF condition used earlier does not give results with no prolif -> but also requires about 166 patinets in each group and the diff was not very important bw resistant and sensitive even if signif



119 min to run every gene

maybe try another gene enrichment condition-phenotype -> try SPOP


maybe also update initial state of EGF, FGF, SPOP based on gene expression? 
simulate cb of gene ?
why E2F1 KO lead to no prolif before? -> try to run again the pers pipeline of before


try pipeline with other drug (for which know resistance)

try cb inputs ? do we see a stronger diff ? so all the growth factors ON (tnf egf fgf) and all cancerogenes on

try simulate drug effect? and cb of drugs?