# Downstream analysis - Simulate Gene Intervention (Knock out) 

In [None]:
import pandas as pd
import shutil
import tempfile
import os
from tqdm import tqdm

from functions.analysis_utils.stats.stats_proba import compute_mannwhitneyu_test_means

from functions.analysis_utils.genes_intervention.pers_interventions import tailor_bnd_genes_intervention


from functions.analysis_utils.MaBoSS_simulation.maboss_phenotype_patient import compute_phenotype_table


from functions.analysis_utils.MaBoSS_simulation.maboss_phenotype_patient import collect_group_data


### Users data parameters

In [None]:
drug_interest = 'AZD7762'
drug_targets = ['CHK1', 'CHK2']

input_interest = 'ACIDOSIS'
phenotype_interest = 'INVASION'



gene_diff_expr = f'significant_genes_{input_interest}_ON_{phenotype_interest}.csv'

subdir = f'{drug_targets[0]}_{drug_targets[1]}_target_block'

folder_models = f"analysis/{drug_interest}/{subdir}/models"

folder_results = f"analysis/{drug_interest}/{subdir}/results"




inputs_list = ['EGF', 'FGF', 'TGFB', 'Androgen', 'Hypoxia', 'Nutrients', 'Carcinogen', 'Acidosis', 'TNF', 'SPOP']
phenotypes_list = ['Proliferation', 'DNA_Repair', 'Invasion', 'Apoptosis', 'Migration']


In [11]:
# only the significant genes

folder_model_resistant = f"{folder_models}/resistant"
folder_model_sensitive = f"{folder_models}/sensitive"


top_resistant_ids_path = f"analysis/{drug_interest}/top_resistant_ids.txt"
with open(top_resistant_ids_path, "r") as f:
    top_resistant_ids_list = [line.strip() for line in f if line.strip()]


top_sensitive_ids_path = f"analysis/{drug_interest}/top_sensitive_ids.txt"
with open(top_sensitive_ids_path, "r") as f:
    top_sensitive_ids_list = [line.strip() for line in f if line.strip()]



# results_gene_enrichment = pd.read_csv(f'{folder_results}/genes_diff_expressed/{gene_diff_expr}')
# results_gene_enrichment = results_gene_enrichment.rename(columns={'Unnamed: 0': 'gene_symbol'})


# results_gene_enrichment['diff_expression'] = results_gene_enrichment['Group Resistant Mean'] - results_gene_enrichment['Group Sensitive Mean'] 

# results_gene_enrichment_sorted = results_gene_enrichment.sort_values(by="diff_expression", ascending = False)

# genes_to_test = list(results_gene_enrichment_sorted['gene_symbol'][:10])


genes_to_test = 'E2F1'


In [None]:


# simulate gene KO individually 
genes_to_target = []


# folder to save all the stats data for each gene intervention
folder_result_stats = f'analysis/{drug_interest}/{subdir}/downstream_analysis/results_{input_interest}_{phenotype_interest}/stats'
os.makedirs(folder_result_stats, exist_ok=True)

if isinstance(genes_to_test, str):
    genes_to_test = [genes_to_test]
    
for gene in tqdm(genes_to_test):
    temp_models_gene_path_temp = f'analysis/{drug_interest}/{subdir}/downstream_analysis/interv_{gene}/models/resistant'
    temp_models_gene_path_sens_temp = f'analysis/{drug_interest}/{subdir}/downstream_analysis/interv_{gene}/models/sensitive'
    temp_results_gene_path_temp = f'analysis/{drug_interest}/{subdir}/downstream_analysis/interv_{gene}/results/resistant'
    temp_results_gene_path_sens_temp = f'analysis/{drug_interest}/{subdir}/downstream_analysis/interv_{gene}/results/sensitive'


    os.makedirs(temp_models_gene_path_temp, exist_ok=True)
    os.makedirs(temp_models_gene_path_sens_temp, exist_ok=True)

    
    os.makedirs(temp_results_gene_path_temp, exist_ok=True)
    os.makedirs(temp_results_gene_path_sens_temp, exist_ok=True)




    # 2. Copy the original models into the temp directory
    shutil.copytree(folder_model_resistant, temp_models_gene_path_temp, dirs_exist_ok=True)
    shutil.copytree(folder_model_sensitive, temp_models_gene_path_sens_temp, dirs_exist_ok=True)


    temp_models_gene_path_temp_interv = f"{temp_models_gene_path_temp}/pers_models"
    temp_models_gene_path_sens_temp_interv = f"{temp_models_gene_path_sens_temp}/pers_models"

    tailor_bnd_genes_intervention(gene, top_resistant_ids_list, temp_models_gene_path_temp_interv, drug_interest)
    tailor_bnd_genes_intervention(gene, top_sensitive_ids_list, temp_models_gene_path_sens_temp_interv, drug_interest)


    for patient in top_resistant_ids_list:
        compute_phenotype_table(temp_results_gene_path_temp, temp_models_gene_path_temp, patient, inputs_list, phenotypes_list, drug_interest)

    for patient in top_sensitive_ids_list:
        compute_phenotype_table(temp_results_gene_path_sens_temp, temp_models_gene_path_sens_temp, patient, inputs_list, phenotypes_list, drug_interest)

    df_res_combined = collect_group_data(temp_results_gene_path_temp)
    df_sens_combined = collect_group_data(temp_results_gene_path_sens_temp)



    patient_res_values =  pd.read_csv(f'{temp_results_gene_path_temp}/combined_results.csv', index_col=0)
    patient_sens_values = pd.read_csv(f'{temp_results_gene_path_sens_temp}/combined_results.csv', index_col=0)
    compute_mannwhitneyu_test_means(gene, folder_result_stats,patient_res_values, patient_sens_values, drug_interest)

    data_greater_side = pd.read_csv(f'{folder_result_stats}/{gene}_p_values_df_mannwhitneyu_greater_sign_{drug_interest}.csv') 


    folders_to_delete = [
        temp_models_gene_path_temp,
        temp_models_gene_path_sens_temp,
        temp_results_gene_path_temp,
        temp_results_gene_path_sens_temp
    ]
    
    if ('Proliferation' not in data_greater_side['Phenotype'].values):
        genes_to_target.append(gene)
    

# delete all the temp folders
    for folder in folders_to_delete:
        if os.path.exists(folder):
            shutil.rmtree(folder)

print(genes_to_target)




  0%|          | 0/1 [00:00<?, ?it/s]

patient_id: SIDM00524
🔍 Processing patient SIDM00524, gene: E2F1
E2F1 node found. Replacing...
SIDM00524: CNV — nodes modified
patient_id: SIDM00506
🔍 Processing patient SIDM00506, gene: E2F1
E2F1 node found. Replacing...
SIDM00506: CNV — nodes modified
patient_id: SIDM01056
🔍 Processing patient SIDM01056, gene: E2F1
E2F1 node found. Replacing...
SIDM01056: CNV — nodes modified
patient_id: SIDM00292
🔍 Processing patient SIDM00292, gene: E2F1
E2F1 node found. Replacing...
SIDM00292: CNV — nodes modified
patient_id: SIDM00719
🔍 Processing patient SIDM00719, gene: E2F1
E2F1 node found. Replacing...
SIDM00719: CNV — nodes modified
patient_id: SIDM00971
🔍 Processing patient SIDM00971, gene: E2F1
E2F1 node found. Replacing...
SIDM00971: CNV — nodes modified
patient_id: SIDM00444
🔍 Processing patient SIDM00444, gene: E2F1
E2F1 node found. Replacing...
SIDM00444: CNV — nodes modified
patient_id: SIDM00767
🔍 Processing patient SIDM00767, gene: E2F1
E2F1 node found. Replacing...
SIDM00767: CNV —

100%|██████████| 1/1 [12:41<00:00, 761.64s/it]

[]





the TNF condition used earlier does not give results with no prolif -> but also requires about 166 patinets in each group and the diff was not very important bw resistant and sensitive even if signif



119 min to run every gene

maybe try another gene enrichment condition-phenotype -> try SPOP


maybe also update initial state of EGF, FGF, SPOP based on gene expression? 
simulate cb of gene ?
why E2F1 KO lead to no prolif before? -> try to run again the pers pipeline of before


try pipeline with other drug (for which know resistance)

try cb inputs ? do we see a stronger diff ? so all the growth factors ON (tnf egf fgf) and all cancerogenes on

try simulate drug effect? and cb of drugs?