# Pipeline to create the personalized Boolean Networks based on gene expression and Copy Number Variation Data 

In [9]:
import pandas as pd
import os
import shutil

from functions.generate_utils.identification_patients.get_patients_sens_res import get_patients

from functions.generate_utils.identification_drug.drug_analysis import identify_drug

# from functions.generate_utils.create_generic_models.update_nodes_names import replace_node_names_in_file


from functions.generate_utils.create_generic_models.update_nodes_names import replace_node_names_in_file


from functions.analysis_utils.MaBoSS_simulation.maboss_phenotype_patient import compute_phenotype_table, collect_group_data

from functions.analysis_utils.stats.stats_proba import compute_mannwhitneyu_test_means
from functions.analysis_utils.results_MaBoSS_visualization.boxplot_phenotype import create_boxplot


from functions.generate_models import generate_models_re, pre_process_re

from functions.analysis import downstream_analysis



### Loading the datasets 

In [10]:
# Loading the data (TCGA, cell model passport)

annotations_models = pd.read_csv('data/model_list_20250407.csv')
mutations_data = pd.read_csv('data/mutations_all_20250318.csv')
drug_data = pd.read_csv('data/drug_sensitivity.csv')

montagud_data = (
    pd.read_csv('data/Montagud_inter_nodes_data.csv', header=1)
    .loc[:, ['Target node', 'Interaction type', 'Source']])
rna_seq_data = pd.read_csv('data/rnaseq_merged/rnaseq_merged_20250117.csv')
cnv_data = pd.read_csv('data/cellmodel_data/cnv_summary_20250207.csv')

proteins_data = pd.read_csv('data/cellmodel_data/proteomics_all_20250211.csv')


models_depmap_annotation = pd.read_csv('data/cellmodel_data/model_list_20250423.csv')
mutations_data_depseq = pd.read_csv('data/depmap_data/OmicsSomaticMutations.csv')


  mutations_data_depseq = pd.read_csv('data/depmap_data/OmicsSomaticMutations.csv')


In [11]:
# identify_drug(drug_data, annotations_models, tissue_remove)

### Users parameters data 

In [12]:
# User's data to choose

# example
type_model = 'proteins_models'

drug_interest = 'Refametinib'
drug_targets = ['MEK1', 'MEK2']



number_patients = 35 # in each group


# tissue_interest = 'HAEMATOPOIETIC AND LYMPHOID'



# Generic Boolean Network 
# Inputs
inputs_list = ['EGF', 'FGF', 'TGFB', 'ANDROGEN', 'HYPOXIA', 'NUTRIENTS', 'CARCINOGEN', 'ACIDOSIS', 'TNF', 'SPOP']

# Phenotypes
phenotype_interest = ["PROLIFERATION","INVASION","DNA_REPAIR","APOPTOSIS"]


# to specify the boolean networks to be genes or proteins based
name_montagud_maps = {
        "CASPASE8": "CASP8",
        "CASPASE3": "CASP3",
        "CASPASE9": "CASP9",
        "CYCLINB": "CCNB1",
        "CYCLIND": "CCND1",
        "DSH": "DVL1",
        "BETA_CATENIN": "CTNNB1",
        "E_CADHERIN": "CDH1",
        "CYCC": "CYCS",
        "MEK1_2": "MEK1",
        "NF_KB": "NFKB",
        "SNAIL": "SNAI1",
        "TNFALPHA": "TNF",
        "TSC1_2": "TSC1",
        "BCL_XL": "BCL2L1",
        "MAP3K1_3": "MAP3K1",
        "CHK1_2": "CHK1",
    }


nodes_add = ['MEK2','TSC2','MAP3K3','CHK2']
nodes_to_add = {'MEK1':'MEK2', 'TSC1':'TSC2', 'MAP3K1':'MAP3K3', 'CHK1':'CHK2'}



nodes_to_remove = ['FUSED_EVENT', 'AR_ERG']



# proteins synonyms 
synonyms_maps = {
        "RAF1": "RAF",
        "BCL2L2": "BCL2",
        "SMAD1": "SMAD",
        "SMAD3": "SMAD",
        "SMAD4": "SMAD",
        "SMAD5": "SMAD",
        "BAK1": "BAK",
        "GSK3A": "GSK3",
        "GSK3B": "GSK3",
        "TGFBR2": "TGFBR",
        "TGFBR3": "TGFBR",
        "NRAS": "RAS",
        "KRAS": "RAS",
        "HRAS": "RAS",
        "AKT2": "AKT",
        "MYCN": "MYC",
        }


In [13]:

subdir = f"{'_'.join(drug_targets)}_target_block"
folder_generic_models = f"analysis/{drug_interest}/{type_model}/{subdir}/models/generic/"
folder_models = f"analysis/{drug_interest}/{type_model}/{subdir}/models"
patients_categ = ['resistant', 'sensitive', 'healthy']

for patient_categ in patients_categ:
    if not os.path.exists(f"analysis/{drug_interest}/{type_model}/{subdir}/results/{patient_categ}"):
        os.makedirs(f"analysis/{drug_interest}/{type_model}/{subdir}/results/{patient_categ}")

    if not os.path.exists(f"analysis/{drug_interest}/{type_model}/{subdir}/models/{patient_categ}"):
        os.makedirs(f"analysis/{drug_interest}/{type_model}/{subdir}/models/{patient_categ}")

dest_dir = f"analysis/{drug_interest}/{type_model}/{subdir}/models/generic"
os.makedirs(dest_dir, exist_ok=True)

# Copy the files
shutil.copy('analysis/generic_models/Montagud2022_Prostate_Cancer.bnd', dest_dir)
shutil.copy('analysis/generic_models/Montagud2022_Prostate_Cancer.cfg', dest_dir) 

'analysis/Refametinib/proteins_models/MEK1_MEK2_target_block/models/generic/Montagud2022_Prostate_Cancer.cfg'

### Pre-processing the data (genes, proteins, cnv)

In [14]:
top_resistant_ids, top_sensitive_ids, top_healthy_ids, montagud_nodes, rna_seq_data_filtered, cnv_data_filtered, table_rna_seq_patients, df_melted_protein, table_proteins_patients = pre_process_re(
montagud_data,
rna_seq_data,
cnv_data,
number_patients,
drug_data,
annotations_models,
drug_interest,
proteins_data,
type_model,
name_montagud_maps,
nodes_add,
synonyms_maps,
tissue_interest=None,
tissue_remove=None,
nodes_to_remove = nodes_to_remove,
)

os.makedirs(f"analysis/{drug_interest}/{type_model}/{subdir}/data_filtered", exist_ok=True)
rna_seq_data_filtered.to_csv(f"analysis/{drug_interest}/{type_model}/{subdir}/data_filtered/rna_seq_data_filtered.csv")
cnv_data_filtered.to_csv(f"analysis/{drug_interest}/{type_model}/{subdir}/data_filtered/cnv_data_filtered.csv")
table_rna_seq_patients.to_csv(f"analysis/{drug_interest}/{type_model}/{subdir}/data_filtered/table_rna_seq_patients.csv")



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annotations_models_filtered.rename(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rna_seq_data.rename(columns={"gene_symbol": "gene_name"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  proteins_data_patient_id_filtered["symbol"] = proteins_data_patient_id_filtered[
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org

preprocessed  proteins


In [15]:
generate_models_re(
            folder_generic_models,
            folder_models,
            top_resistant_ids,
            top_sensitive_ids,
            top_healthy_ids,
            drug_interest,
            drug_targets,
            phenotype_interest,
            rna_seq_data,
            montagud_nodes,
            table_rna_seq_patients,
            cnv_data_filtered,
            name_montagud_maps,
            type_model,
            df_melted_protein,
            table_proteins_patients,
            nodes_to_remove,
            nodes_to_add,
            intervention_gene = None,
        )


change manually the bnd files with the node to remove
change manually the bnd files with the node to remove
All .cfg and .bnd files created for sensitive, resistant and healthy patients.
🔍 Processing patient SIDM00415, gene: MEK1
MEK1 node found. Replacing...
SIDM00415: CNV — nodes modified
🔍 Processing patient SIDM00415, gene: MEK2
MEK2 node found. Replacing...
SIDM00415: CNV — nodes modified
🔍 Processing patient SIDM00886, gene: MEK1
MEK1 node found. Replacing...
SIDM00886: CNV — nodes modified
🔍 Processing patient SIDM00886, gene: MEK2
MEK2 node found. Replacing...
SIDM00886: CNV — nodes modified
🔍 Processing patient SIDM00524, gene: MEK1
MEK1 node found. Replacing...
SIDM00524: CNV — nodes modified
🔍 Processing patient SIDM00524, gene: MEK2
MEK2 node found. Replacing...
SIDM00524: CNV — nodes modified
🔍 Processing patient SIDM00506, gene: MEK1
MEK1 node found. Replacing...
SIDM00506: CNV — nodes modified
🔍 Processing patient SIDM00506, gene: MEK2
MEK2 node found. Replacing...
SIDM0

### Create the personalized PAN cancer models (genes and cnv)

In [16]:
# check no intersection between the two groups
intersection = set(top_resistant_ids) & set(top_sensitive_ids) & set(top_healthy_ids)
print(intersection)

set()


### All pipeline (create personalized networks and compute attractors distribution)

In [9]:
# ALL PIPELINE 


# Pre process data and saved them in the data filtered folder
# AS tissue interest is PAN cancer -> tissue_interest = None (want all tissues)


drugs_dict = {'Refametinib': ['MEK1', 'MEK2'], 'Pictilisib': ['PI3K'], 'AZD7762': ['CHK1', 'CHK2']}
type_models = ['proteins_models', 'genes_models', 'genes_proteins_models']
for drug in drugs_dict:
    for type_model in type_models:
        subdir = f"{'_'.join(drugs_dict[drug])}_target_block"
        folder_generic_models = f"analysis/{drug}/{type_model}/{subdir}/models/generic/"
        folder_models = f"analysis/{drug}/{type_model}/{subdir}/models"
        folder_results = f"analysis/{drug}/{type_model}/{subdir}"


        patients_categ = ['resistant', 'sensitive', 'healthy']
        

        for patient_categ in patients_categ:
            os.makedirs(f"analysis/{drug}/{type_model}/{subdir}/results/{patient_categ}", exist_ok=True)
            os.makedirs(f"analysis/{drug}/{type_model}/{subdir}/models/{patient_categ}", exist_ok=True)

        dest_dir = f"analysis/{drug}/{type_model}/{subdir}/models/generic"
        os.makedirs(dest_dir, exist_ok=True)

        # Copy the files
        shutil.copy('data/montagud_models/Montagud2022_Prostate_Cancer.bnd', dest_dir)
        shutil.copy('data/montagud_models/Montagud2022_Prostate_Cancer.cfg', dest_dir)

        top_resistant_ids, top_sensitive_ids, top_healthy_ids, montagud_nodes, rna_seq_data_filtered, cnv_data_filtered, table_rna_seq_patients, df_melted_protein, table_proteins_patients = pre_process_re(
            montagud_data.copy(),
            rna_seq_data.copy(),
            cnv_data.copy(),
            number_patients,
            drug_data.copy(),
            annotations_models.copy(),
            drug,
            proteins_data.copy(),
            type_model,
            name_montagud_maps,
            nodes_to_add,
            synonyms_maps,
            tissue_interest=None,
            tissue_remove=None,
            node_to_remove=None,
        )

        # os.makedirs(f"analysis/{drug}/data_filtered", exist_ok=True)
        # rna_seq_data_filtered.to_csv(f"analysis/{drug}/data_filtered/rna_seq_data_filtered.csv")
        # cnv_data_filtered.to_csv(f"analysis/{drug}/data_filtered/cnv_data_filtered.csv")
        # table_rna_seq_patients.to_csv(f"analysis/{drug}/data_filtered/table_rna_seq_patients.csv")



# rna_seq_data not filtered as get the may expression value
        # generate_models_re(
        #     folder_generic_models,
        #     folder_models,
        #     top_resistant_ids,
        #     top_sensitive_ids,
        #     top_healthy_ids,
        #     drug,
        #     drugs_dict[drug],
        #     phenotype_interest,
        #     rna_seq_data,
        #     montagud_nodes,
        #     table_rna_seq_patients,
        #     cnv_data_filtered,
        #     name_montagud_maps,
        #     type_model,
        #     df_melted_protein,
        #     table_proteins_patients,
        #     intervention_gene = None,
        # )

        nb_patients_required = downstream_analysis(folder_results,folder_models, drug, top_resistant_ids, top_sensitive_ids, top_healthy_ids, patients_categ, inputs_list, phenotype_interest, annotations_models, list_active_inputs = None)




TypeError: pre_process_re() got an unexpected keyword argument 'node_to_remove'. Did you mean 'nodes_to_remove'?