In [None]:
import maboss
import ginsim
import pandas as pd 
import numpy as np
import mygene
import os
import shutil
import ast
from scipy.stats import kruskal

from create_generic_models.create_generic_patients_cfgs import create_generic_patients_cfg_bnd_validation
from create_generic_models.update_phenotypes_generic_models import generic_models_update_phenotypes


# from create_person_models.tailor_cfgs_patients_gene import personalized_patients_genes_cfgs_validation
from pre_process_data.pre_process_genes import create_table_rna_seq_patients, create_table_proteins_patients

from identification_patients.validation_get_patients_ids import get_patients_valid

from pre_process_data.tcga_preprocess_data import pre_process_tcga_data

from create_person_models.tailor_cfgs_patients_gene import personalized_patients_genes_cfgs, personalized_patients_proteins_cfgs
from create_person_models.tailor_bnd_cnv import tailor_bnd_cnv_validation

from create_person_models.tailor_bnd_tsg_onco_mutations import tailor_bnd_mutat_validation

from MaBoSS_simulation.maboss_phenotype_patient import compute_phenotype_table, compute_phenotype_mean_group_validation, combine_groups_values
from stats.stats_proba import compute_kruskal_test_means_validation

In [2]:
# Import data
phenotype_data = pd.read_csv('data/TCGA_data/prostate/TCGA_PRAD_phenotypes.csv')
genes_data = pd.read_csv('data/TCGA_data/prostate/TCGA_PRAD_genes_illumina.csv', sep='\t')
cnv_data = pd.read_csv('data/TCGA_data/prostate/TCGA_PRAD_cnv_gistic2.csv',sep='\t')

proteins_data = pd.read_csv('data/TCGA_data/prostate/TCGA_PRAD_proteins_RPPA.csv',sep='\t')

# keep all montagud nodes
montagud_data = (
    pd.read_csv('data/Montagud_inter_nodes_data.csv', header=1)
    .loc[:, ['Target node', 'Interaction type', 'Source']])

# Create list of genes of interest (in Montagud data)
montagud_nodes = list(set(montagud_data['Target node'].tolist() + montagud_data['Source'].tolist()))
montagud_nodes = [node for node in montagud_nodes if node != '0/1']
montagud_nodes = [node.upper() for node in montagud_nodes if isinstance(node, str)]
# montagud_nodes.append('KRAS')
# to_remove = ['RAS', 'FUSED_EVENT', 'NKX3_1', 'SPOP', 'AR_ERG']

# montagud_nodes = [node for node in montagud_nodes if node not in to_remove]
# montagud_nodes = list(set(montagud_nodes))

In [3]:
# remove all '_'
montagud_nodes = ['MEK1' if x == 'MEK1_2' else x for x in montagud_nodes]
montagud_nodes = ['TSC1' if x == 'TSC1_2' else x for x in montagud_nodes]
montagud_nodes = ['MAP3K1' if x == 'MAP3K1_3' else x for x in montagud_nodes]
montagud_nodes = ['CHK1' if x == 'CHK1_2' else x for x in montagud_nodes]


montagud_nodes.append('MEK2')
montagud_nodes.append('TSC2')
montagud_nodes.append('MAP3K3')
montagud_nodes.append('CHK2')


to_remove = ['FUSED_EVENT', 'AR_ERG']

montagud_nodes = [node for node in montagud_nodes if node not in to_remove]
print(montagud_nodes)

['PTCH1', 'BAD', 'ETV1', 'CYCLINB', 'GSK3', 'ETS1', 'APOPTOSIS', 'CFLAR', 'EP300', 'TAK1', 'BRCA1', 'EGFR', 'MIGRATION', 'RAS', 'PKC', 'TGFBR', 'RTK', 'E2F1', 'ROS', 'P90RSK', 'BRCA2', 'CHK1', 'P53', 'TWIST1', 'DAXX', 'METASTASIS', 'GLUT1', 'APAF1', 'P38', 'RAGS', 'PDK1', 'PI3K', 'AMP_ATP', 'GLI', 'DSH', 'HYPOXIA', 'EEF2K', 'EGF', 'FADD', 'TCF', 'RHEB', 'COX4I2', 'FGF', 'IKK', 'FOXA1', 'MEK1', 'ZBTB17', 'VHL', 'CASPASE9', 'MDM2', 'MAP3K1', 'SLUG', 'WNT', 'NCOA3', 'EZH2', 'TSC1', 'PROLIFERATION', 'INVASION', 'TNFALPHA', 'BIRC5', 'NF1', 'AXIN1', 'BCL_XL', 'CYTOC', 'PTEN', 'MTORC1', 'P70S6KAB', 'PHDS', 'ATR', 'SMAD', 'AR', 'CASPASE8', 'MYC', 'BMP2', 'MYC_MAX', 'P21', 'NUTRIENTS', 'EEF2', 'TGFB', 'CYCLIND', 'MXI1', 'CDH2', 'GADD45', 'JUN', 'AMPK', 'EMT', 'E_CADHERIN', 'PIP3', 'VEGF', 'DNA_DAMAGE', 'HSPS', 'CASPASE3', 'BETA_CATENIN', 'P15', 'BAK', 'CARCINOGEN', 'SMO', 'NF_KB', 'JNK', 'NKX3_1', 'RB1', 'ERK', 'GSH', 'LACTIC_ACID', 'BAX', 'RAF', 'FGFR3', 'NCOR2', 'ANDROGEN', 'SHH', 'NCOR1', 'F

In [4]:
# print(proteins_data.head())
# print(list(proteins_data['sample']))
proteins_data['sample'] = proteins_data['sample'].str.rsplit('-', n=2).str[0]
proteins_data['sample'] = proteins_data['sample'].str.upper()

# Create regex pattern that matches any name in the list
pattern = '|'.join(montagud_nodes)  # 'NameA|NameB|NameC'

# Filter rows where 'sample' contains any name from the list
proteins_data = proteins_data[proteins_data['sample'].str.contains(pattern, case=False, na=False)]
proteins_data = proteins_data.dropna(how='all', subset=proteins_data.columns[1:])


print(proteins_data)



          sample  TCGA-KK-A59Z-01  TCGA-KC-A4BV-01  TCGA-CH-5772-01  \
8    A-RAF_PS299         0.121288        -0.167044        -0.110590   
13         ADAR1         0.371003        -0.209629        -0.086770   
14           AKT         0.237545        -0.434174        -0.174623   
15     AKT_PS473        -1.609986        -0.246830        -0.105235   
16     AKT_PT308        -1.462328        -0.261684        -0.325260   
..           ...              ...              ...              ...   
219          BAK              NaN              NaN              NaN   
220  C-RAF_PS338              NaN              NaN              NaN   
221         CHK1              NaN              NaN              NaN   
222    CHK2_PT68              NaN              NaN              NaN   
226        PARP1              NaN              NaN              NaN   

     TCGA-G9-6377-01  TCGA-HC-7081-01  TCGA-EJ-7788-01  TCGA-HC-7233-01  \
8           0.066131        -0.223209         0.309472         0.177667 

AKT: how much AKT protein is present.
AKT_PS473: how much of the protein is in the activated or modified form.

how to do with the activated form of the protein ? should i combine it with the full complex?

In [5]:

# diagnoses.tumor_grade, diagnoses.morphology
# stratify by cancer type: diagnoses.primary_diagnosis

phenotype_data_filtered = phenotype_data[['sampleID','gleason_score']]


In [6]:
print(phenotype_data_filtered)
print(phenotype_data_filtered['gleason_score'].value_counts())

            sampleID  gleason_score
0    TCGA-2A-A8VL-01              6
1    TCGA-2A-A8VO-01              6
2    TCGA-2A-A8VT-01              9
3    TCGA-2A-A8VV-01              6
4    TCGA-2A-A8VX-01              8
..               ...            ...
561  TCGA-ZG-A9M4-01              9
562  TCGA-ZG-A9MC-01              9
563  TCGA-ZG-A9N3-01              9
564  TCGA-ZG-A9ND-01              9
565  TCGA-ZG-A9NI-01              9

[566 rows x 2 columns]
gleason_score
7     301
9     144
8      67
6      50
10      4
Name: count, dtype: int64


In [7]:
# create 3 groups: gleason score of 6, gleason score of 7, and of gleason score of > 8

group_0 = [6]
group_1 = [7]
group_2 = [8, 9, 10]

conditions = [
phenotype_data_filtered["gleason_score"].isin(group_0),
phenotype_data_filtered["gleason_score"].isin(group_1),
phenotype_data_filtered["gleason_score"].isin(group_2),

]
choices = ['low_aggressive', 'middle_aggressive', 'high_aggressive']

phenotype_data_filtered.loc[:, "Gleason_group"] = np.select(
conditions, choices, default=""
)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  phenotype_data_filtered.loc[:, "Gleason_group"] = np.select(


In [8]:
print(phenotype_data_filtered)
sampled_df = phenotype_data_filtered.groupby("Gleason_group", group_keys=False).apply(
    lambda x: x.sample(n=min(len(x), 30), random_state=42)
)

            sampleID  gleason_score    Gleason_group
0    TCGA-2A-A8VL-01              6   low_aggressive
1    TCGA-2A-A8VO-01              6   low_aggressive
2    TCGA-2A-A8VT-01              9  high_aggressive
3    TCGA-2A-A8VV-01              6   low_aggressive
4    TCGA-2A-A8VX-01              8  high_aggressive
..               ...            ...              ...
561  TCGA-ZG-A9M4-01              9  high_aggressive
562  TCGA-ZG-A9MC-01              9  high_aggressive
563  TCGA-ZG-A9N3-01              9  high_aggressive
564  TCGA-ZG-A9ND-01              9  high_aggressive
565  TCGA-ZG-A9NI-01              9  high_aggressive

[566 rows x 3 columns]


  sampled_df = phenotype_data_filtered.groupby("Gleason_group", group_keys=False).apply(


In [9]:
print(sampled_df)
patients_id = list(sampled_df['sampleID'])
print(patients_id)

            sampleID  gleason_score      Gleason_group
551  TCGA-ZG-A9L5-01              9    high_aggressive
563  TCGA-ZG-A9N3-01              9    high_aggressive
464  TCGA-V1-A9ZK-01              8    high_aggressive
525  TCGA-YL-A8SF-01              8    high_aggressive
52   TCGA-CH-5792-01              9    high_aggressive
..               ...            ...                ...
382  TCGA-KK-A6E5-01              7  middle_aggressive
392  TCGA-KK-A7AZ-01              7  middle_aggressive
110  TCGA-EJ-7330-01              7  middle_aggressive
166  TCGA-EJ-A8FN-01              7  middle_aggressive
165  TCGA-EJ-A7NN-01              7  middle_aggressive

[90 rows x 3 columns]
['TCGA-ZG-A9L5-01', 'TCGA-ZG-A9N3-01', 'TCGA-V1-A9ZK-01', 'TCGA-YL-A8SF-01', 'TCGA-CH-5792-01', 'TCGA-KK-A8I9-01', 'TCGA-YL-A8HK-01', 'TCGA-HC-A48F-01', 'TCGA-ZG-A9LM-01', 'TCGA-V1-A9ZR-01', 'TCGA-XJ-A9DX-01', 'TCGA-EJ-8472-01', 'TCGA-YL-A9WI-01', 'TCGA-CH-5761-01', 'TCGA-HC-7821-01', 'TCGA-YL-A8SC-01', 'TCGA-YL-A8S

In [10]:
cnv_data_col = list(cnv_data.columns)
common_col = list(set(cnv_data_col) & set(patients_id))
col_keep = ['Gene Symbol'] + common_col
cnv_data_filtered = cnv_data[col_keep]
print(cnv_data_filtered.head())

  Gene Symbol  TCGA-YL-A9WH-01  TCGA-HC-7748-01  TCGA-V1-A9OT-01  \
0       ACAP3               -1                0                0   
1      ACTRT2               -1                0                0   
2        AGRN               -1                0                0   
3     ANKRD65               -1                0                0   
4      ATAD3A               -1                0                0   

   TCGA-EJ-AB20-01  TCGA-HC-7736-01  TCGA-HC-8260-01  TCGA-HC-8258-01  \
0                0                0                0                0   
1                0                0                0                0   
2                0                0                0                0   
3                0                0                0                0   
4                0                0                0                0   

   TCGA-J4-A83N-01  TCGA-CH-5738-01  TCGA-J4-A67Q-01  TCGA-VP-A87D-01  \
0                0                0                0                0   
1     

In [11]:
df_melted_cnv = cnv_data_filtered.melt(
    id_vars=["Gene Symbol"],       # columns to keep fixed
    var_name="samples_id",         # name for the variable column (sample IDs)
    value_name="expression_value"  # name for the values
)


df_melted_cnv['Gene Symbol'] = df_melted_cnv['Gene Symbol'].str.split('|').str[0] 


df_melted_cnv = df_melted_cnv.rename(
    columns={
        "samples_id": "model_id",
        "Gene Symbol": "gene_symbol",
        "expression_value": "rsem_tpm",
    }
)

print(df_melted_cnv.head())


  gene_symbol         model_id  rsem_tpm
0       ACAP3  TCGA-YL-A9WH-01        -1
1      ACTRT2  TCGA-YL-A9WH-01        -1
2        AGRN  TCGA-YL-A9WH-01        -1
3     ANKRD65  TCGA-YL-A9WH-01        -1
4      ATAD3A  TCGA-YL-A9WH-01        -1


In [12]:
group_loss = [-1, -2]
group_normal = [0]
group_gain = [1, 2]

conditions = [
    df_melted_cnv["rsem_tpm"].isin(group_loss),
    df_melted_cnv["rsem_tpm"].isin(group_normal),
    df_melted_cnv["rsem_tpm"].isin(group_gain),
]
choices = ["Loss", "Normal", "Gain"]
df_melted_cnv.loc[:, "effect"] = np.select(conditions, choices, default="")

df_melted_cnv = df_melted_cnv[df_melted_cnv['gene_symbol'].isin(montagud_nodes)]
df_melted_cnv.to_csv('data/TCGA_data/prostate/filtered_data/cnv_samples_table.csv')

In [13]:
# list(df_melted_cnv['gene_symbol'].unique())
# df_melted_cnv[df_melted_cnv['gene_symbol'] == 'BCL2L1']

In [14]:
proteins_data

Unnamed: 0,sample,TCGA-KK-A59Z-01,TCGA-KC-A4BV-01,TCGA-CH-5772-01,TCGA-G9-6377-01,TCGA-HC-7081-01,TCGA-EJ-7788-01,TCGA-HC-7233-01,TCGA-G9-6378-01,TCGA-CH-5761-01,TCGA-G9-6494-01,TCGA-G9-6366-01,TCGA-HC-7210-01,TCGA-FC-A4JI-01,TCGA-EJ-7315-01,TCGA-EJ-5501-01,TCGA-EJ-7314-01,TCGA-G9-6338-01,TCGA-KK-A5A1-01,TCGA-CH-5767-01,TCGA-G9-7521-01,TCGA-HC-7077-01,TCGA-HC-7817-01,TCGA-G9-6371-01,TCGA-CH-5743-01,TCGA-KK-A59Y-01,TCGA-G9-7525-01,TCGA-KC-A4BN-01,TCGA-CH-5750-01,TCGA-EJ-A46D-01,TCGA-EJ-5522-01,TCGA-J9-A52C-01,TCGA-EJ-7321-01,TCGA-CH-5766-01,TCGA-HC-7079-01,TCGA-EJ-7125-01,TCGA-G9-6370-01,TCGA-HC-7213-01,TCGA-J4-8198-01,TCGA-G9-6356-01,TCGA-FC-7708-01,TCGA-H9-7775-01,TCGA-EJ-5506-01,TCGA-EJ-A46G-01,TCGA-HI-7168-01,TCGA-EJ-5526-01,TCGA-HC-7740-01,TCGA-CH-5794-01,TCGA-CH-5788-01,TCGA-EJ-5516-01,TCGA-EJ-5519-01,TCGA-EJ-5530-01,TCGA-EJ-7115-01,TCGA-CH-5753-01,TCGA-KK-A59X-01,TCGA-G9-6351-01,TCGA-HC-7736-01,TCGA-CH-5763-01,TCGA-EJ-A46B-01,TCGA-EJ-5503-01,TCGA-EJ-5525-01,TCGA-G9-6364-01,TCGA-EJ-5518-01,TCGA-CH-5737-01,TCGA-J4-8200-01,TCGA-EJ-7792-01,TCGA-G9-7510-01,TCGA-EJ-7330-01,TCGA-G9-6362-01,TCGA-G9-6384-01,TCGA-CH-5738-01,TCGA-EJ-7317-01,TCGA-G9-6365-01,TCGA-CH-5789-01,TCGA-EJ-5498-01,TCGA-EJ-8474-01,TCGA-EJ-5515-01,TCGA-FC-7961-01,TCGA-G9-7523-01,TCGA-EJ-7785-01,TCGA-EJ-5499-01,TCGA-HC-7748-01,TCGA-EJ-5512-01,TCGA-CH-5771-01,TCGA-EJ-5517-01,TCGA-EJ-7328-01,TCGA-HC-7744-01,TCGA-EJ-A65M-01,TCGA-EJ-5508-01,TCGA-HI-7170-01,TCGA-CH-5740-01,TCGA-EJ-7793-01,TCGA-KC-A4BO-01,TCGA-CH-5752-01,TCGA-EJ-5504-01,TCGA-CH-5739-01,TCGA-EJ-5511-01,TCGA-EJ-5521-01,TCGA-EJ-7791-01,TCGA-HC-7212-01,TCGA-CH-5769-01,TCGA-EJ-A65E-01,TCGA-G9-6343-01,TCGA-G9-6373-01,TCGA-EJ-7789-01,TCGA-HC-7211-01,TCGA-CH-5751-01,TCGA-CH-5754-01,TCGA-G9-6363-01,TCGA-KK-A59V-01,TCGA-CH-5748-01,TCGA-EJ-5496-01,TCGA-G9-7522-01,TCGA-EJ-5524-01,TCGA-EJ-5509-01,TCGA-EJ-5531-01,TCGA-EJ-5514-01,TCGA-G9-6353-01,TCGA-EJ-7331-01,TCGA-CH-5762-01,TCGA-EJ-7797-01,TCGA-EJ-5542-01,TCGA-EJ-5494-01,TCGA-HC-8264-01,TCGA-EJ-5507-01,TCGA-HC-7232-01,TCGA-CH-5790-01,TCGA-J9-A52E-01,TCGA-EJ-A46E-01,TCGA-HI-7171-01,TCGA-CH-5744-01,TCGA-CH-5741-01,TCGA-EJ-5527-01,TCGA-CH-5768-01,TCGA-CH-5792-01,TCGA-EJ-5497-01,TCGA-EJ-5495-01,TCGA-G9-6342-01,TCGA-EJ-8470-01,TCGA-G9-6361-01,TCGA-CH-5791-01,TCGA-G9-6499-01,TCGA-G9-6496-01,TCGA-CH-5764-01,TCGA-EJ-8469-01,TCGA-G9-6367-01,TCGA-FC-A5OB-01,TCGA-EJ-7784-01,TCGA-KC-A4BL-01,TCGA-EJ-7781-01,TCGA-EJ-A46F-01,TCGA-EJ-7783-01,TCGA-EJ-8468-01,TCGA-EJ-7782-01,TCGA-EJ-7786-01,TCGA-HI-7169-01,TCGA-EJ-5532-01,TCGA-EJ-5502-01,TCGA-G9-6329-01,TCGA-KC-A4BR-01,TCGA-CH-5765-01,TCGA-EJ-7794-01,TCGA-EJ-5510-01,TCGA-EJ-5505-01,TCGA-CH-5745-01,TCGA-2A-A8VL-01,TCGA-V1-A8MU-01,TCGA-KK-A8I6-01,TCGA-KK-A8I8-01,TCGA-YL-A8SL-01,TCGA-YL-A8SF-01,TCGA-EJ-AB20-01,TCGA-EJ-A8FS-01,TCGA-G9-A9S4-01,TCGA-EJ-A7NN-01,TCGA-J9-A8CP-01,TCGA-EJ-A7NK-01,TCGA-J4-A83N-01,TCGA-EJ-A6RA-01,TCGA-EJ-A8FO-01,TCGA-KK-A8IF-01,TCGA-ZG-A9LY-01,TCGA-YL-A8SH-01,TCGA-VP-AA1N-01,TCGA-YL-A8SA-01,TCGA-J4-A67Q-01,TCGA-EJ-7325-01,TCGA-V1-A9OX-01,TCGA-KK-A7AP-01,TCGA-2A-A8W3-01,TCGA-G9-6498-01,TCGA-M7-A71Z-01,TCGA-HC-A76X-01,TCGA-KK-A8IC-01,TCGA-KC-A7F5-01,TCGA-VN-A88K-01,TCGA-V1-A8MF-01,TCGA-YL-A8S8-01,TCGA-FC-A66V-01,TCGA-YL-A8SI-01,TCGA-KK-A7B4-01,TCGA-TP-A8TV-01,TCGA-YL-A9WI-01,TCGA-J4-A6M7-01,TCGA-G9-6339-01,TCGA-2A-A8VT-01,TCGA-EJ-A7NJ-01,TCGA-KK-A7AU-01,TCGA-KK-A8IJ-01,TCGA-ZG-A8QX-01,TCGA-2A-A8VO-01,TCGA-KK-A7B3-01,TCGA-ZG-A8QW-01,TCGA-ZG-A8QY-01,TCGA-TP-A8TT-01,TCGA-KK-A8IA-01,TCGA-YL-A8SQ-01,TCGA-EJ-A65D-01,TCGA-YL-A8HM-01,TCGA-TK-A8OK-01,TCGA-VP-A87E-01,TCGA-YL-A9WY-01,TCGA-YL-A8SC-01,TCGA-VN-A88I-01,TCGA-KC-A7F3-01,TCGA-SU-A7E7-01,TCGA-J4-A67O-01,TCGA-ZG-A9LB-01,TCGA-J4-A67K-01,TCGA-EJ-A8FN-01,TCGA-G9-A9S7-01,TCGA-J4-A67M-01,TCGA-ZG-A9LS-01,TCGA-J4-A67N-01,TCGA-KK-A8IB-01,TCGA-M7-A720-01,TCGA-KK-A6E0-01,TCGA-EJ-A6RC-01,TCGA-ZG-A9MC-01,TCGA-YL-A9WL-01,TCGA-ZG-A9L4-01,TCGA-VP-A878-01,TCGA-YL-A8SR-01,TCGA-V1-A9OL-01,TCGA-KC-A7FD-01,TCGA-YL-A9WX-01,TCGA-2A-AAYO-01,TCGA-ZG-A9L9-01,TCGA-KK-A8ID-01,TCGA-EJ-7312-01,TCGA-ZG-A9N3-01,TCGA-M7-A71Y-01,TCGA-YL-A9WJ-01,TCGA-V1-A9ZG-01,TCGA-V1-A9O5-01,TCGA-KK-A6E3-01,TCGA-X4-A8KQ-01,TCGA-KC-A7F6-01,TCGA-ZG-A9L1-01,TCGA-YL-A9WK-01,TCGA-KK-A8I7-01,TCGA-YL-A8HL-01,TCGA-J4-A67R-01,TCGA-VP-A875-01,TCGA-2A-A8VV-01,TCGA-V1-A9Z8-01,TCGA-V1-A9ZR-01,TCGA-V1-A9OY-01,TCGA-YL-A8HK-01,TCGA-EJ-A65B-01,TCGA-FC-A8O0-01,TCGA-ZG-A9LU-01,TCGA-J4-A67T-01,TCGA-V1-A9ZI-01,TCGA-WW-A8ZI-01,TCGA-YL-A8SK-01,TCGA-ZG-A9KY-01,TCGA-EJ-7318-01,TCGA-ZG-A9M4-01,TCGA-J4-A83J-01,TCGA-ZG-A9L6-01,TCGA-G9-6347-01,TCGA-YJ-A8SW-01,TCGA-ZG-A9L0-01,TCGA-KK-A8I4-01,TCGA-ZG-A9LZ-01,TCGA-VP-A87J-01,TCGA-M7-A725-01,TCGA-ZG-A9L5-01,TCGA-ZG-A9LM-01,TCGA-H9-A6BX-01,TCGA-2A-A8W1-01,TCGA-EJ-A8FU-01,TCGA-KK-A6E4-01,TCGA-2A-AAYU-01,TCGA-YL-A8HJ-01,TCGA-YL-A8SP-01,TCGA-EJ-A65F-01,TCGA-V1-A8WL-01,TCGA-XA-A8JR-01,TCGA-EJ-A65J-01,TCGA-2A-A8VX-01,TCGA-VP-A87K-01,TCGA-KK-A7B1-01,TCGA-KK-A6E8-01,TCGA-KK-A7B2-01,TCGA-KK-A8II-01,TCGA-J9-A8CK-01,TCGA-KC-A7FE-01,TCGA-V1-A9Z7-01,TCGA-M7-A723-01,TCGA-V1-A9OQ-01,TCGA-VN-A88L-01,TCGA-J9-A8CM-01,TCGA-V1-A8MJ-01,TCGA-M7-A724-01,TCGA-KK-A7AZ-01,TCGA-VN-A88P-01,TCGA-KK-A8IH-01,TCGA-YL-A8HO-01,TCGA-V1-A9OH-01,TCGA-J4-A83I-01,TCGA-KK-A6E2-01,TCGA-KC-A7FA-01,TCGA-KK-A8I5-01,TCGA-EJ-A7NH-01,TCGA-EJ-AB27-01,TCGA-2A-AAYF-01,TCGA-G9-A9S0-01,TCGA-ZG-A9NI-01,TCGA-EJ-7218-01,TCGA-4L-AA1F-01,TCGA-EJ-A7NM-01,TCGA-YL-A8S9-01,TCGA-KK-A7B0-01,TCGA-V1-A8X3-01,TCGA-V1-A9Z9-01,TCGA-V1-A9OT-01,TCGA-QU-A6IN-01,TCGA-ZG-A8QZ-01,TCGA-M7-A722-01,TCGA-YL-A8SJ-01,TCGA-EJ-A7NF-01,TCGA-QU-A6IP-01,TCGA-EJ-A7NG-01,TCGA-Y6-A9XI-01,TCGA-J9-A8CN-01,TCGA-Y6-A8TL-01,TCGA-FC-A6HD-01,TCGA-H9-A6BY-01,TCGA-G9-6379-01,TCGA-KK-A6E1-01,TCGA-M7-A721-01
8,A-RAF_PS299,0.121288,-0.167044,-0.110590,0.066131,-0.223209,0.309472,0.177667,0.085773,-0.008180,-0.048681,0.434498,-0.161963,0.133044,-0.076316,-0.085805,-0.220813,0.139059,0.146341,0.091404,0.040931,-0.165522,-0.141093,-0.029213,0.007295,-0.161085,-0.071485,0.090077,0.041420,-0.203648,0.198901,0.633044,0.117477,0.448590,0.195897,-0.131286,-0.275811,-0.156456,0.065520,0.329121,0.051788,-0.127191,0.088563,-0.010250,0.064262,-0.074025,-0.051245,-0.011165,0.089192,-0.088314,0.559467,-0.070944,-0.138554,0.366871,0.140011,0.130969,-0.028880,-0.016541,-0.194515,-0.331856,-0.023664,-0.123025,0.263298,0.325669,0.102922,0.124023,0.221755,0.077072,0.201581,-0.267812,-0.217668,0.136260,0.208104,-0.212348,-0.159178,0.147759,0.006210,0.158111,-0.077909,-0.106716,0.035780,-0.021714,-0.055443,-0.074648,0.045575,0.012373,-0.047630,-0.180097,-0.210270,0.053535,-0.044091,-0.212054,0.082904,-0.135914,-0.175372,0.157504,0.061171,0.151824,0.084102,-0.075487,-0.029791,-0.022474,0.149194,0.064988,0.372001,-0.017743,-0.088101,-0.281524,-0.074470,-0.182041,0.084495,-0.100549,0.157955,-0.103972,0.371203,0.430539,0.434059,0.065865,0.224414,0.049231,-0.230385,-0.085361,-0.069315,-0.136402,0.167170,0.108931,-0.145795,0.031645,-0.362369,0.068533,-0.216933,-0.123341,0.150922,-0.025967,-0.186936,-0.007065,-0.092692,-0.013482,-0.141271,0.095438,-0.138662,0.000097,0.208561,-0.190623,0.495171,-0.052817,0.287433,0.084238,0.163665,-0.022284,-0.171228,-0.195069,0.751493,0.403444,0.013302,-0.117150,0.133324,0.053689,-0.247409,0.045810,0.028991,-0.157558,-0.033214,0.317695,0.002061,-0.069991,0.065336,0.090692,-0.078092,-0.037827,-0.006259,-0.095551,0.055520,-0.108718,0.137443,0.228790,0.104042,0.060785,-0.011086,0.092070,0.061492,0.089917,0.040887,-0.031318,0.035489,-0.008728,-0.081835,0.073187,0.210015,0.182412,-0.027251,-0.074557,0.044001,-0.086944,0.081792,-0.114753,0.026357,-0.089078,0.019721,0.029695,0.001750,0.173953,0.000101,-0.033547,-0.001602,0.036252,0.085560,-0.040837,0.063078,0.184532,0.218604,0.154928,-0.148910,0.166994,-0.078517,0.422512,0.082814,0.076408,0.018833,0.022128,0.026795,-0.063584,-0.249066,0.093184,-0.140411,-0.077592,-0.086605,-0.022191,-0.135468,-0.001630,-0.106388,-0.028949,0.312887,0.057758,-0.048519,-0.039697,0.134355,0.104724,-0.014581,0.070727,-0.043465,0.020805,0.198423,0.054739,0.118724,-0.003847,0.061882,-0.191256,-0.083541,-0.085237,0.069528,-0.027797,0.045723,0.037444,-0.028468,-0.084582,0.041762,-0.080598,0.018178,0.114212,0.039938,-0.040325,0.001306,0.021827,0.102825,-0.082821,-0.164476,0.073656,0.021623,-0.000686,0.064693,-0.014902,0.024319,-0.084121,-0.045556,0.005950,-0.139348,-0.063673,-0.073629,0.092045,0.071219,0.059021,-0.063194,-0.102624,0.043862,-0.126533,0.126452,0.179144,-0.084355,-0.096657,0.118067,0.113923,0.107471,-0.014145,0.020810,-0.033623,-0.041168,-0.103391,0.000000,0.070123,-0.032935,0.080405,-0.092856,-0.022384,-0.073278,-0.059351,-0.145726,-0.144302,0.076505,-0.021250,-0.139122,0.001516,-0.015623,-0.170733,-0.045044,0.138687,0.103180,0.010272,0.146373,0.047519,0.083054,0.009612,0.133135,0.026034,0.006097,-0.042955,0.099690,0.102514,-0.009119,0.033520,0.135079,0.188395,-0.010564,0.060979,-0.019698,-0.067963,-0.004027,-0.060676,-0.146258,0.186936,-0.033142,-0.136302,-0.101251,-0.153887,0.117387,0.214882,-0.079233,-0.050524,-0.013875,-0.014515,0.131647,0.050596,0.018850
13,ADAR1,0.371003,-0.209629,-0.086770,0.149373,-0.369561,0.145552,0.114285,0.333469,-0.068571,0.039925,0.477013,-0.061690,0.328808,-0.163169,-0.145333,-0.286966,0.036230,-0.613914,0.074657,-0.053140,-0.041693,-0.161014,0.156815,-0.087798,-0.258606,-0.201415,-0.050248,0.029725,-0.252144,0.175151,-0.042992,0.297558,-0.072065,-0.238472,0.003858,-0.255655,-0.309116,0.017480,-0.050119,0.012003,-0.153893,0.231983,0.068213,-0.016881,-0.084411,0.022739,-0.180454,0.173021,-0.192860,-0.005601,-0.174439,-0.031879,0.032004,-0.015305,0.198236,-0.120339,-0.085081,-0.117806,-0.309219,-0.008178,-0.065075,0.487471,0.393412,0.129616,0.001722,0.311986,0.139725,0.001756,-0.226026,-0.202258,0.197478,0.414088,-0.191246,-0.116514,0.208130,-0.026112,-0.275749,0.104244,-0.044436,0.050002,-0.165655,0.012532,-0.026876,0.110596,-0.004549,0.108302,0.001479,0.001837,-0.010755,0.107616,-0.302473,-0.043073,0.099146,-0.391586,-0.191223,-0.237693,0.073958,-0.045968,-0.187131,-0.188223,0.125606,0.186292,0.031551,0.513219,0.169947,-0.067797,-0.226951,0.045477,-0.125410,0.292922,0.022987,0.118722,-0.118026,0.221642,0.008445,-0.061202,0.200482,0.200146,0.169686,-0.004013,-0.074624,0.217359,0.105178,0.067155,0.042465,0.026282,0.047795,-0.313747,0.042098,-0.113983,-0.032533,0.045605,0.023996,-0.063134,0.182944,-0.074359,0.025758,-0.139777,0.202526,-0.103294,-0.054046,0.219037,-0.205541,-0.415212,0.093080,0.109828,0.246251,-0.000579,0.114723,-0.130206,-0.142362,0.054321,0.048570,0.143540,-0.043286,0.291465,-0.091289,-0.299239,-0.062706,0.158567,-0.199305,-0.234857,0.416321,0.195373,-0.072120,0.010734,0.073220,0.102138,0.149693,-0.023217,-0.034602,-0.093503,0.081995,-0.161130,-0.289360,-0.170842,-0.133075,0.049647,-0.152239,0.050801,0.146233,0.031760,0.047308,-0.029148,0.021187,-0.014535,-0.014220,0.237620,0.194685,0.018694,0.199970,0.004283,0.113877,0.035451,0.147562,0.043889,0.130138,0.059621,-0.048728,0.282641,0.184432,0.227660,-0.040051,-0.028681,0.021538,-0.069119,0.217947,-0.095066,-0.101315,-0.051078,-0.033414,0.005284,0.266827,0.023467,-0.032529,0.096504,-0.087989,0.092035,-0.054510,-0.005206,0.145604,-0.191263,0.092830,0.033146,-0.134817,0.044609,0.163464,0.027792,0.011750,0.046718,-0.003909,0.154104,0.093151,0.273695,-0.005462,0.158157,0.018578,0.050874,0.184201,0.048277,0.003414,0.443912,0.284238,0.067155,0.109429,-0.026616,0.016360,0.253292,0.068586,0.108166,0.143744,0.079211,0.217847,0.262096,-0.031836,0.291553,-0.081010,-0.105824,0.137295,-0.057774,-0.036577,-0.021372,-0.063134,-0.060426,0.012129,-0.217700,0.030960,0.077204,0.017103,0.056170,-0.003646,-0.123234,0.172432,0.073361,0.102825,0.353668,0.000000,0.042740,-0.165087,-0.146500,-0.051942,-0.045160,-0.002184,0.175250,0.134280,-0.127996,0.180620,0.181082,0.163507,0.005345,0.137096,0.018267,0.000477,-0.020033,0.058735,0.134954,0.029666,-0.011454,-0.278370,0.021505,0.012648,0.150028,0.100806,0.096899,0.077794,0.078020,0.064098,0.112326,0.200965,0.055780,0.006082,0.124155,0.026617,0.043963,-0.023700,-0.007574,0.055125,-0.047622,-0.107605,-0.036805,-0.025490,0.064410,0.061764,0.631423,0.095664,-0.030264,-0.053370,0.026882,-0.008032,-0.008096,0.107089,0.145728,0.246357,0.332825,0.008395,0.043554,0.066322,-0.010576,-0.117121,0.087224,-0.096522,0.068405,-0.019076,0.012466,0.132586,0.225078,0.076272,-0.015889,0.069500,-0.045849,0.168301,0.014264
14,AKT,0.237545,-0.434174,-0.174623,-0.274793,0.301125,0.199348,-0.501879,0.203827,-0.127046,0.600264,-0.618887,0.324708,-1.136282,0.172670,-0.230349,0.114698,0.167813,0.755466,0.168472,0.177698,-0.031588,0.238349,0.029446,-0.018654,0.269940,0.618135,-0.040920,0.071063,-0.379939,-0.070512,0.265171,-0.214282,-0.029022,-1.503084,0.600884,0.125363,0.078857,-0.049284,0.032869,-0.271889,0.146029,-0.082547,-0.141212,-0.151174,0.224805,-0.236703,-0.128983,-0.551395,0.045992,0.268168,0.129303,-0.636202,-0.091521,0.411084,-0.074785,-0.228255,0.201022,0.055292,0.073512,-0.050141,-0.115173,-0.349146,0.730579,0.027362,0.221285,0.304165,-0.347015,0.271616,-0.070592,-0.300371,0.335635,0.471970,0.276050,-0.217303,-0.075497,-0.011110,0.608842,0.297678,0.018824,0.074083,0.274007,0.138513,-0.514487,0.438400,-0.199239,-0.173406,0.052616,0.096481,0.135133,-0.187565,0.125929,-0.115705,-0.279115,-0.002506,-0.114208,0.009835,-0.060253,0.361710,0.461304,-0.231464,0.117978,-0.307680,-0.000046,0.238782,0.298634,0.277740,0.009541,0.214821,0.029700,-0.063590,-0.040661,0.118969,0.038927,-0.100015,-0.151736,-0.269498,-0.165541,0.167462,-0.020455,0.248565,0.100245,0.114328,0.187691,-0.270405,-0.471769,-0.170743,-0.415329,-0.220489,-0.117384,0.021296,-0.240952,-0.058045,-0.201962,0.208998,-0.131130,0.389036,-0.074287,-0.131653,0.152813,-0.142710,-0.173335,0.347134,-0.633881,0.083537,-0.070328,-0.329075,0.264613,0.030168,-0.032319,-0.262184,-0.057253,0.088294,0.348629,-0.092413,-0.018654,-0.000269,-0.271634,0.148071,-0.004402,-0.204807,-0.534361,0.488209,-0.062994,0.081706,0.296658,0.202851,-0.022621,0.206925,-0.776567,0.162710,0.199193,-0.072818,0.246870,0.134982,-0.206949,0.330758,-0.878953,-0.302182,0.197228,-0.194511,-0.315337,-0.561804,-0.093046,0.158650,-0.605724,0.288121,0.022504,0.275928,-0.215930,-0.106436,-0.147276,0.206324,-0.454552,0.208852,0.089376,-0.195836,-0.000363,-0.023733,-0.113111,-0.009701,0.004835,0.079866,0.670463,0.197332,0.027762,0.436266,-0.550698,0.193172,0.054064,-0.099523,-0.391218,-0.187237,-0.153954,-0.139458,-0.428058,-0.112631,0.582376,-0.286634,-0.011432,0.001749,0.250827,-1.451748,0.203658,-0.198971,-0.017627,0.234840,-0.348720,-0.177770,0.124541,-0.198772,0.064627,-0.634258,-0.259291,-0.455347,0.073103,-0.147877,-0.228421,-0.153723,-0.352143,0.246418,-0.053179,-0.839109,0.295495,0.431478,-0.486671,0.147018,0.096975,0.327566,-0.123287,-0.414746,0.261843,-0.291446,0.054443,-0.293182,0.366864,0.132500,-0.256085,0.302791,-0.388845,-0.116097,0.139626,-0.017820,0.018482,-0.006163,0.182627,-0.114891,-0.078395,0.099605,-0.073100,-0.258514,0.140386,0.128810,-0.194936,0.372828,-0.669611,0.225510,0.156544,-0.367044,-0.308734,0.098777,0.432211,-0.342743,-0.039232,-0.216146,0.103216,0.602557,0.024544,0.571940,-0.567530,0.076508,-0.236959,-0.073567,0.258856,0.368900,0.185718,-0.079656,0.119250,0.254134,-0.321161,0.239181,0.022931,0.294774,-0.129928,-0.189518,0.101831,0.088656,0.378277,-0.046844,-0.039440,0.196618,-0.253995,-0.578076,-0.072639,0.120612,0.096357,0.674013,0.076093,-0.030548,-0.063933,0.337751,-0.398741,0.180178,-0.302348,0.063473,0.592457,-0.054518,0.401891,0.329106,0.113358,-0.128362,0.013405,-0.033559,-0.347122,-0.259030,0.294320,0.010669,-0.187588,-0.510763,0.301144,0.583645,0.236807,-0.123277,0.522754,0.108368,-0.264820,-0.300510,-0.319330,-0.077099,-0.088846,0.332635,-0.248299,-0.093616
15,AKT_PS473,-1.609986,-0.246830,-0.105235,-0.330287,0.254797,-1.219457,-0.418918,-0.428828,-0.590345,-0.354502,-0.501844,2.316092,-0.511541,-0.079241,1.627635,1.201774,-0.103681,-1.292781,1.574267,-0.955288,0.341239,-1.387371,-0.068245,1.990607,0.829387,-0.621681,-0.015170,1.200509,0.244659,-0.353116,-0.793185,0.096355,0.886235,0.293176,-0.553120,-0.037613,0.696989,-1.549081,-0.627868,-0.941328,-0.300979,0.011330,-0.239743,-0.140093,-0.357788,1.208750,1.278409,-0.277954,1.045220,-0.746782,-0.401546,-0.595099,1.790018,-0.283025,-0.345080,0.044159,0.637508,0.689049,0.120247,-0.678112,-1.365996,-0.527934,1.868640,0.023156,-0.068368,-0.476538,0.338110,-0.969765,-0.397024,0.897432,0.981299,-0.588369,2.140903,0.294251,-0.551132,0.086150,-0.174296,-0.892965,1.000142,-0.025432,1.492394,0.522673,0.270407,-0.075014,-0.470265,-1.428738,-0.047614,-0.248169,-0.456226,0.988294,-0.006980,-0.004547,-0.314030,1.012922,1.451759,0.190127,0.086527,1.443465,-0.076148,0.949840,-0.316602,-0.261724,-0.845860,-0.194359,0.158179,-0.573667,0.528928,-1.557354,-0.472114,0.774464,0.491116,-1.042854,1.444853,0.576084,0.948044,0.153441,-0.191433,-0.176722,0.719086,0.408494,0.382342,0.482750,-1.170889,0.276841,0.087482,0.496556,-1.559365,1.533938,-1.839684,1.527026,0.650188,0.272012,0.787134,0.017894,0.172510,0.979100,0.080624,-0.627868,-0.410091,1.031268,-0.366270,-0.753915,1.262318,-0.808518,0.316129,-1.382213,-0.123001,-0.635052,0.453493,0.026048,0.964966,0.021936,-0.618773,0.251088,-0.047643,0.087951,0.887911,0.245125,-0.223344,-0.136855,0.081244,0.737153,0.148921,1.387866,0.334408,-0.631394,-0.044139,-0.613470,0.583110,-0.393023,0.163047,1.021937,-0.027708,0.497272,-1.181194,0.178288,1.458005,0.376744,-0.019300,-0.680766,1.028616,0.487301,0.745604,1.777984,0.305356,-0.659030,-0.723982,-1.031980,-0.165806,0.005999,-0.703265,0.011906,0.035172,0.106004,-1.305156,-0.535745,1.399304,-0.282043,-0.227825,0.447614,-0.161865,-0.359581,-0.669446,-0.394893,0.832828,0.139860,-0.569252,0.804504,0.326593,0.297541,-0.142578,0.963295,0.568244,-1.085269,0.626205,0.169905,-0.756647,-0.492954,0.146494,0.924873,-0.689276,0.235118,-1.559842,0.542583,1.564662,0.229280,-0.171426,1.041677,0.083243,-1.109081,-0.021914,0.512695,1.724799,-0.329440,0.063223,-0.504252,-0.049346,1.466357,-1.133788,0.458344,0.507164,-0.629664,-0.522502,-0.968767,1.092304,-0.700248,0.539693,-0.271066,-0.306725,0.534626,-0.949717,0.225270,-0.594144,1.219433,-0.426816,-0.799470,-0.905204,-0.080431,-0.747431,0.255161,1.464690,0.915193,0.043378,0.247125,0.063030,0.372687,-1.432257,0.632731,-0.137739,-1.292988,0.081249,0.568900,-0.518421,0.235313,0.428495,-0.001104,0.016509,0.798285,1.163638,-0.537766,-0.134855,0.510633,0.386129,-1.268985,0.464071,1.325377,-0.210693,-0.343256,0.356274,-0.032741,0.044579,0.628768,-0.136919,0.368118,0.841748,-1.586741,0.554887,0.280209,-1.668544,0.713138,-1.090080,-0.407321,-0.858850,-0.758862,-1.137388,1.258216,-1.340786,0.516041,-0.854652,-0.375823,0.342157,-1.268869,-0.990783,-1.239505,-0.457747,-1.278157,-1.017528,1.399892,-0.744124,-0.234984,0.796639,-1.394211,0.903560,-0.497040,-0.180996,-0.468923,-0.431213,0.289286,0.516202,0.411550,-0.138257,0.434016,0.244820,-0.946489,-0.139974,0.896752,-0.073164,1.187201,0.790627,-0.170779,0.573219,0.077475,1.406719,0.619668,-0.376599,-0.036056,0.378726,0.094392,-0.111313,-0.074924,0.257562,0.586213
16,AKT_PT308,-1.462328,-0.261684,-0.325260,-0.244756,0.085217,-0.927288,-0.346230,0.041735,-0.844342,-0.163616,-0.461446,1.922506,-0.546770,-0.083109,1.170650,1.067599,0.732298,-1.044757,1.294215,-1.012096,0.266730,-1.370782,-0.226832,1.873595,0.850080,-0.280980,-0.157378,0.965643,0.155494,-0.539967,-0.904724,-0.082069,0.601576,0.017738,-0.724777,0.018729,0.402501,-0.957971,-0.344929,-0.774497,-0.414125,-0.108190,-0.417768,-0.239357,-0.498657,0.986850,0.905285,-0.473010,0.946192,-1.009509,-0.490686,-0.137775,1.405984,-0.184023,-0.480188,-0.155151,0.246142,0.620609,-0.118320,-0.677859,-0.616106,-0.638482,1.751779,0.113893,-0.059535,-0.687334,0.177870,-0.937255,-0.369957,0.581438,0.706490,-0.723983,1.902730,0.145848,-0.454089,-0.152281,-0.212931,-0.559190,0.657208,-0.218203,1.119872,0.266799,-0.046629,0.160059,0.223830,-1.407049,0.165513,-0.313547,-0.291420,0.689680,0.137947,0.185867,-0.527763,0.932309,1.106396,-0.050002,0.060315,1.302143,-0.033747,0.593321,-0.282310,-0.246032,-0.829498,0.053054,-0.392950,-0.626445,0.246609,-1.276557,-0.340064,0.434795,0.375450,-0.691895,1.183081,0.392127,0.980414,-0.010264,-0.279585,-0.144694,0.282283,0.520163,0.524879,0.170488,-0.975130,-0.122619,0.077743,0.130978,-1.470631,1.043329,-1.297131,1.236702,0.439223,-0.015451,0.457657,-0.242565,-0.213491,0.664549,0.020110,-0.371052,-0.717594,0.594915,-0.399706,-0.676910,0.988552,-0.691973,0.883748,-1.177597,-0.256714,-0.752864,0.201000,0.230863,0.546369,-0.121595,-0.775368,0.153348,0.481398,-0.144677,0.670086,0.525232,-0.222689,-0.470739,0.039574,0.621361,0.114419,0.935784,0.071703,-0.309646,0.141271,-0.518246,0.309305,-0.577014,0.351950,0.475086,-0.260803,0.426744,-0.369089,0.085108,1.264232,0.287386,0.000000,-0.438394,0.460141,0.258147,0.566501,1.694530,0.133957,-0.451381,-0.452907,-0.671863,-0.337861,0.247832,-0.601282,-0.077408,-0.354671,0.488660,-0.730383,-0.512940,0.815153,-0.352606,-0.165538,0.209545,-0.419900,-0.456916,-0.139463,0.079051,0.500859,0.150367,-0.227399,0.341451,-0.104272,0.272195,0.204451,0.482488,-0.038934,-0.486977,0.651219,-0.053278,-0.448797,-0.555741,-0.169613,0.540353,-0.438864,-0.110227,-0.628391,0.212696,1.067688,0.137703,-0.306751,0.614516,-0.056726,-0.333308,0.011267,0.077909,1.459699,-0.553268,-0.024031,-0.353246,0.453021,1.028179,-0.856026,0.257690,0.298317,-0.467560,-0.469537,-0.628836,0.716089,0.075806,0.302634,-0.129945,0.266254,0.124675,-0.441567,-0.042037,-0.521306,0.937298,0.257168,-0.623781,-0.349868,-0.216165,-0.544392,-0.012970,1.042979,0.565673,-0.076118,0.453120,0.000129,0.444518,-0.551418,0.247013,-0.228111,-0.570333,-0.070665,0.300186,-0.333034,1.025860,0.317561,-0.436115,0.016214,0.474113,0.587390,-0.294172,-0.065068,0.325757,0.323340,-0.686999,0.116655,0.694559,-0.065477,-0.428562,0.138325,-0.045700,-0.014176,0.258069,-0.213333,0.125457,0.539494,-1.050179,0.372285,0.336802,-0.323298,0.206579,-0.696805,-0.447341,-0.557916,-0.534383,-0.616227,0.985275,-0.540431,0.531843,-0.714756,-0.255241,0.091347,-0.506175,-0.734477,-0.594810,-0.098939,-0.823287,-0.476968,1.322682,-0.209052,0.145762,0.572526,-0.827625,0.439315,-0.407583,0.319896,-0.390080,-0.227040,0.292416,0.144993,0.294284,0.059195,0.147648,-0.096679,-0.554283,-0.248214,0.664929,0.784755,0.782223,0.431364,-0.147002,0.252503,0.019181,0.931931,0.370148,-0.340168,-0.105309,0.215821,0.769070,-0.114127,0.308305,0.043654,0.391923
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,BAK,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-0.081722,-0.077352,0.000000,0.055090,-0.010766,0.019179,0.004395,0.026112,0.066636,-0.123457,-0.023125,-0.033577,0.137648,0.047508,-0.190068,0.215190,0.004371,-0.123663,-0.121540,-0.105493,-0.033741,0.017770,-0.048102,0.253828,0.256535,0.020276,0.223171,-0.057019,0.101944,0.107892,0.101729,0.066033,-0.170246,-0.037042,-0.005133,0.258077,0.177897,0.259538,0.023681,-0.008471,-0.134923,0.036120,0.063201,-0.129120,-0.196800,-0.078058,0.184800,-0.092274,0.311146,0.064989,-0.147888,0.048942,0.287768,0.093082,-0.067568,-0.103465,0.050399,0.052017,0.192869,0.074841,-0.111633,-0.270190,-0.030828,-0.081006,-0.021252,-0.006999,-0.060884,-0.024660,-0.154427,0.244753,-0.005622,-0.067390,0.043353,0.028870,0.026484,-0.064343,-0.092473,0.097203,-0.055184,0.213817,-0.020648,0.049138,0.020497,0.068655,0.083771,0.039848,-0.015038,-0.085433,-0.009469,0.021309,0.022034,0.428765,-0.016662,0.006719,0.041473,-0.037085,-0.038393,-0.000993,0.059511,0.014887,-0.043940,-0.018737,0.151581,0.057250,-0.065368,0.310198,0.056759,-0.143337,0.233255,-0.034116,-0.070528,0.019854,-0.058454,-0.022739,-0.073957,-0.247255,0.027720,-0.230748,-0.195581,0.037813,0.079616,-0.011323,0.021105,0.169199,0.056320,0.047396,0.088230,0.013760,-0.044278,0.021490,-0.057873,0.190995,-0.033799,-0.051417,-0.232710,-0.033992,0.480057,-0.055663,0.054884,-0.042890,0.084757,0.011728,-0.143030,-0.031301,0.207899,-0.045597,0.005206,0.081766,-0.007413,-0.171316,0.225555,0.033824,-0.085022,-0.131063,-0.005385,0.098162,-0.270407,0.098961,0.000956,-0.055086,0.206817,-0.071599,0.075997,0.052962,0.032879,-0.090841,0.036580,0.103229,0.298127,0.070114,-0.029625,-0.082432,0.016257,-0.271403,0.070132,0.017650,0.065872,0.058554,-0.169148,-0.077614,0.014873,0.163955,-0.099801,-0.110576,0.052640,0.155916,-0.067690,-0.060556
220,C-RAF_PS338,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.031208,0.115645,0.123445,-0.098660,0.309778,-0.070352,0.000000,0.174401,0.006371,0.122859,0.132328,0.017969,0.455353,-0.106236,0.026980,-0.061054,-0.122108,0.384261,0.171555,-0.118706,0.249423,0.043825,-0.033938,-0.218546,-0.407627,-0.004696,-0.060293,-0.117432,-0.238604,-0.238314,-0.072073,-0.157545,-0.007769,-0.072374,0.088786,-0.164324,-0.249457,-0.234484,0.039278,0.067810,0.114654,-0.037351,-0.022213,-0.028654,0.064556,0.072084,0.053109,-0.088956,-0.349929,0.040442,-0.186379,-0.075205,-0.104113,-0.284490,-0.022139,0.044981,0.012520,0.450592,0.179459,0.160088,-0.072698,0.201520,-0.209532,0.181257,-0.163239,0.210876,-0.032640,-0.133135,0.220496,-0.427521,0.002043,-0.014762,0.148798,0.036330,-0.015670,-0.060575,0.090787,0.052384,-0.231797,-0.018134,0.095381,-0.007780,-0.008703,-0.292610,-0.016315,0.042866,-0.063077,0.047815,-0.099904,-0.300203,0.110302,-0.090486,0.047564,0.229393,-0.131382,0.006456,-0.093666,0.017807,-0.084770,-0.011292,-0.030055,0.086676,-0.049912,-0.089106,0.013430,0.207084,-0.221742,0.020492,-0.070489,-0.161706,0.264859,-0.198945,-0.068926,-0.058045,0.176031,0.004782,-0.212924,0.120757,-0.015721,-0.034875,-0.268249,-0.076049,-0.059587,-0.173601,-0.063102,0.015623,-0.085264,-0.026645,0.000000,-0.128493,-0.060902,-0.007579,-0.139596,-0.008812,0.174546,-0.145620,-0.133819,-0.097608,-0.041888,-0.066846,-0.064419,-0.094385,0.067222,0.200445,-0.042290,0.040462,-0.143494,0.191471,-0.036512,0.032833,-0.211756,0.025741,-0.025847,-0.071708,0.072497,-0.089635,0.156708,-0.144711,0.338927,0.008051,-0.006225,-0.138803,-0.150932,-0.148640,0.077003,-0.081629,-0.042175,-0.257827,-0.296013,-0.025005,0.081614,-0.103096,-0.009781,0.124050,-0.063085,-0.022902,-0.007002,-0.035770,-0.105735,0.000000,-0.201739,-0.180047,0.391088,0.109516,-0.193735,-0.090231,-0.088868,0.097365
221,CHK1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-0.090725,-0.022125,-0.003510,0.064551,-0.054570,0.325076,-0.146919,-0.032172,0.108011,-0.194214,0.042486,-0.292824,-0.289149,0.155234,-0.119939,0.053890,0.229610,-0.064411,0.033267,-0.204309,-0.305215,-0.213714,0.120297,0.135913,0.500927,0.044660,0.082500,0.100425,0.440922,0.068849,0.174104,0.467758,0.286278,-0.113644,0.060332,0.120634,0.835386,0.526805,-0.215330,-0.191681,-0.014109,-0.129220,0.016534,0.032668,0.089907,-0.214668,-0.123423,0.133159,1.025126,0.111622,0.033806,0.283945,0.053360,0.461339,0.163792,-0.099677,0.192512,0.437992,-0.178833,-0.166447,-0.100329,-0.368070,0.384826,-0.190951,0.205293,-0.340498,-0.027419,0.006918,-0.200974,0.695996,0.045343,0.013276,-0.022980,-0.232431,-0.186435,0.085540,0.031286,0.075056,0.192386,0.212686,0.109284,-0.059612,-0.098385,0.152841,-0.007349,0.764753,-0.073622,0.177642,0.277523,0.042399,0.022179,0.062553,-0.138278,-0.129336,0.274509,-0.102576,-0.192015,-0.176280,0.022873,-0.088156,0.224453,-0.333979,-0.032320,0.136265,-0.078249,0.027775,0.147729,-0.083859,0.280451,0.081627,-0.181634,0.386524,-0.038647,-0.163996,-0.031362,0.109219,0.089960,-0.147345,-0.381759,-0.038197,0.109798,0.082756,-0.078464,0.365202,0.062647,0.004309,0.261289,0.230774,-0.215220,0.126257,0.073713,-0.130381,0.101279,-0.234893,-0.446022,-0.052761,0.134964,-0.091168,0.068121,0.330711,-0.047897,-0.146869,-0.748745,0.021248,0.170360,-0.117933,0.214997,0.158877,0.228898,-0.142935,0.418023,-0.194060,0.127430,-0.130983,0.065674,-0.041376,-1.125064,-0.036959,0.027170,-0.109872,0.137131,0.125579,0.187701,0.145483,0.366079,0.093556,-0.006350,0.075382,0.932068,0.193293,-0.014216,0.110020,-0.026476,-0.214367,-0.208713,0.189693,-0.056946,0.053856,-0.131582,-0.091585,0.479436,0.691712,-0.106173,-0.053550,0.163401,0.004941,0.043236,-0.027021
222,CHK2_PT68,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.040073,0.122422,0.081246,-0.017479,0.292617,0.072505,-0.013814,0.099254,0.093244,-0.027627,-0.165950,0.104232,0.419606,-0.080929,-0.320509,-0.020712,0.032760,0.390622,0.199283,-0.174924,-0.077836,-0.053916,-0.001307,-0.013766,-0.232135,0.038875,0.003609,-0.069112,-0.010198,-0.155856,-0.040862,0.050678,0.065864,-0.076941,0.081056,-0.101000,0.000000,-0.090662,-0.126695,-0.106268,0.091798,-0.029511,0.037767,-0.086106,0.109841,0.142143,0.089959,-0.053947,-0.089158,0.018872,-0.072774,-0.080446,-0.108970,-0.196633,-0.024903,0.039738,-0.006330,-1.255258,0.120111,0.172517,-0.051324,0.116006,-0.086489,0.030417,-0.084937,0.058579,-0.013934,0.009079,-0.021233,-0.168578,-0.023428,-0.022448,0.134720,-0.047766,-0.313377,0.135986,0.012952,0.207420,0.005816,-0.011670,0.083459,-0.087005,0.183893,-0.153801,-0.072054,0.015905,-0.131745,-0.041993,-0.011503,-0.199649,0.019040,0.103998,0.066367,0.140620,0.020572,0.093096,-0.142890,0.013730,-0.010086,0.068238,0.071601,0.180544,-0.042878,-0.082379,0.099544,0.067620,-0.062248,-0.032054,-0.050079,-0.183285,0.210923,-0.337554,0.032317,-0.198092,0.139353,-0.019523,-0.066235,0.051598,-0.297804,-0.013954,-0.024816,-0.120846,-0.205739,-0.047132,0.067736,0.001057,-0.091340,0.131094,-0.052581,-0.020645,-0.073642,0.013602,-0.113350,0.109760,-0.190535,-0.140005,0.006670,-0.130136,0.030942,0.075767,0.019285,-0.052635,0.056317,0.203421,0.080013,0.045848,-0.081015,0.143682,0.055140,0.010190,-0.089142,0.063690,0.046577,-0.073441,-0.184329,-0.035609,0.221594,-0.113297,0.313564,0.055513,-0.007658,-0.001628,-0.147852,-0.070378,0.056059,-0.083916,-0.070186,-0.157726,-0.157030,0.039413,0.006256,-0.067371,0.099751,-0.382980,-0.072130,-0.060010,0.072302,0.065240,-0.141594,0.000622,-0.064188,-0.016613,0.325302,-0.017086,-0.051162,-0.174010,0.002266,0.093679


In [15]:
# do with proteins instead of genes

proteins_data_col = list(proteins_data.columns)
common_col = list(set(proteins_data_col) & set(patients_id))
col_keep = ['sample'] + common_col
proteins_data_filtered = proteins_data[col_keep]


df_melted_protein = proteins_data_filtered.melt(
    id_vars=["sample"],       # columns to keep fixed
    var_name="samples_id",         # name for the variable column (sample IDs)
    value_name="expression_value"  # name for the values
)

df_melted_protein['sample'] = df_melted_protein['sample'].str.split('|').str[0] 

df_melted_protein = df_melted_protein.rename(
    columns={
        "samples_id": "model_id",
        "sample": "protein_symbol",
        "expression_value": "rsem_tpm",
    }
)
df_melted_protein['protein_symbol'] = df_melted_protein['protein_symbol'].str.upper()
# df_melted_protein['protein_symbol'] = df_melted_protein['protein_symbol'].str.replace('_', '', regex=False)
print(df_melted_protein)

# df_melted_protein.to_csv('data/TCGA_data/prostate/filtered_data/proteins_samples_table.csv')

     protein_symbol         model_id  rsem_tpm
0       A-RAF_PS299  TCGA-CH-5743-01  0.007295
1             ADAR1  TCGA-CH-5743-01 -0.087798
2               AKT  TCGA-CH-5743-01 -0.018654
3         AKT_PS473  TCGA-CH-5743-01  1.990607
4         AKT_PT308  TCGA-CH-5743-01  1.873595
...             ...              ...       ...
4083            BAK  TCGA-EJ-A7NN-01 -0.123457
4084    C-RAF_PS338  TCGA-EJ-A7NN-01  0.122859
4085           CHK1  TCGA-EJ-A7NN-01 -0.194214
4086      CHK2_PT68  TCGA-EJ-A7NN-01 -0.027627
4087          PARP1  TCGA-EJ-A7NN-01 -0.207367

[4088 rows x 3 columns]


In [16]:
print(df_melted_protein['protein_symbol'].unique())

['A-RAF_PS299' 'ADAR1' 'AKT' 'AKT_PS473' 'AKT_PT308' 'AMPK_ALPHA'
 'AMPK_PT172' 'AR' 'DIRAS3' 'ATM' 'B-RAF' 'BAD_PS112' 'BAK' 'BAX' 'BRCA2'
 'C-JUN_PS73' 'C-MYC' 'C-RAF' 'C-RAF_PS338' 'CHK1' 'CHK1_PS345' 'CHK2'
 'CHK2_PT68' 'EEF2' 'EEF2K' 'EGFR' 'EGFR_PY1068' 'EGFR_PY1173' 'ERK2'
 'FOXO3A' 'FOXO3A_PS318_S321' 'GSK3-ALPHA-BETA' 'GSK3-ALPHA-BETA_PS21_S9'
 'GSK3_PS9' 'JNK_PT183_PY185' 'JNK2' 'MEK1' 'MEK1_PS217_S221' 'N-RAS'
 'P21' 'P38_MAPK' 'P38_PT180_Y182' 'P53' 'P90RSK' 'P90RSK_PT359_S363'
 'PDK1' 'PDK1_PS241' 'PI3K-P110-ALPHA' 'PI3K-P85' 'PKC-ALPHA'
 'PKC-ALPHA_PS657' 'PKC-DELTA_PS664' 'PKC-PAN_BETAII_PS660' 'PRAS40_PT246'
 'PTEN' 'SMAD1' 'SMAD3' 'SMAD4' 'TIGAR' 'TSC1' 'VEGFR2' 'PARP_CLEAVED'
 'SNAIL' 'A-RAF' 'ARID1A' 'B-RAF_PS445' 'BCL2A1' 'CHK1_PS296' 'PARP1']


In [17]:
# print(df_melted_protein['protein_symbol'].value_counts())

print(df_melted_protein)
test_df = df_melted_protein[df_melted_protein['protein_symbol'] == 'CHK2']
num_rows = test_df.shape[0]


     protein_symbol         model_id  rsem_tpm
0       A-RAF_PS299  TCGA-CH-5743-01  0.007295
1             ADAR1  TCGA-CH-5743-01 -0.087798
2               AKT  TCGA-CH-5743-01 -0.018654
3         AKT_PS473  TCGA-CH-5743-01  1.990607
4         AKT_PT308  TCGA-CH-5743-01  1.873595
...             ...              ...       ...
4083            BAK  TCGA-EJ-A7NN-01 -0.123457
4084    C-RAF_PS338  TCGA-EJ-A7NN-01  0.122859
4085           CHK1  TCGA-EJ-A7NN-01 -0.194214
4086      CHK2_PT68  TCGA-EJ-A7NN-01 -0.027627
4087          PARP1  TCGA-EJ-A7NN-01 -0.207367

[4088 rows x 3 columns]


In [18]:

# print(df_melted_protein['protein_symbol'].value_counts())
# print(df_melted_protein['protein_symbol'].unique())


protein_list = list(df_melted_protein['protein_symbol'])

mods = ['_PS', '_PT', '_PY']

df_melted_protein = df_melted_protein[
    ~df_melted_protein['protein_symbol'].apply(lambda p: any(mod in p for mod in mods))
]
print(df_melted_protein)
# replace each name by the corresponding name in the montagud nodes list  



     protein_symbol         model_id  rsem_tpm
1             ADAR1  TCGA-CH-5743-01 -0.087798
2               AKT  TCGA-CH-5743-01 -0.018654
5        AMPK_ALPHA  TCGA-CH-5743-01 -0.040532
7                AR  TCGA-CH-5743-01  0.563090
8            DIRAS3  TCGA-CH-5743-01 -0.081379
...             ...              ...       ...
4079         ARID1A  TCGA-EJ-A7NN-01 -0.214734
4081         BCL2A1  TCGA-EJ-A7NN-01  0.283037
4083            BAK  TCGA-EJ-A7NN-01 -0.123457
4085           CHK1  TCGA-EJ-A7NN-01 -0.194214
4087          PARP1  TCGA-EJ-A7NN-01 -0.207367

[2576 rows x 3 columns]


In [19]:
def replace_with_base_name(protein_name):
    for base in montagud_nodes:
        if protein_name.startswith(base):
            return base
    return protein_name  # if no match found, keep original

# Assuming your dataframe is df and column to replace is 'protein_symbol'
df_melted_protein['protein_symbol'] = df_melted_protein['protein_symbol'].apply(replace_with_base_name)

In [20]:
df_melted_protein = df_melted_protein[df_melted_protein['protein_symbol'].isin(montagud_nodes)]
df_melted_protein['protein_symbol'] = df_melted_protein['protein_symbol'].str.replace('_', '', regex=False)
df_melted_protein = df_melted_protein[df_melted_protein['rsem_tpm'].notna()]
df_melted_protein.to_csv('data/TCGA_data/prostate/filtered_data/proteins_samples_table.csv')

In [21]:
# pre-process tcga data based on montagud nodes and patients ids
df_melted_cnv= pd.read_csv('data/TCGA_data/prostate/filtered_data/cnv_samples_table.csv')
def_melted_proteins = pd.read_csv('data/TCGA_data/prostate/filtered_data/proteins_samples_table.csv')

In [22]:
def_melted_proteins = def_melted_proteins[['protein_symbol', 'model_id', 'rsem_tpm']]
def_melted_proteins

Unnamed: 0,protein_symbol,model_id,rsem_tpm
0,AKT,TCGA-CH-5743-01,-0.018654
1,AMPK,TCGA-CH-5743-01,-0.040532
2,AR,TCGA-CH-5743-01,0.563090
3,ATM,TCGA-CH-5743-01,-0.070493
4,BAK,TCGA-CH-5743-01,-0.035079
...,...,...,...
1899,SNAIL,TCGA-EJ-A7NN-01,-0.223618
1900,AR,TCGA-EJ-A7NN-01,-0.214734
1901,BCL2,TCGA-EJ-A7NN-01,0.283037
1902,BAK,TCGA-EJ-A7NN-01,-0.123457


In [23]:
 # Create generic models 

folder_generic_models_cfg = 'validation/prostate/generic_models/proteins_models/Montagud2022_Prostate_Cancer.cfg'
folder_generic_models_bnd = 'validation/prostate/generic_models/proteins_models/Montagud2022_Prostate_Cancer.bnd'

folder_pers_models = 'validation/prostate/personalized_models/proteins_models'

tissue = 'Prostate'

create_generic_patients_cfg_bnd_validation(folder_generic_models_cfg, folder_generic_models_bnd, folder_pers_models, patients_id, tissue)


All .cfg and .bnd files created for the validation.


In [24]:
# update phenotypes in generic models 

phenotype_interest = ["Proliferation","Invasion","DNA_Repair","Migration","Apoptosis"]
original_data_dir = "validation/prostate/personalized_models/proteins_models"
results_dir = "validation/prostate/personalized_models/proteins_models"


generic_models_update_phenotypes(phenotype_interest, original_data_dir, results_dir)


Updated PTCH1.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated BAD.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated ETV1.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated GSK3.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated P14ARF.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated ETS1.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated Apoptosis.is_internal=0 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated EEF2K.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated EP300.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated TAK1.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated BRCA1.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated EGFR.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated Migration.is_internal=0 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated EEF2.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated P90RSK.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated RAS.is_internal=1 in TCGA-ZG-A9L5-01_Prostate.cfg
Updated PKC.is_internal=1 in TCGA-ZG-A9L

In [25]:
# personalize the boolean networks with genes 
table_proteins_patients = create_table_proteins_patients(def_melted_proteins)
print(table_proteins_patients.head())

protein_expression_level             High Protein Abundance  \
model_id                                                      
TCGA-2A-A8VL-01                          AR, P21, PKC, VEGF   
TCGA-2A-AAYO-01           BAX, BRCA2, EEF2, ERK, MEK1, PI3K   
TCGA-2A-AAYU-01                                AKT, AR, BAX   
TCGA-CH-5738-01                                        SMAD   
TCGA-CH-5743-01                                    AR, SMAD   

protein_expression_level             Low Protein Abundance  
model_id                                                    
TCGA-2A-A8VL-01                                       PI3K  
TCGA-2A-AAYO-01                                       PTEN  
TCGA-2A-AAYU-01                          EEF2K, PI3K, SMAD  
TCGA-CH-5738-01           AR, BAK, EEF2K, FOXO, MEK1, PDK1  
TCGA-CH-5743-01                            PKC, SMAD, TSC1  


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  proteins_data.rename(columns={"protein_symbol": "protein_name"}, inplace=True)


In [26]:
personalized_patients_proteins_cfgs(df_melted_protein,montagud_nodes,folder_pers_models,folder_pers_models,patients_id,table_proteins_patients,tissue)

Modified and saved: validation/prostate/personalized_models/proteins_models/TCGA-ZG-A9L5-01_Prostate.cfg
Modified and saved: validation/prostate/personalized_models/proteins_models/TCGA-EJ-A7NN-01_Prostate.bnd
Modified and saved: validation/prostate/personalized_models/proteins_models/TCGA-G9-6343-01_Prostate.bnd
Modified and saved: validation/prostate/personalized_models/proteins_models/TCGA-CH-5743-01_Prostate.bnd
Modified and saved: validation/prostate/personalized_models/proteins_models/TCGA-YL-A9WI-01_Prostate.bnd
Modified and saved: validation/prostate/personalized_models/proteins_models/TCGA-YL-A8SF-01_Prostate.bnd
Modified and saved: validation/prostate/personalized_models/proteins_models/TCGA-Y6-A8TL-01_Prostate.bnd
Modified and saved: validation/prostate/personalized_models/proteins_models/TCGA-V1-A8MF-01_Prostate.bnd
Modified and saved: validation/prostate/personalized_models/proteins_models/TCGA-YL-A8S8-01_Prostate.cfg
Modified and saved: validation/prostate/personalized_mo

In [27]:
# personalize with CNV
tailor_bnd_cnv_validation(df_melted_cnv, original_data_dir, tissue)

🔍 Processing patient TCGA-YL-A9WH-01, gene: ZBTB17
Patient TCGA-YL-A9WH-01 is in both gain and loss groups. Please review.
ZBTB17 node found. Replacing...
🔍 Processing patient TCGA-YL-A9WH-01, gene: JUN
Patient TCGA-YL-A9WH-01 is in both gain and loss groups. Please review.
JUN node found. Replacing...
🔍 Processing patient TCGA-YL-A9WH-01, gene: PDK1
Patient TCGA-YL-A9WH-01 is in both gain and loss groups. Please review.
PDK1 node found. Replacing...
🔍 Processing patient TCGA-YL-A9WH-01, gene: CFLAR
Patient TCGA-YL-A9WH-01 is in both gain and loss groups. Please review.
CFLAR node found. Replacing...
🔍 Processing patient TCGA-YL-A9WH-01, gene: IDH1
Patient TCGA-YL-A9WH-01 is in both gain and loss groups. Please review.
IDH1 node found. Replacing...
🔍 Processing patient TCGA-YL-A9WH-01, gene: VHL
Patient TCGA-YL-A9WH-01 is in both gain and loss groups. Please review.
VHL node found. Replacing...
🔍 Processing patient TCGA-YL-A9WH-01, gene: ATR
Patient TCGA-YL-A9WH-01 is in both gain and 

In [28]:
# # add mutations info 

# # Add somatic mutations data 
# # keep only the one we had for the main pipeline

# mutations_data = pd.read_csv('data/TCGA_data/TCGA_mutations_mutect2_GDC-PANCAN.csv')
# mutations_data_filtered = mutations_data[mutations_data['Sample_ID'].isin(patients_id)]
# mutations_data_filtered = mutations_data_filtered[mutations_data_filtered['gene'].isin(montagud_nodes)]

# # check if genes are TSG/ Oncogenes
# onco_tsg_gene = pd.read_csv('data/unknown_origin/oncogenes_tsg.csv')
# onco_tsg_gene = onco_tsg_gene[['Hugo Symbol', 'Is Oncogene', 'Is Tumor Suppressor Gene']]
# onco_tsg_gene_filtered = onco_tsg_gene[onco_tsg_gene['Hugo Symbol'].isin(montagud_nodes)]
# onco_tsg_gene_filtered = onco_tsg_gene_filtered.rename(columns={'Hugo Symbol': 'gene'})
# # oncogenes = onco_tsg_gene_filtered[onco_tsg_gene_filtered['Is Oncogene'] == 'Yes']
# # tsg_genes = onco_tsg_gene_filtered[onco_tsg_gene_filtered['Is Tumor Suppressor Gene'] == 'Yes']


# mutations_annotated = mutations_data_filtered.merge(
#     onco_tsg_gene_filtered[['gene', 'Is Oncogene', 'Is Tumor Suppressor Gene']],
#     on='gene',
#     how='left'
# )
# mutations_annotated = mutations_annotated.rename(columns={'Is Oncogene': 'oncogene', 'Is Tumor Suppressor Gene': 'tsg'})

# mutations_annotated = mutations_annotated[
#     mutations_annotated['oncogene'].notna() | mutations_annotated['tsg'].notna()
# ]

# # loss function mutation assumption -> TSG and 
# lof_effects = ["frameshift_variant", "stop_gained", "start_lost", "splice_region_variant"]
# lof_mutations = mutations_annotated[mutations_annotated['effect'].isin(lof_effects)]
# lof_mutations_tsg = lof_mutations[(lof_mutations['tsg'] == 'Yes') & (lof_mutations['oncogene'] == 'No')]
# lof_mutations_tsg_filtered = lof_mutations_tsg[['Sample_ID', 'gene']]


# mutations_onco = mutations_annotated[(mutations_annotated['tsg'] == 'No') & (mutations_annotated['oncogene'] == 'Yes')]
# print(mutations_onco.head())
# # dna_vaf > 0.5 -> clonal mutation (mutation probably in the early tumor cells)
# # gof_effects = ['p.G12D','p.S249C', 'p.Y373C']
# # gof_mutations = mutations_onco[mutations_onco['Amino_Acid_Change'].isin(gof_effects)]
# # gof_mutations_filtered = gof_mutations[['Sample_ID', 'gene']]




Let's try to add more info to reflect metastasis change, add mutations data

In [29]:
# tailor_bnd_mutat_validation(lof_mutations_tsg_filtered,gof_mutations_filtered,folder_pers_models, tissue)

In [30]:

folder_models = "validation/prostate/personalized_models/proteins_models"
folder_save_results = "validation/prostate/results/proteins_models/phenotype_distribution/phenotype_table"
phenotypes_interest = [
    "Proliferation",
    "Invasion",
    "DNA_Repair",
    "Migration",
    "Apoptosis",
]

inputs_list = [
    "EGF",
    "FGF",
    "TGFB",
    "Androgen",
    "Hypoxia",
    "Nutrients",
    "Carcinogen",
    "Acidosis",
    "TNF",
    "fused_event",
    "SPOP",
]
for patient in patients_id:
    compute_phenotype_table(folder_save_results,folder_models,patient,inputs_list,phenotypes_interest,tissue="Prostate")


In [31]:
phenotype_data_filtered.head()
# print(phenotype_data_filtered['Gleason_group'].unique())

Unnamed: 0,sampleID,gleason_score,Gleason_group
0,TCGA-2A-A8VL-01,6,low_aggressive
1,TCGA-2A-A8VO-01,6,low_aggressive
2,TCGA-2A-A8VT-01,9,high_aggressive
3,TCGA-2A-A8VV-01,6,low_aggressive
4,TCGA-2A-A8VX-01,8,high_aggressive


In [45]:
low_group_ids= list(phenotype_data_filtered[phenotype_data_filtered['Gleason_group'] == 'low_aggressive']['sampleID'])
medium_group_ids= list(phenotype_data_filtered[phenotype_data_filtered['Gleason_group'] == 'middle_aggressive']['sampleID'])
high_group_ids= list(phenotype_data_filtered[phenotype_data_filtered['Gleason_group'] == 'high_aggressive']['sampleID'])

In [None]:
#move each files to directory corresponding

# Map group names to sample ID lists
group_mapping = {
    "low_group": low_group_ids,
    "medium_group": medium_group_ids,
    "high_group": high_group_ids,
}

# Folder where all the files currently are
source_dir = "validation/prostate/results/proteins_models/phenotype_distribution/phenotype_table"

dest_base_dir = "validation/prostate/results/proteins_models/phenotype_group_means"

# Loop over all files in the source directory
for filename in os.listdir(source_dir):
    if not filename.startswith("_TCGA"):
        continue
    sample_id = filename.replace("_", "").replace(".csv", "")
    # Determine the group of this sample
    group_found = False
    for group_name, id_list in group_mapping.items():
        if sample_id in id_list:
            group_folder = os.path.join(dest_base_dir, group_name)
            os.makedirs(group_folder, exist_ok=True)

            src_path = os.path.join(source_dir, filename)
            dst_path = os.path.join(group_folder, filename)
            shutil.move(src_path, dst_path)

            group_found = True
            break

    if not group_found:
        print(f" Sample ID {sample_id} not found in any group list.")


In [46]:
# combine all the values
groups = ["low_group", "medium_group", "high_group"]

folder_groups_means = "validation/prostate/results/proteins_models/phenotype_group_means"
# to do-> change name (only compute mean not validation)
mean_df =compute_phenotype_mean_group_validation(groups, folder_groups_means)

              Proliferation  Invasion  DNA_Repair  Migration  Apoptosis
Acidosis           0.292123  0.035813    0.233113   0.011228   0.072441
Androgen           0.300313  0.060190    0.232747   0.040647   0.068539
Carcinogen         0.333286  0.076200    0.444396   0.012908   0.186672
EGF                0.317034  0.071839    0.235267   0.032093   0.105377
FGF                0.331681  0.060549    0.233520   0.012204   0.136078
Hypoxia            0.253374  0.034519    0.233728   0.018652   0.063748
Nutrients          0.356842  0.038546    0.232668   0.016591   0.059296
SPOP               0.335883  0.099491    0.232835   0.029573   0.046258
TGFB               0.261204  0.266426    0.233843   0.029161   0.220895
TNF                0.266355  0.289102    0.231578   0.066811   0.071465
fused_event        0.292009  0.039256    0.233135   0.015191   0.064952
Overall_Mean       0.303646  0.097448    0.252439   0.025915   0.099611
              Proliferation  Invasion  DNA_Repair  Migration  Ap

In [51]:
# combine values of a directory together

from collections import defaultdict
def collect_group_data(group_folder_path):
    combined_data = defaultdict(lambda: defaultdict(list))

    for file in os.listdir(group_folder_path):
        if file.startswith("_TCGA") and file.endswith(".csv"):
            file_path = os.path.join(group_folder_path, file)
            df = pd.read_csv(file_path, index_col=0)

            for input_name in df.index:
                for phenotype in df.columns:
                    value = df.at[input_name, phenotype]
                    combined_data[input_name][phenotype].append(float(value))

    result_df = pd.DataFrame.from_dict(combined_data, orient='index')
    result_df.to_csv(os.path.join(group_folder_path, "combined_results.csv"))

    return result_df


base_path = "validation/prostate/results/proteins_models/phenotype_group_means"

group_names = ["low_group", "medium_group", "high_group"]
group_dataframes = {}

for group in group_names:
    folder_path = os.path.join(base_path, group)
    group_df = collect_group_data(folder_path)
    group_dataframes[group] = group_df
group_dataframes

{'low_group':                                                  Proliferation  \
 EGF          [0.024, 0.988, 0.006348, 0.271145, 0.014464, 0...   
 FGF          [0.0238579999999999, 0.98, 0.0289299999999999,...   
 TGFB         [0.02, 0.982598, 0.03, 0.0547869999999999, 0.0...   
 Androgen     [0.0238579999999999, 0.98, 0.0289299999999999,...   
 Hypoxia      [0.016596, 0.98, 0.025499, 0.044189, 0.027192,...   
 Nutrients    [0.012, 0.99, 0.018, 0.413065, 0.020482, 0.966...   
 Carcinogen   [0.017558, 0.984, 0.023612, 0.346861, 0.014, 0...   
 Acidosis     [0.004, 0.98, 0.008, 0.1714709999999999, 0.004...   
 TNF          [0.042, 0.992, 0.0236909999999999, 0.086129, 0...   
 fused_event  [0.021046, 0.989613, 0.019942, 0.155647, 0.044...   
 SPOP         [0.026, 0.975878, 0.0160319999999999, 0.34937,...   
 
                                                       Invasion  \
 EGF          [0.024, 0.013049, 0.010454, 0.2258129999999999...   
 FGF          [0.033914, 0.014, 0.021196, 0.157

In [52]:


# Paths to your combined data CSVs
base_path = "validation/prostate/results/proteins_models/phenotype_group_means"
group_files = {
    "low": os.path.join(base_path, "low_group", "combined_results.csv"),
    "medium": os.path.join(base_path, "medium_group", "combined_results.csv"),
    "high": os.path.join(base_path, "high_group", "combined_results.csv"),
}

# Load all groups into dict of DataFrames
group_dfs = {}
for group, path in group_files.items():
    # Because each cell is a list saved as a string, parse it back to list
    df = pd.read_csv(path, index_col=0)
    # Convert strings like '[1.2, 3.4]' back to Python lists using ast.literal_eval
    df = df.applymap(ast.literal_eval)
    group_dfs[group] = df

# Get all inputs and phenotypes from one dataframe (assuming all share the same shape)
inputs = group_dfs["low"].index
phenotypes = group_dfs["low"].columns

# Prepare result storage
kruskal_results = pd.DataFrame(index=inputs, columns=phenotypes)

# Run Kruskal-Wallis test for each (input, phenotype)
for input_name in inputs:
    for phenotype in phenotypes:
        data_low = group_dfs["low"].at[input_name, phenotype]
        data_medium = group_dfs["medium"].at[input_name, phenotype]
        data_high = group_dfs["high"].at[input_name, phenotype]

        # Run the Kruskal-Wallis test only if all groups have data
        if data_low and data_medium and data_high:
            stat, pvalue = kruskal(data_low, data_medium, data_high)
            kruskal_results.at[input_name, phenotype] = pvalue
        else:
            kruskal_results.at[input_name, phenotype] = None

# Optionally, save the p-values table to CSV
kruskal_results.to_csv(os.path.join(base_path, "kruskal_pvalues.csv"))
print(kruskal_results)


            Proliferation  Invasion DNA_Repair Migration Apoptosis
EGF              0.017723  0.240162   0.017666  0.243646  0.812392
FGF              0.000639  0.005279   0.006998  0.013387  0.004204
TGFB             0.001575  0.001585   0.001145   0.09046  0.123084
Androgen         0.000512  0.002059   0.002614  0.010682  0.002969
Hypoxia          0.000819  0.000626   0.000136  0.026098  0.179885
Nutrients        0.000747  0.001884   0.000129  0.345769  0.690985
Carcinogen       0.002631  0.251649   0.000395  0.208729   0.07889
Acidosis         0.000458  0.016182   0.001118  0.079435  0.015019
TNF              0.009474  0.339702   0.004658  0.097586  0.216672
fused_event      0.007635  0.000707   0.000323  0.009554  0.114082
SPOP             0.000945  0.282215   0.000137  0.122191  0.082015


  df = df.applymap(ast.literal_eval)
  df = df.applymap(ast.literal_eval)
  df = df.applymap(ast.literal_eval)


In [53]:
from statsmodels.stats.multitest import multipletests

# Flatten p-values to a 1D array, ignoring None or NaNs
pvals = kruskal_results.values.flatten()
pvals = [p for p in pvals if p is not None]

# Adjust using BH method
_, pvals_adj, _, _ = multipletests(pvals, alpha=0.05, method='fdr_bh')

# Now, you need to put adjusted p-values back into the DataFrame shape
# Create a copy to fill
adjusted_df = kruskal_results.copy()

# Fill adjusted p-values sequentially where there was a non-None p-value
idx = 0
for i in adjusted_df.index:
    for j in adjusted_df.columns:
        if adjusted_df.at[i, j] is not None:
            adjusted_df.at[i, j] = pvals_adj[idx]
            idx += 1
print(adjusted_df)


            Proliferation  Invasion DNA_Repair Migration Apoptosis
EGF              0.027851   0.27348   0.027851   0.27348  0.812392
FGF              0.003733  0.011614   0.014803  0.023751  0.010053
TGFB             0.005127  0.005127   0.004198  0.124383  0.153855
Androgen         0.003733  0.005961    0.00689  0.019583  0.007422
Hypoxia          0.003752  0.003733    0.00252  0.039872   0.21986
Nutrients        0.003733  0.005756    0.00252  0.358817  0.703781
Carcinogen        0.00689  0.276813   0.003733  0.249567  0.114972
Acidosis         0.003733   0.02697   0.004198  0.114972  0.025815
TNF               0.01812  0.358817   0.010675  0.130908  0.253553
fused_event      0.015552  0.003733   0.003733   0.01812  0.149394
SPOP             0.003996  0.304349    0.00252  0.153855  0.115662


In [54]:
# keep only the significant results
significant_df = adjusted_df.copy()
significant_df[significant_df >= 0.05] = np.nan
significant_df

Unnamed: 0,Proliferation,Invasion,DNA_Repair,Migration,Apoptosis
EGF,0.027851,,0.027851,,
FGF,0.003733,0.011614,0.014803,0.023751,0.010053
TGFB,0.005127,0.005127,0.004198,,
Androgen,0.003733,0.005961,0.00689,0.019583,0.007422
Hypoxia,0.003752,0.003733,0.00252,0.039872,
Nutrients,0.003733,0.005756,0.00252,,
Carcinogen,0.00689,,0.003733,,
Acidosis,0.003733,0.02697,0.004198,,0.025815
TNF,0.01812,,0.010675,,
fused_event,0.015552,0.003733,0.003733,0.01812,
