In [1]:
import pandas as pd
from path_calc import Solver
from evaluation import Eval
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

In [2]:
source_df = pd.read_csv('panacea_sources.tsv', sep='\t')

In [3]:
net = pd.read_csv('collectri_network_sscp.sif', sep='\t')

In [4]:
G = nx.read_weighted_edgelist('collectri_network_sscp.sif', delimiter = '\t', create_using = nx.DiGraph)
for u, v, data in G.edges(data=True):
    weight = data['weight']
    data['sign'] = 1 if weight >= 0 else -1
    data['weight'] = abs(weight)

In [5]:
len(G.nodes())

9065

In [6]:
drugs = source_df.cmpd.to_list()

In [7]:
nodes_network = [f for f in G.nodes]
filtered_source_df = source_df[source_df.target.isin(nodes_network)]

In [8]:
filtered_source_df

Unnamed: 0,cmpd,target,sign
0,AEE788,LIMK1,-1
1,AFATINIB,MAPKAPK2,-1
3,BAFETINIB,MAPK14,-1
4,BOSUTINIB,STK10,-1
5,CABOZANTINIB,ABL2,-1
6,CEDIRANIB,DDR2,-1
7,CRENOLANIB,CDK9,-1
8,CRIZOTINIB,PRKD3,-1
9,DACOMITINIB,RIPK2,-1
10,DASATINIB,LIMK2,-1


In [9]:
filtered_source_df.target.isin(nodes_network)

0     True
1     True
3     True
4     True
5     True
6     True
7     True
8     True
9     True
10    True
11    True
12    True
13    True
14    True
15    True
16    True
17    True
18    True
19    True
20    True
21    True
22    True
23    True
24    True
25    True
26    True
28    True
29    True
30    True
31    True
Name: target, dtype: bool

In [10]:
filtered_source_df

# create a dictionary per treatment, with the targets as keys and the sign as value
source_dict = {}
for i in range(len(filtered_source_df)):
    treatment = filtered_source_df.iloc[i, 0]
    target = filtered_source_df.iloc[i, 1]
    sign = filtered_source_df.iloc[i, 2]
    if treatment not in source_dict:
        source_dict[treatment] = {}
    source_dict[treatment][target] = float(sign)


In [11]:
source_dict

{'AEE788': {'LIMK1': -1.0},
 'AFATINIB': {'MAPKAPK2': -1.0},
 'BAFETINIB': {'MAPK14': -1.0},
 'BOSUTINIB': {'STK10': -1.0},
 'CABOZANTINIB': {'ABL2': -1.0},
 'CEDIRANIB': {'DDR2': -1.0},
 'CRENOLANIB': {'CDK9': -1.0},
 'CRIZOTINIB': {'PRKD3': -1.0},
 'DACOMITINIB': {'RIPK2': -1.0},
 'DASATINIB': {'LIMK2': -1.0},
 'DOVITINIB': {'MARK4': -1.0},
 'FORETINIB': {'EPHA4': -1.0},
 'GEFITINIB': {'GAK': -1.0},
 'ICOTINIB': {'GAK': -1.0},
 'IMATINIB': {'DDR2': -1.0},
 'KW2449': {'MAP4K2': -1.0},
 'LAPATINIB': {'EGFR': -1.0},
 'LINIFANIB': {'FLT3': -1.0},
 'MGCD265': {'PTK6': -1.0},
 'MK2206': {'AKT3': -1.0},
 'NERATINIB': {'STK26': -1.0},
 'NILOTINIB': {'MAPK14': -1.0},
 'OSIMERTINIB': {'BTK': -1.0},
 'PONATINIB': {'MAP4K1': -1.0},
 'QUIZARTINIB': {'MYH10': -1.0},
 'REGORAFENIB': {'EPHA2': -1.0},
 'SUNITINIB': {'CLK2': -1.0},
 'TIVANTINIB': {'MET': -1.0},
 'VANDETANIB': {'BCR': -1.0},
 'VARLITINIB': {'RIPK2': -1.0}}

In [12]:
drugs = filtered_source_df.cmpd.to_list()
drug = drugs[0]

In [13]:
targets = pd.read_csv('panacea_targets.tsv', sep='\t')
targets

Unnamed: 0,gene_symbol,cell_line,treatment,logFC,padj,stat
0,TXNDC17,HSTS,AEE788,1.704067,0.000132,6.973952
1,SLC4A1,ASPC,AEE788,2.671178,0.000409,5.986357
2,RDH11,KRJ1,AEE788,1.756632,0.024179,5.118672
3,MYC,LNCAP,AEE788,2.525878,0.019276,4.843098
4,CWC22,ASPC,AEE788,2.009870,0.013943,4.828274
...,...,...,...,...,...,...
11601,TRAPPC2,ASPC,VARLITINIB,1.611553,0.035870,4.427042
11602,HAUS2,LNCAP,VARLITINIB,1.861795,0.045524,4.426707
11603,KANSL2,HF2597,VARLITINIB,2.053682,0.026614,4.421711
11604,ZNF470,LNCAP,VARLITINIB,1.718736,0.046718,4.356714


In [14]:
# collapse the targets to a single row per gene and treatment
agg_targets = targets.groupby(by=['gene_symbol', 'treatment']).agg({'logFC': 'mean','padj': 'mean','stat': 'mean'}).reset_index()

In [15]:
agg_targets.sort_values(by=['treatment'], inplace=True)

# see the number of targets per treatment
agg_targets.groupby(by=['treatment']).agg({'gene_symbol': 'count'})



Unnamed: 0_level_0,gene_symbol
treatment,Unnamed: 1_level_1
AEE788,9
AFATINIB,24
AZD5363,32
BAFETINIB,249
BOSUTINIB,53
CABOZANTINIB,46
CEDIRANIB,16
CRENOLANIB,30
CRIZOTINIB,20
DACOMITINIB,42


In [16]:
# per treatment, get a max of 100 targets, sorted by largest absolute value of stat column
agg_targets_filtered = agg_targets.groupby(by=['treatment']).apply(lambda x: x.sort_values(by=['stat'], ascending=False).head(100)).reset_index(drop=True)

In [17]:
# get min stat value
agg_targets_filtered.groupby(by=['treatment']).agg({'gene_symbol': 'count'})

Unnamed: 0_level_0,gene_symbol
treatment,Unnamed: 1_level_1
AEE788,9
AFATINIB,24
AZD5363,32
BAFETINIB,100
BOSUTINIB,53
CABOZANTINIB,46
CEDIRANIB,16
CRENOLANIB,30
CRIZOTINIB,20
DACOMITINIB,42


In [18]:
# create a dictionary per treatment, with the targets as keys and a +1 or -1 depending of the sign of the stat column
target_dict = {}
for treatment in agg_targets_filtered.treatment.unique():
    target_dict[treatment] = {}
    for index, row in agg_targets_filtered[agg_targets_filtered.treatment == treatment].iterrows():
        target_dict[treatment][row['gene_symbol']] = row['stat'] / abs(row['stat'])

target_dict

{'AEE788': {'TXNDC17': 1.0,
  'SLC4A1': 1.0,
  'RDH11': 1.0,
  'MYC': 1.0,
  'CWC22': 1.0,
  'LGALS9': 1.0,
  'FMN1': 1.0,
  'COL1A2': 1.0,
  'LINC02683': 1.0},
 'AFATINIB': {'CISH': 1.0,
  'ELOA': 1.0,
  'GPM6A': 1.0,
  'PRR4': 1.0,
  'GSDMB': 1.0,
  'VIRMA': 1.0,
  'IMPA2': 1.0,
  'NCALD': 1.0,
  'SEPTIN7': 1.0,
  'SPAST': 1.0,
  'SNX6': 1.0,
  'P2RY10': 1.0,
  'ZMIZ1': 1.0,
  'TRMT5': 1.0,
  'TP53I11': 1.0,
  'MAN1A1': 1.0,
  'TCERG1': 1.0,
  'UNC93B1': 1.0,
  'SLC6A20': 1.0,
  'AFDN': 1.0,
  'TMEM63A': 1.0,
  'POMT1': 1.0,
  'GAL3ST1': 1.0,
  'AKR7A2': 1.0},
 'AZD5363': {'MUCL1': 1.0,
  'BPIFB2': 1.0,
  'PIP': 1.0,
  'PPP1R36': 1.0,
  'SCGB2A2': 1.0,
  'PLCE1': 1.0,
  'NPRL2': 1.0,
  'MRPL35': 1.0,
  'SLC4A1': 1.0,
  'ZNF783': 1.0,
  'ARRB2': 1.0,
  'RPLP0': 1.0,
  'PIGS': 1.0,
  'LINC00910': 1.0,
  'NCALD': 1.0,
  'PSENEN': 1.0,
  'STPG1': 1.0,
  'LONRF3': 1.0,
  'MCEE': 1.0,
  'CYP27A1': 1.0,
  'GNG12': 1.0,
  'PRKCB': 1.0,
  'CUEDC2': 1.0,
  'CDYL2': 1.0,
  'KCNG1': 1.0,
  'ZC3H

In [19]:
source_dict[drug]['LIMK1']

-1.0

In [20]:
for drug in drugs:
    print('Solving for {}'.format(drug))    
    G_solver = Solver(G, 'PANACEA')
    G_solver.source_dict = source_dict[drug]
    G_solver.target_dict = target_dict[drug]
    G_solver.network_batchrun(iter = drug, cutoff = 4)




BOSUTINIB
Solving for BOSUTINIB
