# prism viability + klaeger kinome inhibition state (kis)

In [1]:
import pyreadr
def read_rds(file):
    df = pyreadr.read_r(file)[None]
    return df

viability = read_rds('../data/PRISM_klaeger_imputed_tidy.rds')
viability.head()

Unnamed: 0,depmap_id,drug,klaeger_conc,imputed_viability
0,ACH-000007,Abemaciclib,3e-09,0.974677
1,ACH-000007,Abemaciclib,1e-08,0.94218
2,ACH-000007,Abemaciclib,3e-08,0.884299
3,ACH-000007,Abemaciclib,1e-07,0.767666
4,ACH-000007,Abemaciclib,3e-07,0.554302


In [2]:
kis = read_rds('../data/klaeger_full_tidy.rds')
kis['gene_name'] = 'inh_' + kis['gene_name']
kis.head()

Unnamed: 0,drug,gene_name,relative_intensity,concentration_M
0,Abemaciclib,inh_AAK1,1.0,0.0
1,Abemaciclib,inh_AAK1,0.905556,3e-09
2,Abemaciclib,inh_AAK1,0.791054,1e-08
3,Abemaciclib,inh_AAK1,0.738292,3e-08
4,Abemaciclib,inh_AAK1,0.639565,1e-07


In [3]:
# pivot so genes are columns
kis = kis.pivot(index=['drug', 'concentration_M'], columns='gene_name', values='relative_intensity').reset_index()
kis.columns.name = None
kis.head()

Unnamed: 0,drug,concentration_M,inh_AAK1,inh_ABL1,inh_ABL2,inh_ACAD10,inh_ACAD11,inh_ACADVL,inh_ACOX1,inh_ACOX3,...,inh_ULK3,inh_UNC119,inh_VDAC3,inh_WEE1,inh_YARS,inh_YES1,inh_YTHDF3;YTHDF1,inh_YWHAG,inh_YWHAQ,inh_ZAK
0,AC-480,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,AC-480,3e-09,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,AC-480,1e-08,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,AC-480,3e-08,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,AC-480,1e-07,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


# create kis_viability

In [4]:
common_drugs = set(viability['drug']).intersection(set(kis['drug']))
common_conc = set(viability['klaeger_conc']).intersection(set(kis['concentration_M']))

In [5]:
kis_viability = kis[(kis['drug'].isin(common_drugs)) & (kis['concentration_M'].isin(common_conc))]
kis_viability = kis_viability.sort_values(['drug', 'concentration_M']).reset_index(drop=True)
kis_viability = kis_viability.merge(viability, left_on=['drug', 'concentration_M'], right_on=['drug', 'klaeger_conc'], how='inner')
kis_viability = kis_viability.drop(columns=['klaeger_conc'])
kis_viability = kis_viability.rename(columns={'imputed_viability': 'viability'})
kis_viability.head()

Unnamed: 0,drug,concentration_M,inh_AAK1,inh_ABL1,inh_ABL2,inh_ACAD10,inh_ACAD11,inh_ACADVL,inh_ACOX1,inh_ACOX3,...,inh_VDAC3,inh_WEE1,inh_YARS,inh_YES1,inh_YTHDF3;YTHDF1,inh_YWHAG,inh_YWHAQ,inh_ZAK,depmap_id,viability
0,AC-480,3e-09,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-000007,1.0
1,AC-480,3e-09,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-000008,1.08875
2,AC-480,3e-09,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-000011,1.0
3,AC-480,3e-09,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-000012,1.29819
4,AC-480,3e-09,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-000013,1.458047


# kinome graph

In [6]:
# list of kinases to see which kinome graph to use
kinases = [col.split('inh_')[-1] for col in kis_viability.columns if col.startswith('inh_')]
kinases

['AAK1',
 'ABL1',
 'ABL2',
 'ACAD10',
 'ACAD11',
 'ACADVL',
 'ACOX1',
 'ACOX3',
 'ACP1',
 'ACSL5',
 'ACTR2',
 'ACTR3',
 'ACVR1',
 'ACVR1B',
 'ACVR2B',
 'ACVRL1',
 'ADCK1',
 'ADCK3',
 'ADCK5',
 'ADD2',
 'ADK',
 'ADRBK1',
 'ADSL',
 'AFF1',
 'AFF4',
 'AIMP1',
 'AK2',
 'AKR1C3',
 'AKT1',
 'AKT2',
 'AKT3',
 'ALK',
 'AP1B1',
 'AP1G1',
 'AP2A1',
 'AP2A2',
 'AP2B1',
 'AP2M1',
 'APRT',
 'ARAF',
 'ATR',
 'AURKA',
 'AURKB',
 'AZI2',
 'BCR',
 'BMP2K',
 'BMPR1A',
 'BMPR1B',
 'BMPR2',
 'BRAF',
 'BRD4',
 'BRD4;BRD3',
 'BTK',
 'BUB1',
 'C2CD5',
 'CAB39',
 'CABLES1',
 'CALM1',
 'CALR',
 'CAMK1G',
 'CAMK2D',
 'CAMK2G',
 'CAMK4',
 'CAMKK2',
 'CAPNS1',
 'CAPZA1',
 'CARS',
 'CBR1',
 'CCAR2',
 'CCDC47',
 'CCNA2',
 'CCNB1',
 'CCNB2',
 'CCNE1',
 'CCNE2',
 'CCNH',
 'CCNI',
 'CCNK',
 'CCNT1',
 'CCNT2',
 'CDC23',
 'CDC42BPA',
 'CDC42BPB',
 'CDC42BPG',
 'CDC7',
 'CDK1',
 'CDK10',
 'CDK12',
 'CDK13',
 'CDK16',
 'CDK17',
 'CDK18',
 'CDK2',
 'CDK3',
 'CDK4',
 'CDK5',
 'CDK6',
 'CDK7',
 'CDK9',
 'CDKL5',
 'CERS2',
 '

In [7]:
import networkx as nx
import pandas as pd

# v1: hippie
df = pd.read_csv('../data/hippieKinNetEntrez-v1.tab', sep='\t', header=None, names=['source', 'target', 'prob'])
G1 = nx.from_pandas_edgelist(df, edge_attr=None)
assert set(df['source']).union(set(df['target'])) == set(G1.nodes)
print('nodes:', len(G1.nodes), 'edges:', len(G1.edges))

# v2: csv
df = pd.read_csv('../data/kin_unweighted.csv', sep='\t', header=None, names=['source', 'target'])
G2 = nx.read_edgelist('../data/kin_unweighted.csv')
assert set(df['source']).union(set(df['target'])) == set(G2.nodes)
print('nodes:', len(G2.nodes), 'edges:', len(G2.edges))

# # WIP: visualize
# from pyvis.network import Network

# g = Network(cdn_resources='in_line')
# g.from_nx(G)
# g.show('ex.html')
# plot G
# nx.draw(G, with_labels=True)

nodes: 324 edges: 1795
nodes: 517 edges: 5066


In [8]:
print('kinases:', len(kinases))
print(f'G1: {len(G1.nodes)} ({len(set(G1.nodes).intersection(set(kinases)))})')
print(f'G2: {len(G2.nodes)} ({len(set(G2.nodes).intersection(set(kinases)))})')

kinases: 520
G1: 324 (144)
G2: 517 (237)


^^ let's use G2 (more matches)

In [9]:
df = pd.read_csv('../data/kin_unweighted.csv', sep='\t', header=None, names=['source', 'target'])
G = nx.read_edgelist('../data/kin_unweighted.csv')

matched_kinases = set(G.nodes).intersection(set(kinases))
len(matched_kinases)

237

# for a fair comparison, only include kis for the kinases matched to the graph

In [10]:
kis_viability

Unnamed: 0,drug,concentration_M,inh_AAK1,inh_ABL1,inh_ABL2,inh_ACAD10,inh_ACAD11,inh_ACADVL,inh_ACOX1,inh_ACOX3,...,inh_VDAC3,inh_WEE1,inh_YARS,inh_YES1,inh_YTHDF3;YTHDF1,inh_YWHAG,inh_YWHAQ,inh_ZAK,depmap_id,viability
0,AC-480,3.000000e-09,1.000000,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-000007,1.000000
1,AC-480,3.000000e-09,1.000000,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-000008,1.088750
2,AC-480,3.000000e-09,1.000000,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-000011,1.000000
3,AC-480,3.000000e-09,1.000000,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-000012,1.298190
4,AC-480,3.000000e-09,1.000000,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-000013,1.458047
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610843,Y-39983,3.000000e-05,0.019503,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-001239,-0.038812
610844,Y-39983,3.000000e-05,0.019503,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-001306,1.784841
610845,Y-39983,3.000000e-05,0.019503,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-001307,1.000000
610846,Y-39983,3.000000e-05,0.019503,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ACH-001318,1.876079


In [11]:
matched_inh_cols = [col for col in kis_viability.columns if col.startswith('inh_') and col.split('inh_')[-1] in matched_kinases]
assert len(matched_inh_cols) == len(matched_kinases)
matched_kis_viability = kis_viability[[col for col in kis_viability if not col.startswith('inh_')] + matched_inh_cols]
matched_kis_viability

Unnamed: 0,drug,concentration_M,depmap_id,viability,inh_AAK1,inh_ABL1,inh_ABL2,inh_ACVR1,inh_ACVR1B,inh_ACVR2B,...,inh_TGFBR2,inh_TNIK,inh_TNK1,inh_TNK2,inh_TP53RK,inh_TTK,inh_TYK2,inh_ULK1,inh_WEE1,inh_YES1
0,AC-480,3.000000e-09,ACH-000007,1.000000,1.000000,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,AC-480,3.000000e-09,ACH-000008,1.088750,1.000000,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,AC-480,3.000000e-09,ACH-000011,1.000000,1.000000,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,AC-480,3.000000e-09,ACH-000012,1.298190,1.000000,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,AC-480,3.000000e-09,ACH-000013,1.458047,1.000000,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610843,Y-39983,3.000000e-05,ACH-001239,-0.038812,0.019503,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
610844,Y-39983,3.000000e-05,ACH-001306,1.784841,0.019503,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
610845,Y-39983,3.000000e-05,ACH-001307,1.000000,0.019503,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
610846,Y-39983,3.000000e-05,ACH-001318,1.876079,0.019503,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


# save kis_viability and edgelist to results

In [12]:
# save graph
nx.write_edgelist(G, '../results/kinome.edgelist', delimiter=',', data=False)
# ensure graph is saved correctly
test_G = nx.from_pandas_edgelist(pd.read_csv('../results/kinome.edgelist', header=None, names=['source', 'target']))
assert nx.utils.misc.graphs_equal(G, test_G)

# save df
matched_kis_viability.to_parquet('../results/kis_viability.parquet', index=False)
# ensure df is saved correctly
test_df = pd.read_parquet('../results/kis_viability.parquet')
assert matched_kis_viability.equals(test_df)