In [108]:
import cmapPy
import cmapPy.pandasGEXpress.parse
import json
import numpy as np
from collections import Counter
np.random.seed(1024)
import pandas as pd
import statsmodels
from statsmodels.stats.multitest import fdrcorrection
import os
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (-2,2))

# gene processing

In [109]:
# hgnc genes
hgnc_v2 = pd.read_csv('/share/fsmresfiles/SpatialT/drug-target-results-OVR-0725/Analysis_PPI/HGNC091923.txt', sep='\t')
hgnc_dict = dict(zip(hgnc_v2['Approved symbol'], hgnc_v2['NCBI gene ID']))
hgnc_v2_prev = hgnc_v2[hgnc_v2['Previous symbol'].notnull()]
hgnc_dict_prev = dict(zip(hgnc_v2_prev['Previous symbol'], hgnc_v2_prev['NCBI gene ID']))

In [110]:
gene_info = pd.read_csv('/share/fsmresfiles/SpatialT/cmap2_l1000/geneinfo_beta.txt', sep='\t')

In [111]:
# mapping gene indices to real names 
gene_cmap2 = gene_info[['gene_id','gene_symbol']]
gene_cmap2.columns = ['num','name']
lst = []
for i in gene_cmap2['num'].tolist():
    if pd.isna(i) != True:
        lst.append(int(i))
    else:
        lst.append(i)
gene_cmap2['num'] = lst
gene_dict_cmap2 = gene_cmap2.set_index('num').to_dict()['name']
gene_dict_cmap2_rev = {v:k for k,v in gene_dict_cmap2.items()}

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gene_cmap2['num'] = lst


# cmap2 enrichment setup

In [112]:
# rank file of 6hr perturbations (from z scores)- gene x compound effects
cmap2_cmap1_perturb_rank_dur = pd.read_csv('/share/fsmresfiles/SpatialT/cmap2_l1000/processed/6hr_only/cmap2_6hr_pert_rank.csv', engine='c')

In [113]:
cmap2_cmap1_perturb_rank_dur = cmap2_cmap1_perturb_rank_dur.set_index('Unnamed: 0')

In [114]:
cmap2_cmap1_perturb_rank_dur_arry = np.array(cmap2_cmap1_perturb_rank_dur)

In [115]:
ROWS_dur = [str(gene_dict_cmap2_rev[i]) for i in cmap2_cmap1_perturb_rank_dur.index]
COLS_dur = cmap2_cmap1_perturb_rank_dur.columns.tolist()
ROW2IDX2_val_dur = list(range(0,len(ROWS_dur)))
COL2IDX2_val_dur = list(range(0,len(COLS_dur)))
ROW2IDX_key_dur = ROWS_dur
COL2IDX_key_dur = COLS_dur
ROW2IDX_dur = {k:v for k,v in zip(ROW2IDX_key_dur, ROW2IDX2_val_dur)}
COL2IDX_dur = {k:v for k,v in zip(COL2IDX_key_dur, COL2IDX2_val_dur)}

In [116]:
N_GENES, N_PERTURBATIONS = cmap2_cmap1_perturb_rank_dur_arry.shape
N_REPEATS = 1000
RANDOM_RANK = np.random.rand(N_GENES, N_REPEATS).argsort(axis=0) + 1  # gene x repeat

# cmap2 perturbation setup

In [117]:
# cmap2 perturbation zscore
z_score = np.load('/share/fsmresfiles/SpatialT/cmap2_l1000/processed/6hr_only/cmap2_6hr_pert_z.npy')

In [118]:
z_score_df = pd.DataFrame(z_score)

In [119]:
z_score_df.columns = cmap2_cmap1_perturb_rank_dur.columns
z_score_df.index = cmap2_cmap1_perturb_rank_dur.index

# format DGE results

In [120]:
def get_up_down_deg (df):
    df_up = df[(df['stat']>0.5) & (df['qval']<0.05) ]
    df_down = df[(df['stat']<-0.5) & (df['qval']<0.05) ]
    return df_up, df_down

In [121]:
# transform from genename to number
import math
def hgnc_gene_up_down(up_df, down_df):
    gene_num_up = []
    for i in up_df['gene'].tolist():
        if i in hgnc_dict.keys() and math.isnan(hgnc_dict[i]) == False:
            gene_num_up.append(str(int(hgnc_dict[i])))
        elif i in hgnc_dict_prev.keys() and math.isnan(hgnc_dict_prev[i]) == False:
            gene_num_up.append(str(int(hgnc_dict_prev[i])))
        else:
            print('SKIP GENE',i)

    gene_num_down = []
    for i in down_df['gene'].tolist():
        if i in hgnc_dict.keys() and math.isnan(hgnc_dict[i]) == False:
            gene_num_down.append(str(int(hgnc_dict[i])))
        elif i in hgnc_dict_prev.keys() and math.isnan(hgnc_dict_prev[i]) == False:
            gene_num_down.append(str(int(hgnc_dict_prev[i])))
        else:
            print('SKIP GENE', i) 
    return gene_num_up, gene_num_down

In [122]:
def write_gene_up_down(gene_lst, cell, direction):
    dge_dir = '/share/fsmresfiles/SpatialT/drug-target-cmap2/'+dsid+'/'+sampleid+'/DGE_dec_SVG_gene_num/'
    if not os.path.exists(dge_dir ):
        os.makedirs(dge_dir )
    #print(dge_dir+cell+'_'+direction+'.txt')
    with open(dge_dir+cell+'_'+direction+'.txt', 'w') as f:
        for line in gene_lst:
            f.write(f"{line}\n")

# GSEA

In [123]:
def cmap2_gsea_setup(gene_num_up, gene_num_down):
    DATA = {
        "U": [ROW2IDX_dur[g] for g in gene_num_up if g in ROW2IDX_dur],
        "D": [ROW2IDX_dur[g] for g in gene_num_down if g in ROW2IDX_dur],
    }
    N_UP = len(DATA["U"])
    N_DOWN = len(DATA["D"])
    #print(N_UP, N_DOWN)
    return DATA, N_UP, N_DOWN

In [124]:
# up/down gene set level es
def STEP1(v):
    # deg sorted up/down reg gene perrturb array
    v = sorted(v)
    #print('v:', v, len(v))
    # t = length of up/down reg gene perturb array
    t = len(v)
    
    # (i+1)/t = fraction of elements up to index i in sorted up/down gene list 
    # v[i]/n_genes = fraction of the rank of the current gene at index i out of the total number of genes
    # total upward (a) and downward (b) influence (es) for each up/down reg gene set
    a = max((i + 1) / t - v[i] / N_GENES for i in range(t)) #look into paper percentage versus ngenes (window)
    b = max(v[i] / N_GENES - i / t for i in range(t))
    es = 0
    if a > b: #check what a and b is
        es = a
    elif b > a:
        es = -b
    #print('a:',a,'b:', b)
    return es

# diff of up/down gene set es 
def STEP2(rank_u, rank_d):
    #print(rank_u)
    # calculate up genes and down genes es difference for each perturbation to determine overall es  
    es_u = STEP1(rank_u) if rank_u.size else 0
    es_d = STEP1(rank_d) if rank_d.size else 0
    #print(es_u, es_d)
    if np.sign(es_u) == np.sign(es_d):
        return 0
    return es_u - es_d


In [125]:
def run_gsea(DATA, N_UP, N_DOWN):
    background = np.array([STEP2(RANDOM_RANK[:N_UP, c], RANDOM_RANK[N_UP:N_UP + N_DOWN, c]) for c in range(N_REPEATS)])
    result = []
    # for each perturbation, calculate the es score difference of up genes and down genes
    for col in range(N_PERTURBATIONS):
        es = STEP2(cmap2_cmap1_perturb_rank_dur_arry[DATA["U"], col], cmap2_cmap1_perturb_rank_dur_arry[DATA["D"], col])
        #print(es)
        p = 1.0
        if es > 0:
            p = np.mean(background > es)
        elif es < 0:
            p = np.mean(background < es)
        result.append((es, p))
    return result

# format gsea

In [126]:
# all drugs enrichment score
def format_gsea_res(result, cell):
    res_formatted = pd.DataFrame(result)
    res_formatted.columns = ['es','p']
    res_formatted['es'] = [round(i,3) for i in res_formatted['es'].tolist()]
    res_formatted['p-adj']=statsmodels.stats.multitest.fdrcorrection(res_formatted['p'].tolist(), alpha=0.05, method='indep', is_sorted=False)[1]
    res_formatted = res_formatted.reset_index()
    res_formatted.index = COLS_dur
    #res_formatted = res_formatted.drop('level_0',axis = 1)
    res_formatted['drug'] = [i.split('--')[0] for i in res_formatted.index.tolist()]
    enrich_dir = '/share/fsmresfiles/SpatialT/drug-target-cmap2/'+dsid+'/'+sampleid+'/Enrichment_SVG/'
    if not os.path.exists(enrich_dir ):
        os.makedirs(enrich_dir )
    res_formatted.to_csv(enrich_dir+cell+'.csv')
    return res_formatted

In [127]:
# top and bottom 500 enriched drugs
def format_enrich(ds, cell):
    res = pd.DataFrame()
    res['CMAP_instance'] = [i.split('--')[1] for i in ds.index.tolist()]
    res['Drug'] = ds['drug'].tolist()
    res['Enrichment_score'] = ds['es'].tolist()
    res['P-value'] = ds['p'].tolist()
    res['Adj-p']= ds['p-adj'].tolist()
    res = res.sort_values('Enrichment_score', ascending=False)
    res_inv = res.head(500)
    res_pos = res.tail(500)
    enrich_dir = '/share/fsmresfiles/SpatialT/website/staging/data_v3_release/drug_screen/drug_enrichment_cmap2_SVG/'+dsid+'/'+sampleid+'/'
    if not os.path.exists(enrich_dir ):
        os.makedirs(enrich_dir )
    res_inv.to_csv(enrich_dir+cell+'_INV_es.csv', index=False)
    res_pos.to_csv(enrich_dir+cell+'_POS_es.csv', index=False)
    return res_inv, res_pos

# perturbation

In [128]:
def get_perturb_zs(cell, direction, es_df, dge, cell_type_deg_z, cell_type_deg_rank):
    perturb_dir = '/share/fsmresfiles/SpatialT/drug-target-cmap2/'+dsid+'/'+sampleid+'/Perturbation_SVG/'+cell+'/'+direction+'/'
    if not os.path.exists(perturb_dir):
        os.makedirs(perturb_dir)  
    for j,i in es_df.iterrows():
        drug_idx = i['CMAP_instance']
        drug = i['Drug']
        drug_new_idx = drug + '--' +drug_idx
        #print(j, drug_new_idx)
        zs = cell_type_deg_z[[drug_new_idx]]
        zs.columns = ['z']
        zs = zs.sort_values('z')
        rs = cell_type_deg_rank[[drug_new_idx]]
        rs=rs[rs.index.isin(zs.index)]
        rs.columns = ['rank']
        zs = zs.merge(rs, how = 'inner', left_index=True, right_index= True)
        zs = zs.merge(dge, how = 'left', left_index=True, right_on = 'gene')
        zs.to_csv(perturb_dir+drug_new_idx+'.csv', index =False)

In [129]:
# format pertrbation network to create edge and node file
def format_perturb(perturb_dir, cell):           
    os.chdir(perturb_dir)
    for direc in os.listdir(perturb_dir):
        os.chdir(perturb_dir+direc+'/')
        perturb_sample_dir = '/share/fsmresfiles/SpatialT/website/staging/data_v3_release/drug_screen/drug_perturbation_cmap2_SVG/'+dsid+'/'+sampleid+'/'+cell+'/'+direc+'/'
        if not os.path.exists(perturb_sample_dir):
            os.makedirs(perturb_sample_dir)
        
        for perturb in os.listdir():
            #print(direc, perturb)
            drug = perturb.split('--')[0]
            cmap_instance = perturb.split('--')[1].split('.csv')[0]
            cmap_drug = perturb.split('.csv')[0]
            
            # edges
            perturb_df = pd.read_csv(perturb)
            deg_gene_num = perturb_df.shape[0]
            perturb_edges = pd.DataFrame()
            perturb_edges['Source'] = [drug for i in range(deg_gene_num)]
            perturb_edges['CMAP_instance'] = [cmap_instance for i in range(deg_gene_num)]
            perturb_edges['Target'] = perturb_df['gene'].tolist()
            perturb_edges['log2fc'] = perturb_df['stat'].tolist()
            perturb_edges['exp_z'] = perturb_df['z'].tolist()
            perturb_edges['exp_z_norm'] = [i[0] for i in scaler.fit_transform(np.array(perturb_edges['exp_z'].tolist()).reshape(-1, 1))]
            perturb_edges['abs_log2fc'] = [abs(i) for i in perturb_df['stat'].tolist()]
            perturb_edges['sign_log2fc'] = [np.sign(i) for i in perturb_df['stat'].tolist()]
            perturb_edges = perturb_edges.sort_values('exp_z')
            head_edges = perturb_edges.head(30)
            tail_edges = perturb_edges.tail(30)
            perturb_edges_sub = pd.concat([head_edges, tail_edges])
            perturb_edges_sub.to_csv(perturb_sample_dir+cmap_drug+'_perturb.csv',index=False)
                
            # nodes
            perturb_nodes = pd.DataFrame()
            perturb_nodes['Id'] = perturb_edges_sub['Target'] 
            perturb_nodes['Label'] = perturb_edges_sub['Target'] 
            perturb_nodes['log2fc'] = perturb_edges_sub['log2fc']
            perturb_nodes['sign_log2fc'] = perturb_edges_sub['sign_log2fc'] 
            perturb_nodes['abs_log2fc'] = perturb_edges_sub['abs_log2fc'] 
            drug_row = pd.DataFrame({'Id':[perturb_edges_sub['Source'].unique().tolist()[0]],
                                 'Label':[perturb_edges_sub['Source'].unique().tolist()[0]],
                                 'log2fc':[10],
                                 'sign_log2fc':[0],
                                 'abs_log2fc':[10]})
            perturb_nodes = pd.concat([drug_row, perturb_nodes])
            perturb_nodes.to_csv(perturb_sample_dir+cmap_drug+'_perturb_nodes.csv',index=False)


# running enrichment

In [133]:
def run_enrich_perturb(dsid, sampleid, deg_dir, cell_direc):
    for ct in cell_direc:
        # deg
        cell = ct.split('.csv')[0]
        deg = pd.read_csv(deg_dir + '/' + ct)
        print('deg file read in for:', dsid, sampleid, cell)
        deg_up, deg_down = get_up_down_deg(deg)
        print('deg shape:', deg_up.shape, deg_down.shape)
        if deg_up.shape[0] > 2000 or deg_up.shape[0] <10 or deg_down.shape[0] > 2000 or deg_down.shape[0] <10:
            print('deg too few or too many')
            print('\n')
            continue
        gene_num_up, gene_num_down =  hgnc_gene_up_down(deg_up, deg_down )
        write_gene_up_down(gene_num_up, cell, 'UP')
        write_gene_up_down(gene_num_up, cell, 'DOWN')
        print('degs hgnc-formatted:', dsid, sampleid)    

        # enrichment
        DATA, N_UP,N_DOWN =  cmap2_gsea_setup(gene_num_up, gene_num_down)
        print('cmap matched deg:',N_UP,N_DOWN)
        gsea_res= run_gsea(DATA, N_UP, N_DOWN)
        print('finish gsea, head:', gsea_res[:3])
        gsea_res_form = format_gsea_res(gsea_res, cell)
        enrich_inv, enrich_pos = format_enrich(gsea_res_form, cell)
        print('enrich saved, shape:', enrich_inv.shape, enrich_pos.shape, enrich_inv.head(1), enrich_inv.head(1))

        # perturbation
        dge =pd.concat([deg_up, deg_down]).sort_values('stat')
        cell_type_deg_rank = cmap2_cmap1_perturb_rank_dur[cmap2_cmap1_perturb_rank_dur.index.isin(dge['gene'].tolist())]
        cell_type_deg_z = z_score_df[z_score_df.index.isin(dge['gene'].tolist())]
        print('deg genes matched to cmap:',cell_type_deg_z.shape[0])
        get_perturb_zs(cell, 'INV', enrich_inv, dge, cell_type_deg_z, cell_type_deg_rank)
        get_perturb_zs(cell, 'POS',enrich_pos, dge, cell_type_deg_z, cell_type_deg_rank)
        print('inv and pos perturbs saved:', len(os.listdir('/share/fsmresfiles/SpatialT/drug-target-cmap2/'+dsid+'/'+sampleid+'/Perturbation_SVG/'+cell+'/'+'INV/')),len(os.listdir('/share/fsmresfiles/SpatialT/drug-target-cmap2/'+dsid+'/'+sampleid+'/Perturbation_SVG/'+cell+'/'+'POS/')) )
        perturb_dir = '/share/fsmresfiles/SpatialT/drug-target-cmap2/'+dsid+'/'+sampleid+'/Perturbation_SVG/'+cell+'/'
        format_perturb(perturb_dir, cell)
        print('perturb files formatted:', len(os.listdir('/share/fsmresfiles/SpatialT/website/staging/data_v3_release/drug_screen/drug_perturbation_cmap2_SVG/'+dsid+'/'+sampleid+'/'+cell+'/INV/')), len(os.listdir('/share/fsmresfiles/SpatialT/website/staging/data_v3_release/drug_screen/drug_perturbation_cmap2_SVG/'+dsid+'/'+sampleid+'/'+cell+'/POS/')))
        print('\n')


In [134]:
#z = 0
#for i in range(all_patho_deg.shape[0]):
    #print(z)
    #dsid = all_patho_deg.iloc[0][0]
    #sampleid = all_patho_deg.iloc[0][1]
dsid = 'DS4A'
sampleid = 'DS4A.1'
deg_dir = '/share/fsmresfiles/SpatialT/drug-target/'+dsid+'/'+sampleid+'/DGE_dec_SVG'
cell_direc = [i for i in os.listdir(deg_dir) if 'UP' not in i and 'DOWN' not in i]
run_enrich_perturb(dsid, sampleid, deg_dir, cell_direc)
    #z+=1

deg file read in for: DS4A DS4A.1 B.cell
deg shape: (373, 4) (1325, 4)
degs hgnc-formatted: DS4A DS4A.1
cmap matched deg: 294 1141
finish gsea, head: [(0, 1.0), (0, 1.0), (0, 1.0)]
enrich saved, shape: (500, 5) (500, 5)                                         CMAP_instance           Drug  \
124110  PAC038_U2OS_6H:BRD-K26431593-001-01-3:10.0258  BRD-K26431593   

        Enrichment_score  P-value  Adj-p  
124110              0.37      0.0    0.0                                           CMAP_instance           Drug  \
124110  PAC038_U2OS_6H:BRD-K26431593-001-01-3:10.0258  BRD-K26431593   

        Enrichment_score  P-value  Adj-p  
124110              0.37      0.0    0.0  
deg genes matched to cmap: 1435
inv and pos perturbs saved: 500 500
perturb files formatted: 1000 1000


deg file read in for: DS4A DS4A.1 TMKI67
deg shape: (1401, 4) (766, 4)
degs hgnc-formatted: DS4A DS4A.1
cmap matched deg: 1152 646
finish gsea, head: [(-0.1969237122481995, 0.0), (0, 1.0), (-0.1745978011410172, 0.

deg genes matched to cmap: 426
inv and pos perturbs saved: 500 500
perturb files formatted: 1000 1000


deg file read in for: DS4A DS4A.1 Macrophage
deg shape: (559, 4) (1910, 4)
degs hgnc-formatted: DS4A DS4A.1
cmap matched deg: 462 1595
finish gsea, head: [(0, 1.0), (0, 1.0), (0, 1.0)]
enrich saved, shape: (500, 5) (500, 5)                                         CMAP_instance           Drug  \
117948  PAC039_U2OS_6H:BRD-K24514314-001-01-6:10.0068  BRD-K24514314   

        Enrichment_score  P-value  Adj-p  
117948             0.382      0.0    0.0                                           CMAP_instance           Drug  \
117948  PAC039_U2OS_6H:BRD-K24514314-001-01-6:10.0068  BRD-K24514314   

        Enrichment_score  P-value  Adj-p  
117948             0.382      0.0    0.0  
deg genes matched to cmap: 2057
inv and pos perturbs saved: 500 500
perturb files formatted: 1000 1000




## case study

In [140]:
enrich_inv_mal = pd.read_csv('/share/fsmresfiles/SpatialT/drug-target-cmap2/DS4A/DS4A.1/Enrichment_SVG/Malignant.csv')

In [141]:
case_drugs = ['everolimus','sirolimus','BRD-K68202742','LY-294002','wortmannin','BIBU-1361','perhexiline','NVP-AUY922']

In [142]:
enrich_inv_mal = enrich_inv_mal.sort_values('es', ascending=False)
enrich_inv_mal = enrich_inv_mal.reset_index(drop=True)

In [144]:
def case_drugs_select(enrich_df, ct):
    enrich_df.index = enrich_df['Unnamed: 0']
    enrich_df = enrich_df.drop('Unnamed: 0', axis = 1)
    enrich_df_sel = enrich_df[enrich_df['drug'].isin(case_drugs)]
    enrich_df_sel = enrich_df_sel.sort_values('es', ascending=False)
    enrich_df_sel = enrich_df_sel.drop_duplicates(subset=['drug'])
    enrich_df_sel['cell_type'] = ct

    return enrich_df_sel

In [145]:
enrich_inv_mal_case = case_drugs_select(enrich_inv_mal, 'Mal')

In [146]:
enrich_inv_mal_case

Unnamed: 0_level_0,index,es,p,p-adj,drug,cell_type
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
everolimus--CPC014_HT29_6H:BRD-K13514097-001-01-2:10,19394,0.339,0.0,0.0,everolimus,Mal
wortmannin--ERG015_PC3_6H:BRD-A75409952-001-01-6:10,97377,0.314,0.0,0.0,wortmannin,Mal
BIBU-1361--CPC005_VCAP_6H:BRD-K49294207-300-01-2:10,65349,0.309,0.0,0.0,BIBU-1361,Mal
sirolimus--CPC006_CL34_6H:BRD-A23770159-001-02-3:11.1,49788,0.298,0.0,0.0,sirolimus,Mal
LY-294002--PAC038_U2OS_6H:BRD-K27305650-003-01-4:20,118689,0.298,0.0,0.0,LY-294002,Mal
NVP-AUY922--CPC006_NCIH508_6H:BRD-K41859756-001-01-9:10,51182,0.278,0.0,0.0,NVP-AUY922,Mal
BRD-K68202742--HOG002_MCF7_6H:BRD-K68202742-001-10-8:0.3704,11089,0.274,0.0,0.0,BRD-K68202742,Mal
perhexiline--CPC006_PC3_6H:BRD-A19633847-050-20-6:10,34747,0.261,0.0,0.0,perhexiline,Mal


In [152]:
enrich_dir = '/share/fsmresfiles/SpatialT/drug-target-cmap2/DS4A/DS4A.1/Enrichment_SVG'
def case_drugs_select_nonmal(ct):
    enrich_df = pd.read_csv(enrich_dir+'/'+ct+'.csv')
    enrich_df.index = enrich_df['Unnamed: 0']
    enrich_df = enrich_df.drop('Unnamed: 0', axis = 1)
    enrich_df_sel = enrich_df[enrich_df.index.isin(enrich_inv_mal_case.index.tolist())]
    enrich_df_sel['cell_type'] = ct
    return enrich_df_sel

In [None]:
enrich_inv_tmk_case = case_drugs_select_nonmal('TMKI67')
enrich_inv_cd4_case = case_drugs_select_nonmal('CD4')
enrich_inv_cd8_case = case_drugs_select_nonmal('CD8')
enrich_inv_dc_case = case_drugs_select_nonmal('DC')
enrich_inv_mono_case = case_drugs_select_nonmal('Monocyte')
enrich_inv_nk_case = case_drugs_select_nonmal('NK')
enrich_inv_treg_case = case_drugs_select_nonmal('T.reg')

In [169]:
ds4a1_enrich_sel_drugs = pd.concat([pd.concat([pd.concat([pd.concat([pd.concat([pd.concat([pd.concat([enrich_inv_mal_case,enrich_inv_tmk_case]), enrich_inv_cd4_case]),enrich_inv_cd8_case]), enrich_inv_nk_case]),enrich_inv_dc_case]),enrich_inv_mono_case]),enrich_inv_treg_case])

In [170]:
drug_upd = []
for i in ds4a1_enrich_sel_drugs['drug'].tolist():
    if i == 'BRD-K68202742':
        drug_upd.append('Trichostatin A (HDAC)')
    elif i == 'NVP-AUY922':
        drug_upd.append('Luminespib (HSP90)')
    elif i == 'LY-294002':
        drug_upd.append('LY-294002 (PI3K)')
    elif i == 'wortmannin':
        drug_upd.append('Wortmannin (PI3K)')
    elif i == 'perhexiline':
        drug_upd.append('Perhexiline (CPT)')
    elif i == 'BIBU-1361':
        drug_upd.append('BIBU-1361 (EGFR)')
    elif i == 'sirolimus':
        drug_upd.append('Sirolimus (mTOR)')
    elif i == 'everolimus':
        drug_upd.append('Everolimus (mTOR)')

In [172]:
ds4a1_enrich_sel_drugs['Drug_upd'] = drug_upd

In [173]:
ds4a1_enrich_sel_drugs

Unnamed: 0_level_0,index,es,p,p-adj,drug,cell_type,Drug_upd
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
everolimus--CPC014_HT29_6H:BRD-K13514097-001-01-2:10,19394,0.339,0.0,0.0,everolimus,Mal,Everolimus (mTOR)
wortmannin--ERG015_PC3_6H:BRD-A75409952-001-01-6:10,97377,0.314,0.0,0.0,wortmannin,Mal,Wortmannin (PI3K)
BIBU-1361--CPC005_VCAP_6H:BRD-K49294207-300-01-2:10,65349,0.309,0.0,0.0,BIBU-1361,Mal,BIBU-1361 (EGFR)
sirolimus--CPC006_CL34_6H:BRD-A23770159-001-02-3:11.1,49788,0.298,0.0,0.0,sirolimus,Mal,Sirolimus (mTOR)
LY-294002--PAC038_U2OS_6H:BRD-K27305650-003-01-4:20,118689,0.298,0.0,0.0,LY-294002,Mal,LY-294002 (PI3K)
...,...,...,...,...,...,...,...
sirolimus--CPC006_CL34_6H:BRD-A23770159-001-02-3:11.1,49788,-0.197,0.0,0.0,sirolimus,T.reg,Sirolimus (mTOR)
NVP-AUY922--CPC006_NCIH508_6H:BRD-K41859756-001-01-9:10,51182,-0.190,0.0,0.0,NVP-AUY922,T.reg,Luminespib (HSP90)
BIBU-1361--CPC005_VCAP_6H:BRD-K49294207-300-01-2:10,65349,-0.197,0.0,0.0,BIBU-1361,T.reg,BIBU-1361 (EGFR)
wortmannin--ERG015_PC3_6H:BRD-A75409952-001-01-6:10,97377,0.000,1.0,1.0,wortmannin,T.reg,Wortmannin (PI3K)


In [176]:
ds4a1_enrich_sel_drugs.to_csv('DS4A1_mal_svg_ranked_case_study_drugs_es.csv')