In [1]:
import pandas as pd
import numpy as np
import sys

In [2]:
"""

 1. initiate the TAS class

    parameters: 
    
    file_dss, string (csv file path) or pd.DataFrame
    file_dti, string (csv file path) or pd.DataFrame
    dti_cutoff, int, default 10,000
    permutation_round, int, default 10,000
    rnd_seed, int, default 0
    
    DSS file format: 1st column is the drug id, rest columns are cell names of the DSS in different cells
    DTI file format: 1st column is the drug id, 1st row is the target id.
    
"""

sys.path.append('./')
from TAS import TAS

file_dss = './input_files/dss_example.csv'
file_dti = './input_files/dti_example.csv'

tas_example = TAS(file_dss=file_dss, file_dti=file_dti, dti_cutoff=10000, permutation_round=1000, rnd_seed=0)

In [3]:
"""

  2. calculate TAS scores and p-values and save all files

"""

dss_has_dti, tas_result, tas_df, tas_pv, tas_pa = tas_example.cal_tas_pvalue()
tas_example.save_files(out_dir='./results_tas_example/', fname='eg')

Done permutation, time used in min:  0.3196548382441203


In [4]:
"""

DSS table, with compounds having known targets, according to the dti_cutoff.

"""

dss_has_dti.head()

Unnamed: 0,BT-549,CAL-148,CAL-51,HCC1937,Hs-578T,MDA-MB-231,MDA-MB-436,MDA-MB-468,CAL-120,CAL-85-1,DU-4475,HCC1143,HDQ-P1,MFM-223,BT-20,HCC-1187,HCC-1395,HCC-1806,HCC-38,HCC-70
CHEMBL576982,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8,0.0,0.0,0.0,0.0
CHEMBL3544932,2.6,24.9,28.6,0.0,2.3,0.0,1.6,8.7,1.0,2.2,4.1,0.0,3.3,7.1,2.6,1.5,1.4,0.0,5.8,1.3
CHEMBL1801204,0.0,24.6,16.0,3.4,0.3,0.0,0.0,0.0,0.0,2.8,11.6,2.3,0.7,3.7,0.7,1.7,0.5,0.0,0.0,0.0
CHEMBL3545097,0.0,13.3,7.9,0.0,0.0,0.4,0.0,0.0,0.0,0.4,5.0,0.0,0.0,3.1,0.0,0.0,0.0,0.0,0.1,0.0
CHEMBL521851,0.3,17.7,18.8,1.5,1.4,0.0,0.0,0.0,0.0,2.5,4.1,0.2,3.0,1.7,7.5,0.0,1.6,0.0,0.0,0.0


In [5]:
"""

TAS table, with FDR.

"""

tas_result.head()

Unnamed: 0,Target,Cell_line,TAS,PVal,PVAdjust
0,Q16288,BT-549,0.064286,0.919,1.0
20,P54764,BT-549,0.966667,0.243,0.987737
40,O43353,BT-549,0.189286,0.958,1.0
60,P48736,BT-549,0.471429,0.55,1.0
80,P06239,BT-549,0.557895,0.572,1.0


In [6]:
"""

TAS scores, target x cell matrix.

"""

tas_df.head()

Unnamed: 0,BT-549,CAL-148,CAL-51,HCC1937,Hs-578T,MDA-MB-231,MDA-MB-436,MDA-MB-468,CAL-120,CAL-85-1,DU-4475,HCC1143,HDQ-P1,MFM-223,BT-20,HCC-1187,HCC-1395,HCC-1806,HCC-38,HCC-70
Q16288,0.064286,5.3,9.785714,0.107143,1.135714,1.521428,0.65,1.571429,0.114286,0.685714,3.785714,0.221429,0.285714,1.171429,1.764286,1.557143,0.7,0.385714,0.978571,0.985714
P54764,0.966667,12.255556,17.311111,0.533333,1.722222,1.122222,1.222222,3.888889,1.033333,4.255556,5.755556,0.977778,2.7,2.788889,2.877778,2.066667,1.788889,1.044445,3.044444,1.455556
O43353,0.189286,4.032143,5.917857,0.153571,0.432143,0.878571,0.282143,0.571429,0.292857,3.214286,3.707143,0.321429,0.635714,0.825,0.975,0.778571,0.489286,0.217857,0.65,0.246429
P48736,0.471429,10.842856,11.821427,0.935714,1.635714,2.785714,0.092857,1.692857,1.8,3.514286,4.771428,0.878571,2.257143,3.692857,4.507143,2.821429,4.342857,1.892857,1.7,1.092857
P06239,0.557895,8.831579,11.892105,0.428947,1.247368,1.268421,0.889474,2.455263,0.731579,3.7,4.405263,1.042105,1.610526,2.152632,2.48158,2.45,1.7,1.310526,2.613158,1.071053


In [7]:
"""

TAS p-values, target x cell matrix.

"""

tas_pv.head()

Unnamed: 0,BT-549,CAL-148,CAL-51,HCC1937,Hs-578T,MDA-MB-231,MDA-MB-436,MDA-MB-468,CAL-120,CAL-85-1,DU-4475,HCC1143,HDQ-P1,MFM-223,BT-20,HCC-1187,HCC-1395,HCC-1806,HCC-38,HCC-70
Q16288,0.919,0.809,0.363,0.922,0.545,0.777,0.59,0.529,0.92,0.994,0.649,0.949,0.976,0.864,0.723,0.908,0.94,0.966,0.941,0.558
P54764,0.243,0.093,0.014,0.551,0.323,0.81,0.292,0.069,0.353,0.336,0.289,0.48,0.102,0.424,0.443,0.71,0.579,0.732,0.413,0.356
O43353,0.958,0.984,0.957,0.976,0.982,0.984,0.958,0.994,0.95,0.641,0.776,0.976,0.962,0.994,0.978,1.0,1.0,1.0,1.0,0.99
P48736,0.55,0.111,0.174,0.303,0.334,0.463,0.944,0.479,0.18,0.49,0.46,0.542,0.14,0.246,0.193,0.535,0.116,0.498,0.773,0.533
P06239,0.572,0.22,0.044,0.851,0.586,0.979,0.514,0.144,0.677,0.47,0.574,0.534,0.332,0.739,0.664,0.756,0.843,0.866,0.629,0.633


In [8]:
"""

TAS adjusted p-values (FDR), target x cell matrix.

"""

tas_pa.head()

Cell_line,BT-20,BT-549,CAL-120,CAL-148,CAL-51,CAL-85-1,DU-4475,HCC-1187,HCC-1395,HCC-1806,HCC-38,HCC-70,HCC1143,HCC1937,HDQ-P1,Hs-578T,MDA-MB-231,MDA-MB-436,MDA-MB-468,MFM-223
Target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
B2RXH2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
O00141,1.0,1.0,1.0,0.828138,0.729732,1.0,1.0,0.999678,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
O00238,1.0,1.0,1.0,0.83676,0.597131,0.796156,1.0,1.0,1.0,1.0,0.835805,0.964068,1.0,1.0,0.691089,0.944,1.0,1.0,0.621765,1.0
O00311,1.0,1.0,1.0,0.659679,0.319551,0.912665,0.931276,0.999678,1.0,0.918478,0.82062,1.0,1.0,1.0,0.807763,0.995525,1.0,0.999945,0.613134,1.0
O00329,1.0,1.0,1.0,0.786017,0.662736,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.853284,1.0,1.0,1.0,1.0,1.0
