In [109]:
import numpy as np
import pandas as pd
import requests
import json

In [110]:
def signature_extractor(file):
    
    # preprocessing data before 
            #calculating up/down regulated genes
        
    stat = pd.read_table(file, sep=" ")
    stat = stat.apply(pd.to_numeric)
    
    up_genes = []
    down_genes = []
    names = stat.columns
    
    # calculating up/down regulated genes
    
    if names[0] == "logFC" and names[2] == "PValue":
        up_genes = stat[(stat["logFC"] >= np.log(2)) & (stat["PValue"] <= 1e-3)].index
        down_genes = stat[(stat["logFC"] <= np.log(0.5)) & (stat["PValue"] <= 1e-3)].index
        
    if names[1] == "log2FoldChange" and names[5] == "padj":
        up_genes = stat[(stat["log2FoldChange"] >= np.log2(2)) & (stat["padj"] <= 1e-3)].index
        down_genes = stat[(stat["log2FoldChange"] <= np.log2(0.5)) & (stat["padj"] <= 1e-3)].index    
        
    return up_genes, down_genes

In [111]:
stat_edger = pd.read_table("~/Downloads/DE_heart_fb_deseq2_edger.txt", sep=" ")
stat_deseq = pd.read_table("~/Downloads/DE_heart_fb_deseq2_deseq2.txt", sep=" ")

In [112]:
stat_edger

Unnamed: 0,logFC,logCPM,PValue
A1BG,-1.698154,3.142090,8.742768e-153
A1CF,0.789337,-1.235338,1.931845e-11
A2M,4.161486,8.595770,0.000000e+00
A2ML1,-0.843630,0.312166,9.159935e-25
A2MP1,8.379818,0.348657,0.000000e+00
...,...,...,...
BP-2171C21.4,0.905227,-6.219698,1.194889e-01
BP-2171C21.5,4.622840,-4.020418,4.622842e-52
BP-2171C21.6,3.737071,-2.167515,2.873120e-95
BP-2189O9.2,2.743989,2.278862,0.000000e+00


In [113]:
stat_deseq

Unnamed: 0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj
A1BG,104.033051,-1.290317,0.150884,-8.551720,1.212678e-17,9.294413e-17
A1CF,5.198336,0.883494,0.128005,6.902011,5.127130e-12,2.735763e-11
A2M,1073.552347,2.176929,0.221235,9.839893,7.579178e-23,7.591673e-22
A2ML1,10.459817,0.689752,0.131503,5.245165,1.561428e-07,5.384772e-07
A2MP1,17.444379,4.063652,0.152135,26.710848,3.521566e-157,4.206541e-155
...,...,...,...,...,...,...
BP-2171C21.4,1.320363,-0.409913,0.127170,-3.223338,1.267059e-03,1.990488e-03
BP-2171C21.5,1.764268,0.774370,0.134721,5.747972,9.032043e-09,3.586154e-08
BP-2171C21.6,4.207419,1.430520,0.130206,10.986614,4.432452e-28,5.491939e-27
BP-2189O9.2,39.811244,4.247406,0.168005,25.281404,5.116107e-141,4.794718e-139


In [114]:
up_genes_edger, down_genes_edger = signature_extractor("~/Downloads/DE_heart_fb_deseq2_edger.txt")

In [115]:
up_genes_deseq, down_genes_deseq = signature_extractor("~/Downloads/DE_heart_fb_deseq2_deseq2.txt")

In [116]:
pd.DataFrame(up_genes_edger)

Unnamed: 0,0
0,A1CF
1,A2M
2,A2MP1
3,A3GALT2
4,A4GALT
...,...
8575,BP-2171C21.2
8576,BP-2171C21.5
8577,BP-2171C21.6
8578,BP-2189O9.2


In [117]:
url = 'https://maayanlab.cloud/L1000CDS2/query'

def upperGenes(genes):
    # The app uses uppercase gene symbols. So it is crucial to perform upperGenes() step.
    return [gene.upper() for gene in genes]

# gene-set search example
data = {"upGenes":up_genes_edger,
"dnGenes":down_genes_edger}
data['upGenes'] = upperGenes(data['upGenes'])
data['dnGenes'] = upperGenes(data['dnGenes'])
config = {"aggravate":True,"searchMethod":"geneSet","share":True,"combination":True,"db-version":"latest"}
metadata = [{"key":"Tag","value":"gene-set python example"},{"key":"Cell","value":"MCF7"}]
payload = {"data":data,"config":config,"meta":metadata}
headers = {'content-type':'application/json'}
r = requests.post(url,data=json.dumps(payload),headers=headers)
resGeneSet = r.json()

In [118]:
resGeneSet["topMeta"]

[{'pert_desc': 'PP-110',
  'pert_dose_unit': 'um',
  'pert_id': 'BRD-K03618428',
  'cell_id': 'HT29',
  'pert_dose': '22.2',
  'pert_time': '24.0',
  'pert_time_unit': 'h',
  'sig_id': 'CPC006_HT29_24H:BRD-K03618428:22.2',
  'score': 0.0324,
  'overlap': {'up/up': ['ADH1C',
    'ALDH5A1',
    'ALDH6A1',
    'BTG2',
    'C11ORF71',
    'C1QA',
    'CALCOCO1',
    'CFTR',
    'CLCA1',
    'CLIC5',
    'DECR1',
    'EFNA1',
    'ERBB4',
    'F13A1',
    'FCGBP',
    'FOXO4',
    'GLTSCR2',
    'HPGD',
    'KLHL24',
    'KRT20',
    'LEFTY1',
    'MAOA',
    'MUC2',
    'PCK1',
    'PDZD2',
    'RAP1GAP',
    'SEPP1',
    'SESN1',
    'ST6GAL1',
    'ST6GALNAC2',
    'TNNT1',
    'TSC22D3',
    'TST',
    'VSIG4',
    'WT1',
    'ZNF91'],
   'dn/dn': ['ABCE1',
    'ACSL3',
    'ADK',
    'AGPAT5',
    'ALDH1A3',
    'ANP32E',
    'APOBEC3B',
    'ARNTL2',
    'ASCC3',
    'ASF1B',
    'ASPM',
    'ATF5',
    'AURKA',
    'AURKB',
    'BAG2',
    'BARD1',
    'BCLAF1',
    'BIRC5',
    'BRC

In [119]:
try:
    for i in resGeneSet["topMeta"]:
        if "pert_desc" in i:
            print(i["pert_id"])
except KeyError:
    print("No such a Key")

BRD-K03618428
BRD-K56343971
BRD-K03618428
BRD-K34581968
BRD-K34581968
BRD-K41895714
BRD-K41895714
BRD-K57080016
BRD-K98490050
BRD-K12502280
BRD-K81418486
BRD-K81418486
BRD-K68548958
BRD-A19037878
BRD-A12230535
BRD-K57080016
BRD-K13049116
BRD-K53414658
BRD-A36630025
BRD-K12502280
BRD-K81418486
BRD-K12343256
BRD-A45889380
BRD-A13122391
BRD-K64800655
BRD-K56343971
BRD-K88510285
BRD-K69932463
BRD-K57080016
BRD-K03618428
BRD-K21680192
BRD-A19037878
BRD-K49371609
BRD-A19037878
BRD-K33272502
BRD-A45889380
BRD-K50168500
BRD-A19037878
BRD-K19295594
BRD-K07762753
BRD-A19037878
BRD-K81418486
BRD-K34581968
BRD-K49865102
BRD-A35588707
BRD-A35588707
BRD-K81418486
BRD-A58564983
BRD-A18497530
BRD-K05104363


In [120]:
try:
    for i in resGeneSet["topMeta"]:
        if "pert_desc" in i:
            print(i["pubchem_id"])
except KeyError:
    print("No such a Key")

24905203
42611257
24905203
11353973
11353973
10377751
10377751
10127622
3926765
16722836
5311
5311
1285940
6376322
216345
10127622
24785538
9911830
4014291
16722836
5311
11707110
23581813
16220015
46191454
42611257
387447
25262965
10127622
24905203
5458171
6376322
10275789
6376322
11296282
23581813
156414
6376322
227456
6604931
6376322
5311
11353973
9826528
6708778
6708778
5311
16667695
1830
6918454


In [121]:
try:
    for i in resGeneSet["combinations"]:
        print(i)
except KeyError:
    print("No such a Key")

{'X1': 'CPC006_HT29_24H:BRD-K03618428:22.2', 'X2': 'CPC006_HA1E_24H:BRD-K81418486:11.1', 'value': 0.0524}
{'X1': 'CPC006_HT29_24H:BRD-K03618428:22.2', 'X2': 'CPC006_HA1E_24H:BRD-K81418486:10.0', 'value': 0.0516}
{'X1': 'CPC006_HT29_24H:BRD-K03618428:22.2', 'X2': 'CPC006_HA1E_24H:BRD-A19037878:10.0', 'value': 0.0512}
{'X1': 'CPC006_HT29_24H:BRD-K03618428:22.2', 'X2': 'CPC007_HA1E_24H:BRD-A19037878:10.0', 'value': 0.0486}
{'X1': 'CPC006_HT29_24H:BRD-K03618428:22.2', 'X2': 'CPC002_HA1E_24H:BRD-A19037878:10.0', 'value': 0.0485}
{'X1': 'CPC006_HT29_24H:BRD-K03618428:22.2', 'X2': 'CPC006_HA1E_24H:BRD-K88510285:0.04', 'value': 0.0476}
{'X1': 'CPC006_HT29_24H:BRD-K03618428:22.2', 'X2': 'CPC006_A375_24H:BRD-A13122391:0.08', 'value': 0.0475}
{'X1': 'CPC006_A375_24H:BRD-K56343971:10.0', 'X2': 'CPC006_HA1E_24H:BRD-K81418486:11.1', 'value': 0.0472}
{'X1': 'CPC006_HT29_24H:BRD-K34581968:11.1', 'X2': 'CPC006_HA1E_24H:BRD-K81418486:11.1', 'value': 0.0472}
{'X1': 'CPC006_A375_24H:BRD-K56343971:10.0', '