In [2]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm
import pubchempy as pcp
import requests

In [6]:
pharmgkb_df = pd.read_csv('rawData/PharmGKB/clinicalVariants/clinicalVariants.tsv', sep='\t')
pharmgkb_df = pharmgkb_df.dropna(subset=['chemicals'])
pharmgkb_df = pharmgkb_df.dropna(subset=['gene']).reset_index(drop= True)
# print(pharmgkb_df)
pharmgkb_df.head()

Unnamed: 0,variant,gene,type,level of evidence,chemicals,phenotypes
0,"CYP2C9*1, CYP2C9*2, CYP2C9*3, CYP2C9*13",CYP2C9,Metabolism/PK,1A,meloxicam,
1,"CYP2C9*1, CYP2C9*3, CYP2C9*13",CYP2C9,Metabolism/PK,1A,lornoxicam,
2,"CYP2C9*1, CYP2C9*2, CYP2C9*3",CYP2C9,Metabolism/PK,1A,siponimod,
3,rs17376848,DPYD,Toxicity,1A,capecitabine,Neoplasms
4,rs2297595,DPYD,Toxicity,1A,capecitabine,Neoplasms


In [7]:
chemicals_df = pd.read_csv('rawData/PharmGKB/chemicals/chemicals.tsv', sep='\t', dtype={'PubChem Compound Identifiers': str})
chemicals_df.head()


Unnamed: 0,PharmGKB Accession Id,Name,Generic Names,Trade Names,Brand Mixtures,Type,Cross-references,SMILES,InChI,Dosing Guideline,...,VIP Count,Dosing Guideline Sources,Top Clinical Annotation Level,Top FDA Label Testing Level,Top Any Drug Label Testing Level,Label Has Dosing Info,Has Rx Annotation,RxNorm Identifiers,ATC Identifiers,PubChem Compound Identifiers
0,PA166250381,10-desmethyl alpha-dihydrotetrabenazine,"""(2R,3R,11bR)-9-methoxy-3-(2-methylpropyl)-2,3...",,,Metabolite,PubChem Compound:59272813,,,No,...,0,,,,,,,,,59272813.0
1,PA166250405,10-desmethyl beta-dihydrodeutetrabenazine,"""(2S,3R,11bR)-3-(2-methylpropyl)-9-(trideuteri...",,,Metabolite,PubChem Compound:117693284,,,No,...,0,,,,,,,,,117693284.0
2,PA166250403,10-desmethyl beta-dihydrotetrabenazine,"""(2S,3R,11bR)-9-methoxy-3-(2-methylpropyl)-2,3...",,,Metabolite,PubChem Compound:59272796,,,No,...,0,,,,,,,,,59272796.0
3,PA166178620,10-formyltetrahydrofolate,,,,Biological Intermediate,PubChem Compound:10,,,No,...,0,,,,,,,,,10.0
4,PA166131395,10-hydroxy r-warfarin,,,,Metabolite,HMDB:HMDB13884,,,No,...,0,,,,,,,,,


In [8]:
variant_df = pd.read_csv('rawData/PharmGKB/variants/variants.tsv', sep='\t')
columns = ['Variant Name', 'Gene Symbols', 'Synonyms']
variant_df = pd.DataFrame(data=variant_df, columns=columns)
variant_df = variant_df.dropna(subset=['Gene Symbols'])
variant_df = variant_df[variant_df['Synonyms'].str.contains('NP')].reset_index(drop=True)

protein_dict = {
    'Gly':'G', 'Ala':'A', 'Val':'V', 'Leu':'L', 'Ile':'I',
    'Pro':'P', 'Phe':'F', 'Tyr':'Y', 'Trp':'W', 'Ser':'S', 
    'Thr':'T', 'Cys':'C', 'Met':'M', 'Asn':'N', 'Gln':'Q',
    'Asp':'D', 'Glu':'E', 'Lys':'K', 'Arg':'R', 'His':'H'
}

# split NPnumber
variant_table = pd.DataFrame(columns=['gene', 'rsid', 'variant', 'NPid'])
for i in range(variant_df.shape[0]): # 
    gene = variant_df['Gene Symbols'][i].split(',')[0]
    rsid = variant_df['Variant Name'][i]
    synonyms = variant_df['Synonyms'][i]
    # split and filtering
    syn_list = synonyms.split(',')
    np_list = [n for n in syn_list if 'NP' in n]
    missense_list = [n for n in np_list if '=' not in n]
    #print(missense_list)
    try: # for = in NPid: not missense
        NPid = missense_list[0].strip()
    except IndexError:
        continue
    string = NPid.split('p.')[-1]
    try:
        variant = protein_dict[string[0:3]] + string[3:-3] + protein_dict[string[-3:]]
    except KeyError:
        continue
    variant_table = variant_table.append([{'gene':gene, 'rsid':rsid, 'variant':variant, 'NPid':NPid}], ignore_index=True)
variant_table

Unnamed: 0,gene,rsid,variant,NPid
0,CYP1B1,rs10012,R48G,NP_000095.2:p.Arg48Gly
1,ABCG2,rs1002012563,T512N,NP_004818.2:p.Thr512Asn
2,GABRP,rs10036156,L7V,NP_055026.1:p.Leu7Val
3,SLC5A7,rs1013940,I89V,NP_068587.1:p.Ile89Val
4,UGT1A10,rs10187694,E139K,NP_061948.1:p.Glu139Lys
...,...,...,...,...
1545,CYP2A6,rs199916117,K194E,NP_000753.3:p.Lys194Glu
1546,CYP3A4,rs201821708,Y319C,NP_059488.2:p.Tyr319Cys
1547,CYP2A6,rs28399440,F118L,NP_000753.3:p.Phe118Leu
1548,CYP2C9,rs370100007,Q214H,NP_000762.2:p.Gln214His


In [9]:
# query_variant_df = pd.DataFrame()
# for i in range(len(variant_df)):
#     variant = variant_df['Variant Name'][i]
#     query_variant_df = query_variant_df.append([{'type':'dbsnp', 'variant': variant}], ignore_index = True)

# query_variant_df.to_csv('middlefile/pharmgkb_query_variant_table.tsv', sep = '\t', index=False)

gen_coord_df = pd.read_csv('middlefile/pharmgkb_gen_coords_3a9b7334.txt', sep= '\t')
gen_coord_df

Unnamed: 0,Variation ID,dbSNP,Chromosome,Position,REF Allele,ALT Allele (IUPAC),Minor Allele,Minor Allele Global Frequency,Contig,Contig Position,Band
0,rs7414551,rs7414551,1,964594,G,A,A,0.205471,GL000006.2,378606,p36.33
1,rs146898897,rs146898897,1,1582436,C,T,T,0.007388,GL000006.2,996448,p36.33
2,rs13303344,rs13303344,1,2016961,C,W,C,0.455072,GL000006.2,1430973,p36.33
3,rs4481796,rs4481796,1,2017366,T,C,T,0.216454,GL000006.2,1431378,p36.33
4,rs2376805,rs2376805,1,2024923,G,A,G,0.224441,GL000006.2,1438935,p36.33
...,...,...,...,...,...,...,...,...,...,...,...
6819,rs72554665,rs72554665,X,154532269,C,D,A,0.001589,GL000172.2,10056663,q28
6820,rs2230037,rs2230037,X,154532439,A,G,A,0.215894,GL000172.2,10056833,q28
6821,rs5030868,rs5030868,X,154534419,G,A,A,0.000795,GL000172.2,10058813,q28
6822,rs1050829,rs1050829,X,154535277,T,M,C,0.094570,GL000172.2,10059671,q28


In [10]:
pharmgkb_all_df = pd.DataFrame()
for i in range(len(pharmgkb_df)):
    chemicals = pharmgkb_df['chemicals'][i]
    rsid = pharmgkb_df['variant'][i]
    gene_symbol = pharmgkb_df['gene'][i].split(',')[0]
    label = pharmgkb_df['type'][i]
    disease_name = pharmgkb_df['phenotypes'][i]
    
    try:
        variant = variant_table[variant_table['rsid'] == rsid]['variant'].values[0]
    
    except:
        # print(rsid)
        variant = rsid

    try:
        chemical_type = chemicals_df[chemicals_df['Name'] == chemicals]['Type'].values[0]
        chemical_smile = chemicals_df[chemicals_df['Name'] == chemicals]['SMILES'].values[0]
        pubchem_id = chemicals_df[chemicals_df['Name'] == chemicals]['PubChem Compound Identifiers'].values[0]
        # print(chemicals_df[chemicals_df['Name'] == chemicals]['PubChem Compound Identifiers'].values[0])
    except :
        # print(chemicals)  
        chemical_type = None
        chemical_smile = None
        pubchem_id = None

    try:

        chromosome = gen_coord_df[gen_coord_df['dbSNP'] == rsid]['Chromosome'].values[0]
        position = gen_coord_df[gen_coord_df['dbSNP'] == rsid]['Position'].values[0]
        ref_allele = gen_coord_df[gen_coord_df['dbSNP'] == rsid]['REF Allele'].values[0]
        alt_allele = gen_coord_df[gen_coord_df['dbSNP'] == rsid]['ALT Allele (IUPAC)'].values[0]
        

    except IndexError:

        chromosome = None
        position = None
        ref_allele = None
        alt_allele = None

    pharmgkb_all_df = pharmgkb_all_df.append([{'data source': 'pharmgkb', 'chemical_type': chemical_type, 'chemical name': chemicals, 'pubchem id': pubchem_id, 'smile': chemical_smile, 'disease name': disease_name, 'gene symbol': gene_symbol, 'variant': variant, 'chromosome': chromosome, 'position': position, 'ref allele': ref_allele, 'alt allele': alt_allele, 'label': label}], ignore_index = True)

pharmgkb_all_df.to_csv('middlefile/pharmgkb_all_table.tsv', sep = '\t')
pharmgkb_all_df

Unnamed: 0,data source,chemical_type,chemical name,pubchem id,smile,disease name,gene symbol,variant,chromosome,position,ref allele,alt allele,label
0,pharmgkb,Drug,meloxicam,5281106,CC1=CN=C(S1)N/C(=C\2/C(=O)C3=CC=CC=C3S(=O)(=O)...,,CYP2C9,"CYP2C9*1, CYP2C9*2, CYP2C9*3, CYP2C9*13",,,,,Metabolism/PK
1,pharmgkb,Drug,lornoxicam,5282204,CN1/C(=C(\NC2=CC=CC=N2)/O)/C(=O)C3=C(S1(=O)=O)...,,CYP2C9,"CYP2C9*1, CYP2C9*3, CYP2C9*13",,,,,Metabolism/PK
2,pharmgkb,Drug,siponimod,44599207,,,CYP2C9,"CYP2C9*1, CYP2C9*2, CYP2C9*3",,,,,Metabolism/PK
3,pharmgkb,Prodrug,capecitabine,60953,CCCCCOC(=O)NC1=NC(=O)N(C=C1F)[C@H]2[C@@H]([C@@...,Neoplasms,DPYD,rs17376848,1,97450068,A,G,Toxicity
4,pharmgkb,Prodrug,capecitabine,60953,CCCCCOC(=O)NC1=NC(=O)N(C=C1F)[C@H]2[C@@H]([C@@...,Neoplasms,DPYD,M166V,1,97699535,T,C,Toxicity
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4823,pharmgkb,Drug,sertraline,68617,CN[C@H]1CC[C@H](C2=CC=CC=C12)C3=CC(=C(C=C3)Cl)Cl,"Depressive Disorder, Major,Panic Disorder",SLC6A4,"SLC6A4 HTTLPR long form (L allele), SLC6A4 HTT...",,,,,Efficacy
4824,pharmgkb,Drug,sertraline,68617,CN[C@H]1CC[C@H](C2=CC=CC=C12)C3=CC(=C(C=C3)Cl)Cl,"Anxiety Disorders,Depressive Disorder, Major",SLC6A4,"SLC6A4 HTTLPR long form (L allele), SLC6A4 HTT...",,,,,Toxicity
4825,pharmgkb,Drug,fluoxetine,3386,CNCCC(C1=CC=CC=C1)OC2=CC=C(C=C2)C(F)(F)F,"Depressive Disorder, Major",SLC6A4,rs25531,17,30237328,T,C,Efficacy
4826,pharmgkb,Drug,fluoxetine,3386,CNCCC(C1=CC=CC=C1)OC2=CC=C(C=C2)C(F)(F)F,"Depressive Disorder, Major,Obsessive-Compulsiv...",SLC6A4,"SLC6A4 HTTLPR long form (L allele), SLC6A4 HTT...",,,,,Efficacy


In [11]:
civic_df = pd.read_csv('rawData/CIViC/nightly-ClinicalEvidenceSummaries.tsv', sep='\t')
molecular_df = pd.read_csv('rawData/CIViC/nightly-MolecularProfileSummaries.tsv', sep='\t')
variant_df = pd.read_csv('rawData/CIViC/nightly-VariantSummaries.tsv', sep='\t')

variant_df['variant_id'] = variant_df['variant_id'].astype(object)

print(civic_df.shape)
civic_df = civic_df.dropna(subset=['therapies']).reset_index(drop=True)
print(civic_df.shape)
# civic_df['therapies'].isna().value_counts()

(4196, 25)
(2562, 25)


In [12]:
civic_all_df = pd.DataFrame()
for i in range(len(civic_df)):
    therapies = civic_df['therapies'][i]
    disease_name = civic_df['disease'][i]
    label = civic_df['significance'][i]
    molecular_profile = civic_df['molecular_profile'][i]
    # print(molecular_profile)
    try:
        variant_id = int(molecular_df[molecular_df['name'] == molecular_profile]['variant_ids'].values[0])
        # print(type(variant_id))
        # print(type(variant_df['variant_id'][24]))
        gene = variant_df[variant_df['variant_id'] == variant_id]['gene'].values[0]
        variant = variant_df[variant_df['variant_id'] == variant_id]['variant'].values[0]
        # print(gene)
    except:
        print(molecular_profile)
        continue

    civic_all_df = civic_all_df.append([{'data source': 'civic', 'chemical name': therapies, 'disease name': disease_name, 'gene symbol': gene, 'variant': variant, 'label': label}], ignore_index = True)

civic_all_df.to_csv('middlefile/civic_all_table.tsv', sep = '\t', )

BRAF V600E AND BRAF V600M
BRAF V600E AND BRAF Amplification
ALK EML4::ALK AND ALK L1196M
ABL1 BCR::ABL AND ABL1 E255K
ABL1 BCR::ABL AND ABL1 T315I
ABL1 BCR::ABL AND ABL1 T315I
ALK EML4::ALK AND ALK C1156Y
ALK EML4::ALK AND ALK L1196M
FLT3 ITD AND FLT3 D835Y
ALK EML4::ALK AND ALK Amplification
ALK EML4::ALK AND ALK G1202R
ALK EML4::ALK AND ALK L1196M
ALK EML4::ALK AND ALK S1206Y
ALK EML4::ALK AND ALK T1151dup
ABL1 BCR::ABL AND ABL1 F317L
ABL1 BCR::ABL AND ABL1 F317L
ABL1 BCR::ABL AND ABL1 T315I
ALK EML4::ALK AND ALK L1152R
ALK EML4::ALK AND ALK G1269A
ALK EML4::ALK AND ALK G1269A
ALK EML4::ALK AND ALK Amplification
ALK EML4::ALK AND ALK Amplification
EGFR Amplification AND EGFR EGFRVIII
ALK EML4::ALK AND ALK C1156Y
ALK EML4::ALK AND ALK C1156Y
ALK EML4::ALK AND ALK L1198F AND ALK C1156Y
ALK EML4::ALK AND ALK L1198F AND ALK C1156Y
MET Amplification AND MET Splice Site (c.3028G>A)
ALK EML4::ALK AND ALK V1180L
ALK EML4::ALK AND ALK V1180L
ALK EML4::ALK AND ALK V1180L
ALK EML4::ALK AND ALK 

In [13]:
dbmcs_df1 = pd.read_csv('rawData/dbMCS/Data for resistance.csv')
dbmcs_df2 = pd.read_csv('rawData/dbMCS/Data for sensitivity.csv')
print(dbmcs_df1.shape, dbmcs_df2.shape)

dbmcs_df = pd.concat([dbmcs_df1, dbmcs_df2]).reset_index(drop=True)
print(dbmcs_df.shape)
dbmcs_df.head()

(1164, 7) (2555, 7)
(3719, 7)


Unnamed: 0,Gene,Entrez_id,cDNA,Drug,Drug_id,Association,Evidence level
0,ABL1,25,c.944C>T,Nilotinib,DB04868,Resistant,1
1,ABL1,25,c.944C>T,Dasatinib,DB01254,Resistant,1
2,ABL1,25,c.944C>T,Bosutinib,DB06616,Resistant,1
3,ABL1,25,c.895G>C,Dasatinib,DB01254,Resistant,1
4,ABL1,25,c.943A>G,Dasatinib,DB01254,Resistant,1


In [14]:
dbmcs_all_df = pd.DataFrame()
for i in range(len(dbmcs_df)):
    therapies = dbmcs_df['Drug'][i]
    gene = dbmcs_df['Gene'][i]
    variant = dbmcs_df['cDNA'][i]
    label = dbmcs_df['Association'][i]

    dbmcs_all_df = dbmcs_all_df.append([{'data source': 'dbmcs', 'chemical name': therapies, 'gene symbol': gene, 'variant': variant, 'label': label}], ignore_index = True)
    
dbmcs_all_df.to_csv('middlefile/dbmcs_all_table.tsv', sep = '\t')

In [15]:
aimms_df = pd.read_csv('rawData/AIMMS/AIMMS-human.csv')
aimms_df
aimms_df['gene'].unique()

array(['ABL1', 'EGFR', 'AKR1B1'], dtype=object)

In [16]:
aimms_all_df = pd.DataFrame()
for i in range(len(aimms_df)):
    therapies = aimms_df['drug'][i]
    gene = aimms_df['gene'][i]
    variant = aimms_df['Mutants'][i]
    label = aimms_df['ddg'][i]

    aimms_all_df = aimms_all_df.append([{'data source': 'aimms', 'chemical name': therapies, 'gene symbol': gene, 'variant': variant, 'ddg': label}], ignore_index = True)
    
aimms_all_df.to_csv('middlefile/aimms_all_table.tsv', sep = '\t')

In [17]:
ret_df = pd.read_csv('rawData/RET/RET.csv')
ret_df.head()

Unnamed: 0,drug,Mutation,ic50,Unnamed: 3,Unnamed: 4,ddg,gene
0,Cabozantinib,L730I,2.96,12.333333,2.512306,1.496993,RET
1,Cabozantinib,L730V,2.37,9.875,2.290006,1.364533,RET
2,Cabozantinib,L730V/V804M,7.37,30.708333,3.424534,2.040558,RET
3,Cabozantinib,E732K,1.15,4.791667,1.566878,0.933647,RET
4,Cabozantinib,V738A,1.2,5.0,1.609438,0.959007,RET


In [18]:
ret_all_df = pd.DataFrame()
for i in range(len(ret_df)):
    therapies = ret_df['drug'][i]
    gene = ret_df['gene'][i]
    variant = ret_df['Mutation'][i]
    label = ret_df['ddg'][i]

    ret_all_df = ret_all_df.append([{'data source': 'ret', 'chemical name': therapies, 'gene symbol': gene, 'variant': variant, 'ddg': label}], ignore_index = True)
    
ret_all_df.to_csv('middlefile/ret_all_table.tsv', sep = '\t')

In [19]:
gdsc_df = pd.read_csv('rawData/GDSC/GDSC_drug_cell_line_sensitivity.tsv', sep = '\t')
gdsc_df.head()

Unnamed: 0,TARGET,DRUG,MUTATION,CELL_LINE_WT,CELL_LINE_MT,LN_IC50_WT,LN_IC50_MT,DDG,UNIPROT_ID
0,ABL1,Nilotinib,R608H,5637+A375+AMO-1+ASH-3+BE-13+BT-474+BxPC-3+CAL-...,A253,2.767916,3.384217,0.365149,P00519
1,ABL1,Nilotinib,A1045T,5637+A375+AMO-1+ASH-3+BE-13+BT-474+BxPC-3+CAL-...,C32,2.767916,3.814408,0.62003,P00519
2,ABL1,Nilotinib,V587M,5637+A375+AMO-1+ASH-3+BE-13+BT-474+BxPC-3+CAL-...,CA46,2.767916,3.0144,0.146038,P00519
3,ABL1,Nilotinib,P460L,5637+A375+AMO-1+ASH-3+BE-13+BT-474+BxPC-3+CAL-...,CAL-27,2.767916,3.602492,0.494474,P00519
4,ABL1,Nilotinib,G1079D,5637+A375+AMO-1+ASH-3+BE-13+BT-474+BxPC-3+CAL-...,CAPAN-2,2.767916,5.464567,1.597725,P00519


In [20]:
gdsc_all_df = pd.DataFrame()
for i in range(len(gdsc_df)):
    therapies = gdsc_df['DRUG'][i]
    gene = gdsc_df['TARGET'][i]
    variant = gdsc_df['MUTATION'][i]
    label = gdsc_df['DDG'][i]                                                                                    

    
    uniprot_id = gdsc_df['UNIPROT_ID'][i]

    gdsc_all_df = gdsc_all_df.append([{'data source': 'gdsc', 'chemical name': therapies, 'gene symbol': gene, 'variant': variant, 'uniprot id': uniprot_id, 'ddg': label}], ignore_index = True)
    
gdsc_all_df.to_csv('middlefile/gdsc_all_table.tsv', sep = '\t')

In [21]:
depmep_df = pd.read_csv('rawData/DepMap/MepMap_drug_cell_line_sensitivity.tsv', sep = '\t')
depmep_df.head()

Unnamed: 0,TARGET,DRUG,MUTATION,CELL_LINE_WT,CELL_LINE_MT,LN_IC50_WT,LN_IC50_MT,DDG
0,ALK,TAE-684,p.A96T,L428+HCT116+RVH421+DKMG+MG63+LC1SQSF+SUIT2+EN+...,C2BBE1,3.608218,1.563514,-1.211456
1,ALK,TAE-684,p.R317T,L428+HCT116+RVH421+DKMG+MG63+LC1SQSF+SUIT2+EN+...,CORL23,3.608218,7.618295,2.375909
2,ALK,TAE-684,p.G882C,L428+HCT116+RVH421+DKMG+MG63+LC1SQSF+SUIT2+EN+...,EFO27,3.608218,8.0,2.602064
3,ALK,TAE-684,p.G689R,L428+HCT116+RVH421+DKMG+MG63+LC1SQSF+SUIT2+EN+...,G361,3.608218,4.944404,0.79167
4,ALK,TAE-684,p.R753Q,L428+HCT116+RVH421+DKMG+MG63+LC1SQSF+SUIT2+EN+...,HCC78,3.608218,8.0,2.602064


In [22]:
depmep_all_df = pd.DataFrame()
for i in range(len(depmep_df)):
    therapies = depmep_df['DRUG'][i]
    gene = depmep_df['TARGET'][i]
    variant = depmep_df['MUTATION'][i]
    label = depmep_df['DDG'][i]                                                                                    


    depmep_all_df = depmep_all_df.append([{'data source': 'depmep', 'chemical name': therapies, 'gene symbol': gene, 'variant': variant, 'ddg': label}], ignore_index = True)
    
depmep_all_df.to_csv('middlefile/depmep_all_table.tsv', sep = '\t')

In [23]:
all_data_df = pd.concat([aimms_all_df, civic_all_df, dbmcs_all_df, depmep_all_df, gdsc_all_df, pharmgkb_all_df, ret_all_df]).reset_index(drop = True)
all_data_df.to_csv('middlefile/all_data_table.tsv', sep = '\t', index= False)


In [24]:
all_data_df = pd.read_csv('middlefile/all_data_table.tsv', sep = '\t')
symbol_list = all_data_df['gene symbol'].unique()
# for i in range(len(symbol_list)):
for i in tqdm(range(len(symbol_list))):
    try:
        gene = symbol_list[i]
        url = 'https://rest.uniprot.org/uniprotkb/search?format=json&query=(reviewed:true)%20AND%20(organism_id:9606)%20AND%20' + gene
        response = requests.get(url)
        data = response.json()
        all_data_df.loc[all_data_df['gene symbol'] == gene, 'uniprot id'] = data['results'][0]['primaryAccession']
        # print(gene)
        # print(data['results'][0]['primaryAccession'])

    except:
        # print(i)
        print(gene)
        continue
all_data_df.to_csv('middlefile/uniprot_table.tsv', sep = '\t', index= False)
all_data_df

  0%|          | 0/1323 [00:00<?, ?it/s]

 38%|███▊      | 498/1323 [26:33<44:40,  3.25s/it]  

OR52J2P


 50%|█████     | 664/1323 [34:42<28:22,  2.58s/it]  

PSORS1C3


 54%|█████▎    | 710/1323 [37:26<29:18,  2.87s/it]  

HLA-DPB2


 63%|██████▎   | 837/1323 [44:01<19:28,  2.40s/it]  

HOTAIR


 71%|███████   | 941/1323 [49:02<17:45,  2.79s/it]

LINC00251


 73%|███████▎  | 963/1323 [50:05<13:15,  2.21s/it]

CYCSP5


 73%|███████▎  | 966/1323 [50:09<10:04,  1.69s/it]

MIR27A


 78%|███████▊  | 1036/1323 [53:27<12:48,  2.68s/it]

MEG3


 86%|████████▋ | 1143/1323 [58:22<06:44,  2.25s/it]

MIR582


 94%|█████████▍| 1246/1323 [1:02:54<02:29,  1.94s/it]

CYP2A7P1


100%|██████████| 1323/1323 [1:06:12<00:00,  3.00s/it]


Unnamed: 0,data source,chemical name,gene symbol,variant,ddg,disease name,label,uniprot id,chemical_type,pubchem id,smile,chromosome,position,ref allele,alt allele
0,aimms,dasatinib,ABL1,M244V,0.290000,,,P00519,,,,,,,
1,aimms,dasatinib,ABL1,G250E,0.480000,,,P00519,,,,,,,
2,aimms,dasatinib,ABL1,Q252H,0.860000,,,P00519,,,,,,,
3,aimms,dasatinib,ABL1,Y253F,0.330000,,,P00519,,,,,,,
4,aimms,dasatinib,ABL1,Y253H,0.290000,,,P00519,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19664,ret,Nintedanib,RET,G810A,-0.044158,,,P07949,,,,,,,
19665,ret,Nintedanib,RET,G810S,0.826043,,,P07949,,,,,,,
19666,ret,Nintedanib,RET,V871I,0.149749,,,P07949,,,,,,,
19667,ret,Nintedanib,RET,F998V,0.000000,,,P07949,,,,,,,


In [25]:
gene = 'VEGFB'
url = 'https://rest.uniprot.org/uniprotkb/search?format=fasta&query=(reviewed:true)%20AND%20(organism_id:9606)%20AND%20' + gene
response = requests.get(url)
text = response.text
a = text.split('>sp')
a = a[1].split('\n')[1:]
a = ''.join(a)
a

'MSPLLRRLLLAALLQLAPAQAPVSQPDAPGHQRKVVSWIDVYTRATCQPREVVVPLTVELMGTVAKQLVPSCVTVQRCGGCCPDDGLECVPTGQHQVRMQILMIRYPSSQLGEMSLEEHSQCECRPKKKDSAVKPDRAATPHHRPQPRSVPGWDSAPGAPSPADITHPTPAPGPSAHAAPSTTSALTPGPAAAAADAAASSVAKGGA'

In [26]:
all_data_df = pd.read_csv('middlefile/uniprot_table.tsv', sep = '\t')
symbol_list = all_data_df['gene symbol'].unique()
symbol_list = [item for item in symbol_list if ',' not in item]
for i in tqdm(range(len(symbol_list))):
    try:
        gene = symbol_list[i]
        url = 'https://rest.uniprot.org/uniprotkb/search?format=fasta&query=(reviewed:true)%20AND%20(organism_id:9606)%20AND%20' + gene
        response = requests.get(url)
        text = response.text
        fasta = text.split('>sp')[1].split('\n')[1:]
        fasta_merged = ''.join(fasta)
        all_data_df.loc[all_data_df['gene symbol'] == gene, 'fasta'] = fasta_merged

    except:
        print(gene)
        continue

all_data_df.to_csv('middlefile/fasta_table.tsv', sep = '\t', index= False)
all_data_df

 35%|███▌      | 467/1323 [09:49<17:37,  1.24s/it]

SULT1C4


 38%|███▊      | 498/1323 [10:26<17:11,  1.25s/it]

OR52J2P


 50%|█████     | 664/1323 [14:04<14:29,  1.32s/it]

PSORS1C3


 54%|█████▎    | 710/1323 [15:03<12:41,  1.24s/it]

HLA-DPB2


 63%|██████▎   | 837/1323 [17:41<10:10,  1.26s/it]

HOTAIR


 71%|███████   | 941/1323 [19:51<07:49,  1.23s/it]

LINC00251


 73%|███████▎  | 963/1323 [20:18<07:24,  1.23s/it]

CYCSP5


 73%|███████▎  | 966/1323 [20:22<07:17,  1.22s/it]

MIR27A


 78%|███████▊  | 1036/1323 [21:48<05:46,  1.21s/it]

MEG3


 86%|████████▋ | 1143/1323 [23:59<03:40,  1.23s/it]

MIR582


 94%|█████████▍| 1246/1323 [26:05<01:33,  1.22s/it]

CYP2A7P1


100%|██████████| 1323/1323 [27:40<00:00,  1.25s/it]


In [3]:
all_data_df = pd.read_csv('middlefile/fasta_table.tsv', sep = '\t')
drug_list = all_data_df['chemical name'].unique()
drug_list = [item for item in drug_list if ',' not in item]
for i in tqdm(range(len(drug_list))):
    drug = drug_list[i]
    try:
        # 通过药物名称查询相关信息
        compound = pcp.get_compounds(drug, 'name')[0]
        
        # 获取PubChem ID
        pubchem_id = compound.cid
        
        # 获取指纹 (Fingerprint)
        fingerprint = compound.cactvs_fingerprint
        
        # 获取SMILES
        smiles = compound.isomeric_smiles
    except:
        # print(drug)
        compound = None
        pubchem_id = None
        fingerprint = None
        smiles = None
        

    all_data_df.loc[all_data_df['chemical name'] == drug, 'pubchem id'] = str(pubchem_id)
    all_data_df.loc[all_data_df['chemical name'] == drug, 'pubchem fingerprint'] = fingerprint
    all_data_df.loc[all_data_df['chemical name'] == drug, 'smile'] = smiles


    

all_data_df.to_csv('middlefile/drug_table.tsv', sep = '\t', index= False)
all_data_df

  1%|          | 11/1417 [00:18<37:37,  1.61s/it]

Peginterferon Alfa-2b


  2%|▏         | 22/1417 [00:36<36:47,  1.58s/it]

JAK2 Inhibitor AZD1480


  2%|▏         | 24/1417 [00:39<35:25,  1.53s/it]

Panitumumab


  2%|▏         | 27/1417 [00:43<34:59,  1.51s/it]

Cetuximab


  2%|▏         | 31/1417 [00:50<34:57,  1.51s/it]

Anti-CD33


  3%|▎         | 36/1417 [00:58<35:52,  1.56s/it]

Akt Inhibitor MK2206


  3%|▎         | 41/1417 [01:06<36:24,  1.59s/it]

Hormone Therapy


  4%|▎         | 52/1417 [01:24<34:44,  1.53s/it]

PI3K-alpha Inhibitor MEN1611


  5%|▍         | 64/1417 [01:46<42:29,  1.88s/it]

Trastuzumab


  5%|▍         | 69/1417 [02:03<58:11,  2.59s/it]  

Onartuzumab


  5%|▍         | 70/1417 [02:05<51:50,  2.31s/it]

Epidermal Growth Factor Receptor Tyrosine Kinase Inhibitor


  5%|▌         | 74/1417 [02:12<39:25,  1.76s/it]

PI3Kbeta Inhibitor AZD8186


  5%|▌         | 77/1417 [02:21<1:10:09,  3.14s/it]

FACT Complex-targeting Curaxin CBL0137


  6%|▌         | 84/1417 [02:52<1:06:44,  3.00s/it]

Seribantumab


  6%|▌         | 85/1417 [02:54<55:28,  2.50s/it]  

Nimotuzumab


  6%|▋         | 92/1417 [03:05<36:18,  1.64s/it]

NOTCH1 Antibody (PF-06293622)


  7%|▋         | 93/1417 [03:07<34:15,  1.55s/it]

Patritumab


  7%|▋         | 96/1417 [03:11<33:36,  1.53s/it]

RG7356


  7%|▋         | 97/1417 [03:13<32:23,  1.47s/it]

Conatumumab


  7%|▋         | 99/1417 [03:16<32:08,  1.46s/it]

Chemotherapy


  7%|▋         | 101/1417 [03:19<31:42,  1.45s/it]

Anti-ErbB3 Monoclonal Antibody AV-203


  8%|▊         | 109/1417 [03:32<33:50,  1.55s/it]

Bevacizumab


  8%|▊         | 110/1417 [03:33<32:18,  1.48s/it]

ACLY SiRNA


  8%|▊         | 111/1417 [03:34<31:22,  1.44s/it]

Anti-EGFR Monoclonal Antibody


  8%|▊         | 112/1417 [03:36<30:34,  1.41s/it]

Taxane Compound


  8%|▊         | 113/1417 [03:37<29:52,  1.37s/it]

IGF1R Monoclonal Antibody


  9%|▊         | 123/1417 [03:53<32:17,  1.50s/it]

Rindopepimut


  9%|▉         | 126/1417 [03:58<33:11,  1.54s/it]

MEK Inhibitor RO4987655


  9%|▉         | 129/1417 [04:03<34:02,  1.59s/it]

Futuximab/Modotuximab Mixture


 10%|▉         | 135/1417 [04:13<34:53,  1.63s/it]

Adjuvant Chemotherapy


 10%|▉         | 136/1417 [04:15<33:19,  1.56s/it]

PD1 Inhibitor


 10%|▉         | 139/1417 [04:19<31:37,  1.48s/it]

Atezolizumab


 10%|█         | 148/1417 [04:34<33:39,  1.59s/it]

Nivolumab


 12%|█▏        | 164/1417 [05:03<40:02,  1.92s/it]

Alemtuzumab


 12%|█▏        | 165/1417 [05:04<36:23,  1.74s/it]

Peginterferon Alfa-2a


 12%|█▏        | 168/1417 [05:09<33:08,  1.59s/it]

Ipilimumab


 12%|█▏        | 169/1417 [05:10<31:33,  1.52s/it]

Porcupine Inhibitor WNT974


 12%|█▏        | 170/1417 [05:12<30:18,  1.46s/it]

Platinum Compound


 12%|█▏        | 172/1417 [05:15<30:15,  1.46s/it]

MTOR Kinase Inhibitor AZD8055


 12%|█▏        | 174/1417 [05:18<30:03,  1.45s/it]

Pembrolizumab


 13%|█▎        | 179/1417 [05:26<31:43,  1.54s/it]

MTOR Kinase Inhibitor PP242


 13%|█▎        | 181/1417 [05:29<31:11,  1.51s/it]

Mitogen-Activated Protein Kinase Kinase Inhibitor


 13%|█▎        | 182/1417 [05:30<29:54,  1.45s/it]

MET Tyrosine Kinase Inhibitor SGX523


 13%|█▎        | 184/1417 [05:33<30:15,  1.47s/it]

Aromatase Inhibitor


 13%|█▎        | 185/1417 [05:34<29:32,  1.44s/it]

CDK Inhibitor SNS-032


 14%|█▎        | 193/1417 [05:47<32:03,  1.57s/it]

Therapeutic Tumor Infiltrating Lymphocytes


 14%|█▍        | 195/1417 [05:51<32:02,  1.57s/it]

Pan-AKT Kinase Inhibitor GSK690693


 14%|█▍        | 196/1417 [05:52<30:30,  1.50s/it]

Pyrimidine Antagonist


 14%|█▍        | 197/1417 [05:53<29:36,  1.46s/it]

FGFR Inhibitor AZD4547


 14%|█▍        | 204/1417 [06:04<31:02,  1.54s/it]

EAP Protocol


 15%|█▍        | 207/1417 [06:09<30:49,  1.53s/it]

Letetresgene Autoleucel


 15%|█▍        | 209/1417 [06:12<30:08,  1.50s/it]

MDM2 Inhibitor AMGMDS3


 15%|█▍        | 212/1417 [06:17<30:05,  1.50s/it]

Radiation Therapy


 15%|█▌        | 214/1417 [06:20<29:40,  1.48s/it]

Radioactive Iodine


 15%|█▌        | 217/1417 [06:24<29:54,  1.50s/it]

Anti-VEGF Monoclonal Antibody


 15%|█▌        | 219/1417 [06:27<29:49,  1.49s/it]

Immune Checkpoint Inhibitor


 16%|█▌        | 222/1417 [06:32<30:16,  1.52s/it]

Rilotumumab


 16%|█▌        | 227/1417 [06:40<32:20,  1.63s/it]

BET Inhibitor


 17%|█▋        | 238/1417 [06:59<31:00,  1.58s/it]

Anti-PD1 Monoclonal Antibody


 17%|█▋        | 239/1417 [07:00<29:27,  1.50s/it]

Anti-PD-L1 Monoclonal Antibody


 17%|█▋        | 240/1417 [07:01<28:27,  1.45s/it]

Akt/ERK Inhibitor ONC201


 17%|█▋        | 243/1417 [07:06<29:02,  1.48s/it]

Trastuzumab Deruxtecan


 17%|█▋        | 245/1417 [07:09<29:15,  1.50s/it]

PI3Ka/Di


 18%|█▊        | 248/1417 [07:14<29:36,  1.52s/it]

R3Mab


 18%|█▊        | 249/1417 [07:15<28:36,  1.47s/it]

FLT3/ABL/Aurora Kinase Inhibitor KW-2449


 18%|█▊        | 255/1417 [07:25<30:16,  1.56s/it]

R-CHOP Regimen


 18%|█▊        | 258/1417 [07:30<30:03,  1.56s/it]

Imatinib Mesylate + Dasatinib


 18%|█▊        | 259/1417 [07:31<28:42,  1.49s/it]

Imatinib Mesylate + Nilotinib


 18%|█▊        | 260/1417 [07:32<27:42,  1.44s/it]

Dasatinib + Nilotinib


 18%|█▊        | 261/1417 [07:33<26:52,  1.40s/it]

BRAF inhibitors


 19%|█▉        | 271/1417 [07:50<29:06,  1.52s/it]

Vemurafenib + Cetuximab + Irinotecan


 19%|█▉        | 272/1417 [07:52<28:01,  1.47s/it]

Irinotecan + Cetuximab


 19%|█▉        | 273/1417 [07:54<30:10,  1.58s/it]

EGFR TKIs


 19%|█▉        | 274/1417 [07:55<28:31,  1.50s/it]

Dabrafenib + Trametinib


 20%|█▉        | 278/1417 [08:02<29:49,  1.57s/it]

Dabrafenib + Trametinib + SCH772984


 20%|██        | 285/1417 [08:13<29:55,  1.59s/it]

OBX1-012


 20%|██        | 287/1417 [08:16<28:53,  1.53s/it]

unspecified PD-L1 antibody


 20%|██        | 288/1417 [08:18<28:34,  1.52s/it]

unspecified CTLA4 antibody + unspecified PD-1 antibody


 20%|██        | 289/1417 [08:19<27:34,  1.47s/it]

unspecified PD-1 antibody


 21%|██        | 291/1417 [08:22<27:50,  1.48s/it]

EKI-285


 21%|██        | 292/1417 [08:23<27:24,  1.46s/it]

Pertuzumab


 21%|██        | 297/1417 [08:32<29:14,  1.57s/it]

Lapatinib + Trastuzumab


 21%|██        | 298/1417 [08:33<28:01,  1.50s/it]

Aromatase inhibitors


 21%|██        | 299/1417 [08:34<26:59,  1.45s/it]

anti-estrogens


 22%|██▏       | 306/1417 [08:46<29:17,  1.58s/it]

FGFR inhibitors


 22%|██▏       | 310/1417 [08:53<28:34,  1.55s/it]

All-trans Retinoic Acid + Anthracycline


 22%|██▏       | 313/1417 [08:57<27:48,  1.51s/it]

Cisplatin + Fluorouracil


 22%|██▏       | 314/1417 [08:59<26:52,  1.46s/it]

EGFR inhibitors


 22%|██▏       | 315/1417 [09:00<26:12,  1.43s/it]

AGI-5198 + Talazoparib


 22%|██▏       | 316/1417 [09:01<25:41,  1.40s/it]

AGI-5198 + Metformin


 22%|██▏       | 317/1417 [09:03<25:12,  1.38s/it]

AGI-5198 + Radiotherapy


 22%|██▏       | 318/1417 [09:04<24:57,  1.36s/it]

AGI-5198 + Olaparib


 23%|██▎       | 326/1417 [09:17<29:07,  1.60s/it]

Gemcitabine + Trametinib


 23%|██▎       | 328/1417 [09:21<28:29,  1.57s/it]

Gemcitabine + Erlotinib


 23%|██▎       | 329/1417 [09:22<27:04,  1.49s/it]

Cetuximab + FOLFOX4


 23%|██▎       | 330/1417 [09:28<53:22,  2.95s/it]

MEK inhibitors


 24%|██▎       | 333/1417 [09:33<36:08,  2.00s/it]

BRAF + MEK inhibitors


 24%|██▎       | 334/1417 [09:34<33:01,  1.83s/it]

MET inhibitors


 24%|██▍       | 342/1417 [09:47<27:47,  1.55s/it]

Pan-TRK inhibitors


 24%|██▍       | 344/1417 [09:50<26:58,  1.51s/it]

Cetuximab + Fluorouracil


 24%|██▍       | 345/1417 [09:51<26:03,  1.46s/it]

Bevacizumab + Temsirolimus


 24%|██▍       | 346/1417 [09:53<25:24,  1.42s/it]

Bortezomib + Temsirolimus + Topotecan


 24%|██▍       | 347/1417 [09:54<24:47,  1.39s/it]

PI3K pathway inhibitors


 25%|██▍       | 349/1417 [09:57<25:47,  1.45s/it]

Letrozole + Ribociclib


 25%|██▌       | 359/1417 [10:15<28:02,  1.59s/it]

Daunorubicin + Cytarabine


 25%|██▌       | 360/1417 [10:16<26:37,  1.51s/it]

allosteric AKT inhibitors


 25%|██▌       | 361/1417 [10:17<25:53,  1.47s/it]

non-allosteric AKT inhibitors


 26%|██▌       | 362/1417 [10:19<25:10,  1.43s/it]

AKT inhibitors


 26%|██▌       | 363/1417 [10:20<24:40,  1.40s/it]

ARQ 751


 26%|██▌       | 366/1417 [10:25<26:54,  1.54s/it]

Alectinib + Doxorubicin


 26%|██▌       | 367/1417 [10:26<25:44,  1.47s/it]

AZD3463 + Doxorubicin


 26%|██▌       | 368/1417 [10:28<24:58,  1.43s/it]

Crizotinib + Cyclophosphamide + Topotecan


 26%|██▋       | 375/1417 [10:40<27:44,  1.60s/it]

Temozolomide + Veliparib


 27%|██▋       | 376/1417 [10:41<26:20,  1.52s/it]

Panitumumab + Dabrafenib + Trametinib


 27%|██▋       | 377/1417 [10:43<27:56,  1.61s/it]

Vemurafenib + Cobimetinib


 27%|██▋       | 378/1417 [10:44<26:36,  1.54s/it]

Sorafenib + Panitumumab


 27%|██▋       | 379/1417 [10:46<25:29,  1.47s/it]

PLX4720 + Pictilisib


 27%|██▋       | 380/1417 [10:47<24:37,  1.42s/it]

PLX4720 + Nutlin-3


 27%|██▋       | 381/1417 [10:48<24:11,  1.40s/it]

Vemurafenib + Capecitabine + Bevacizumab


 27%|██▋       | 382/1417 [10:50<23:50,  1.38s/it]

Selumetinib + Dactolisib


 27%|██▋       | 383/1417 [10:51<23:22,  1.36s/it]

Vemurafenib + Gefitinib + Cetuximab


 27%|██▋       | 384/1417 [10:52<23:07,  1.34s/it]

Trametinib + Dabrafenib


 27%|██▋       | 385/1417 [10:54<23:05,  1.34s/it]

Dactolisib + GDC-0879


 27%|██▋       | 386/1417 [10:55<22:55,  1.33s/it]

Dabrafenib + Trametinib + Vemurafenib


 27%|██▋       | 388/1417 [10:58<23:58,  1.40s/it]

Vemurafenib + Panitumumab


 27%|██▋       | 389/1417 [10:59<23:25,  1.37s/it]

Vemurafenib + Irinotecan + Cetuximab


 28%|██▊       | 390/1417 [11:01<23:12,  1.36s/it]

Dabrafenib + Trametinib DMSO


 28%|██▊       | 391/1417 [11:02<23:08,  1.35s/it]

Vemurafenib + Panitumumab + Irinotecan


 28%|██▊       | 392/1417 [11:03<23:07,  1.35s/it]

Vemurafenib + Pertuzumab


 28%|██▊       | 393/1417 [11:05<23:06,  1.35s/it]

Encorafenib + Cetuximab


 28%|██▊       | 394/1417 [11:06<23:12,  1.36s/it]

Alpelisib + Cetuximab + Encorafenib


 28%|██▊       | 395/1417 [11:07<22:55,  1.35s/it]

Binimetinib + Encorafenib


 28%|██▊       | 396/1417 [11:09<22:51,  1.34s/it]

Cetuximab + Irinotecan + Vemurafenib


 28%|██▊       | 397/1417 [11:10<22:42,  1.34s/it]

Mitogen-Activated Protein Kinase Kinase inhibitor + BRAF inhibitor


 28%|██▊       | 398/1417 [11:11<22:43,  1.34s/it]

ERK inhibitors


 28%|██▊       | 403/1417 [11:19<25:37,  1.52s/it]

Buparlisib + Vemurafenib


 29%|██▊       | 404/1417 [11:21<24:49,  1.47s/it]

MK2206 + Vemurafenib


 29%|██▊       | 405/1417 [11:22<24:19,  1.44s/it]

Cobimetinib + Vemurafenib


 29%|██▊       | 406/1417 [11:23<23:43,  1.41s/it]

Atezolizumab + Vemurafenib


 29%|██▊       | 407/1417 [11:25<23:23,  1.39s/it]

Selumetinib + Vemurafenib


 29%|██▉       | 408/1417 [11:26<23:01,  1.37s/it]

Trametinib + Vemurafenib


 29%|██▉       | 409/1417 [11:27<22:36,  1.35s/it]

Cetuximab + Vemurafenib


 29%|██▉       | 410/1417 [11:29<22:27,  1.34s/it]

MK2206 + Trametinib


 29%|██▉       | 411/1417 [11:30<22:21,  1.33s/it]

Atezolizumab + Cobimetinib + Vemurafenib


 29%|██▉       | 412/1417 [11:32<23:21,  1.39s/it]

Buparlisib + Trametinib


 29%|██▉       | 413/1417 [11:33<22:58,  1.37s/it]

Buparlisib + Selumetinib


 29%|██▉       | 415/1417 [11:36<23:59,  1.44s/it]

Encorafenib + Trametinib


 30%|██▉       | 420/1417 [11:44<26:24,  1.59s/it]

Panitumumab + Vemurafenib


 30%|██▉       | 422/1417 [11:48<26:55,  1.62s/it]

Everolimus + PLX4720


 30%|██▉       | 425/1417 [11:52<25:49,  1.56s/it]

Erlotinib + Vemurafenib


 30%|███       | 426/1417 [11:54<24:44,  1.50s/it]

Cetuximab + PLX4720


 30%|███       | 428/1417 [11:57<25:33,  1.55s/it]

Dabrafenib + Panitumumab


 30%|███       | 430/1417 [12:00<24:39,  1.50s/it]

Dasatinib + Trametinib


 30%|███       | 431/1417 [12:01<23:54,  1.45s/it]

TW-37 + Vemurafenib


 31%|███       | 433/1417 [12:04<24:01,  1.47s/it]

PAC-1 + Vemurafenib


 31%|███       | 434/1417 [12:06<23:19,  1.42s/it]

Cetuximab + Selumetinib


 31%|███       | 436/1417 [12:09<23:41,  1.45s/it]

Imatinib + PLX4720


 31%|███       | 437/1417 [12:10<23:59,  1.47s/it]

Gefitinib + PLX4720


 31%|███       | 438/1417 [12:11<23:18,  1.43s/it]

Dabrafenib + SCH772984


 31%|███       | 439/1417 [12:13<22:53,  1.40s/it]

Cetuximab + Dabrafenib + SCH772984


 31%|███       | 440/1417 [12:14<22:54,  1.41s/it]

Afatinib + BI-882370


 31%|███       | 441/1417 [12:16<22:40,  1.39s/it]

Navitoclax + PLX4720


 31%|███       | 442/1417 [12:17<22:35,  1.39s/it]

Ganetespib + TAK-733


 31%|███▏      | 443/1417 [12:18<22:23,  1.38s/it]

SBI-755199


 31%|███▏      | 444/1417 [12:20<22:12,  1.37s/it]

Navitoclax + Vemurafenib


 31%|███▏      | 445/1417 [12:21<21:52,  1.35s/it]

Everolimus + Selumetinib


 31%|███▏      | 446/1417 [12:22<21:37,  1.34s/it]

Pimasertib + Regorafenib


 32%|███▏      | 447/1417 [12:24<21:28,  1.33s/it]

BI-882370 + Cetuximab


 32%|███▏      | 449/1417 [12:27<22:21,  1.39s/it]

Trametinib + TW-37


 32%|███▏      | 450/1417 [12:28<22:07,  1.37s/it]

Cediranib + PLX4720


 32%|███▏      | 451/1417 [12:29<21:51,  1.36s/it]

Pimasertib + Sorafenib


 32%|███▏      | 453/1417 [12:32<22:48,  1.42s/it]

Dasatinib + SCH772984


 32%|███▏      | 454/1417 [12:34<22:28,  1.40s/it]

Alpelisib + PLX4720


 32%|███▏      | 455/1417 [12:35<22:09,  1.38s/it]

SBI-0640726


 32%|███▏      | 456/1417 [12:36<21:49,  1.36s/it]

Palbociclib + PD-0325901


 32%|███▏      | 457/1417 [12:38<21:42,  1.36s/it]

Everolimus + Pimasertib


 32%|███▏      | 459/1417 [12:41<22:32,  1.41s/it]

PAC-1 + Trametinib + Vemurafenib


 32%|███▏      | 460/1417 [12:42<22:15,  1.40s/it]

PLX4720 + Selumetinib


 33%|███▎      | 461/1417 [12:43<21:54,  1.38s/it]

DETD-35


 33%|███▎      | 462/1417 [12:45<22:09,  1.39s/it]

Lapatinib + Panobinostat


 33%|███▎      | 463/1417 [12:46<22:26,  1.41s/it]

DETD-35 + Vemurafenib


 33%|███▎      | 464/1417 [12:48<22:25,  1.41s/it]

PLX4720 + Tivozanib


 33%|███▎      | 465/1417 [12:49<22:02,  1.39s/it]

Navitoclax + Trametinib


 33%|███▎      | 466/1417 [12:50<21:48,  1.38s/it]

PF-00477736 + PF3644022


 33%|███▎      | 467/1417 [12:52<21:28,  1.36s/it]

Dasatinib + Selumetinib


 33%|███▎      | 469/1417 [12:55<22:18,  1.41s/it]

Cetuximab + SCH772984


 33%|███▎      | 471/1417 [12:58<23:02,  1.46s/it]

Cetuximab + Dabrafenib


 33%|███▎      | 472/1417 [12:59<22:21,  1.42s/it]

Lapatinib + Vemurafenib


 33%|███▎      | 473/1417 [13:00<21:56,  1.39s/it]

CLM3


 34%|███▎      | 476/1417 [13:05<23:01,  1.47s/it]

BGB-283 + Cetuximab


 34%|███▎      | 477/1417 [13:06<22:29,  1.44s/it]

Encorafenib + Ribociclib


 34%|███▎      | 478/1417 [13:08<22:27,  1.43s/it]

Cetuximab + Sorafenib


 34%|███▍      | 479/1417 [13:09<21:54,  1.40s/it]

Cediranib + PLX4720 + Selumetinib


 34%|███▍      | 480/1417 [13:10<21:44,  1.39s/it]

Erlotinib + PLX4720


 34%|███▍      | 482/1417 [13:14<23:23,  1.50s/it]

BI-882370 + Trametinib


 34%|███▍      | 483/1417 [13:15<23:16,  1.49s/it]

Dabrafenib + Omipalisib


 34%|███▍      | 484/1417 [13:17<22:27,  1.44s/it]

LSN3074753


 34%|███▍      | 485/1417 [13:18<22:13,  1.43s/it]

Binimetinib + Cetuximab + Encorafenib


 34%|███▍      | 487/1417 [13:21<22:28,  1.45s/it]

DT01 + Fluorouracil + Oxaliplatin


 34%|███▍      | 488/1417 [13:22<21:55,  1.42s/it]

PLX4720 + Vorinostat


 35%|███▍      | 489/1417 [13:24<22:19,  1.44s/it]

Omipalisib + Trametinib


 35%|███▍      | 491/1417 [13:27<22:24,  1.45s/it]

Cetuximab + Dabrafenib + Trametinib


 35%|███▍      | 492/1417 [13:28<21:46,  1.41s/it]

Gefitinib + Vemurafenib


 35%|███▍      | 493/1417 [13:29<21:21,  1.39s/it]

Dabrafenib + Panitumumab + Trametinib


 35%|███▍      | 494/1417 [13:31<21:07,  1.37s/it]

Doxorubicin + PLX4720


 35%|███▍      | 495/1417 [13:32<21:03,  1.37s/it]

BI2536 + PLX4720


 35%|███▌      | 496/1417 [13:33<20:50,  1.36s/it]

Cetuximab + Encorafenib


 35%|███▌      | 497/1417 [13:35<20:44,  1.35s/it]

Irinotecan + Panitumumab


 35%|███▌      | 498/1417 [13:36<20:32,  1.34s/it]

Encorafenib + Binimetinib


 35%|███▌      | 499/1417 [13:37<20:26,  1.34s/it]

Encorafenib + Binimetinib + Cetuximab


 35%|███▌      | 500/1417 [13:39<20:29,  1.34s/it]

JAK inhibitors


 35%|███▌      | 501/1417 [13:40<20:28,  1.34s/it]

Ibrutinib + Ruxolitinib


 35%|███▌      | 502/1417 [13:41<20:23,  1.34s/it]

Everolimus + Letrozole


 36%|███▌      | 510/1417 [13:55<23:46,  1.57s/it]

Dasatinib + Erlotinib


 36%|███▌      | 511/1417 [13:56<22:36,  1.50s/it]

Nilotinib + Saracatinib


 36%|███▋      | 515/1417 [14:02<23:03,  1.53s/it]

Afatinib + Cetuximab


 36%|███▋      | 516/1417 [14:04<22:08,  1.47s/it]

Afatinib + Nimotuzumab


 36%|███▋      | 517/1417 [14:05<21:35,  1.44s/it]

Cetuximab + Sirolimus


 37%|███▋      | 518/1417 [14:06<21:07,  1.41s/it]

Pemetrexed + Erlotinib


 37%|███▋      | 519/1417 [14:08<20:42,  1.38s/it]

Futuximab


 37%|███▋      | 520/1417 [14:09<21:22,  1.43s/it]

Sirolimus + Cetuximab


 37%|███▋      | 525/1417 [14:19<28:22,  1.91s/it]

MM-151


 37%|███▋      | 527/1417 [14:22<25:19,  1.71s/it]

Bevacizumab + Erlotinib


 37%|███▋      | 529/1417 [14:25<23:33,  1.59s/it]

Buparlisib + Osimertinib


 37%|███▋      | 530/1417 [14:27<22:22,  1.51s/it]

Osimertinib + Selumetinib


 37%|███▋      | 531/1417 [14:28<21:30,  1.46s/it]

Gefitinib + Pemetrexed


 38%|███▊      | 533/1417 [14:31<21:33,  1.46s/it]

Cabozantinib + Erlotinib


 38%|███▊      | 535/1417 [14:34<21:18,  1.45s/it]

hEGFRvIII-CD3 bi-scFv


 38%|███▊      | 537/1417 [14:37<20:23,  1.39s/it]

Erlotinib + Ganetespib


 38%|███▊      | 538/1417 [14:38<20:05,  1.37s/it]

Cisplatin + ER2


 38%|███▊      | 539/1417 [14:39<19:52,  1.36s/it]

Trametinib + WZ4002


 38%|███▊      | 542/1417 [14:44<21:21,  1.47s/it]

BEZ235 + Selumetinib


 38%|███▊      | 543/1417 [14:45<20:50,  1.43s/it]

Amlexanox + Selumetinib


 38%|███▊      | 544/1417 [14:47<20:28,  1.41s/it]

Necitumumab


 38%|███▊      | 545/1417 [14:48<20:10,  1.39s/it]

Bevacizumab + Gefitinib


 39%|███▊      | 546/1417 [14:49<19:54,  1.37s/it]

Bevacizumab + Osimertinib


 39%|███▊      | 549/1417 [14:54<21:03,  1.46s/it]

Afatinib + Sirolimus


 39%|███▉      | 550/1417 [14:55<20:33,  1.42s/it]

Trastuzumab + Neratinib + Lapatinib


 39%|███▉      | 551/1417 [14:57<20:09,  1.40s/it]

ERBB2 TKIs


 39%|███▉      | 552/1417 [14:58<19:42,  1.37s/it]

Trastuzumab-based chemotherapy


 39%|███▉      | 553/1417 [14:59<19:30,  1.36s/it]

Compound 23814 + Tivozanib


 39%|███▉      | 557/1417 [15:06<21:57,  1.53s/it]

Trastuzumab + XL147


 39%|███▉      | 558/1417 [15:07<21:05,  1.47s/it]

Cetuximab + Lapatinib


 39%|███▉      | 559/1417 [15:08<20:24,  1.43s/it]

Neratinib + Trastuzumab


 40%|███▉      | 560/1417 [15:10<19:55,  1.40s/it]

JQ1 + Osimertinib


 40%|███▉      | 561/1417 [15:11<19:39,  1.38s/it]

anti-HER3 mAbs


 40%|███▉      | 562/1417 [15:12<19:25,  1.36s/it]

PI3K pathway inhibitors + MEK inhibitors


 40%|███▉      | 563/1417 [15:14<19:15,  1.35s/it]

Paclitaxel + Carboplatin


 40%|███▉      | 564/1417 [15:15<19:05,  1.34s/it]

Paclitaxel + Cisplatin


 40%|███▉      | 565/1417 [15:16<19:12,  1.35s/it]

EZH2 inhibitors


 40%|███▉      | 566/1417 [15:18<19:07,  1.35s/it]

ACY-957 + GSK126


 40%|████      | 568/1417 [15:21<19:59,  1.41s/it]

ACY-957 + DZNeP


 40%|████      | 571/1417 [15:25<20:51,  1.48s/it]

ACY-957 + Doxorubicin


 40%|████      | 572/1417 [15:27<20:15,  1.44s/it]

EED226 + EI1


 41%|████      | 577/1417 [15:35<21:46,  1.56s/it]

GSK3052230


 41%|████      | 582/1417 [15:43<21:41,  1.56s/it]

SU11248 + Cytarabine


 41%|████      | 583/1417 [15:45<20:46,  1.49s/it]

SU11248 + Daunorubicin


 41%|████▏     | 586/1417 [15:50<21:53,  1.58s/it]

Cytarabine + Daunorubicin + Midostaurin


 41%|████▏     | 587/1417 [15:51<20:47,  1.50s/it]

Arsenic trioxide + Cytarabine + Methotrexate + Tretinoin


 41%|████▏     | 588/1417 [15:52<20:04,  1.45s/it]

Crenolanib + Trametinib


 42%|████▏     | 590/1417 [15:55<20:36,  1.50s/it]

Azacitidine + Sorafenib


 42%|████▏     | 592/1417 [15:58<20:30,  1.49s/it]

AEB071 + Binimetinib


 42%|████▏     | 594/1417 [16:01<20:38,  1.50s/it]

AEB071 + PD-0325901


 42%|████▏     | 599/1417 [16:09<21:07,  1.55s/it]

Binimetinib + Everolimus


 43%|████▎     | 604/1417 [16:18<22:23,  1.65s/it]

AZD8055 + Binimetinib


 43%|████▎     | 605/1417 [16:19<21:08,  1.56s/it]

Sirolimus + Trametinib


 43%|████▎     | 607/1417 [16:22<20:21,  1.51s/it]

Oxaliplatin + Fluorouracil


 43%|████▎     | 608/1417 [16:24<19:37,  1.46s/it]

IDH1 inhibitors


 43%|████▎     | 609/1417 [16:25<19:04,  1.42s/it]

PARP inhibitors


 43%|████▎     | 612/1417 [16:30<20:03,  1.49s/it]

Temozolomide + Vandetanib


 43%|████▎     | 613/1417 [16:31<19:24,  1.45s/it]

Cisplatin + Talazoparib


 43%|████▎     | 616/1417 [16:35<18:55,  1.42s/it]

IDH2 inhibitors


 44%|████▎     | 618/1417 [16:39<19:51,  1.49s/it]

Peginterferon alfa-2a


 44%|████▍     | 621/1417 [16:43<20:06,  1.52s/it]

AZD1208 + Ruxolitinib


 44%|████▍     | 623/1417 [16:46<19:44,  1.49s/it]

Ruxolitinib + ZSTK474


 44%|████▍     | 625/1417 [16:49<19:34,  1.48s/it]

Ruxolitinib + SGI-1776


 44%|████▍     | 627/1417 [16:52<19:37,  1.49s/it]

BEZ235 + Ruxolitinib


 44%|████▍     | 629/1417 [16:55<19:34,  1.49s/it]

Pictilisib + Ruxolitinib


 45%|████▍     | 634/1417 [17:03<20:04,  1.54s/it]

VEGFR inhibitors


 45%|████▍     | 635/1417 [17:05<19:09,  1.47s/it]

unspecified VEGFR2 antibody


 45%|████▌     | 638/1417 [17:09<19:32,  1.50s/it]

Bevacizumab + Sorafenib


 45%|████▌     | 640/1417 [17:12<19:14,  1.49s/it]

Cytarabine + Dasatinib


 45%|████▌     | 641/1417 [17:14<18:42,  1.45s/it]

BPR1J373


 45%|████▌     | 643/1417 [17:17<19:01,  1.47s/it]

Infigratinib + Imatinib


 45%|████▌     | 644/1417 [17:18<18:31,  1.44s/it]

G007-LK + Imatinib


 46%|████▌     | 645/1417 [17:20<18:07,  1.41s/it]

Dactolisib + Selumetinib


 46%|████▌     | 647/1417 [17:23<18:23,  1.43s/it]

Docetaxel + Selumetinib


 46%|████▌     | 648/1417 [17:24<17:57,  1.40s/it]

EGFR inhibitor + ARS-853


 46%|████▌     | 649/1417 [17:26<18:56,  1.48s/it]

Irinotecan + Selumetinib


 46%|████▌     | 650/1417 [17:27<18:47,  1.47s/it]

Panitumumab + Trametinib


 46%|████▌     | 651/1417 [17:28<18:06,  1.42s/it]

Cetuximab + Trametinib


 46%|████▋     | 658/1417 [17:40<21:42,  1.72s/it]

Ruxolitinib + TGX-221


 47%|████▋     | 660/1417 [17:44<20:08,  1.60s/it]

LY-294002 + Capivasertib


 47%|████▋     | 661/1417 [17:45<19:17,  1.53s/it]

Pazopanib + Everolimus


 47%|████▋     | 662/1417 [17:46<18:27,  1.47s/it]

IMG-2005-5


 47%|████▋     | 663/1417 [17:48<17:56,  1.43s/it]

IRAK-1 or IRAK-4 inhibitors


 47%|████▋     | 666/1417 [17:52<18:04,  1.44s/it]

Tretinoin + NSC348884


 47%|████▋     | 667/1417 [17:53<17:42,  1.42s/it]

Anti-CD33 + Anti-CD123


 47%|████▋     | 668/1417 [17:55<17:24,  1.39s/it]

Induction Therapy


 47%|████▋     | 669/1417 [17:56<17:06,  1.37s/it]

Everolimus + Binimetinib


 47%|████▋     | 671/1417 [17:59<17:39,  1.42s/it]

Alpelisib + Binimetinib


 47%|████▋     | 672/1417 [18:00<17:16,  1.39s/it]

Compound 3144


 48%|████▊     | 674/1417 [18:03<17:42,  1.43s/it]

Cetuximab + Dactolisib


 48%|████▊     | 675/1417 [18:05<17:26,  1.41s/it]

AZD5363 + Trastuzumab


 48%|████▊     | 676/1417 [18:06<17:06,  1.39s/it]

AZD5363 + Lapatinib


 48%|████▊     | 677/1417 [18:07<17:05,  1.39s/it]

Everolimus + Fulvestrant


 48%|████▊     | 679/1417 [18:10<17:27,  1.42s/it]

Fulvestrant + Alpelisib


 48%|████▊     | 680/1417 [18:12<17:03,  1.39s/it]

Cetuximab + Floxuridine + Fluorouracil + Irinotecan


 48%|████▊     | 682/1417 [18:15<17:31,  1.43s/it]

Alpelisib + Fulvestrant


 48%|████▊     | 683/1417 [18:16<17:06,  1.40s/it]

Gemcitabine + LY2780301


 48%|████▊     | 684/1417 [18:17<16:48,  1.38s/it]

Cetuximab + Irinotecan


 48%|████▊     | 685/1417 [18:19<16:45,  1.37s/it]

PD-0325901 + Pictilisib


 48%|████▊     | 686/1417 [18:20<16:35,  1.36s/it]

MK2206 + Temsirolimus


 49%|████▊     | 688/1417 [18:23<17:28,  1.44s/it]

YM-024


 49%|████▊     | 690/1417 [18:26<17:39,  1.46s/it]

DHM25


 49%|████▉     | 692/1417 [18:29<17:41,  1.46s/it]

SHR-A1307


 49%|████▉     | 694/1417 [18:32<17:51,  1.48s/it]

Radiotherapy + Taselisib


 49%|████▉     | 695/1417 [18:34<17:24,  1.45s/it]

Cisplatin + Pictilisib


 49%|████▉     | 696/1417 [18:35<17:03,  1.42s/it]

Buparlisib + Paclitaxel


 49%|████▉     | 698/1417 [18:38<17:30,  1.46s/it]

Alpelisib + Letrozole


 49%|████▉     | 699/1417 [18:39<16:53,  1.41s/it]

Pictilisib + Sorafenib


 49%|████▉     | 700/1417 [18:41<16:35,  1.39s/it]

Gemcitabine + Pictilisib


 49%|████▉     | 701/1417 [18:42<16:18,  1.37s/it]

Carboplatin + Paclitaxel + Temsirolimus


 50%|████▉     | 702/1417 [18:43<16:07,  1.35s/it]

PW12


 50%|████▉     | 706/1417 [18:50<17:52,  1.51s/it]

Dactolisib + WEHI-539


 50%|████▉     | 708/1417 [18:53<17:37,  1.49s/it]

Copanlisib + Paclitaxel


 50%|█████     | 709/1417 [18:54<16:58,  1.44s/it]

Bevacizumab + FOLFIRI Regimen


 50%|█████     | 710/1417 [18:55<16:32,  1.40s/it]

MK-2206 + PD184352


 50%|█████     | 714/1417 [19:02<17:44,  1.51s/it]

Cixutumumab


 50%|█████     | 715/1417 [19:03<17:06,  1.46s/it]

Teprotumumab


 51%|█████     | 716/1417 [19:04<16:42,  1.43s/it]

AZD7762 + Irinotecan


 51%|█████     | 717/1417 [19:06<16:16,  1.40s/it]

Irinotecan + CHK1 or CHK2 inhibitors


 51%|█████     | 720/1417 [19:14<27:37,  2.38s/it]

HG-6-63-01


 51%|█████     | 721/1417 [19:15<24:01,  2.07s/it]

XMD15-44


 51%|█████     | 722/1417 [19:16<21:21,  1.84s/it]

Spliceosome inhibitors


 51%|█████     | 725/1417 [19:21<18:51,  1.63s/it]

Arsenic trioxide + Posaconazole


 51%|█████▏    | 728/1417 [19:25<17:33,  1.53s/it]

Everolimus + Radiotherapy


 51%|█████▏    | 729/1417 [19:27<16:50,  1.47s/it]

Everolimus + Exemestane


 52%|█████▏    | 730/1417 [19:28<16:17,  1.42s/it]

Mitomycin + Cisplatin + Etoposide


 52%|█████▏    | 731/1417 [19:29<15:57,  1.40s/it]

HSP90 inhibitors


 52%|█████▏    | 734/1417 [19:34<16:12,  1.42s/it]

APR-246 + Cisplatin


 52%|█████▏    | 735/1417 [19:35<16:05,  1.42s/it]

Cisplatin + Irinotecan


 52%|█████▏    | 737/1417 [19:38<17:08,  1.51s/it]

Cyclophosphamide + Doxorubicin + Fluorouracil


 52%|█████▏    | 738/1417 [19:40<16:37,  1.47s/it]

Doxorubicin + Ifosfamide


 52%|█████▏    | 742/1417 [19:48<18:48,  1.67s/it]

Alvespimycin + Vorinostat


 53%|█████▎    | 744/1417 [19:51<17:38,  1.57s/it]

MK-1775 + Carboplatin


 53%|█████▎    | 745/1417 [19:52<16:46,  1.50s/it]

Ibrutinib + Rituximab


 53%|█████▎    | 746/1417 [19:53<16:12,  1.45s/it]

Tanespimycin + Vorinostat


 53%|█████▎    | 747/1417 [19:55<15:56,  1.43s/it]

Gemcitabine + PLATINUM


 54%|█████▍    | 770/1417 [20:34<17:27,  1.62s/it]

Venotoclax


 54%|█████▍    | 772/1417 [20:37<17:13,  1.60s/it]

QL-XII-61


 55%|█████▌    | 783/1417 [20:55<16:19,  1.55s/it]

XMD15-27


 55%|█████▌    | 785/1417 [20:58<15:51,  1.51s/it]

BDP-00009066


 56%|█████▌    | 787/1417 [21:01<15:42,  1.50s/it]

THZ-2-102-1


 56%|█████▌    | 788/1417 [21:03<15:05,  1.44s/it]

CDK9_5038


 56%|█████▌    | 789/1417 [21:04<14:42,  1.41s/it]

CDK9_5576


 56%|█████▌    | 791/1417 [21:07<15:05,  1.45s/it]

THZ-2-49


 56%|█████▋    | 799/1417 [21:20<15:58,  1.55s/it]

Dyrk1b_0191


 57%|█████▋    | 809/1417 [21:37<15:58,  1.58s/it]

EphB4_9721


 58%|█████▊    | 815/1417 [21:47<15:55,  1.59s/it]

IGF1R_3801


 58%|█████▊    | 816/1417 [21:48<15:07,  1.51s/it]

IGFR_3801


 58%|█████▊    | 818/1417 [21:51<14:47,  1.48s/it]

FGFR_0939


 58%|█████▊    | 820/1417 [21:54<14:42,  1.48s/it]

AZD7969


 58%|█████▊    | 821/1417 [21:55<14:14,  1.43s/it]

KIN001-042


 59%|█████▊    | 832/1417 [22:15<16:13,  1.66s/it]

KIN001-260


 59%|█████▉    | 833/1417 [22:16<15:14,  1.57s/it]

IRAK4_4710


 59%|█████▉    | 835/1417 [22:19<14:44,  1.52s/it]

JAK1_3715


 59%|█████▉    | 836/1417 [22:21<14:11,  1.47s/it]

JAK1_8709


 59%|█████▉    | 837/1417 [22:22<13:47,  1.43s/it]

JAK_8517


 59%|█████▉    | 838/1417 [22:23<13:30,  1.40s/it]

JAK3_7406


 60%|██████    | 851/1417 [22:46<17:30,  1.86s/it]

ERK_2440


 60%|██████    | 852/1417 [22:47<15:58,  1.70s/it]

ERK_6604


 61%|██████    | 859/1417 [23:00<15:36,  1.68s/it]

MCT1_6447


 61%|██████    | 860/1417 [23:02<14:34,  1.57s/it]

Nutlin-3a (-)


 61%|██████    | 867/1417 [23:13<13:49,  1.51s/it]

JW-7-52-1


 61%|██████▏   | 868/1417 [23:14<13:19,  1.46s/it]

QL-VIII-58


 62%|██████▏   | 878/1417 [23:31<14:19,  1.59s/it]

PAK_5339


 64%|██████▍   | 907/1417 [24:20<13:21,  1.57s/it]

MCT4_1422


 65%|██████▍   | 914/1417 [24:31<13:17,  1.59s/it]

TAF1_5496


 65%|██████▍   | 915/1417 [24:33<12:39,  1.51s/it]

KIN001-236


 65%|██████▌   | 926/1417 [24:51<12:23,  1.51s/it]

MPS-1-IN-1


 65%|██████▌   | 927/1417 [24:52<12:41,  1.55s/it]

TTK_3146


 65%|██████▌   | 928/1417 [24:54<12:08,  1.49s/it]

ULK1_4989


 67%|██████▋   | 948/1417 [25:55<13:05,  1.67s/it]

rasburicase


 67%|██████▋   | 954/1417 [26:05<12:09,  1.58s/it]

ivacaftor / lumacaftor


 68%|██████▊   | 957/1417 [26:10<11:58,  1.56s/it]

hormonal contraceptives for systemic use


 70%|███████   | 997/1417 [27:16<12:09,  1.74s/it]

aminoglycoside antibacterials


 72%|███████▏  | 1024/1417 [28:01<10:00,  1.53s/it]

hmg coa reductase inhibitors


 73%|███████▎  | 1033/1417 [28:16<09:48,  1.53s/it]

antiepileptics


 73%|███████▎  | 1035/1417 [28:19<09:34,  1.50s/it]

etanercept


 73%|███████▎  | 1036/1417 [28:20<09:10,  1.45s/it]

Platinum compounds


 73%|███████▎  | 1041/1417 [28:27<08:38,  1.38s/it]

rituximab


 74%|███████▎  | 1043/1417 [28:30<08:58,  1.44s/it]

Tumor necrosis factor alpha (TNF-alpha) inhibitors


 74%|███████▎  | 1045/1417 [28:33<09:06,  1.47s/it]

diuretics


 74%|███████▍  | 1053/1417 [28:46<09:29,  1.56s/it]

trastuzumab


 74%|███████▍  | 1055/1417 [28:49<09:11,  1.52s/it]

topoisomerase I inhibitors


 75%|███████▍  | 1060/1417 [28:59<10:05,  1.70s/it]

cetuximab


 75%|███████▌  | 1067/1417 [29:11<09:17,  1.59s/it]

Beta Blocking Agents


 75%|███████▌  | 1068/1417 [29:13<08:46,  1.51s/it]

anthracyclines and related substances


 76%|███████▌  | 1072/1417 [29:19<08:55,  1.55s/it]

Measles vaccines


 76%|███████▌  | 1073/1417 [29:20<08:33,  1.49s/it]

antipsychotics


 76%|███████▋  | 1082/1417 [29:36<08:28,  1.52s/it]

Antihypertensives And Diuretics In Combination


 77%|███████▋  | 1091/1417 [29:50<07:54,  1.45s/it]

antidepressants


 77%|███████▋  | 1096/1417 [29:58<08:15,  1.54s/it]

pitrakinra


 78%|███████▊  | 1100/1417 [30:04<07:48,  1.48s/it]

nitroprusside


 78%|███████▊  | 1110/1417 [30:20<07:47,  1.52s/it]

Alkylating Agents


 79%|███████▉  | 1123/1417 [30:41<07:22,  1.50s/it]

corticosteroids


 80%|███████▉  | 1131/1417 [30:54<07:26,  1.56s/it]

Enzymes


 81%|████████  | 1142/1417 [31:12<07:09,  1.56s/it]

Bisphosphonates


 81%|████████  | 1143/1417 [31:14<06:49,  1.49s/it]

antineoplastic agents


 81%|████████  | 1151/1417 [31:26<06:52,  1.55s/it]

Drugs used in alcohol dependence


 81%|████████▏ | 1154/1417 [31:31<06:15,  1.43s/it]

eculizumab


 82%|████████▏ | 1164/1417 [31:46<06:38,  1.57s/it]

radiotherapy


 83%|████████▎ | 1177/1417 [32:09<06:19,  1.58s/it]

bevacizumab


 83%|████████▎ | 1182/1417 [32:17<05:50,  1.49s/it]

ustekinumab


 84%|████████▎ | 1184/1417 [32:20<05:53,  1.52s/it]

egfr inhibitors


 85%|████████▍ | 1200/1417 [32:48<05:48,  1.61s/it]

Selective serotonin reuptake inhibitors


 85%|████████▍ | 1201/1417 [32:49<05:29,  1.52s/it]

thiazolidinediones


 85%|████████▍ | 1204/1417 [32:54<05:27,  1.54s/it]

Influenza vaccines


 85%|████████▌ | 1208/1417 [33:00<05:25,  1.56s/it]

antithymocyte globulin


 85%|████████▌ | 1209/1417 [33:02<05:10,  1.49s/it]

Pertussis vaccines


 86%|████████▌ | 1214/1417 [33:11<05:29,  1.62s/it]

adalimumab


 86%|████████▌ | 1221/1417 [33:24<05:43,  1.75s/it]

calcium channel blockers


 86%|████████▋ | 1224/1417 [33:29<05:11,  1.61s/it]

Opium alkaloids and derivatives


 87%|████████▋ | 1234/1417 [33:46<04:55,  1.61s/it]

infliximab


 87%|████████▋ | 1235/1417 [33:47<04:37,  1.53s/it]

Antihypertensives


 87%|████████▋ | 1236/1417 [33:49<04:25,  1.47s/it]

Drugs For Treatment Of Tuberculosis


 87%|████████▋ | 1237/1417 [33:50<04:17,  1.43s/it]

Hepatitis vaccines


 88%|████████▊ | 1240/1417 [33:55<04:23,  1.49s/it]

pegloticase


 88%|████████▊ | 1241/1417 [33:56<04:13,  1.44s/it]

peginterferon alfa-2b


 88%|████████▊ | 1242/1417 [33:57<04:05,  1.40s/it]

selective beta-2-adrenoreceptor agonists


 88%|████████▊ | 1246/1417 [34:04<04:29,  1.58s/it]

catecholamines


 89%|████████▉ | 1263/1417 [34:33<03:58,  1.55s/it]

ranibizumab


 89%|████████▉ | 1266/1417 [34:38<03:53,  1.55s/it]

Dipeptidyl peptidase 4 (DPP-4) inhibitors


 90%|████████▉ | 1269/1417 [34:43<04:00,  1.62s/it]

Photodynamic therapy


 90%|████████▉ | 1270/1417 [34:44<03:44,  1.53s/it]

interferons


 90%|█████████ | 1278/1417 [34:57<03:39,  1.58s/it]

Dihydropyridine derivatives


 90%|█████████ | 1279/1417 [34:59<03:28,  1.51s/it]

Analgesics


 90%|█████████ | 1282/1417 [35:03<03:25,  1.52s/it]

tocilizumab


 91%|█████████ | 1288/1417 [35:13<03:22,  1.57s/it]

Antibiotics


 91%|█████████ | 1292/1417 [35:20<03:17,  1.58s/it]

interferon beta-1a


 91%|█████████▏| 1296/1417 [35:26<03:13,  1.60s/it]

gemtuzumab ozogamicin


 92%|█████████▏| 1301/1417 [35:34<03:02,  1.58s/it]

Antithyroid Preparations


 92%|█████████▏| 1306/1417 [35:43<02:59,  1.62s/it]

Drugs Used In Diabetes


 93%|█████████▎| 1323/1417 [36:12<02:30,  1.60s/it]

highly active antiretroviral therapy (haart)


 94%|█████████▍| 1333/1417 [36:48<07:21,  5.26s/it]

glucocorticoids


 94%|█████████▍| 1338/1417 [37:04<04:06,  3.12s/it]

botulinum toxin type a


 96%|█████████▋| 1364/1417 [37:47<01:24,  1.60s/it]

peginterferon alfa-2a


 97%|█████████▋| 1374/1417 [38:03<01:08,  1.60s/it]

direct acting antivirals


100%|██████████| 1417/1417 [39:16<00:00,  1.66s/it]


In [9]:
all_data_df = pd.read_csv('middlefile/drug_table.tsv', sep = '\t')
disease_list = all_data_df['disease name'].unique()

token_endpoint = 'https://icdaccessmanagement.who.int/connect/token'
client_id = '8ce4ac1c-84d5-4e46-b686-796ab943d907_ce952a66-3b10-41a3-9da3-131c7739883a'
client_secret = 'ajUCkbpjrbzTSUmtp8cLtVHCIL6FjPFwz/dONYQeyAM='
scope = 'icdapi_access'
grant_type = 'client_credentials'

# get the OAUTH2 token

# set data to post
payload = {'client_id': client_id, 
	   	   'client_secret': client_secret, 
           'scope': scope, 
           'grant_type': grant_type}
           
# make request
r = requests.post(token_endpoint, data=payload, verify=False).json()
token = r['access_token']

# HTTP header fields to set
headers = {'Authorization':  'Bearer '+token, 
           'Accept': 'json', 
           'Accept-Language': 'en',
	   'API-Version': 'v2'}

for i in tqdm(range(len(disease_list))):
    disease = disease_list[i]
    try:
        # access ICD API
        uri = 'https://id.who.int/icd/entity/search?q=' + disease
        r = requests.get(uri, headers=headers, verify=False)
        icd11_id = r.json()['destinationEntities'][0]['id']

    except:
        # print(disease)
        icd11_id = None
        

    all_data_df.loc[all_data_df['disease name'] == disease, 'icd11'] = icd11_id

    
all_data_df.to_csv('middlefile/icd11_table.tsv', sep = '\t', index= False)
all_data_df['icd11'].value_counts() 


  0%|          | 0/844 [00:00<?, ?it/s]

nan


  2%|▏         | 16/844 [00:21<19:22,  1.40s/it]

T-cell Lymphoblastic Leukemia/lymphoma


  2%|▏         | 17/844 [00:22<19:28,  1.41s/it]

Estrogen-receptor Positive Breast Cancer


  2%|▏         | 19/844 [00:25<19:24,  1.41s/it]

Malignant Anus Melanoma


  3%|▎         | 22/844 [00:29<19:25,  1.42s/it]

Invasive Bladder Transitional Cell Carcinoma


  4%|▍         | 33/844 [00:45<19:08,  1.42s/it]

Her2-receptor Positive Breast Cancer


  7%|▋         | 61/844 [01:25<18:32,  1.42s/it]

Uterine Corpus Endometrial Carcinoma


 10%|▉         | 82/844 [01:54<17:52,  1.41s/it]

Colon Mucinous Adenocarcinoma


 10%|█         | 85/844 [01:59<17:54,  1.42s/it]

Colorectal Adenocarcinoma


 12%|█▏        | 104/844 [02:26<17:31,  1.42s/it]

B-cell Adult Acute Lymphocytic Leukemia


 14%|█▎        | 114/844 [02:40<17:15,  1.42s/it]

Triple-receptor Negative Breast Cancer


 14%|█▎        | 115/844 [02:41<17:21,  1.43s/it]

Scrotum Paget's Disease


 14%|█▍        | 118/844 [02:45<17:09,  1.42s/it]

Ovarian Serous Cystadenocarcinoma


 15%|█▍        | 123/844 [02:52<17:04,  1.42s/it]

Her2-receptor Negative Breast Cancer


 15%|█▍        | 125/844 [02:55<17:08,  1.43s/it]

Mammary Analogue Secretory Carcinoma


 16%|█▌        | 137/844 [03:12<16:52,  1.43s/it]

Childhood Pilocytic Astrocytoma


 17%|█▋        | 141/844 [03:18<16:36,  1.42s/it]

Ureter Small Cell Carcinoma


 18%|█▊        | 150/844 [03:31<16:20,  1.41s/it]

Epithelioid Inflammatory Myofibroblastic Sarcoma


 19%|█▉        | 159/844 [03:44<16:14,  1.42s/it]

Childhood B-cell Acute Lymphoblastic Leukemia


 19%|█▉        | 161/844 [03:46<16:04,  1.41s/it]

Childhood Low-grade Glioma


 20%|█▉        | 165/844 [03:52<16:00,  1.41s/it]

Histiocytic And Dendritic Cell Cancer


 20%|█▉        | 166/844 [03:53<16:00,  1.42s/it]

Glioblastoma Proneural Subtype


 20%|█▉        | 168/844 [03:56<16:02,  1.42s/it]

Childhood Acute Lymphocytic Leukemia


 21%|██        | 175/844 [04:06<15:50,  1.42s/it]

Childhood Ependymoma


 21%|██        | 176/844 [04:08<15:45,  1.42s/it]

Diffuse Large B-cell Lymphoma Activated B-cell Type


 21%|██        | 179/844 [04:12<15:41,  1.42s/it]

Pancreatic Acinar Cell Adenocarcinoma


 22%|██▏       | 182/844 [04:16<15:40,  1.42s/it]

Diffuse Large B-cell Lymphoma Germinal Center B-cell Type


 22%|██▏       | 184/844 [04:19<15:39,  1.42s/it]

over-anticoagulation


 22%|██▏       | 185/844 [04:20<15:37,  1.42s/it]

HIV Infections,Hyperbilirubinemia


 22%|██▏       | 186/844 [04:22<15:36,  1.42s/it]

dose reduction


 23%|██▎       | 190/844 [04:28<15:29,  1.42s/it]

Depressive Disorder, Major,Mental Disorders


 23%|██▎       | 192/844 [04:30<15:18,  1.41s/it]

Hemolysis,Methemoglobinemia


 24%|██▎       | 199/844 [04:40<15:16,  1.42s/it]

Inflammatory Bowel Diseases,Myelosuppression


 24%|██▍       | 201/844 [04:43<15:11,  1.42s/it]

Drug Hypersensitivity,HIV Infections


 24%|██▍       | 202/844 [04:45<15:11,  1.42s/it]

Drug Hypersensitivity,drug reaction with eosinophilia and systemic symptoms,Epidermal Necrolysis, Toxic,severe cutaneous adverse reactions,Stevens-Johnson Syndrome


 25%|██▍       | 207/844 [04:52<14:59,  1.41s/it]

Depressive Disorder,Mental Disorders


 25%|██▌       | 215/844 [05:03<14:51,  1.42s/it]

cardiotoxicity,Respiratory Insufficiency


 26%|██▌       | 219/844 [05:09<14:38,  1.41s/it]

Arrhythmias, Cardiac,Tachycardia


 26%|██▌       | 221/844 [05:11<14:38,  1.41s/it]

Esophagitis,Gastroesophageal Reflux,Helicobacter Infections,Peptic Ulcer


 26%|██▋       | 222/844 [05:13<14:44,  1.42s/it]

Gastroesophageal Reflux,Helicobacter Infections


 26%|██▋       | 223/844 [05:14<14:44,  1.42s/it]

Helicobacter Infections,Ulcer


 27%|██▋       | 224/844 [05:16<14:44,  1.43s/it]

Leukopenia,Neutropenia


 27%|██▋       | 225/844 [05:17<14:40,  1.42s/it]

Precursor Cell Lymphoblastic Leukemia-Lymphoma


 27%|██▋       | 227/844 [05:20<14:30,  1.41s/it]

Psychotic Disorders,schizoaffective disorder,Schizophrenia


 27%|██▋       | 228/844 [05:21<14:27,  1.41s/it]

heart transplantation,hemopoietic stem cell transplant,Kidney Transplantation,liver transplantation,lung transplantation


 27%|██▋       | 230/844 [05:24<14:26,  1.41s/it]

Acute coronary syndrome,Cardiovascular Diseases,Ischemic Attack, Transient,Stroke


 27%|██▋       | 231/844 [05:26<14:24,  1.41s/it]

statin-related myopathy


 28%|██▊       | 234/844 [05:30<14:19,  1.41s/it]

Drug Hypersensitivity,drug reaction with eosinophilia and systemic symptoms,Epidermal Necrolysis, Toxic,Maculopapular Exanthema,Stevens-Johnson Syndrome


 28%|██▊       | 235/844 [05:31<14:16,  1.41s/it]

heart transplantation,hematopoietic stem cell transplantation,Kidney Transplantation,lung transplantation


 28%|██▊       | 236/844 [05:33<14:17,  1.41s/it]

drug reaction with eosinophilia and systemic symptoms,Epidermal Necrolysis, Toxic,severe cutaneous adverse reactions,Stevens-Johnson Syndrome


 28%|██▊       | 237/844 [05:34<14:16,  1.41s/it]

drug reaction with eosinophilia and systemic symptoms,Epidermal Necrolysis, Toxic,Maculopapular Exanthema,severe cutaneous adverse reactions,Stevens-Johnson Syndrome


 28%|██▊       | 238/844 [05:36<14:13,  1.41s/it]

Epidermal Necrolysis, Toxic,Epilepsy,severe cutaneous adverse reactions,Stevens-Johnson Syndrome


 28%|██▊       | 239/844 [05:37<14:14,  1.41s/it]

Depressive Disorder,Obsessive-Compulsive Disorder


 28%|██▊       | 240/844 [05:38<14:12,  1.41s/it]

agitation,Alcohol-Related Disorders,cardiotoxicity,Depression,Depressive Disorder,Depressive Disorder, Major,Drug Toxicity,dysphoria,Edema,Nausea,Obsessive-Compulsive Disorder,Tachycardia,Vomiting


 29%|██▊       | 241/844 [05:40<14:09,  1.41s/it]

Leukopenia,Myelosuppression


 29%|██▉       | 244/844 [05:44<14:14,  1.42s/it]

drug-induced liver injury,Hepatitis, Toxic,Toxic liver disease,Tuberculosis


 29%|██▉       | 246/844 [05:47<14:08,  1.42s/it]

Hepatitis C,HIV Infections


 29%|██▉       | 247/844 [05:48<14:04,  1.41s/it]

time to therapeutic inr


 29%|██▉       | 248/844 [05:50<14:04,  1.42s/it]

time in therapeutic range


 30%|██▉       | 250/844 [05:53<14:06,  1.43s/it]

Drug Hypersensitivity,drug reaction with eosinophilia and systemic symptoms,Leprosy,Maculopapular Exanthema,severe cutaneous adverse reactions,Stevens-Johnson Syndrome


 30%|██▉       | 251/844 [05:54<14:02,  1.42s/it]

Hypercholesterolemia,Myocardial Infarction


 30%|██▉       | 252/844 [05:55<14:01,  1.42s/it]

Kidney Transplantation,liver transplantation,Proteinuria


 30%|███       | 254/844 [05:58<14:03,  1.43s/it]

Atrial Fibrillation,heart valve replacement


 30%|███       | 257/844 [06:03<13:55,  1.42s/it]

Hemorrhage,over-anticoagulation,time above therapeutic range


 31%|███       | 258/844 [06:04<13:51,  1.42s/it]

Diabetes Mellitus, Type 2,Heart Failure,Pulmonary Disease, Chronic Obstructive


 31%|███       | 259/844 [06:05<13:46,  1.41s/it]

Heroin Dependence,Opioid-Related Disorders


 31%|███       | 261/844 [06:08<13:40,  1.41s/it]

Drug Toxicity,hematotoxicity,Leukopenia,Lymphoma,mucositis,Neoplasms,Neutropenia,Osteosarcoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma,primary central nervous system lymphoma,Thrombocytopenia,Toxic liver disease


 31%|███       | 262/844 [06:10<13:38,  1.41s/it]

Arthritis, Juvenile Rheumatoid,Arthritis, Psoriatic,Arthritis, Rheumatoid,Drug Toxicity


 31%|███▏      | 264/844 [06:12<13:36,  1.41s/it]

Epidermal Necrolysis, Toxic,Maculopapular Exanthema,severe cutaneous adverse reactions,Stevens-Johnson Syndrome


 32%|███▏      | 266/844 [06:15<13:28,  1.40s/it]

Kidney Transplantation,laparoscopic sleeve gastrectomy,liver transplantation,lung transplantation


 32%|███▏      | 267/844 [06:17<13:31,  1.41s/it]

Kidney Transplantation,liver transplantation


 32%|███▏      | 268/844 [06:18<13:32,  1.41s/it]

Epidermal Necrolysis, Toxic,severe cutaneous adverse reactions,Stevens-Johnson Syndrome


 32%|███▏      | 269/844 [06:19<13:30,  1.41s/it]

Arthritis, Psoriatic,Arthritis, Rheumatoid,Crohn Disease,Inflammation,Psoriasis,Spondylitis, Ankylosing


 32%|███▏      | 270/844 [06:21<13:32,  1.41s/it]

Adenocarcinoma,Carcinoma, Non-Small-Cell Lung,Drug Resistance,Lung Neoplasms


 32%|███▏      | 271/844 [06:22<13:30,  1.41s/it]

drug reaction with eosinophilia and systemic symptoms,Epidermal Necrolysis, Toxic,Exanthema,Hypersensitivity,Stevens-Johnson Syndrome


 32%|███▏      | 273/844 [06:25<13:23,  1.41s/it]

Coronary Disease,Hyperlipidemias


 32%|███▏      | 274/844 [06:26<13:24,  1.41s/it]

Anemia,Hepatitis C, Chronic


 33%|███▎      | 277/844 [06:31<13:20,  1.41s/it]

Carcinoma, Non-Small-Cell Lung,Drug Resistance


 33%|███▎      | 278/844 [06:32<13:20,  1.41s/it]

Arthritis, Rheumatoid,Neuromyelitis Optica


 33%|███▎      | 279/844 [06:34<13:18,  1.41s/it]

Coronary Disease,Myocardial Infarction


 33%|███▎      | 280/844 [06:35<13:15,  1.41s/it]

Drug Hypersensitivity,Stevens-Johnson Syndrome


 33%|███▎      | 282/844 [06:38<13:14,  1.41s/it]

Hypertension,Myocardial Infarction


 34%|███▍      | 286/844 [06:43<13:06,  1.41s/it]

Cough,Hypertension


 34%|███▍      | 287/844 [06:45<13:03,  1.41s/it]

HIV Infections,Hypertriglyceridemia


 34%|███▍      | 288/844 [06:46<12:59,  1.40s/it]

Breast Neoplasms,Drug Toxicity


 34%|███▍      | 291/844 [06:50<13:02,  1.41s/it]

Schizophrenia,Weight gain


 35%|███▍      | 295/844 [06:56<13:06,  1.43s/it]

Breast Neoplasms,Neutropenia


 35%|███▌      | 296/844 [06:58<13:02,  1.43s/it]

Carcinoma, Non-Small-Cell Lung,Neoplasms


 35%|███▌      | 299/844 [07:02<12:53,  1.42s/it]

Cardiovascular Diseases,Coronary Disease,Stroke


 36%|███▌      | 301/844 [07:05<12:47,  1.41s/it]

Acquired Long QT Syndrome (aLQTS)


 36%|███▌      | 302/844 [07:06<12:47,  1.42s/it]

Psychotic Disorders,Substance-Related Disorders


 36%|███▋      | 306/844 [07:12<12:48,  1.43s/it]

Bipolar Disorder,Schizophrenia


 37%|███▋      | 310/844 [07:17<12:30,  1.41s/it]

HIV Infections,Hyperlipidemias


 37%|███▋      | 313/844 [07:22<12:30,  1.41s/it]

Drug Toxicity,Psoriasis


 37%|███▋      | 316/844 [07:26<12:28,  1.42s/it]

Arthritis, Rheumatoid,Drug Toxicity


 38%|███▊      | 318/844 [07:29<12:23,  1.41s/it]

Arrhythmias, Cardiac,Drug Toxicity,Lymphoma, Non-Hodgkin


 38%|███▊      | 319/844 [07:30<12:27,  1.42s/it]

Congenital Abnormalities,Craniofacial Abnormalities


 38%|███▊      | 320/844 [07:32<12:20,  1.41s/it]

Cystitis,Transplantation


 38%|███▊      | 323/844 [07:36<12:15,  1.41s/it]

Coronary Artery Disease,Hypertension


 38%|███▊      | 324/844 [07:37<12:10,  1.41s/it]

Hypertension,Kidney Diseases,Nephrosclerosis


 39%|███▊      | 327/844 [07:41<12:13,  1.42s/it]

Hepatic Veno-Occlusive Disease,Transplantation


 40%|███▉      | 334/844 [07:51<11:56,  1.41s/it]

Neoplasms,Osteonecrosis


 40%|████      | 340/844 [08:00<11:48,  1.41s/it]

Diabetes Mellitus,Edema,Hyperlipidemias


 40%|████      | 341/844 [08:01<11:45,  1.40s/it]

Depression,Depressive Disorder,Depressive Disorder, Major,Hypotension


 41%|████      | 345/844 [08:07<11:40,  1.40s/it]

Diabetes Mellitus,Hypertension


 41%|████      | 346/844 [08:08<11:37,  1.40s/it]

Anxiety Disorders,Depressive Disorder, Major


 41%|████      | 348/844 [08:11<11:38,  1.41s/it]

Hyperlipoproteinemia Type II


 41%|████▏     | 349/844 [08:12<11:37,  1.41s/it]

Hypercholesterolemia,Myalgia unspecified


 41%|████▏     | 350/844 [08:14<11:40,  1.42s/it]

Coronary Artery Disease,Myalgia unspecified


 42%|████▏     | 356/844 [08:22<11:34,  1.42s/it]

Coronary Artery Disease,Hypercholesterolemia


 42%|████▏     | 357/844 [08:24<11:35,  1.43s/it]

Hyperlipidemias


 42%|████▏     | 358/844 [08:25<11:34,  1.43s/it]

Coronary Artery Disease,Myocardial Infarction


 43%|████▎     | 360/844 [08:28<11:30,  1.43s/it]

Diabetes Mellitus,Hypertension,Vascular Diseases


 44%|████▍     | 370/844 [08:42<11:10,  1.41s/it]

Uterine Cervical Neoplasms


 44%|████▍     | 371/844 [08:44<11:07,  1.41s/it]

Deafness,Ototoxicity,Testicular Neoplasms


 44%|████▍     | 375/844 [08:49<10:55,  1.40s/it]

Colorectal Neoplasms,Drug Toxicity


 45%|████▍     | 378/844 [08:53<10:58,  1.41s/it]

Helicobacter Infections


 45%|████▌     | 381/844 [08:58<10:53,  1.41s/it]

Angina Pectoris,Heart Failure


 45%|████▌     | 382/844 [08:59<10:56,  1.42s/it]

Leukopenia,Neoplasms


 46%|████▌     | 385/844 [09:03<10:49,  1.41s/it]

Anemia, Hemolytic,Hemolysis


 46%|████▌     | 386/844 [09:05<10:49,  1.42s/it]

Neoplasms,Neutropenia


 46%|████▌     | 387/844 [09:06<10:48,  1.42s/it]

Headache Disorders,Migraine with Aura,Migraine without Aura


 46%|████▌     | 389/844 [09:09<10:40,  1.41s/it]

Hemolysis,Lead Poisoning, Nervous System, Childhood


 46%|████▋     | 392/844 [09:13<10:38,  1.41s/it]

Drug Toxicity,Neurotoxicity Syndromes


 47%|████▋     | 393/844 [09:15<10:40,  1.42s/it]

Neurotoxicity Syndromes


 47%|████▋     | 394/844 [09:16<10:38,  1.42s/it]

severe cutaneous adverse reactions


 47%|████▋     | 397/844 [09:20<10:36,  1.42s/it]

Opioid-Related Disorders


 47%|████▋     | 398/844 [09:22<10:31,  1.42s/it]

Substance Withdrawal Syndrome


 47%|████▋     | 399/844 [09:23<10:30,  1.42s/it]

Drug Toxicity,Inflammatory Bowel Diseases,Pancreatitis


 47%|████▋     | 400/844 [09:25<10:25,  1.41s/it]

Maculopapular Exanthema


 48%|████▊     | 401/844 [09:26<10:25,  1.41s/it]

drug reaction with eosinophilia and systemic symptoms


 48%|████▊     | 409/844 [09:37<10:12,  1.41s/it]

Carcinoma, Non-Small-Cell Lung,Colorectal Neoplasms,Neoplasms,Pancreatic Neoplasms


 49%|████▊     | 411/844 [09:40<10:10,  1.41s/it]

anaphylactoid reaction


 49%|████▉     | 414/844 [09:44<10:05,  1.41s/it]

Carcinoma, Non-Small-Cell Lung,Mesothelioma


 50%|████▉     | 419/844 [09:52<10:05,  1.42s/it]

Gastrointestinal Stromal Tumors,Leukemia, Myelogenous, Chronic, BCR-ABL Positive


 50%|████▉     | 420/844 [09:53<10:02,  1.42s/it]

Hepatitis C, Chronic,HIV Infections


 51%|█████     | 427/844 [10:03<09:48,  1.41s/it]

hemopoietic stem cell transplant


 51%|█████     | 428/844 [10:04<09:46,  1.41s/it]

Colitis, Ulcerative,Inflammatory Bowel Diseases


 51%|█████     | 429/844 [10:06<09:47,  1.41s/it]

Crohn Disease,Inflammatory Bowel Diseases


 51%|█████▏    | 433/844 [10:11<09:38,  1.41s/it]

Cough,Essential hypertension


 52%|█████▏    | 441/844 [10:23<09:27,  1.41s/it]

Anemia, Hemolytic,Hemolysis,Protein Deficiency


 52%|█████▏    | 442/844 [10:24<09:26,  1.41s/it]

schizoaffective disorder,Schizophrenia


 53%|█████▎    | 446/844 [10:30<09:25,  1.42s/it]

Neoplasms,Ovarian Neoplasms,Stomach Neoplasms


 53%|█████▎    | 447/844 [10:31<09:21,  1.41s/it]

Obesity,Polycystic Ovary Syndrome


 53%|█████▎    | 448/844 [10:32<09:17,  1.41s/it]

Autism Spectrum Disorder,Psychotic Disorders,Schizophrenia


 53%|█████▎    | 449/844 [10:34<09:16,  1.41s/it]

Autism Spectrum Disorder,Schizophrenia


 53%|█████▎    | 451/844 [10:37<09:11,  1.40s/it]

Depression,suicide


 54%|█████▎    | 452/844 [10:38<09:08,  1.40s/it]

Infertility, Female,Ovarian hyperstimulation syndrome


 54%|█████▎    | 453/844 [10:39<09:10,  1.41s/it]

Colorectal Neoplasms,hand-foot syndrome


 54%|█████▍    | 455/844 [10:42<09:05,  1.40s/it]

Coronary Artery Disease,Coronary Disease,Myocardial Infarction


 54%|█████▍    | 456/844 [10:44<09:06,  1.41s/it]

Breast Neoplasms,Menopause


 54%|█████▍    | 457/844 [10:45<09:02,  1.40s/it]

Carcinoma, Renal Cell,hand-foot syndrome


 54%|█████▍    | 458/844 [10:46<08:59,  1.40s/it]

Carcinoma, Renal Cell,Hypertension


 55%|█████▍    | 460/844 [10:49<09:01,  1.41s/it]

Depressive Disorder,Narcolepsy


 55%|█████▌    | 467/844 [10:59<08:56,  1.42s/it]

drug reaction with eosinophilia and systemic symptoms,Epidermal Necrolysis, Toxic,Stevens-Johnson Syndrome


 55%|█████▌    | 468/844 [11:01<08:52,  1.42s/it]

Autism Spectrum Disorder,Mood Disorders


 56%|█████▌    | 470/844 [11:03<08:50,  1.42s/it]

Coronary Artery Disease,Diabetes Mellitus, Type 2


 56%|█████▌    | 471/844 [11:05<08:48,  1.42s/it]

HIV Infections,Pancreatic Neoplasms


 56%|█████▌    | 472/844 [11:06<08:49,  1.42s/it]

Kidney Transplantation,Myasthenia Gravis


 56%|█████▌    | 473/844 [11:08<08:47,  1.42s/it]

hypertensive nephrosclerosis


 56%|█████▋    | 476/844 [11:12<08:43,  1.42s/it]

Pancreatitis,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 57%|█████▋    | 478/844 [11:15<08:37,  1.41s/it]

Alzheimer Disease,Lewy Body Disease


 57%|█████▋    | 479/844 [11:16<08:34,  1.41s/it]

Anemia,Nasopharyngeal Neoplasms


 57%|█████▋    | 481/844 [11:19<08:34,  1.42s/it]

Nasopharyngeal Neoplasms,Neutropenia


 57%|█████▋    | 482/844 [11:21<08:33,  1.42s/it]

Gastroesophageal Reflux,Transplantation


 57%|█████▋    | 485/844 [11:25<08:29,  1.42s/it]

Pain,Tobacco Use Disorder


 58%|█████▊    | 486/844 [11:26<08:25,  1.41s/it]

HIV Infections,Tuberculosis


 58%|█████▊    | 488/844 [11:29<08:20,  1.41s/it]

Coronary Disease,Osteoporosis


 58%|█████▊    | 490/844 [11:32<08:16,  1.40s/it]

HIV Infections,nephrolithiasis


 58%|█████▊    | 491/844 [11:33<08:16,  1.41s/it]

Anemia,Nasopharyngeal Neoplasms,Neutropenia


 58%|█████▊    | 492/844 [11:35<08:18,  1.42s/it]

Osteonecrosis,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 59%|█████▊    | 495/844 [11:39<08:11,  1.41s/it]

Anemia,Ovarian Neoplasms


 59%|█████▉    | 499/844 [11:45<08:10,  1.42s/it]

Carcinoma, Hepatocellular,hand-foot syndrome


 59%|█████▉    | 501/844 [11:47<08:07,  1.42s/it]

hand-foot syndrome,Hypertension


 59%|█████▉    | 502/844 [11:49<08:01,  1.41s/it]

Menopause,Schizophrenia


 60%|█████▉    | 503/844 [11:50<07:59,  1.41s/it]

Arthritis, Rheumatoid,Psoriasis


 60%|█████▉    | 504/844 [11:52<08:00,  1.41s/it]

Bipolar Disorder,Depression,Substance-Related Disorders


 60%|█████▉    | 506/844 [11:54<07:56,  1.41s/it]

Irritable Bowel Syndrome,Leukopenia


 60%|██████    | 509/844 [11:59<07:51,  1.41s/it]

Drug Hypersensitivity,Epidermal Necrolysis, Toxic,erythema exudativum multiforme,Maculopapular Exanthema,Stevens-Johnson Syndrome


 60%|██████    | 510/844 [12:00<07:48,  1.40s/it]

Epidermal Necrolysis, Toxic,Maculopapular Exanthema,Stevens-Johnson Syndrome


 61%|██████    | 511/844 [12:01<07:47,  1.40s/it]

Colorectal Neoplasms,Neutropenia


 61%|██████    | 512/844 [12:03<07:48,  1.41s/it]

Hepatitis C, Chronic,Recurrence


 61%|██████    | 514/844 [12:06<07:43,  1.40s/it]

Colitis, Ulcerative,Kidney Transplantation


 61%|██████    | 515/844 [12:07<07:43,  1.41s/it]

Hyperglycemia,Hypertension


 61%|██████    | 516/844 [12:08<07:42,  1.41s/it]

Kidney Transplantation,lung transplantation


 61%|██████▏   | 518/844 [12:11<07:37,  1.40s/it]

Arthralgia,Breast Neoplasms


 61%|██████▏   | 519/844 [12:13<07:35,  1.40s/it]

Asthenia,Neoplasms


 62%|██████▏   | 521/844 [12:16<07:37,  1.42s/it]

Diarrhea,Neoplasms


 62%|██████▏   | 522/844 [12:17<07:35,  1.41s/it]

Hyperbilirubinemia,Neoplasms


 62%|██████▏   | 524/844 [12:20<07:33,  1.42s/it]

Breast Neoplasms,Leukopenia,Neutropenia


 62%|██████▏   | 525/844 [12:21<07:31,  1.42s/it]

Drug Toxicity,Neoplasms,Ototoxicity


 62%|██████▏   | 526/844 [12:23<07:29,  1.41s/it]

Neoplasms,nephrotoxicity


 62%|██████▏   | 527/844 [12:24<07:28,  1.41s/it]

major adverse cardiac events (mace)


 63%|██████▎   | 528/844 [12:25<07:26,  1.41s/it]

Epidermal Necrolysis, Toxic,HIV Infections,Stevens-Johnson Syndrome


 63%|██████▎   | 530/844 [12:28<07:22,  1.41s/it]

Breast Neoplasms,Lymphopenia


 63%|██████▎   | 531/844 [12:30<07:23,  1.42s/it]

Breast Neoplasms,mucositis


 63%|██████▎   | 533/844 [12:32<07:22,  1.42s/it]

Breast Neoplasms,Hyperglycemia,Leukopenia


 64%|██████▎   | 536/844 [12:37<07:17,  1.42s/it]

Stroke,Venous Thrombosis


 64%|██████▎   | 537/844 [12:38<07:16,  1.42s/it]

Coronary Artery Disease,Diabetes Mellitus,Hypercholesterolemia


 64%|██████▎   | 538/844 [12:40<07:13,  1.42s/it]

Toxic liver disease,Tuberculosis


 64%|██████▍   | 539/844 [12:41<07:10,  1.41s/it]

drug-induced liver injury,Toxic liver disease


 64%|██████▍   | 540/844 [12:42<07:08,  1.41s/it]

Acute coronary syndrome,Myocardial Infarction


 64%|██████▍   | 542/844 [12:45<07:05,  1.41s/it]

Bipolar Disorder,Depressive Disorder,Psychotic Disorders,schizoaffective disorder


 64%|██████▍   | 543/844 [12:47<07:06,  1.42s/it]

tonsillectomy


 64%|██████▍   | 544/844 [12:48<07:07,  1.42s/it]

adverse events,gastrointestinal toxicity,Myelosuppression,Urinary Bladder Neoplasms


 65%|██████▍   | 545/844 [12:50<07:07,  1.43s/it]

gastrointestinal toxicity,Myelosuppression,Urinary Bladder Neoplasms


 65%|██████▍   | 546/844 [12:51<07:03,  1.42s/it]

hematopoietic stem cell transplantation,Kidney Transplantation,Urinary Bladder Neoplasms


 65%|██████▍   | 547/844 [12:52<07:01,  1.42s/it]

Hypernatremia,Hypertension


 65%|██████▍   | 548/844 [12:54<06:59,  1.42s/it]

Carcinoma, Non-Small-Cell Lung,gastrointestinal toxicity,Hematologic Diseases,Leukopenia


 65%|██████▌   | 549/844 [12:55<06:58,  1.42s/it]

Diabetes Mellitus, Type 2,Polycystic Ovary Syndrome


 65%|██████▌   | 550/844 [12:57<06:58,  1.42s/it]

Asthenia,Nausea,Neoplasms,Vomiting


 65%|██████▌   | 551/844 [12:58<06:55,  1.42s/it]

Inflammatory Bowel Diseases,Psoriasis


 65%|██████▌   | 552/844 [12:59<06:53,  1.41s/it]

Anemia,Dermatitis,Leukopenia,mucositis,Myelosuppression,Neutropenia,Thrombocytopenia


 66%|██████▌   | 553/844 [13:01<06:51,  1.41s/it]

adverse events,Premature Birth


 66%|██████▌   | 554/844 [13:02<06:50,  1.42s/it]

drug-induced liver injury,Tuberculosis


 66%|██████▌   | 555/844 [13:04<06:51,  1.42s/it]

Carcinoma, Renal Cell,Neutropenia


 66%|██████▌   | 556/844 [13:05<06:46,  1.41s/it]

Acquired Immunodeficiency Syndrome,HIV Infections,nephrolithiasis


 66%|██████▌   | 557/844 [13:06<06:44,  1.41s/it]

Breast Neoplasms,Colorectal Neoplasms


 66%|██████▌   | 559/844 [13:09<06:41,  1.41s/it]

Alcoholism,Death


 66%|██████▋   | 561/844 [13:12<06:39,  1.41s/it]

Hemorrhage,Myocardial Infarction


 67%|██████▋   | 562/844 [13:14<06:37,  1.41s/it]

Maculopapular Exanthema,severe cutaneous adverse reactions,Stevens-Johnson Syndrome


 67%|██████▋   | 563/844 [13:15<06:35,  1.41s/it]

drug reaction with eosinophilia and systemic symptoms,Stevens-Johnson Syndrome


 67%|██████▋   | 564/844 [13:16<06:32,  1.40s/it]

drug reaction with eosinophilia and systemic symptoms,severe cutaneous adverse reactions,Stevens-Johnson Syndrome


 67%|██████▋   | 565/844 [13:18<06:30,  1.40s/it]

Nausea,Neoplasms,Vomiting


 67%|██████▋   | 566/844 [13:19<06:27,  1.39s/it]

Multiple Myeloma,progression-free survival


 67%|██████▋   | 567/844 [13:21<06:29,  1.41s/it]

Cocaine-Related Disorders,Heroin Dependence


 67%|██████▋   | 568/844 [13:22<06:29,  1.41s/it]

Coronary Disease,Hemorrhage


 67%|██████▋   | 569/844 [13:23<06:27,  1.41s/it]

Hemorrhage,venous thromboembolism


 68%|██████▊   | 573/844 [13:29<06:22,  1.41s/it]

Leukopenia,Neutropenia,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 68%|██████▊   | 574/844 [13:30<06:23,  1.42s/it]

Alzheimer Disease,cognitive dysfunction


 68%|██████▊   | 575/844 [13:32<06:20,  1.41s/it]

overall survival,Thrombocytopenia


 68%|██████▊   | 576/844 [13:33<06:17,  1.41s/it]

Drug Toxicity,overall survival


 68%|██████▊   | 577/844 [13:35<06:15,  1.41s/it]

Anemia,Leukopenia,Nausea,Neutropenia,Thrombocytopenia,Vomiting


 68%|██████▊   | 578/844 [13:36<06:14,  1.41s/it]

Cardiovascular Diseases,Rhabdomyolysis


 69%|██████▊   | 580/844 [13:39<06:12,  1.41s/it]

adverse events,Epilepsy


 69%|██████▉   | 581/844 [13:40<06:13,  1.42s/it]

drug-induced liver injury,Multiple Sclerosis


 69%|██████▉   | 582/844 [13:42<06:09,  1.41s/it]

Pain, Postoperative,Respiratory Insufficiency


 69%|██████▉   | 584/844 [13:45<06:06,  1.41s/it]

HIV Infections,Peripheral Nervous System Diseases


 69%|██████▉   | 585/844 [13:46<06:06,  1.42s/it]

Arthritis, Rheumatoid,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 69%|██████▉   | 586/844 [13:47<06:04,  1.41s/it]

Colorectal Neoplasms,Head and Neck Neoplasms


 70%|██████▉   | 588/844 [13:50<06:02,  1.41s/it]

Colonic Neoplasms,Colorectal Neoplasms,Neoplasms,Rectal Neoplasms,Uterine Cervical Neoplasms


 70%|███████   | 591/844 [14:00<12:26,  2.95s/it]

Alcoholism,Substance-Related Disorders


 70%|███████   | 593/844 [14:02<09:01,  2.16s/it]

Neutropenia,Pancreatic Neoplasms


 70%|███████   | 594/844 [14:04<08:02,  1.93s/it]

Testicular Neoplasms,Vomiting


 70%|███████   | 595/844 [14:05<07:22,  1.78s/it]

Alopecia,Testicular Neoplasms


 71%|███████   | 596/844 [14:07<06:56,  1.68s/it]

Infection,Nausea,Testicular Neoplasms


 71%|███████   | 597/844 [14:08<06:36,  1.60s/it]

Alopecia,Pain,Testicular Neoplasms


 71%|███████   | 598/844 [14:09<06:21,  1.55s/it]

Anemia,Testicular Neoplasms


 71%|███████   | 599/844 [14:11<06:09,  1.51s/it]

Leukopenia,Testicular Neoplasms


 71%|███████   | 600/844 [14:12<06:01,  1.48s/it]

febrile neutropenia,Testicular Neoplasms


 71%|███████   | 601/844 [14:14<05:54,  1.46s/it]

Drug interaction with drug,Drug Toxicity


 72%|███████▏  | 604/844 [14:19<06:13,  1.56s/it]

short qt syndrome 1


 72%|███████▏  | 605/844 [14:20<06:02,  1.52s/it]

pneumonitis,progression-free survival


 72%|███████▏  | 607/844 [14:23<05:47,  1.47s/it]

Heart Arrest,Overdose,Respiratory Insufficiency


 72%|███████▏  | 608/844 [14:25<05:43,  1.46s/it]

cessation,Tobacco Use Disorder


 72%|███████▏  | 611/844 [14:29<05:31,  1.42s/it]

Maculopapular Exanthema,Tuberculosis


 73%|███████▎  | 612/844 [14:30<05:30,  1.42s/it]

Opioid-Related Disorders,Sexual Dysfunctions, Psychological


 73%|███████▎  | 613/844 [14:32<05:26,  1.41s/it]

adverse events,Opioid-Related Disorders


 73%|███████▎  | 614/844 [14:33<05:25,  1.41s/it]

Arthritis, Psoriatic,Psoriasis


 73%|███████▎  | 615/844 [14:34<05:25,  1.42s/it]

Alcoholism,Attention Deficit Disorder with Hyperactivity


 73%|███████▎  | 616/844 [14:36<05:24,  1.42s/it]

Drug Toxicity,Gastrointestinal Stromal Tumors


 73%|███████▎  | 620/844 [14:41<05:18,  1.42s/it]

Pancytopenia,Thrombocytopenia


 74%|███████▎  | 621/844 [14:43<05:17,  1.42s/it]

Depressive Disorder, Major,suicidal ideation


 74%|███████▎  | 622/844 [14:44<05:14,  1.42s/it]

Drug Toxicity,Tuberculosis


 74%|███████▍  | 624/844 [14:47<05:11,  1.41s/it]

Mesothelioma,Thrombocytopenia


 74%|███████▍  | 625/844 [14:49<05:10,  1.42s/it]

Alopecia,Mesothelioma


 74%|███████▍  | 626/844 [14:50<05:08,  1.42s/it]

Anemia,Mesothelioma


 74%|███████▍  | 627/844 [14:51<05:07,  1.42s/it]

Leukopenia,Mesothelioma


 74%|███████▍  | 628/844 [14:53<05:05,  1.42s/it]

hematopoietic stem cell transplantation,Neurotoxicity Syndromes


 75%|███████▍  | 630/844 [14:56<05:06,  1.43s/it]

Diabetes Mellitus, Type 2,Weight gain


 75%|███████▍  | 631/844 [14:57<05:03,  1.43s/it]

Carcinoma, Non-Small-Cell Lung,pneumonitis


 75%|███████▍  | 632/844 [14:59<05:01,  1.42s/it]

adverse events,Carcinoma, Non-Small-Cell Lung


 75%|███████▌  | 633/844 [15:00<05:00,  1.42s/it]

Constriction, Pathologic,Ischemic Attack, Transient,Stroke


 75%|███████▌  | 634/844 [15:01<04:58,  1.42s/it]

HIV Infections,Hyperlipidemias,Hypertriglyceridemia


 75%|███████▌  | 635/844 [15:03<04:56,  1.42s/it]

lung transplantation,overall survival


 75%|███████▌  | 636/844 [15:04<04:55,  1.42s/it]

chronic lung allograft dysfunction,lung transplantation


 76%|███████▌  | 640/844 [15:10<04:48,  1.42s/it]

adverse events,Arthritis, Rheumatoid


 76%|███████▌  | 641/844 [15:11<04:46,  1.41s/it]

heart valve replacement,Hemorrhage


 76%|███████▋  | 644/844 [15:16<04:44,  1.42s/it]

Epilepsy,Psychotic Disorders


 77%|███████▋  | 647/844 [15:20<04:40,  1.42s/it]

Hepatitis C,Liver Neoplasms


 77%|███████▋  | 648/844 [15:21<04:39,  1.43s/it]

Carcinoma, Non-Small-Cell Lung,Thrombocytopenia


 77%|███████▋  | 649/844 [15:23<04:36,  1.42s/it]

HIV Infections,nephrotoxicity


 77%|███████▋  | 651/844 [15:25<04:31,  1.41s/it]

sedation,Urticaria


 77%|███████▋  | 652/844 [15:27<04:31,  1.41s/it]

Neoplasms,Ototoxicity,Testicular Neoplasms


 77%|███████▋  | 653/844 [15:28<04:28,  1.40s/it]

Alcoholism,Bipolar Disorder


 77%|███████▋  | 654/844 [15:30<04:29,  1.42s/it]

Carcinoma, Non-Small-Cell Lung,Toxic liver disease


 78%|███████▊  | 655/844 [15:31<04:27,  1.42s/it]

Carcinoma, Non-Small-Cell Lung,Exanthema


 78%|███████▊  | 656/844 [15:33<04:28,  1.43s/it]

Carcinoma, Non-Small-Cell Lung,Diarrhea


 78%|███████▊  | 658/844 [15:35<04:25,  1.43s/it]

Sexual Dysfunctions, Psychological


 78%|███████▊  | 659/844 [15:37<04:24,  1.43s/it]

Hyperalgesia


 79%|███████▊  | 663/844 [15:43<04:17,  1.42s/it]

Death,Opioid-Related Disorders


 79%|███████▉  | 665/844 [15:45<04:14,  1.42s/it]

Colorectal Neoplasms,Esophageal Neoplasms,Osteosarcoma,Ovarian Neoplasms,Pancreatic Neoplasms


 79%|███████▉  | 666/844 [15:47<04:12,  1.42s/it]

overall survival


 79%|███████▉  | 667/844 [15:48<04:12,  1.43s/it]

hand-foot syndrome,Neoplasms


 79%|███████▉  | 668/844 [15:50<04:11,  1.43s/it]

Neoplasms,Osteosarcoma,Ototoxicity,Testicular Neoplasms


 79%|███████▉  | 669/844 [15:51<04:09,  1.42s/it]

Neutropenia,Urinary Bladder Neoplasms


 79%|███████▉  | 670/844 [15:53<04:07,  1.42s/it]

Drug Toxicity,Urinary Bladder Neoplasms


 80%|███████▉  | 671/844 [15:54<04:07,  1.43s/it]

severe cutaneous adverse reactions,Stevens-Johnson Syndrome


 80%|███████▉  | 672/844 [15:55<04:05,  1.43s/it]

Carcinoma, Hepatocellular,Hyperbilirubinemia


 80%|███████▉  | 673/844 [15:57<04:11,  1.47s/it]

Hyperprolactinemia,Schizophrenia


 80%|███████▉  | 674/844 [15:58<04:06,  1.45s/it]

Arteriosclerosis,Coronary Disease,Essential hypertension,glomerular disease,Glomerulonephritis, IGA,Kidney Diseases


 80%|███████▉  | 675/844 [16:00<04:03,  1.44s/it]

Carcinoma, Non-Small-Cell Lung,Mesothelioma,Pancreatic Neoplasms


 80%|████████  | 676/844 [16:01<04:02,  1.45s/it]

Carcinoma, Non-Small-Cell Lung,Colorectal Neoplasms,Gastrointestinal Neoplasms,Ovarian Neoplasms


 80%|████████  | 677/844 [16:03<03:59,  1.43s/it]

Leukopenia,Myelosuppression,Neutropenia,Thrombocytopenia


 81%|████████  | 682/844 [16:10<03:50,  1.42s/it]

Heart Failure,Neoplasms


 81%|████████  | 683/844 [16:11<03:48,  1.42s/it]

Cardiomyopathies,Neoplasms


 81%|████████  | 685/844 [16:14<03:46,  1.42s/it]

Mental Disorders,Schizophrenia


 81%|████████▏ | 686/844 [16:15<03:44,  1.42s/it]

Metabolic Syndrome,Schizophrenia


 82%|████████▏ | 688/844 [16:18<03:41,  1.42s/it]

Brain Neoplasms,Deafness,Ototoxicity


 82%|████████▏ | 692/844 [16:24<03:35,  1.42s/it]

Nausea,Pain, Postoperative,Vomiting


 82%|████████▏ | 693/844 [16:25<03:33,  1.41s/it]

Constipation,dry mouth,Respiratory Insufficiency


 82%|████████▏ | 694/844 [16:27<03:32,  1.42s/it]

Exanthema,Opioid-Related Disorders


 82%|████████▏ | 695/844 [16:28<03:30,  1.42s/it]

adverse events,Nausea,Vomiting


 82%|████████▏ | 696/844 [16:30<03:29,  1.41s/it]

Opioid-Related Disorders,Sleep Disorders


 83%|████████▎ | 697/844 [16:31<03:28,  1.42s/it]

overall survival,progression-free survival


 83%|████████▎ | 699/844 [16:34<03:25,  1.42s/it]

Medulloblastoma,Neoplasms,Ototoxicity,Testicular Neoplasms


 83%|████████▎ | 701/844 [16:37<03:23,  1.42s/it]

Postoperative Nausea and Vomiting,Vomiting


 83%|████████▎ | 703/844 [16:40<03:20,  1.42s/it]

HIV Infections,Toxic liver disease


 84%|████████▎ | 705/844 [16:42<03:18,  1.43s/it]

Carcinoma, Hepatocellular,Carcinoma, Renal Cell


 84%|████████▎ | 706/844 [16:44<03:17,  1.43s/it]

Carcinoma, Hepatocellular,Liver Neoplasms


 84%|████████▍ | 707/844 [16:45<03:15,  1.43s/it]

Breast Neoplasms,Ovarian Neoplasms,Peripheral Nervous System Diseases


 84%|████████▍ | 708/844 [16:47<03:15,  1.43s/it]

sustained virological response (svr)


 84%|████████▍ | 712/844 [16:52<03:07,  1.42s/it]

Opioid-Related Disorders,Pruritus


 84%|████████▍ | 713/844 [16:54<03:05,  1.42s/it]

Heroin Dependence,Memory Disorders


 85%|████████▍ | 715/844 [16:57<03:02,  1.42s/it]

Acute coronary syndrome,Coronary Artery Disease,Hemorrhage,Myocardial Infarction


 85%|████████▍ | 716/844 [16:58<03:01,  1.42s/it]

Drug Toxicity,Thalassemia


 85%|████████▍ | 717/844 [16:59<03:00,  1.42s/it]

adverse events,neuropathic pain


 85%|████████▌ | 718/844 [17:01<02:59,  1.42s/it]

Agranulocytosis,Graves Disease


 85%|████████▌ | 721/844 [17:05<02:54,  1.42s/it]

Hypertriglyceridemia,schizoaffective disorder,Schizophrenia,Weight gain


 86%|████████▌ | 722/844 [17:07<02:52,  1.41s/it]

Hypertriglyceridemia,Weight gain


 86%|████████▌ | 723/844 [17:08<02:52,  1.42s/it]

Fractures, Bone,Pain,Pain, Postoperative


 86%|████████▌ | 724/844 [17:09<02:50,  1.42s/it]

Colitis, Ulcerative,Crohn Disease,Irritable Bowel Syndrome,Leukopenia,Neutropenia


 86%|████████▌ | 725/844 [17:11<02:49,  1.42s/it]

Breast Neoplasms,heart transplantation,Kidney Neoplasms,Kidney Transplantation,lung transplantation,Neuroendocrine Tumors


 86%|████████▌ | 726/844 [17:12<02:46,  1.41s/it]

Breast Neoplasms,Kidney Neoplasms,Neuroendocrine Tumors


 86%|████████▌ | 727/844 [17:14<02:45,  1.41s/it]

Carcinoma, Squamous Cell,overall survival


 86%|████████▋ | 728/844 [17:15<02:43,  1.41s/it]

Carcinoma, Squamous Cell,progression-free survival


 86%|████████▋ | 729/844 [17:16<02:42,  1.41s/it]

Diabetes Mellitus, Type 2,Hypoglycemia


 87%|████████▋ | 731/844 [17:19<02:40,  1.42s/it]

postanesthesia apnea


 87%|████████▋ | 732/844 [17:21<02:38,  1.42s/it]

adverse events,Bipolar Disorder,Depression,Depressive Disorder, Major


 87%|████████▋ | 733/844 [17:22<02:37,  1.42s/it]

Bipolar Disorder,Depression,Depressive Disorder, Major


 87%|████████▋ | 734/844 [17:24<02:36,  1.42s/it]

Pancreatic Neoplasms,Thrombocytopenia


 87%|████████▋ | 735/844 [17:25<02:36,  1.43s/it]

Nausea,Pancreatic Neoplasms


 87%|████████▋ | 736/844 [17:26<02:34,  1.43s/it]

Adenocarcinoma,Carcinoma, Non-Small-Cell Lung,Drug Toxicity,Exanthema,Toxic liver disease


 88%|████████▊ | 739/844 [17:31<02:29,  1.43s/it]

Leukopenia,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 88%|████████▊ | 740/844 [17:32<02:28,  1.43s/it]

Leukopenia,Osteosarcoma


 88%|████████▊ | 741/844 [17:34<02:27,  1.43s/it]

mucositis,Osteosarcoma


 88%|████████▊ | 742/844 [17:35<02:25,  1.42s/it]

Anemia,mucositis,Osteosarcoma


 88%|████████▊ | 743/844 [17:36<02:23,  1.42s/it]

cardiotoxicity,Osteosarcoma


 88%|████████▊ | 744/844 [17:38<02:22,  1.42s/it]

nephrotoxicity,Osteosarcoma


 88%|████████▊ | 745/844 [17:39<02:21,  1.43s/it]

Hypertension,Hypertrophy, Left Ventricular


 88%|████████▊ | 746/844 [17:41<02:19,  1.43s/it]

Mesothelioma,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 89%|████████▊ | 747/844 [17:42<02:18,  1.43s/it]

Neoplasms,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 89%|████████▊ | 748/844 [17:44<02:16,  1.42s/it]

drug-induced liver injury,Leukopenia,Precursor Cell Lymphoblastic Leukemia-Lymphoma,Thrombocytopenia


 89%|████████▊ | 749/844 [17:45<02:14,  1.42s/it]

gastrointestinal toxicity,mucositis,Neutropenia,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 89%|████████▉ | 750/844 [17:46<02:13,  1.42s/it]

Esophageal Neoplasms,Ovarian Neoplasms,Stomach Neoplasms


 89%|████████▉ | 751/844 [17:48<02:12,  1.42s/it]

Lymphoma, Non-Hodgkin,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 89%|████████▉ | 752/844 [17:49<02:10,  1.42s/it]

Acute coronary syndrome,Hyperlipoproteinemia Type II


 89%|████████▉ | 754/844 [17:52<02:06,  1.41s/it]

gastrointestinal toxicity,Hallucinations,Parkinson Disease


 89%|████████▉ | 755/844 [17:53<02:05,  1.41s/it]

Burkitt Lymphoma,Lymphoma, T-Cell,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 90%|████████▉ | 756/844 [17:55<02:04,  1.41s/it]

Carcinoma, Non-Small-Cell Lung,Drug Toxicity,Leukemia, B-Cell, Acute


 90%|████████▉ | 757/844 [17:56<02:03,  1.42s/it]

Osteosarcoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 90%|████████▉ | 758/844 [17:58<02:02,  1.43s/it]

Lymphoma,mucositis,Osteosarcoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 90%|████████▉ | 759/844 [17:59<02:00,  1.42s/it]

Burkitt Lymphoma,Drug Toxicity,Lymphoma, T-Cell,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 90%|█████████ | 760/844 [18:01<01:58,  1.42s/it]

Leukemia,Lymphoma,Osteosarcoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 90%|█████████ | 761/844 [18:02<01:57,  1.41s/it]

Lymphoma,Osteosarcoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 90%|█████████ | 762/844 [18:03<01:55,  1.41s/it]

Anemia,Leukopenia,Lymphoma, Non-Hodgkin,mucositis,Osteosarcoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma,Thrombocytopenia,Toxic liver disease


 90%|█████████ | 763/844 [18:05<01:54,  1.41s/it]

Drug Toxicity,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 91%|█████████ | 764/844 [18:06<01:52,  1.41s/it]

Drug Toxicity,Lymphoma,Osteosarcoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 91%|█████████ | 765/844 [18:08<01:52,  1.42s/it]

gastrointestinal toxicity,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 91%|█████████ | 767/844 [18:10<01:49,  1.42s/it]

Breast Neoplasms,Peripheral Nervous System Diseases


 91%|█████████ | 769/844 [18:13<01:46,  1.42s/it]

Constipation,Delirium,Lung Neoplasms,Nausea,Pain,Postoperative Nausea and Vomiting,Pruritus,Respiratory Insufficiency,somnolence,Urinary Retention


 91%|█████████ | 770/844 [18:15<01:44,  1.42s/it]

adverse events,Constipation,Delirium,Dizziness,Nausea,Pain, Postoperative,Postoperative Nausea and Vomiting,Pruritus,Respiratory Insufficiency,somnolence,Urinary Retention,Vomiting


 91%|█████████▏| 771/844 [18:16<01:42,  1.41s/it]

Burkitt Lymphoma,Leukemia,Lymphoma,Lymphoma, T-Cell,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 91%|█████████▏| 772/844 [18:17<01:41,  1.41s/it]

Graft vs Host Disease,Leukemia,Leukemia, Myelogenous, Chronic, BCR-ABL Positive


 92%|█████████▏| 774/844 [18:20<01:39,  1.42s/it]

adverse events,Hypersensitivity


 92%|█████████▏| 775/844 [18:22<01:37,  1.42s/it]

nephrotoxicity


 92%|█████████▏| 776/844 [18:23<01:36,  1.42s/it]

Confusion,Drug Toxicity,Headache,Muscle Rigidity,Schizophrenia,sedation,Seizures,Tachycardia


 92%|█████████▏| 777/844 [18:25<01:34,  1.41s/it]

adverse events,Alcoholism,Anxiety Disorders


 92%|█████████▏| 778/844 [18:26<01:33,  1.42s/it]

Neoplasms,Neurotoxicity Syndromes


 92%|█████████▏| 779/844 [18:27<01:32,  1.42s/it]

Acute coronary syndrome,major adverse cardiac events (mace)


 92%|█████████▏| 780/844 [18:29<01:30,  1.42s/it]

Coronary Disease,Hypercholesterolemia,Myocardial Infarction


 93%|█████████▎| 781/844 [18:30<01:28,  1.41s/it]

Neutropenia,Precursor Cell Lymphoblastic Leukemia-Lymphoma,Thrombocytopenia


 93%|█████████▎| 782/844 [18:32<01:27,  1.42s/it]

Lupus Erythematosus, Systemic,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 93%|█████████▎| 783/844 [18:33<01:26,  1.41s/it]

Shortened QT interval


 93%|█████████▎| 784/844 [18:34<01:25,  1.42s/it]

drug-induced liver injury,HIV Infections,Tuberculosis


 93%|█████████▎| 785/844 [18:36<01:24,  1.42s/it]

adverse events,Hypersensitivity,severe cutaneous adverse reactions


 93%|█████████▎| 786/844 [18:37<01:22,  1.43s/it]

Leukopenia,Neutropenia,Ovarian Neoplasms


 93%|█████████▎| 787/844 [18:39<01:20,  1.42s/it]

Dyspepsia,Pain, Postoperative


 93%|█████████▎| 789/844 [18:42<01:17,  1.42s/it]

Cardiomyopathy, Dilated,Death


 94%|█████████▍| 794/844 [18:49<01:10,  1.41s/it]

Depressive Disorder, Major,Nausea,Vomiting


 94%|█████████▍| 795/844 [18:50<01:09,  1.41s/it]

Depressive Disorder, Major,Sexual Dysfunctions, Psychological


 94%|█████████▍| 796/844 [18:52<01:08,  1.42s/it]

ability to concentrate,Depressive Disorder, Major,Diarrhea,Dizziness,Tremor


 94%|█████████▍| 797/844 [18:53<01:07,  1.43s/it]

Depressive Disorder, Major,Mood Disorders,Panic Disorder


 95%|█████████▍| 798/844 [18:54<01:05,  1.42s/it]

Peripheral Nervous System Diseases


 95%|█████████▍| 799/844 [18:56<01:03,  1.42s/it]

Gastroesophageal Reflux,Helicobacter Infections,Peptic Ulcer


 95%|█████████▍| 800/844 [18:57<01:02,  1.42s/it]

cardiotoxicity,Neoplasms


 95%|█████████▌| 802/844 [19:00<00:59,  1.41s/it]

Multiple Myeloma,Osteonecrosis


 95%|█████████▌| 803/844 [19:01<00:57,  1.41s/it]

Angina Pectoris,Coronary Artery Disease,Myocardial Infarction,Myocardial Ischemia,Thrombosis


 95%|█████████▌| 804/844 [19:03<00:56,  1.40s/it]

Deafness,Neoplasms,Ototoxicity


 95%|█████████▌| 805/844 [19:04<00:54,  1.40s/it]

HIV Infections,Kidney Diseases


 95%|█████████▌| 806/844 [19:06<00:53,  1.40s/it]

Hypercholesterolemia,Hyperlipoproteinemia Type II


 96%|█████████▌| 807/844 [19:07<00:51,  1.40s/it]

Postoperative Nausea and Vomiting


 96%|█████████▌| 809/844 [19:10<00:49,  1.41s/it]

Acute coronary syndrome,Angina Pectoris


 96%|█████████▌| 811/844 [19:13<00:46,  1.41s/it]

drug reaction with eosinophilia and systemic symptoms,HIV Infections,Stevens-Johnson Syndrome,Toxic liver disease


 96%|█████████▌| 812/844 [19:14<00:44,  1.40s/it]

platelet reactivity


 96%|█████████▋| 814/844 [19:17<00:42,  1.42s/it]

Diabetes Mellitus, Type 1,Heart Failure


 97%|█████████▋| 815/844 [19:18<00:40,  1.41s/it]

Bipolar Disorder,Depression,Psychotic Disorders,Schizophrenia,Substance-Related Disorders


 97%|█████████▋| 816/844 [19:20<00:39,  1.41s/it]

hematopoietic stem cell transplantation,Kidney Transplantation,transplant rejection


 97%|█████████▋| 817/844 [19:21<00:38,  1.41s/it]

Carcinoma, Non-Small-Cell Lung,Ovarian Neoplasms


 97%|█████████▋| 818/844 [19:23<00:36,  1.41s/it]

Acute coronary syndrome,Coronary Artery Disease


 97%|█████████▋| 819/844 [19:24<00:35,  1.41s/it]

Arthritis, Rheumatoid,Crohn Disease,Psoriasis,Spondylitis, Ankylosing


 97%|█████████▋| 820/844 [19:25<00:33,  1.41s/it]

Myelosuppression,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 97%|█████████▋| 822/844 [19:28<00:31,  1.42s/it]

Fanconi Syndrome,HIV Infections


 98%|█████████▊| 823/844 [19:30<00:29,  1.41s/it]

adverse events,Pain, Postoperative


 98%|█████████▊| 824/844 [19:31<00:28,  1.41s/it]

Alcoholism,hypersexuality state,Tobacco Use Disorder


 98%|█████████▊| 825/844 [19:32<00:26,  1.40s/it]

Neoplasms,Pain,Pain, Postoperative


 98%|█████████▊| 826/844 [19:34<00:25,  1.41s/it]

Breast Neoplasms,Kidney Neoplasms,Kidney Transplantation,Neuroendocrine Tumors


 98%|█████████▊| 827/844 [19:35<00:23,  1.40s/it]

cns depression,Drug Toxicity


 98%|█████████▊| 829/844 [19:38<00:21,  1.41s/it]

Arthritis, Rheumatoid,Drug Toxicity,hematopoietic stem cell transplantation,Psoriasis,Toxic liver disease


 98%|█████████▊| 830/844 [19:39<00:19,  1.41s/it]

Burkitt Lymphoma,Lymphoma, T-Cell,Osteosarcoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 98%|█████████▊| 831/844 [19:41<00:18,  1.42s/it]

Leukemia, Lymphoid,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 99%|█████████▊| 832/844 [19:42<00:17,  1.42s/it]

Leukemia, B-Cell, Acute,Osteosarcoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 99%|█████████▊| 833/844 [19:44<00:15,  1.41s/it]

Burkitt Lymphoma,Leukemia,Lymphoma, T-Cell,Neoplasms,Osteosarcoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 99%|█████████▉| 834/844 [19:45<00:14,  1.41s/it]

Brain Diseases,Drug Toxicity,Osteosarcoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 99%|█████████▉| 835/844 [19:46<00:12,  1.41s/it]

Burkitt Lymphoma,Drug Toxicity,Lymphoma, T-Cell,Osteosarcoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 99%|█████████▉| 836/844 [19:48<00:11,  1.41s/it]

Burkitt Lymphoma,Lymphoma, Non-Hodgkin,Lymphoma, T-Cell,Precursor Cell Lymphoblastic Leukemia-Lymphoma


 99%|█████████▉| 837/844 [19:49<00:09,  1.40s/it]

adverse events,Constipation,Delirium,Nausea,Pruritus,somnolence,Urinary Retention


 99%|█████████▉| 838/844 [19:51<00:08,  1.41s/it]

Depressive Disorder, Major,Obsessive-Compulsive Disorder


 99%|█████████▉| 839/844 [19:52<00:07,  1.41s/it]

Drug Toxicity,Neoplasms,Neutropenia,Peripheral Nervous System Diseases


100%|█████████▉| 840/844 [19:54<00:05,  1.41s/it]

Drug Toxicity,Neutropenia,Peripheral Nervous System Diseases,Toxic liver disease


100%|█████████▉| 841/844 [19:55<00:04,  1.40s/it]

Cardiomyopathy, Dilated,Heart Failure


100%|█████████▉| 842/844 [19:56<00:02,  1.40s/it]

Drug Hypersensitivity,drug reaction with eosinophilia and systemic symptoms,Epidermal Necrolysis, Toxic,Maculopapular Exanthema,severe cutaneous adverse reactions,Stevens-Johnson Syndrome


100%|██████████| 844/844 [19:59<00:00,  1.42s/it]

Depressive Disorder, Major,Panic Disorder





http://id.who.int/icd/entity/1969743250    401
http://id.who.int/icd/entity/774170412     327
http://id.who.int/icd/entity/596808334     195
http://id.who.int/icd/entity/1669279433    176
http://id.who.int/icd/entity/488336723     168
                                          ... 
http://id.who.int/icd/entity/146744831       1
http://id.who.int/icd/entity/190029283       1
http://id.who.int/icd/entity/154709545       1
http://id.who.int/icd/entity/944754984       1
http://id.who.int/icd/entity/1890374210      1
Name: icd11, Length: 357, dtype: int64

In [39]:
all_data_df = pd.read_csv('middlefile/icd11_table.tsv', sep = '\t')
uniprot_id_list = all_data_df['uniprot id'].unique()
for i in tqdm(range(len(uniprot_id_list))):
    uniprot_id = uniprot_id_list[i]
    search_url = "https://search.rcsb.org/rcsbsearch/v2/query"
    search_request = {
        "query": {
            "type": "group",
            "logical_operator": "and",
            "nodes": [
                {
                    "type": "terminal",
                    "service": "text",
                    "parameters": {
                        "operator": "exact_match",
                        "value": uniprot_id,
                        "attribute": "rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_accession"
                    }
                },
                {
                    "type": "terminal",
                    "service": "text",
                    "parameters": {
                        "operator": "exact_match",
                        "value": "UniProt",
                        "attribute": "rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_name"
                    }
                }
            ]
        },
        "request_options": {
            "paginate": {
            "start": 0,
            "rows": 1000
            }
        },
        "return_type": "entry"
    }
    try:
        response = requests.post(search_url, json=search_request)
        pdb_list = response.json()['result_set']
        pdb_str = ''
        for i, pdb_item in enumerate(pdb_list):
            pdb_str += pdb_item['identifier']
            if i != len(pdb_list)-1 :
                pdb_str += ','
        # print(uniprot_id)
        # print(pdb_str)
    except:
        # print(uniprot_id)
        pdb_list = None

    all_data_df.loc[all_data_df['uniprot id'] == uniprot_id, 'pdb list'] = pdb_str   

all_data_df.to_csv('middlefile/pdb_table.tsv', sep = '\t', index= False)
all_data_df                              


100%|██████████| 1296/1296 [20:26<00:00,  1.06it/s]


Unnamed: 0,data source,chemical name,gene symbol,variant,ddg,disease name,label,uniprot id,chemical_type,pubchem id,smile,chromosome,position,ref allele,alt allele,fasta,pubchem fingerprint,icd11,pdb list
0,aimms,dasatinib,ABL1,M244V,0.290000,,,P00519,,3062316,CC1=C(C(=CC=C1)Cl)NC(=O)C2=CN=C(S2)NC3=CC(=NC(...,,,,,MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAA...,1110000001111011101100000000000001000100000000...,,"1AB2,1AWO,1BBZ,1JU5,1OPL,1ZZP,2ABL,2E2B,2F4J,2..."
1,aimms,dasatinib,ABL1,G250E,0.480000,,,P00519,,3062316,CC1=C(C(=CC=C1)Cl)NC(=O)C2=CN=C(S2)NC3=CC(=NC(...,,,,,MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAA...,1110000001111011101100000000000001000100000000...,,"1AB2,1AWO,1BBZ,1JU5,1OPL,1ZZP,2ABL,2E2B,2F4J,2..."
2,aimms,dasatinib,ABL1,Q252H,0.860000,,,P00519,,3062316,CC1=C(C(=CC=C1)Cl)NC(=O)C2=CN=C(S2)NC3=CC(=NC(...,,,,,MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAA...,1110000001111011101100000000000001000100000000...,,"1AB2,1AWO,1BBZ,1JU5,1OPL,1ZZP,2ABL,2E2B,2F4J,2..."
3,aimms,dasatinib,ABL1,Y253F,0.330000,,,P00519,,3062316,CC1=C(C(=CC=C1)Cl)NC(=O)C2=CN=C(S2)NC3=CC(=NC(...,,,,,MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAA...,1110000001111011101100000000000001000100000000...,,"1AB2,1AWO,1BBZ,1JU5,1OPL,1ZZP,2ABL,2E2B,2F4J,2..."
4,aimms,dasatinib,ABL1,Y253H,0.290000,,,P00519,,3062316,CC1=C(C(=CC=C1)Cl)NC(=O)C2=CN=C(S2)NC3=CC(=NC(...,,,,,MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAA...,1110000001111011101100000000000001000100000000...,,"1AB2,1AWO,1BBZ,1JU5,1OPL,1ZZP,2ABL,2E2B,2F4J,2..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19664,ret,Nintedanib,RET,G810A,-0.044158,,,P07949,,135423438,CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)N=C(C3=CC=CC...,,,,,MAKATSGAAGLRLLLLLLLPLLGKVALGLYFSRDAYWEKLYVDQAA...,1111000001111011101110000000000000000000000000...,,"2IVS,2IVT,2IVU,2IVV,2X2K,2X2L,2X2M,2X2U,4CKI,4..."
19665,ret,Nintedanib,RET,G810S,0.826043,,,P07949,,135423438,CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)N=C(C3=CC=CC...,,,,,MAKATSGAAGLRLLLLLLLPLLGKVALGLYFSRDAYWEKLYVDQAA...,1111000001111011101110000000000000000000000000...,,"2IVS,2IVT,2IVU,2IVV,2X2K,2X2L,2X2M,2X2U,4CKI,4..."
19666,ret,Nintedanib,RET,V871I,0.149749,,,P07949,,135423438,CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)N=C(C3=CC=CC...,,,,,MAKATSGAAGLRLLLLLLLPLLGKVALGLYFSRDAYWEKLYVDQAA...,1111000001111011101110000000000000000000000000...,,"2IVS,2IVT,2IVU,2IVV,2X2K,2X2L,2X2M,2X2U,4CKI,4..."
19667,ret,Nintedanib,RET,F998V,0.000000,,,P07949,,135423438,CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)N=C(C3=CC=CC...,,,,,MAKATSGAAGLRLLLLLLLPLLGKVALGLYFSRDAYWEKLYVDQAA...,1111000001111011101110000000000000000000000000...,,"2IVS,2IVT,2IVU,2IVV,2X2K,2X2L,2X2M,2X2U,4CKI,4..."


In [5]:
all_data_df = pd.read_csv('middlefile/pdb_table.tsv', sep = '\t')
all_data_df['id'] = range(1,len(all_data_df)+1)
all_data_df.to_csv('middlefile/final_table.tsv', sep = '\t', index= False)

In [4]:
all_data_df = pd.read_csv('middlefile/icd11_table.tsv', sep = '\t')
uniprot_id_list = all_data_df['uniprot id'].unique()
for i in tqdm(range(5)):
    uniprot_id = uniprot_id_list[i]
    search_url = "https://search.rcsb.org/rcsbsearch/v2/query"
    search_request = {
        "query": {
            "type": "group",
            "logical_operator": "and",
            "nodes": [
                {
                    "type": "terminal",
                    "service": "text",
                    "parameters": {
                        "operator": "exact_match",
                        "value": uniprot_id,
                        "attribute": "rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_accession"
                    }
                },
                {
                    "type": "terminal",
                    "service": "text",
                    "parameters": {
                        "operator": "exact_match",
                        "value": "UniProt",
                        "attribute": "rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_name"
                    }
                }
            ]
        },
        "request_options": {
            "paginate": {
            "start": 0,
            "rows": 1000
            }
        },
        "return_type": "entry"
    }
    try:
        response = requests.post(search_url, json=search_request)
        
        pdb_list = response.json()['result_set']
        print(response.json())
        pdb_str = ''
        for i, pdb_item in enumerate(pdb_list):
            pdb_str += pdb_item['identifier']
            if i != len(pdb_list)-1 :
                pdb_str += ','
        # print(uniprot_id)
        # print(pdb_str)
    except:
        # print(uniprot_id)
        pdb_list = None

    all_data_df.loc[all_data_df['uniprot id'] == uniprot_id, 'pdb list'] = pdb_str   

all_data_df.to_csv('middlefile/pdb_table.tsv', sep = '\t', index= False)
all_data_df                              


 20%|██        | 1/5 [00:01<00:04,  1.06s/it]

{'query_id': 'f4b8358d-0445-4b08-932b-d9090531624e', 'result_type': 'entry', 'total_count': 81, 'result_set': [{'identifier': '1AB2', 'score': 1.0}, {'identifier': '1AWO', 'score': 1.0}, {'identifier': '1BBZ', 'score': 1.0}, {'identifier': '1JU5', 'score': 1.0}, {'identifier': '1OPL', 'score': 1.0}, {'identifier': '1ZZP', 'score': 1.0}, {'identifier': '2ABL', 'score': 1.0}, {'identifier': '2E2B', 'score': 1.0}, {'identifier': '2F4J', 'score': 1.0}, {'identifier': '2FO0', 'score': 1.0}, {'identifier': '2G1T', 'score': 1.0}, {'identifier': '2G2F', 'score': 1.0}, {'identifier': '2G2H', 'score': 1.0}, {'identifier': '2G2I', 'score': 1.0}, {'identifier': '2GQG', 'score': 1.0}, {'identifier': '2HIW', 'score': 1.0}, {'identifier': '2HYY', 'score': 1.0}, {'identifier': '2HZ0', 'score': 1.0}, {'identifier': '2HZ4', 'score': 1.0}, {'identifier': '2HZI', 'score': 1.0}, {'identifier': '2O88', 'score': 1.0}, {'identifier': '2V7A', 'score': 1.0}, {'identifier': '3CS9', 'score': 1.0}, {'identifier': 

 40%|████      | 2/5 [00:02<00:03,  1.06s/it]

{'query_id': '26142bb3-6597-4296-8190-2447bd133ff5', 'result_type': 'entry', 'total_count': 291, 'result_set': [{'identifier': '1IVO', 'score': 1.0}, {'identifier': '1M14', 'score': 1.0}, {'identifier': '1M17', 'score': 1.0}, {'identifier': '1MOX', 'score': 1.0}, {'identifier': '1NQL', 'score': 1.0}, {'identifier': '1XKK', 'score': 1.0}, {'identifier': '1YY9', 'score': 1.0}, {'identifier': '1Z9I', 'score': 1.0}, {'identifier': '2EB2', 'score': 1.0}, {'identifier': '2EB3', 'score': 1.0}, {'identifier': '2GS2', 'score': 1.0}, {'identifier': '2GS6', 'score': 1.0}, {'identifier': '2GS7', 'score': 1.0}, {'identifier': '2ITN', 'score': 1.0}, {'identifier': '2ITO', 'score': 1.0}, {'identifier': '2ITP', 'score': 1.0}, {'identifier': '2ITQ', 'score': 1.0}, {'identifier': '2ITT', 'score': 1.0}, {'identifier': '2ITU', 'score': 1.0}, {'identifier': '2ITV', 'score': 1.0}, {'identifier': '2ITW', 'score': 1.0}, {'identifier': '2ITX', 'score': 1.0}, {'identifier': '2ITY', 'score': 1.0}, {'identifier':

 60%|██████    | 3/5 [00:03<00:02,  1.05s/it]

{'query_id': '3f5fd879-18b9-4836-b3f5-596255c6c315', 'result_type': 'entry', 'total_count': 166, 'result_set': [{'identifier': '1ABN', 'score': 1.0}, {'identifier': '1ADS', 'score': 1.0}, {'identifier': '1AZ1', 'score': 1.0}, {'identifier': '1AZ2', 'score': 1.0}, {'identifier': '1EF3', 'score': 1.0}, {'identifier': '1EL3', 'score': 1.0}, {'identifier': '1IEI', 'score': 1.0}, {'identifier': '1MAR', 'score': 1.0}, {'identifier': '1PWL', 'score': 1.0}, {'identifier': '1PWM', 'score': 1.0}, {'identifier': '1T40', 'score': 1.0}, {'identifier': '1T41', 'score': 1.0}, {'identifier': '1US0', 'score': 1.0}, {'identifier': '1X96', 'score': 1.0}, {'identifier': '1X97', 'score': 1.0}, {'identifier': '1X98', 'score': 1.0}, {'identifier': '1XGD', 'score': 1.0}, {'identifier': '1Z3N', 'score': 1.0}, {'identifier': '1Z89', 'score': 1.0}, {'identifier': '1Z8A', 'score': 1.0}, {'identifier': '2ACQ', 'score': 1.0}, {'identifier': '2ACR', 'score': 1.0}, {'identifier': '2ACS', 'score': 1.0}, {'identifier':

 80%|████████  | 4/5 [00:04<00:01,  1.05s/it]

{'query_id': '6a94955d-3799-4f72-b713-8e3e06915360', 'result_type': 'entry', 'total_count': 19, 'result_set': [{'identifier': '2QRV', 'score': 1.0}, {'identifier': '3A1A', 'score': 1.0}, {'identifier': '3A1B', 'score': 1.0}, {'identifier': '3LLR', 'score': 1.0}, {'identifier': '3SVM', 'score': 1.0}, {'identifier': '4QBQ', 'score': 1.0}, {'identifier': '4QBR', 'score': 1.0}, {'identifier': '4QBS', 'score': 1.0}, {'identifier': '4U7P', 'score': 1.0}, {'identifier': '4U7T', 'score': 1.0}, {'identifier': '5YX2', 'score': 1.0}, {'identifier': '6BRR', 'score': 1.0}, {'identifier': '6F57', 'score': 1.0}, {'identifier': '6PA7', 'score': 1.0}, {'identifier': '6W89', 'score': 1.0}, {'identifier': '6W8B', 'score': 1.0}, {'identifier': '6W8D', 'score': 1.0}, {'identifier': '6W8J', 'score': 1.0}, {'identifier': '8BA5', 'score': 1.0}]}


100%|██████████| 5/5 [00:05<00:00,  1.05s/it]

{'query_id': 'dc981724-9284-4979-ad33-6dd5aca8deff', 'result_type': 'entry', 'total_count': 69, 'result_set': [{'identifier': '1S9J', 'score': 1.0}, {'identifier': '2P55', 'score': 1.0}, {'identifier': '3DV3', 'score': 1.0}, {'identifier': '3DY7', 'score': 1.0}, {'identifier': '3E8N', 'score': 1.0}, {'identifier': '3EQB', 'score': 1.0}, {'identifier': '3EQC', 'score': 1.0}, {'identifier': '3EQD', 'score': 1.0}, {'identifier': '3EQF', 'score': 1.0}, {'identifier': '3EQG', 'score': 1.0}, {'identifier': '3EQH', 'score': 1.0}, {'identifier': '3EQI', 'score': 1.0}, {'identifier': '3MBL', 'score': 1.0}, {'identifier': '3ORN', 'score': 1.0}, {'identifier': '3OS3', 'score': 1.0}, {'identifier': '3PP1', 'score': 1.0}, {'identifier': '3SLS', 'score': 1.0}, {'identifier': '3V01', 'score': 1.0}, {'identifier': '3V04', 'score': 1.0}, {'identifier': '3VVH', 'score': 1.0}, {'identifier': '3W8Q', 'score': 1.0}, {'identifier': '3WIG', 'score': 1.0}, {'identifier': '3ZLS', 'score': 1.0}, {'identifier': 




Unnamed: 0,data source,chemical name,gene symbol,variant,ddg,disease name,label,uniprot id,chemical_type,pubchem id,smile,chromosome,position,ref allele,alt allele,fasta,pubchem fingerprint,icd11,pdb list
0,aimms,dasatinib,ABL1,M244V,0.290000,,,P00519,,3062316,CC1=C(C(=CC=C1)Cl)NC(=O)C2=CN=C(S2)NC3=CC(=NC(...,,,,,MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAA...,1110000001111011101100000000000001000100000000...,,"1AB2,1AWO,1BBZ,1JU5,1OPL,1ZZP,2ABL,2E2B,2F4J,2..."
1,aimms,dasatinib,ABL1,G250E,0.480000,,,P00519,,3062316,CC1=C(C(=CC=C1)Cl)NC(=O)C2=CN=C(S2)NC3=CC(=NC(...,,,,,MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAA...,1110000001111011101100000000000001000100000000...,,"1AB2,1AWO,1BBZ,1JU5,1OPL,1ZZP,2ABL,2E2B,2F4J,2..."
2,aimms,dasatinib,ABL1,Q252H,0.860000,,,P00519,,3062316,CC1=C(C(=CC=C1)Cl)NC(=O)C2=CN=C(S2)NC3=CC(=NC(...,,,,,MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAA...,1110000001111011101100000000000001000100000000...,,"1AB2,1AWO,1BBZ,1JU5,1OPL,1ZZP,2ABL,2E2B,2F4J,2..."
3,aimms,dasatinib,ABL1,Y253F,0.330000,,,P00519,,3062316,CC1=C(C(=CC=C1)Cl)NC(=O)C2=CN=C(S2)NC3=CC(=NC(...,,,,,MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAA...,1110000001111011101100000000000001000100000000...,,"1AB2,1AWO,1BBZ,1JU5,1OPL,1ZZP,2ABL,2E2B,2F4J,2..."
4,aimms,dasatinib,ABL1,Y253H,0.290000,,,P00519,,3062316,CC1=C(C(=CC=C1)Cl)NC(=O)C2=CN=C(S2)NC3=CC(=NC(...,,,,,MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAA...,1110000001111011101100000000000001000100000000...,,"1AB2,1AWO,1BBZ,1JU5,1OPL,1ZZP,2ABL,2E2B,2F4J,2..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19664,ret,Nintedanib,RET,G810A,-0.044158,,,P07949,,135423438,CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)N=C(C3=CC=CC...,,,,,MAKATSGAAGLRLLLLLLLPLLGKVALGLYFSRDAYWEKLYVDQAA...,1111000001111011101110000000000000000000000000...,,
19665,ret,Nintedanib,RET,G810S,0.826043,,,P07949,,135423438,CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)N=C(C3=CC=CC...,,,,,MAKATSGAAGLRLLLLLLLPLLGKVALGLYFSRDAYWEKLYVDQAA...,1111000001111011101110000000000000000000000000...,,
19666,ret,Nintedanib,RET,V871I,0.149749,,,P07949,,135423438,CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)N=C(C3=CC=CC...,,,,,MAKATSGAAGLRLLLLLLLPLLGKVALGLYFSRDAYWEKLYVDQAA...,1111000001111011101110000000000000000000000000...,,
19667,ret,Nintedanib,RET,F998V,0.000000,,,P07949,,135423438,CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)N=C(C3=CC=CC...,,,,,MAKATSGAAGLRLLLLLLLPLLGKVALGLYFSRDAYWEKLYVDQAA...,1111000001111011101110000000000000000000000000...,,


In [12]:
search_url = "https://search.rcsb.org/rcsbsearch/v2/query"
search_request = {
  "query": {
    "type": "terminal",
    "service": "sequence",
    "parameters": {
      "evalue_cutoff": 1,
      "identity_cutoff": 0.9,
      "sequence_type": "protein",
      "value": "MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNSLEKHSWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEGRVYHYRINTASDGKLYVSSESRFNTLAELVHHHSTVADGLITTLHYPAPKRNKPTVYGVSPNYDKWEMERTDITMKHKLGGGQYGEVYEGVWKKYSLTVAVKTLKEDTMEVEEFLKEAAVMKEIKHPNLVQLLGVCTREPPFYIITEFMTYGNLLDYLRECNRQEVNAVVLLYMATQISSAMEYLEKKNFIHRDLAARNCLVGENHLVKVADFGLSRLMTGDTYTAHAGAKFPIKWTAPESLAYNKFSIKSDVWAFGVLLWEIATYGMSPYPGIDLSQVYELLEKDYRMERPEGCPEKVYELMRACWQWNPSDRPSFAEIHQAFETMFQESSISDEVEKELGKQGVRGAVSTLLQAPELPTKTRTSRRAAEHRDTTDVPEMPHSKGQGESDPLDHEPAVSPLLPRKERGPPEGGLNEDERLLPKDKKTNLFSALIKKKKKTAPTPPKRSSSFREMDGQPERRGAGEEEGRDISNGALAFTPLDTADPAKSPKPSNGAGVPNGALRESGGSGFRSPHLWKKSSTLTSSRLATGEEEGGGSSSKRFLRSCSASCVPHGAKDTEWRSVTLPRDLQSTGRQFDSSTFGGHKSEKPALPRKRAGENRSDQVTRGTVTPPPRLVKKNEEAADEVFKDIMESSPGSSPPNLTPKPLRRQVTVAPASGLPHKEEAGKGSALGTPAAAEPVTPTSKAGSGAPGGTSKGPAEESRVRRHKHSSESPGRDKGKLSRLKPAPPPPPAASAGKAGGKPSQSPSQEAAGEAVLGAKTKATSLVDAVNSDAAKPSQPGEGLKKPVLPATPKPQSAKPSGTPISPAPVPSTLPSASSALAGDQPSSTAFIPLISTRVSLRKTRQPPERIASGAITKGVVLDSTEALCLAISRNSEQMASHSAVLEAGKNLYTFCVSYVDSIQQMRNKFAFREAINKLENNLRELQICPATAGSGPAATQDFSKLLSSVKEISDIVQR"
    }
  },
  "request_options": {
    "scoring_strategy": "sequence"
  },
  "return_type": "polymer_entity"
}
response = requests.post(search_url, json=search_request)
print(response.json())

{'query_id': 'e4bfe2df-0dba-44b1-8d9c-4aac2366e493', 'result_type': 'polymer_entity', 'total_count': 104, 'result_set': [{'identifier': '5MO4_1', 'score': 1.0}, {'identifier': '1OPL_1', 'score': 0.9966960352422908}, {'identifier': '1OPK_1', 'score': 0.9955947136563876}, {'identifier': '2FO0_1', 'score': 0.974669603524229}, {'identifier': '8SSN_1', 'score': 0.9085903083700441}, {'identifier': '4XEY_1', 'score': 0.789647577092511}, {'identifier': '2E2B_1', 'score': 0.5374449339207048}, {'identifier': '6XR6_1', 'score': 0.5374449339207048}, {'identifier': '6XR7_1', 'score': 0.5374449339207048}, {'identifier': '4WA9_1', 'score': 0.5330396475770925}]}


In [6]:
import pandas as pd
data_df = pd.read_csv('middlefile/final_table.tsv', sep = '\t')
data_df['data source'].value_counts().reset_index()

Unnamed: 0,data source,count
0,gdsc,8442
1,pharmgkb,4828
2,dbmcs,3719
3,civic,2381
4,depmep,193
5,ret,56
6,aimms,50


In [7]:
print(data_df.groupby('data source')['disease name'].nunique().reset_index())
print(data_df['disease name'].nunique())

  data source  disease name
0       aimms             0
1       civic           181
2       dbmcs             0
3      depmep             0
4        gdsc             0
5    pharmgkb           666
6         ret             0
843


In [10]:
print(data_df.groupby('data source')['gene symbol'].nunique())
print(data_df['gene symbol'].nunique())

data source
aimms          3
civic        256
dbmcs        112
depmep         5
gdsc         124
pharmgkb    1054
ret            1
Name: gene symbol, dtype: int64
1323


In [11]:
print(data_df.groupby('data source')['chemical name'].nunique())
print(data_df['chemical name'].nunique())

data source
aimms         4
civic       623
dbmcs       628
depmep        5
gdsc        234
pharmgkb    842
ret           4
Name: chemical name, dtype: int64
2142


In [12]:
def count_mutation_types(group):
    # 定义单点突变的正则表达式
    single_mutation_pattern = '^[A-Z][0-9]*[A-Z]$|^c\.[0-9]+[A-Z]>[A-Z]$|^p\.[A-Z][0-9]+[A-Z]$'
    
    # 统计单点和多点突变的数量
    single_point_mutations = group['variant'].str.match(single_mutation_pattern).sum()
    multi_point_mutations = len(group) - single_point_mutations
    
    # 创建一个包含统计结果的 Series
    result = pd.Series({
        '单点突变数量': single_point_mutations,
        '多点突变数量': multi_point_mutations
    })
    
    return result

# 按照 data source 分组，并应用 count_mutation_types 函数
mutation_counts = data_df.groupby('data source').apply(count_mutation_types)

# 打印结果
print(mutation_counts)
print(count_mutation_types(data_df))

             单点突变数量  多点突变数量
data source                
aimms            47       3
civic           820    1561
dbmcs          2986     733
depmep          173      20
gdsc           7114    1328
pharmgkb       1524    3304
ret              52       4
单点突变数量    12716
多点突变数量     6953
dtype: int64
