In [2]:
import numpy as np
import pandas as pd
from Abstract import Abstract
import re
import requests
read_prefix = '/data/MIMIC3/%s.csv'
write_prefix = '/data/liu/mimic3/%s.csv'
aa = Abstract(read_prefix=read_prefix,write_prefix=write_prefix)

## Diagnoses (ICD9_CODE)

In [5]:
diag_df = aa.read_data('DIAGNOSES_ICD')

In [6]:
diag_df.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,SEQ_NUM,ICD9_CODE
0,1297,109,172335,1.0,40301
1,1298,109,172335,2.0,486
2,1299,109,172335,3.0,58281
3,1300,109,172335,4.0,5855
4,1301,109,172335,5.0,4254


In [14]:
diag_df[diag_df['ICD9_CODE']=='2355']

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,SEQ_NUM,ICD9_CODE
14051,14316,1249,103686,1.0,2355
164851,163444,14615,163684,7.0,2355
210343,210996,18969,167233,1.0,2355
386467,386075,41520,158306,9.0,2355
437718,440171,53342,171013,19.0,2355
599445,624769,94307,108480,9.0,2355
648508,650209,99817,195557,17.0,2355


In [10]:
diag_code_df = aa.read_data('D_ICD_DIAGNOSES')
diag_code_df.head()

Unnamed: 0,ROW_ID,ICD9_CODE,SHORT_TITLE,LONG_TITLE
0,174,1166,TB pneumonia-oth test,"Tuberculous pneumonia [any form], tubercle bac..."
1,175,1170,TB pneumothorax-unspec,"Tuberculous pneumothorax, unspecified"
2,176,1171,TB pneumothorax-no exam,"Tuberculous pneumothorax, bacteriological or h..."
3,177,1172,TB pneumothorx-exam unkn,"Tuberculous pneumothorax, bacteriological or h..."
4,178,1173,TB pneumothorax-micro dx,"Tuberculous pneumothorax, tubercle bacilli fou..."


In [15]:
diag_code_df[diag_code_df['ICD9_CODE']=='2355']

Unnamed: 0,ROW_ID,ICD9_CODE,SHORT_TITLE,LONG_TITLE
1714,1717,2355,Unc behav neo GI NEC,Neoplasm of uncertain behavior of other and un...


## Prescriptions

In [4]:
usecols=["SUBJECT_ID", "STARTDATE", "ENDDATE","DRUG","NDC","DRUG_NAME_GENERIC","ROUTE"]
pres_df = pd.read_csv(read_prefix%'PRESCRIPTIONS',usecols=usecols,dtype={'NDC': str})
pres_df.head()

In [12]:
pres_df[pres_df['DRUG']!=pres_df['DRUG_NAME_GENERIC']]

Unnamed: 0,SUBJECT_ID,STARTDATE,ENDDATE,DRUG,DRUG_NAME_GENERIC,NDC,ROUTE
2,6,2175-06-11 00:00:00,2175-06-12 00:00:00,Heparin Sodium,,00338055002,IV
3,6,2175-06-11 00:00:00,2175-06-12 00:00:00,D5W,,0,IV
6,6,2175-06-12 00:00:00,2175-06-12 00:00:00,Heparin Sodium,,00338055002,IV
7,6,2175-06-12 00:00:00,2175-06-12 00:00:00,D5W,,0,IV
8,6,2175-06-12 00:00:00,2175-06-13 00:00:00,Heparin Sodium,,00338055002,IV
10,6,2175-06-12 00:00:00,2175-06-13 00:00:00,D5W,,0,IV
16,6,2175-06-13 00:00:00,2175-06-14 00:00:00,D5W,,0,IV
17,6,2175-06-13 00:00:00,2175-06-14 00:00:00,Heparin Sodium,,00338055002,IV
23,13,2167-01-08 00:00:00,2167-01-09 00:00:00,D5W,,0,IV
24,13,2167-01-08 00:00:00,2167-01-09 00:00:00,Heparin Sodium,,00074779362,IV


### Remove duplicate drugs (Including drugs sometimes have NDC but not in other time)

In [15]:
mimic_drug_df = pres_df[['DRUG','NDC']].drop_duplicates()
mimic_drug_df.count()

DRUG    9211
NDC     8065
dtype: int64

In [41]:
mimic_drug_counts = mimic_drug_df.groupby('DRUG',as_index=False).count().rename(columns={'NDC':'NDC_count'})
mimic_drug_counts.head()

Unnamed: 0,DRUG,NDC_count
0,,0
1,Cabergoline ( Dostinex),0
2,Symbicort,0
3,Zaditor Ophthalmic,0
4,*IND* Pexelizumab/Placebo,1


In [42]:
mimic_drug_df = aa.left_join(mimic_drug_df, mimic_drug_counts,'DRUG')
mimic_drug_df.head()

Unnamed: 0,DRUG,NDC,NDC_count
0,Tacrolimus,469061711,9
1,Warfarin,56017275,7
2,Heparin Sodium,338055002,9
3,D5W,0,12
4,Furosemide,54829725,17


In [58]:
mimic_drug_counts['NDC_L'] = mimic_drug_df.groupby('DRUG',as_index=False)['NDC'].apply(list)
## Write mimic3 drugs with NDC code lists into files

### mapping drugs(ndc, names) to CIDs

In [5]:
mimic_drug_counts=pd.read_csv(write_prefix%'mimic_drugs')
mimic_drug_counts.head()

Unnamed: 0,DRUG,NDC_count,NDC_L
0,Cabergoline ( Dostinex),0,[nan]
1,Symbicort,0,[nan]
2,Zaditor Ophthalmic,0,[nan]
3,*IND* Pexelizumab/Placebo,1,['0']
4,*NF,1,['63824000840']


In [7]:
# mimic_drug_counts['DRUG_len'] = mimic_drug_counts['DRUG'].apply(len)

In [1]:
# mimic_drug_counts

In [44]:
class drug_mapping():
    
    def list_notna(self, L):
        return list(filter(None, L))
    
    def drug_keywords(self, drug):
        drug = drug.strip()
        ## remove percentage value(interger and float) and **[w]**
        ill_regex = '[(-.0-9)(\d)]+%|\*+.*?\*+|\*+[a-zA-Z]+'
        drug = re.sub(ill_regex, "", drug)
        
        ## Extract terms within brackets
        bracket_re = '\(.*?\)'
        bracket_terms = re.findall(bracket_re, drug)
        bracket_terms = [re.sub('[\(\)]',"",i).strip() for i in bracket_terms]
        bracket_terms = [('_').join(re.sub(' +', "",i).split(' ')) for i in bracket_terms]
        drug = re.sub(bracket_re, "", drug).strip()
        
        ## split by " " and "/"
        drug_terms = re.split(' +|\/', drug)
        
        ## reverse drug whole name
        if(len(drug_terms)>1):
            drug = ('_').join(drug_terms)
            reverse_drug = ('_').join(drug_terms[1:]+[drug_terms[0]])
            if(len(drug_terms)>2):
                back_reverse_drug = ('_').join([drug_terms[-1]]+drug_terms[:-1])
                reverse_drug = [reverse_drug, back_reverse_drug]
            else: reverse_drug = [reverse_drug]
            return self.list_notna([drug] + bracket_terms + reverse_drug), \
                        self.list_notna(drug_terms)
        else: 
            return self.list_notna([drug] + bracket_terms), []
    
    def search_CID_bydrugs(self, drugs):
        pubchem_api_1 = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/%s/cids/JSON'
        if(drugs):
            all_results = []
            for drug in drugs:
                response = requests.get(pubchem_api_1%drug)
                if(response.status_code!=404):
                    try:
                        results = response.json()['IdentifierList']['CID']
                    except:
                        continue
                    all_results = all_results + results
            all_results = list(set(all_results))
            return all_results[:5]
                
    
    def search_CID_byterms(self, drug_terms):
        pubchem_api_2 = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/%s/cids/JSON?name_type=word'

        ## Only remain 2 results with the least results
        min2_results = {}
        num_results = 0
        if(drug_terms):
            for term in drug_terms:
                response = requests.get(pubchem_api_2%term)
                if(response.status_code!=404):
                    min2_result = {}
                    try:
                        results = response.json()['IdentifierList']['CID']   
                    except:
                        continue
                    if(num_results<2):             
                        min2_result['CID'] = results
                        min2_result['len'] = len(results)
                        min2_results[term] = min2_result
                        num_results += 1
                    else: 
                        min2_terms = min2_results.keys()
                        min2_len = {min2_results[key]['len']:key for key in min2_terms}
                        new_len = len(results)
                        max_len = max(min2_len.keys())
                        if(new_len < max_len):
                            min2_results.pop(min2_len[max_len])
                            min2_result['CID'] = results
                            min2_result['len'] = new_len
                            min2_results[term] = min2_result
        else: return []                   
        results_len= len(min2_results)
        if(results_len==0): 
            return []
        elif(results_len==1): 
            return min2_results[list(min2_results.keys())[0]]['CID'][:5]
        else: 
            keys = list(min2_results.keys())
            re1 = min2_results[keys[0]]['CID'][:3]
            re2 = min2_results[keys[1]]['CID'][:3]
            finals = list(set(re1).intersection(set(re2)))
            if(len(finals)==0):
                finals = re1 + re2
            return finals[:5]
    
    def search_ingredient_byndc(self, ndc_code):
        fda_ndc_api = 'https://api.fda.gov/drug/ndc.json?search=product_ndc:"%s"'
        response = requests.get(fda_ndc_api%ndc_code)
        try:
            ingredient = response.json()['results'][0]['active_ingredients'][0]['name']
            return ingredient
        except:
            return ''
        
    
    def get_ingredients_byndc(self, drug_ndc):
        comma_re = '\'\d+\''
        ndc_L = re.findall(comma_re, drug_ndc)
        ndc_L = [s.replace('\'','') for s in ndc_L]
        
        ndc_len = len(drug_ndc)
        if(not ndc_L): return []
        elif(ndc_len==1 and ndc_L[0]=='0'):
            return []
        else:
            ndc_codes = []
            for ndc in ndc_L:
                ## Match three kinds of ndc format
                if(ndc[0]=='0'):
                    ## 442
                    ndc_codes = ndc_codes + [('-').join([ndc[1:5], ndc[5:9]])]
                if(ndc[5]=='0'):
                    ## 532
                    ndc_codes = ndc_codes + [('-').join([ndc[0:5],ndc[6:9]])]
                if(ndc[9]=='0'):
                    ## 541
                    ndc_codes = ndc_codes + [('-').join([ndc[0:5],ndc[5:9]])]
            ingredients = [self.search_ingredient_byndc(ndc_code) for ndc_code in ndc_codes]
            ingredients = self.list_notna(ingredients)
            return ingredients
#             return ndc_codes
                    
            
    
    def get_CID_by_name(self, drug):
        drugs, terms = self.drug_keywords(drug)
        first_re = self.search_CID_bydrugs(drugs)
        if(first_re):
            return first_re
        else:
            if(not terms): terms=drugs
            second_re = self.search_CID_byterms(terms)
            return second_re
    
    def list_flattern(self, l):
        flatten_L = [item for sublist in l for item in sublist]
        return flatten_L
    
    def get_CID(self, drug_name, drug_ndc):
        ## first search ingredients and then search ICD
        ingredient_L = self.search_ingredient_byndc(drug_ndc)
        cid_L = self.list_flattern([self.get_CID_by_name(ingred) for ingred in ingredient_L])
        
        if(not cid_L):
            cid_L = self.get_CID_by_name(drug_name)
        return cid_L


In [45]:
dm = drug_mapping()
# ndc = "['00002831501']"
# ndc_L = dm.get_ingredients_byndc(ndc)
# ndc_L
# mimic_drug_counts_p1 = mimic_drug_counts[:1500]

In [46]:
mimic_drug_counts[:10].apply(lambda row: dm.get_CID(row['DRUG'], row['NDC_L']), axis=1)

0                                              [54746]
1                                    [56841116, 40000]
2              [5282408, 3827, 71749703, 298, 7018721]
3                    [5073, 5284627, 444795, 54671203]
4                                                   []
5                                              [68844]
6                                                   []
7                                       [60172, 60871]
8                                                   []
9    [135401907, 135534565, 135407110, 23675735, 23...
dtype: object

In [47]:
mimic_drug_counts[:10]

Unnamed: 0,DRUG,NDC_count,NDC_L
0,Cabergoline ( Dostinex),0,[nan]
1,Symbicort,0,[nan]
2,Zaditor Ophthalmic,0,[nan]
3,*IND* Pexelizumab/Placebo,1,['0']
4,*NF,1,['63824000840']
5,*NF BRINZOLAMIDE (AZOPT),0,[nan]
6,*NF* Abatacept,1,['00003218710']
7,*NF* Adefovir (HepSera),0,[nan]
8,*NF* Alemtuzumab,1,['50419035510']
9,*NF* Allopurinol Sodium,1,['59730560101']


In [67]:
('-').join([ndc[1:5], ndc[5:9],ndc[9:]])

'0002-8215-01'

In [59]:
drug_ndc = "[nan]"
comma_re = '\'\d+\''
ndc_L = re.findall(comma_re, drug_ndc)
ndc_L = [s.replace('\'','') for s in ndc_L]

In [60]:
ndc_L

[]

In [27]:
dm.drug_keywords('~ <IND> Defibrotide')

(['~_<IND>_Defibrotide', '<IND>_Defibrotide_~', 'Defibrotide_~_<IND>'],
 ['~', '<IND>', 'Defibrotide'])

In [None]:
mimic_drug_counts_p1['CID']=mimic_drug_counts_p1['DRUG'].apply(dm.get_ICD_by_name)
aa.write2file('mimic_drugs_CID_p1')

In [150]:
p = '*NF BRINZOLAMIDE (AZOPT) AB'
ill_regex = '[(-.0-9)(\d)]+%|\*.*?\*|\*+[a-zA-Z]+'
t = re.sub(ill_regex, "", p)
print(t)

 BRINZOLAMIDE (AZOPT) AB


In [171]:
## apply on drug_counts
mimic_drug_counts['DRUG'][:10].apply(dm.get_ICD_by_name)

['Cabergoline', 'Dostinex']
[54746]
['Symbicort']
[56841116, 40000]
['Zaditor_Ophthalmic', 'Ophthalmic_Zaditor']
[5282408, 3827, 71749703, 298, 7018721]
['Pexelizumab_Placebo', 'Placebo_Pexelizumab']
[5073, 5284627, 444795, 54671203]
[]
[]
['BRINZOLAMIDE', 'AZOPT']
[68844]
['Abatacept']
[]
['Adefovir', 'HepSera']
[60172, 60871]
['Alemtuzumab']
[]
['Allopurinol_Sodium', 'Sodium_Allopurinol']
[23675735, 23665884, 16684434, 135401907, 135534565]


0                                              [54746]
1                                    [56841116, 40000]
2              [5282408, 3827, 71749703, 298, 7018721]
3                    [5073, 5284627, 444795, 54671203]
4                                                   []
5                                              [68844]
6                                                   []
7                                       [60172, 60871]
8                                                   []
9    [23675735, 23665884, 16684434, 135401907, 1355...
Name: DRUG, dtype: object

In [76]:
mimic_drug_counts[:10]

Unnamed: 0,DRUG,NDC_count,NDC_L
0,Cabergoline ( Dostinex),0,[nan]
1,Symbicort,0,[nan]
2,Zaditor Ophthalmic,0,[nan]
3,*IND* Pexelizumab/Placebo,1,['0']
4,*NF,1,['63824000840']
5,*NF BRINZOLAMIDE (AZOPT),0,[nan]
6,*NF* Abatacept,1,['00003218710']
7,*NF* Adefovir (HepSera),0,[nan]
8,*NF* Alemtuzumab,1,['50419035510']
9,*NF* Allopurinol Sodium,1,['59730560101']


In [None]:
out

In [25]:
a[a['DRUG_NAME_GENERIC'].notna()]

Unnamed: 0,SUBJECT_ID,STARTDATE,ENDDATE,DRUG,DRUG_NAME_GENERIC,NDC,ROUTE
40,13,2167-01-09 00:00:00,2167-01-12 00:00:00,Acetaminophen,Acetaminophen (Rectal),00713016550,PR
43,13,2167-01-09 00:00:00,2167-01-12 00:00:00,Metoclopramide,Metoclopramide HCl,00031670972,IV
60,6,2175-06-14 00:00:00,2175-06-15 00:00:00,Metoclopramide,Metoclopramide HCl,00904107061,PO
112,4,2191-03-16 00:00:00,2191-03-16 00:00:00,Insulin,Insulin Glargine,00088222033,SC
116,4,2191-03-16 00:00:00,2191-03-16 00:00:00,Lantus,Insulin Glargine,00088222033,SC
123,4,2191-03-16 00:00:00,2191-03-19 00:00:00,Insulin,Insulin Glargine,00088222033,SC
131,4,2191-03-16 00:00:00,2191-03-23 00:00:00,Heparin,Heparin Sodium,00641040025,SC
134,4,2191-03-16 00:00:00,2191-03-23 00:00:00,Ranitidine,Ranitidine HCl,00781188313,PO
141,4,2191-03-17 00:00:00,2191-03-20 00:00:00,Lantus,Insulin Glargine,00088222033,SC
147,4,2191-03-18 00:00:00,2191-03-23 00:00:00,Didanosine Chewable Tab,Didanosine,00087665201,PO


array([11,  1,  3])

array(['0'], dtype=object)

array([nan], dtype=object)

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ICUSTAY_ID,STARTDATE,ENDDATE,DRUG_TYPE,DRUG,DRUG_NAME_POE,DRUG_NAME_GENERIC,FORMULARY_DRUG_CD,GSN,NDC,PROD_STRENGTH,DOSE_VAL_RX,DOSE_UNIT_RX,FORM_VAL_DISP,FORM_UNIT_DISP,ROUTE,NDC_length
683,2981776,11,194540,,2178-05-10 00:00:00,2178-05-10 00:00:00,MAIN,Melatonin,Melatonin,Melatonin,,024665 041568 044488 043811 026076,,1 mg,11,mg,11,TAB,ORAL,3
686,2981777,11,194540,,2178-05-10 00:00:00,2178-05-11 00:00:00,MAIN,Melatonin,Melatonin,Melatonin,,024665 041568 044488 043811 026076,,1 mg,1,mg,1,TAB,ORAL,3
10011,1040299,124,172461,,2160-07-09 00:00:00,2160-07-10 00:00:00,MAIN,Famotidine,,,FAMO20PM,021732,,20mg Premix Bag,20,mg,1,BAG,IV,3
10012,1040300,124,172461,,2160-07-09 00:00:00,2160-07-10 00:00:00,MAIN,Cefazolin,,,CEFA1F,009061,,1g Froz.Bag,1,gm,1,BAG,IV,3
10015,1040297,124,172461,,2160-07-09 00:00:00,2160-07-10 00:00:00,MAIN,Heparin Sodium,,,HEPAPREMIX,006522,,"25,000 unit Premix Bag",25000,UNIT,1,BAG,IV,3


In [31]:
del pres_df_1, pres_df_3

## SIDE EFFECTS

In [5]:
drug_indication_df = pd.read_csv('/data/liu/ade/SIDER_4/indications.tsv', sep='\t', header=0)
drug_indication_df.head()
# del drug_indication_df

Unnamed: 0,drugbank_id,drugbank_name,pubchem_id,stitch_id_flat,umls_cui_from_label,method,concept_name,meddra_type,umls_cui_from_meddra,meddra_name
0,DB00014,Goserelin,47725,CID100047725,C0002871,text_mention,Anemia,PT,C0002871,Anaemia
1,DB00014,Goserelin,47725,CID100047725,C0006142,NLP_indication,Malignant neoplasm of breast,PT,C0006142,Breast cancer
2,DB00014,Goserelin,47725,CID100047725,C0006826,NLP_precondition,Malignant Neoplasms,PT,C0006826,Neoplasm malignant
3,DB00014,Goserelin,47725,CID100047725,C0014175,NLP_indication,Endometriosis,PT,C0014175,Endometriosis
4,DB00014,Goserelin,47725,CID100047725,C0025323,text_mention,Menorrhagia,PT,C0025323,Menorrhagia


In [22]:
drug_indication_df[drug_indication_df['drugbank_name']=='Tacrolimus'].head()

Unnamed: 0,drugbank_id,drugbank_name,pubchem_id,stitch_id_flat,umls_cui_from_label,method,concept_name,meddra_type,umls_cui_from_meddra,meddra_name
8247,DB00864,Tacrolimus,5372,CID100005372,C0003862,NLP_indication,Arthralgia,PT,C0948594,Musculoskeletal discomfort
8248,DB00864,Tacrolimus,5372,CID100005372,C0003862,NLP_indication,Arthralgia,PT,C0003862,Arthralgia
8249,DB00864,Tacrolimus,5372,CID100005372,C0003864,text_mention,Arthritis,PT,C0003864,Arthritis
8250,DB00864,Tacrolimus,5372,CID100005372,C0003873,NLP_indication,Rheumatoid Arthritis,PT,C0003873,Rheumatoid arthritis
8251,DB00864,Tacrolimus,5372,CID100005372,C0003873,NLP_indication,Rheumatoid Arthritis,PT,C0003864,Arthritis


In [19]:
side_effect_df = pd.read_csv('/data/liu/ade/SIDER_4/side-effects.tsv', sep='\t', header=0)
side_effect_df.head()

Unnamed: 0,drugbank_id,drugbank_name,umls_cui_from_meddra,side_effect_name
0,DB07768,"(10ALPHA,13ALPHA,14BETA,17ALPHA)-17-HYDROXYAND...",C0000729,Abdominal cramps
1,DB07768,"(10ALPHA,13ALPHA,14BETA,17ALPHA)-17-HYDROXYAND...",C0000737,Abdominal pain
2,DB07768,"(10ALPHA,13ALPHA,14BETA,17ALPHA)-17-HYDROXYAND...",C0232492,Abdominal pain upper
3,DB07768,"(10ALPHA,13ALPHA,14BETA,17ALPHA)-17-HYDROXYAND...",C0740651,Abdominal symptom
4,DB07768,"(10ALPHA,13ALPHA,14BETA,17ALPHA)-17-HYDROXYAND...",C0877331,Abnormal clotting factor


In [23]:
side_effect_df[side_effect_df['drugbank_id']=='DB00864'].head()

Unnamed: 0,drugbank_id,drugbank_name,umls_cui_from_meddra,side_effect_name
133938,DB00864,Tacrolimus,C1291077,Abdominal bloating
133939,DB00864,Tacrolimus,C0000731,Abdominal distension
133940,DB00864,Tacrolimus,C0000737,Abdominal pain
133941,DB00864,Tacrolimus,C0234458,Abnormal dreams
133942,DB00864,Tacrolimus,C3665386,Abnormal vision


In [24]:
len(side_effect_df['drugbank_name'].unique())

1223

In [32]:
len(pres_df['DRUG'].unique())

4206

## Match drugs in DrugBank and MIMIC3 by names

In [53]:
# drugbank_df = pd.read_csv('/data/liu/ade/SIDER_4/spider_drugbank.txt', sep='\t')
## Solve text with different number of columns 
def read_from_txt(filepath,cols,split='\t\t'):
    f=open(filepath,"r")
    lines=f.readlines()
    result=[]
    for x in lines:
        result.append(x.split(split)[:cols])
    f.close()
    return result

# drugbank_list = read_from_txt('/data/liu/ade/SIDER_4/spider_drugbank.txt',2)
drugbank_list = pd.read_csv('/data/liu/ade/SIDER_4/drugbank.tsv', header=0, sep='\t')
drugbank_list[:5]

pres_drugs = pres_df[['DRUG','NDC']].drop_duplicates()
# pres_drugs = pres_drugs[pres_drugs['NDC'].notna()]
aa.write2file(pres_drugs,'mimic_drugs')

bank_drugs = drugbank_list[['drugbank_id','name']].drop_duplicates()
aa.write2file(pres_drugs,'bank_drugs')

# sider_drugs = side_effect_df[['drugbank_id','drugbank_name']].drop_duplicates()
aa.write2file(pres_drugs,'sider_drugs')

In [17]:
pres_drugs.head()

Unnamed: 0,DRUG,NDC
0,Tacrolimus,469061711
1,Warfarin,56017275
2,Heparin Sodium,338055002
3,D5W,0
4,Furosemide,54829725


In [4]:
pres_drugs = pd.read_csv(write_prefix%'mimic_drugs', header=0, sep=',',dtype={'NDC': str})
pres_drugs.head()

Unnamed: 0,DRUG,NDC
0,Tacrolimus,469061711
1,Warfarin,56017275
2,Heparin Sodium,338055002
3,D5W,0
4,Furosemide,54829725


In [15]:
import requests
def get_unii(drug_name):
    fda_api = 'https://api.fda.gov/drug/label.json?search=openfda.generic_name:%s&limit=1' % drug_name
    r = requests.get(fda_api)
    try:
        return set(r.json()['results'][0]['openfda']['unii'])
    except:
        print("Cannot find the drug %s in FDA dataset." % drug_name)
        return np.nan

In [16]:
## Search unii codes for drugs in MIMIC3
pres_drugs['UNII'] = pres_drugs['DRUG'].apply(get_unii)

Cannot find the drug D5W in FDA dataset.
Cannot find the drug Zolpidem Tartrate in FDA dataset.
Cannot find the drug Midazolam HCl in FDA dataset.
Cannot find the drug Nitroglycerin SL in FDA dataset.
Cannot find the drug Vancomycin HCl in FDA dataset.
Cannot find the drug Iso-Osmotic Dextrose in FDA dataset.
Cannot find the drug SW in FDA dataset.
Cannot find the drug D5W in FDA dataset.
Cannot find the drug Valganciclovir HCl in FDA dataset.
Cannot find the drug Send 500mg Vial in FDA dataset.
Cannot find the drug NEO*IV*Gentamicin in FDA dataset.
Cannot find the drug NEO*IV*Ampicillin Sodium in FDA dataset.
Cannot find the drug Syringe (Neonatal) *D5W* in FDA dataset.
Cannot find the drug Labetalol HCl in FDA dataset.
Cannot find the drug NS in FDA dataset.
Cannot find the drug Soln. in FDA dataset.
Cannot find the drug Hydralazine HCl in FDA dataset.
Cannot find the drug *NF* Nicardipine HCl IV in FDA dataset.
Cannot find the drug NEO*IV*Gentamicin in FDA dataset.
Cannot find the d

Cannot find the drug Lidocaine 1%/Epinephrine 1:100000 in FDA dataset.
Cannot find the drug Bupivacaine 0.25% in FDA dataset.
Cannot find the drug Midazolam HCl in FDA dataset.
Cannot find the drug Lidocaine 1% in FDA dataset.
Cannot find the drug Thiamine HCl in FDA dataset.
Cannot find the drug Sodium Chloride 3% (Hypertonic) in FDA dataset.
Cannot find the drug Nephrocaps in FDA dataset.
Cannot find the drug Carmustine in FDA dataset.
Cannot find the drug Prevident Mint Gel 1.1% in FDA dataset.
Cannot find the drug Ciprofloxacin HCl in FDA dataset.
Cannot find the drug Hydrocortisone Na Succ. in FDA dataset.
Cannot find the drug NS Epidural Bag (0.9% NaCl) in FDA dataset.
Cannot find the drug Bupivacaine 0.5% in FDA dataset.
Cannot find the drug Magnesium Citrate in FDA dataset.
Cannot find the drug Atacand in FDA dataset.
Cannot find the drug Procainamide HCl in FDA dataset.
Cannot find the drug Aluminum Hydroxide Suspension in FDA dataset.
Cannot find the drug Miconazole Powder 2%

Cannot find the drug Albumin 5% in FDA dataset.
Cannot find the drug Piperacillin-Tazobactam Na in FDA dataset.
Cannot find the drug ALBU in FDA dataset.
Cannot find the drug Acetylcysteine 20% in FDA dataset.
Cannot find the drug D5W in FDA dataset.
Cannot find the drug Albumin 25% (12.5gm) in FDA dataset.
Cannot find the drug Midazolam HCl in FDA dataset.
Cannot find the drug 0.9% Sodium Chloride in FDA dataset.
Cannot find the drug Albumin 5% (25g / 500mL) in FDA dataset.
Cannot find the drug 5% Dextrose in FDA dataset.
Cannot find the drug Labetalol HCl in FDA dataset.
Cannot find the drug Miconazole Nitrate Vag Cream 2% in FDA dataset.
Cannot find the drug NEO*IM*Palivizumab in FDA dataset.
Cannot find the drug NEO*SC*Polio Vaccine (Inactive) in FDA dataset.
Cannot find the drug Fat Emulsion 20% in FDA dataset.
Cannot find the drug Terazosin HCl in FDA dataset.
Cannot find the drug Vancomycin 25mg/mL Ophth Soln in FDA dataset.
Cannot find the drug PrednisoLONE Acetate 1% Ophth. Su

Cannot find the drug Ciprofloxacin HCl in FDA dataset.
Cannot find the drug zinc oxide in FDA dataset.
Cannot find the drug Potassium Chl 20 mEq / 1000 mL D5LR in FDA dataset.
Cannot find the drug Multivitamins in FDA dataset.
Cannot find the drug Piperacillin-Tazobactam Na in FDA dataset.
Cannot find the drug Acetami in FDA dataset.
Cannot find the drug Paroxetine HCl in FDA dataset.
Cannot find the drug Lidocaine Jelly 2% in FDA dataset.
Cannot find the drug Propranolol HCl in FDA dataset.
Cannot find the drug Readi-Cat 2 (Barium Sulfate 2% Suspension) in FDA dataset.
Cannot find the drug Heparin (CRRT Machine Priming) in FDA dataset.
Cannot find the drug Hydrocortisone Na Succ. in FDA dataset.
Cannot find the drug Acetami in FDA dataset.
Cannot find the drug Sulfameth/Trimethoprim SS in FDA dataset.
Cannot find the drug Pulmicort in FDA dataset.
Cannot find the drug Heparin Flush PICC (100 units/ml) in FDA dataset.
Cannot find the drug Albumin 25% (12.5gm) in FDA dataset.
Cannot fin

Cannot find the drug Loperamide Hcl in FDA dataset.
Cannot find the drug Phenazopyridine HCl in FDA dataset.
Cannot find the drug Acetaminophen w/Codeine Elixir in FDA dataset.
Cannot find the drug Avapro in FDA dataset.
Cannot find the drug Hydrocortisone Cream 1% in FDA dataset.
Cannot find the drug pilocarpine HCl in FDA dataset.
Cannot find the drug TRISODIUM CITRATE 4% REPLACEMENT FLUID (Dialysis) in FDA dataset.
Cannot find the drug Ropinirole HCl in FDA dataset.
Cannot find the drug Hydrocerin in FDA dataset.
Cannot find the drug Leucovorin Calcium in FDA dataset.
Cannot find the drug Tobramycin Inhalation Soln in FDA dataset.
Cannot find the drug NEO*IV*Clindamycin in FDA dataset.
Cannot find the drug *NF* Mycophenolate Sodium in FDA dataset.
Cannot find the drug Myfortic in FDA dataset.
Cannot find the drug Muromonab-CD3 in FDA dataset.
Cannot find the drug Multivitamins in FDA dataset.
Cannot find the drug Critic-Aid Skin Paste in FDA dataset.
Cannot find the drug Rosiglitazo

Cannot find the drug Alteplase 1mg/1mL ( Clearance ie. Temp TLCL ) in FDA dataset.
Cannot find the drug NEO*IV*Phenobarbital in FDA dataset.
Cannot find the drug Propranolol HCl in FDA dataset.
Cannot find the drug Collagenase Ointment in FDA dataset.
Cannot find the drug D5 1/4NS in FDA dataset.
Cannot find the drug Hydrocortisone Acetate Ointment 1% in FDA dataset.
Cannot find the drug Triamcinolone Acetonide 0.1% Cream in FDA dataset.
Cannot find the drug Timolol Maleate 0.5% in FDA dataset.
Cannot find the drug Pulmicort in FDA dataset.
Cannot find the drug GlipiZIDE XL in FDA dataset.
Cannot find the drug Pioglitazone HCl in FDA dataset.
Cannot find the drug Clindamycin HCl in FDA dataset.
Cannot find the drug Aspir in FDA dataset.
Cannot find the drug Aspiri in FDA dataset.
Cannot find the drug *NF* Tacrolimus Suspension in FDA dataset.
Cannot find the drug Remifentanil HCl in FDA dataset.
Cannot find the drug Benicar in FDA dataset.
Cannot find the drug Acebutolol HCl in FDA dat

Cannot find the drug multiv in FDA dataset.
Cannot find the drug Dexmedetomidine Hcl in FDA dataset.
Cannot find the drug Metoprolol Tartrate in FDA dataset.
Cannot find the drug Pletal in FDA dataset.
Cannot find the drug D20W in FDA dataset.
Cannot find the drug Clonidine Patch 0.1 mg/24 hr in FDA dataset.
Cannot find the drug Lipid Emulsion 20% in FDA dataset.
Cannot find the drug Lidocaine 1% PF in FDA dataset.
Cannot find the drug *NF* Arginine HCl in FDA dataset.
Cannot find the drug *NF* Beclomethasone Dipropionate Inhalation in FDA dataset.
Cannot find the drug Soln. in FDA dataset.
Cannot find the drug guaif in FDA dataset.
Cannot find the drug Famotidine (PO) in FDA dataset.
Cannot find the drug NEO*PO*Hydrocortisone Sod Succinate in FDA dataset.
Cannot find the drug Glipizide XL in FDA dataset.
Cannot find the drug Dextran 40 10% in FDA dataset.
Cannot find the drug Prempro in FDA dataset.
Cannot find the drug Pulmozyme in FDA dataset.
Cannot find the drug Sonata in FDA data

Cannot find the drug BACTR in FDA dataset.
Cannot find the drug Levobunolol Hcl 0.5% in FDA dataset.
Cannot find the drug Megestr in FDA dataset.
Cannot find the drug Chlordiazepoxide HCl in FDA dataset.
Cannot find the drug Femara in FDA dataset.
Cannot find the drug *NF* Capecitabine in FDA dataset.
Cannot find the drug Imipramine HCl in FDA dataset.
Cannot find the drug Mupirocin Ointment 2% in FDA dataset.
Cannot find the drug *NF* Basiliximab in FDA dataset.
Cannot find the drug Methylprednisolone Na Succ. in FDA dataset.
Cannot find the drug Unasyn in FDA dataset.
Cannot find the drug Propranolol LA in FDA dataset.
Cannot find the drug Propranolol HCl in FDA dataset.
Cannot find the drug Sulfameth/Trimethoprim in FDA dataset.
Cannot find the drug Collagenase Ointment in FDA dataset.
Cannot find the drug Glucovance (5mg-500mg) in FDA dataset.
Cannot find the drug Piperacillin-Tazobactam Na in FDA dataset.
Cannot find the drug Razadyne in FDA dataset.
Cannot find the drug SYMLIN in

Cannot find the drug Propafenone HCl in FDA dataset.
Cannot find the drug Sulfame in FDA dataset.
Cannot find the drug Sulfame in FDA dataset.
Cannot find the drug potass in FDA dataset.
Cannot find the drug Prevacid in FDA dataset.
Cannot find the drug Gammagard S/D in FDA dataset.
Cannot find the drug prevacid in FDA dataset.
Cannot find the drug Acetic Acid 0.25% Irrig. Soln in FDA dataset.
Cannot find the drug *nf in FDA dataset.
Cannot find the drug levo in FDA dataset.
Cannot find the drug Povidone Iodine 1/4 Strength in FDA dataset.
Cannot find the drug Lescol XL in FDA dataset.
Cannot find the drug Protopic in FDA dataset.
Cannot find the drug bact in FDA dataset.
Cannot find the drug cilo in FDA dataset.
Cannot find the drug *NF* Micafungin in FDA dataset.
Cannot find the drug Triamcinolone Acetonide 0.025% Cream in FDA dataset.
Cannot find the drug Lumigan in FDA dataset.
Cannot find the drug Zelnorm in FDA dataset.
Cannot find the drug Polysaccharide Iron Complex in FDA data

Cannot find the drug Ritonavir/Lopinavir in FDA dataset.
Cannot find the drug Donnatol in FDA dataset.
Cannot find the drug Blue-Green Algae (Spirulina) in FDA dataset.
Cannot find the drug NEO*PO*Midazolam in FDA dataset.
Cannot find the drug NEO*PO*Lorazepam Oral Concentrate in FDA dataset.
Cannot find the drug Ritonavir/Lopinavir in FDA dataset.
Cannot find the drug TB Syringe in FDA dataset.
Cannot find the drug Kaletra in FDA dataset.
Cannot find the drug Nystatin-Triamcinolone Ointment in FDA dataset.
Cannot find the drug Budesonide (Pulmicort) in FDA dataset.
Cannot find the drug DiphenhydrAMINE HCl in FDA dataset.
Cannot find the drug Pulmicort in FDA dataset.
Cannot find the drug Nasonex in FDA dataset.
Cannot find the drug Lidocaine 1% in FDA dataset.
Cannot find the drug Bacitracin/Polymyxin B Sulfate Opht. Oint in FDA dataset.
Cannot find the drug Atropine Sulfate Ophth 1% in FDA dataset.
Cannot find the drug Glipizide XL in FDA dataset.
Cannot find the drug Tropicamide 1 %

Cannot find the drug Eplerenone (INSPRA) in FDA dataset.
Cannot find the drug Travatan Z in FDA dataset.
Cannot find the drug diltiaz in FDA dataset.
Cannot find the drug Tizanidine HCl in FDA dataset.
Cannot find the drug Unithroid in FDA dataset.
Cannot find the drug Klonopin in FDA dataset.
Cannot find the drug Sulfacetamide 10% Ophth Soln. in FDA dataset.
Cannot find the drug Albumin 5% (12.5g / 250mL) in FDA dataset.
Cannot find the drug Sinemet in FDA dataset.
Cannot find the drug Betamethasone Dipro 0.05% Cream in FDA dataset.
Cannot find the drug Actonel in FDA dataset.
Cannot find the drug Insulin Lispro 75/25 in FDA dataset.
Cannot find the drug NEO*PO*Zidovudine Syrup in FDA dataset.
Cannot find the drug Insulin Human 70/30 in FDA dataset.
Cannot find the drug Propranolol LA in FDA dataset.
Cannot find the drug Dermagran Ointment in FDA dataset.
Cannot find the drug Gentamic in FDA dataset.
Cannot find the drug Ketamine HCl in FDA dataset.
Cannot find the drug Betaseron in F

Cannot find the drug Meperidine PF in FDA dataset.
Cannot find the drug GU in FDA dataset.
Cannot find the drug Alteplase 1mg/Flush Volume ( Dialysis/Pheresis Catheters ) in FDA dataset.
Cannot find the drug Amantadine Syrup in FDA dataset.
Cannot find the drug Arimidex in FDA dataset.
Cannot find the drug Revatio in FDA dataset.
Cannot find the drug Lotemax*NF* in FDA dataset.
Cannot find the drug Hydroxyzine HCl in FDA dataset.
Cannot find the drug Mult5l in FDA dataset.
Cannot find the drug Bupivacaine 0.1% in FDA dataset.
Cannot find the drug Blue CADD Cassette in FDA dataset.
Cannot find the drug DILT in FDA dataset.
Cannot find the drug FiberCon in FDA dataset.
Cannot find the drug Leucovorin Calcium in FDA dataset.
Cannot find the drug NEO*IV*PHENObarbital in FDA dataset.
Cannot find the drug Atropine Sulfate 1% in FDA dataset.
Cannot find the drug Terlipressin Study Drug (*IND*) in FDA dataset.
Cannot find the drug Fluocinolone 0.01% Solution in FDA dataset.
Cannot find the dru

Cannot find the drug Lipram 4500 in FDA dataset.
Cannot find the drug Yodoxin in FDA dataset.
Cannot find the drug Lotrel in FDA dataset.
Cannot find the drug GlipiZIDE XL in FDA dataset.
Cannot find the drug Betaxolol HCl  0.25% in FDA dataset.
Cannot find the drug Acetylcysteine 20% in FDA dataset.
Cannot find the drug Potassium Chloride (Powder) in FDA dataset.
Cannot find the drug Verapamil HCl in FDA dataset.
Cannot find the drug Sodium Citrate in FDA dataset.
Cannot find the drug Molindone HCl in FDA dataset.
Cannot find the drug charc in FDA dataset.
Cannot find the drug *NF in FDA dataset.
Cannot find the drug Lysodren in FDA dataset.
Cannot find the drug Cyproheptadine HCl in FDA dataset.
Cannot find the drug Emend in FDA dataset.
Cannot find the drug Gua in FDA dataset.
Cannot find the drug Genta in FDA dataset.
Cannot find the drug *NF* Rasburicase in FDA dataset.
Cannot find the drug gent in FDA dataset.
Cannot find the drug Procarbazine HCl in FDA dataset.
Cannot find the 

Cannot find the drug D5W 0.1% Albumin in FDA dataset.
Cannot find the drug Unifiber in FDA dataset.
Cannot find the drug Trifluoperazine HCl in FDA dataset.
Cannot find the drug NEO*PO*Hydralazine in FDA dataset.
Cannot find the drug Carrington Dermal Wound in FDA dataset.
Cannot find the drug Keppra in FDA dataset.
Cannot find the drug *NF* Glycopyrrolate in FDA dataset.
Cannot find the drug Fentora in FDA dataset.
Cannot find the drug Rhinocort in FDA dataset.
Cannot find the drug Altace in FDA dataset.
Cannot find the drug NEO*IV*Epoetin Alfa in FDA dataset.
Cannot find the drug Sulfameth/Trimethoprim Suspension in FDA dataset.
Cannot find the drug Mitomycin in FDA dataset.
Cannot find the drug Methadone HCl in FDA dataset.
Cannot find the drug Atorvastatin in FDA dataset.
Cannot find the drug Fluorometholone 0.1% Ophth Susp. in FDA dataset.
Cannot find the drug Actiq in FDA dataset.
Cannot find the drug Pancrelipase 4500 in FDA dataset.
Cannot find the drug Mirapex in FDA dataset.


Cannot find the drug *NF* Valacyclovir in FDA dataset.
Cannot find the drug Stanozolol (Bulk) in FDA dataset.
Cannot find the drug albuter in FDA dataset.
Cannot find the drug Bupivacain in FDA dataset.
Cannot find the drug clo in FDA dataset.
Cannot find the drug clomi in FDA dataset.
Cannot find the drug PARCOPA in FDA dataset.
Cannot find the drug *NF* Fenofibrate in FDA dataset.
Cannot find the drug *NF* Cefixime Suspension in FDA dataset.
Cannot find the drug Tiagabine HCl in FDA dataset.
Cannot find the drug maalox in FDA dataset.
Cannot find the drug Doxil in FDA dataset.
Cannot find the drug *NF* Amoxapine in FDA dataset.
Cannot find the drug Procainamide HCl in FDA dataset.
Cannot find the drug Neo-Synephrine in FDA dataset.
Cannot find the drug Neo-Synephrine in FDA dataset.
Cannot find the drug Desmopressi in FDA dataset.
Cannot find the drug *NF* Cefotetan in FDA dataset.
Cannot find the drug NS in FDA dataset.
Cannot find the drug Soln in FDA dataset.
Cannot find the drug 

KeyboardInterrupt: 

In [10]:
#0.9% Sodium Chloride
bank_drugs.head()

Unnamed: 0,drugbank_id,name
0,DB00001,Lepirudin
1,DB00002,Cetuximab
2,DB00003,Dornase alfa
3,DB00004,Denileukin diftitox
4,DB00005,Etanercept


### DRUG x name, transform all letters of drug names to lower case

In [48]:
# pres_drugs['DRUG_lower'] = pres_drugs['DRUG'].str.lower()
# bank_drugs['DRUG_lower'] = bank_drugs['name'].str.lower()
sider_drugs['DRUG_lower'] = sider_drugs['drugbank_name'].str.lower()

In [34]:
map_BP_drugs = aa.left_join(pres_drugs,bank_drugs,'DRUG_lower')
map_BP_drugs.head()

Unnamed: 0,DRUG,NDC,DRUG_lower,drugbank_id,name
0,Tacrolimus,469061711,tacrolimus,DB00864,Tacrolimus
1,Warfarin,56017275,warfarin,DB00682,Warfarin
2,Heparin Sodium,338055002,heparin sodium,,
3,D5W,0,d5w,,
4,Furosemide,54829725,furosemide,DB00695,Furosemide


In [37]:
a=map_BP_drugs[map_BP_drugs['drugbank_id'].notna()].head()

Unnamed: 0,DRUG,NDC,DRUG_lower,drugbank_id,name
2,Heparin Sodium,338055002,heparin sodium,,
3,D5W,0,d5w,,
10,Neutra-Phos,17314931102,neutra-phos,,
11,Heparin Sodium,74779362,heparin sodium,,
13,Docusate Sodium,51079001920,docusate sodium,,


In [54]:
51079001920
# len(side_effect_df['drugbank_name'].unique())
# len(pres_drugs['DRUG'].unique())
# b=sider_drugs[sider_drugs['drugbank_name']=='Mupirocin']
"2999F27MAD",
          "O18YUO0I83",
          "R60L0SM5BC"
        NMH84OZK2B

In [4]:
# import json
import requests

response = requests.get('https://api.fda.gov/drug/label.json?search=openfda.generic_name:Ondansetron&limit=1')
# json_data = json.loads(response.text)

In [15]:
response.json()['results'][0]['openfda']['unii']

['NMH84OZK2B']

In [23]:
# response2 = requests.get('https://api.fda.gov/drug/label.json?search=openfda.product_ndc:"0173-0569"&limit=1')
set(response2.json()['results'][0]['openfda']['unii'])

AttributeError: 'set' object has no attribute 'sort'

## draft

In [3]:
drugbank_list[drugbank_list['drugbank_id']=='DB00904'].head()

Unnamed: 0,drugbank_id,name,type,groups,atc_codes,categories,inchikey,inchi,description
893,DB00904,Ondansetron,small molecule,approved,A04AA01,Antipsychotic Agents,InChIKey=FELGMEQIXOGIFQ-UHFFFAOYSA-N,InChI=1S/C18H19N3O/c1-12-19-9-10-21(12)11-13-7...,A competitive serotonin type 3 receptor antago...


In [105]:
drugbank_list = list(zip(*drugbank_list))

In [110]:
drugbank_map = dict(zip(drugbank_list[0],drugbank_list[1]))
drugbank_map['Dulaglutide']

'DB09045'

In [112]:
del drugbank_list
pres_df.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ICUSTAY_ID,STARTDATE,ENDDATE,DRUG_TYPE,DRUG,DRUG_NAME_POE,DRUG_NAME_GENERIC,FORMULARY_DRUG_CD,GSN,NDC,PROD_STRENGTH,DOSE_VAL_RX,DOSE_UNIT_RX,FORM_VAL_DISP,FORM_UNIT_DISP,ROUTE,NDC_length
0,2214776,6,107064,,2175-06-11 00:00:00,2175-06-12 00:00:00,MAIN,Tacrolimus,Tacrolimus,Tacrolimus,TACR1,21796,469061711,1mg Capsule,2,mg,2,CAP,PO,11
1,2214775,6,107064,,2175-06-11 00:00:00,2175-06-12 00:00:00,MAIN,Warfarin,Warfarin,Warfarin,WARF5,6562,56017275,5mg Tablet,5,mg,1,TAB,PO,11
2,2215524,6,107064,,2175-06-11 00:00:00,2175-06-12 00:00:00,MAIN,Heparin Sodium,,,HEPAPREMIX,6522,338055002,"25,000 unit Premix Bag",25000,UNIT,1,BAG,IV,11
4,2214773,6,107064,,2175-06-11 00:00:00,2175-06-12 00:00:00,MAIN,Furosemide,Furosemide,Furosemide,FURO20,8208,54829725,20mg Tablet,20,mg,1,TAB,PO,11
5,2214774,6,107064,,2175-06-11 00:00:00,2175-06-15 00:00:00,MAIN,Warfarin,Warfarin,Warfarin,WARF0,14198,56016975,Check with MD for Dose,1,dose,1,dose,PO,11


In [161]:
test = pd.DataFrame({'DRUG':pres_df['DRUG'].unique()})

In [129]:
def drug2id(drug): 
    try:
        return drugbank_map[drug]
    except KeyError as error:
        return np.nan

In [138]:
test['drugbank_id'] = pres_df['DRUG'].apply(lambda x: drug2id(x.title()))

In [162]:
test.head()

Unnamed: 0,DRUG
0,Tacrolimus
1,Warfarin
2,Heparin Sodium
3,D5W
4,Furosemide


In [133]:
test[test['drugbank_id'].isna()].count()

DRUG           2780
drugbank_iD       0
dtype: int64

In [159]:
side_effect_df[side_effect_df['drugbank_id']=='DB00904'].head()

Unnamed: 0,drugbank_id,drugbank_name,umls_cui_from_meddra,side_effect_name
103667,DB00904,Ondansetron,C0000737,Abdominal pain
103668,DB00904,Ondansetron,C0152198,Accommodation disorder
103669,DB00904,Ondansetron,C0948089,Acute coronary syndrome
103670,DB00904,Ondansetron,C0085631,Agitation
103671,DB00904,Ondansetron,C0392156,Akathisia


In [142]:
bank_drug_test = side_effect_df[['drugbank_id','drugbank_name']].drop_duplicates()
len(bank_drug_test)

1223

In [143]:
aa.left_join(bank_drug_test,test,'drugbank_id')

Unnamed: 0,drugbank_id,drugbank_name,DRUG
0,DB07768,"(10ALPHA,13ALPHA,14BETA,17ALPHA)-17-HYDROXYAND...",
1,DB07886,"(11alpha,14beta)-11,17,21-trihydroxypregn-4-en...",
2,DB07702,"(16ALPHA,17ALPHA)-ESTRA-1,3,5(10)-TRIENE-3,16,...",
3,DB07465,"(1S,3S,5S)-2-{(2S)-2-amino-2-[(1R,3S,5R,7S)-3-...",
4,DB08567,"(1S,4S)-4-(3,4-dichlorophenyl)-N-methyl-1,2,3,...",
5,DB07129,"(2R)-1-(2,6-dimethylphenoxy)propan-2-amine",
6,DB08298,(2S)-2-(6-methoxynaphthalen-2-yl)propanoic acid,
7,DB02932,(2r)-N-[4-Cyano-3-(Trifluoromethyl)Phenyl]-3-[...,
8,DB02648,(3-Carboxy-2-(R)-Hydroxy-Propyl)-Trimethyl-Amm...,
9,DB08472,(3R)-N-methyl-3-phenyl-3-[4-(trifluoromethyl)p...,


In [148]:
pres_df[pres_df['DRUG']=='Zofran ODT']

Unnamed: 0,SUBJECT_ID,STARTDATE,ENDDATE,DRUG,NDC,ROUTE
1678741,23479,2151-12-03 00:00:00,2151-12-03 00:00:00,Zofran ODT,173056900,ORAL


In [151]:
drug_indication_df[drug_indication_df['drugbank_id']=='DB00904'].head()

Unnamed: 0,drugbank_id,drugbank_name,pubchem_id,stitch_id_flat,umls_cui_from_label,method,concept_name,meddra_type,umls_cui_from_meddra,meddra_name
8466,DB00904,Ondansetron,4595,CID100004595,C0006826,NLP_indication,Malignant Neoplasms,PT,C0006826,Neoplasm malignant
8467,DB00904,Ondansetron,4595,CID100004595,C0027497,NLP_indication,Nausea,PT,C0027497,Nausea
8468,DB00904,Ondansetron,4595,CID100004595,C0042963,NLP_indication,Vomiting,PT,C0042963,Vomiting
8469,DB00904,Ondansetron,4595,CID100004595,C0520904,NLP_indication,Postoperative Nausea,PT,C1868752,Procedural nausea


In [152]:
test

Unnamed: 0,DRUG,drugbank_id
0,Tacrolimus,DB00864
1,Warfarin,DB00682
2,Heparin Sodium,
3,D5W,
4,Furosemide,DB00695
5,Mycophenolate Mofetil,DB00682
6,Neutra-Phos,
7,Nitroglycerin,
8,Docusate Sodium,
9,Insulin,DB00682


In [155]:
drugbank_map['Zofran ODT']

KeyError: 'Zofran ODT'

In [154]:
test[test['DRUG']=='Zofran ODT']

Unnamed: 0,DRUG,drugbank_id
3245,Zofran ODT,DB00495


In [157]:
bank_drug_test[bank_drug_test['drugbank_id']=='DB00495']

Unnamed: 0,drugbank_id,drugbank_name
150563,DB00495,Zidovudine


In [158]:
drug2id('Zofran ODT')

nan

In [160]:
'Zofran ODT'.title()

'Zofran Odt'