In [1]:
import pandas as pd
from google.cloud import bigquery
import google.auth

from fuzzywuzzy import fuzz
from fuzzywuzzy import process

In [2]:
def get_client(c,p):
    return bigquery.Client(p, c)

def get_gcreds(scopes = None):
    if scopes is None:
        scopes = ["https://www.googleapis.com/auth/bigquery"]
    return google.auth.default(
        scopes )

def get_authed_client():
    cred = get_gcreds()
    print(*cred)
    return get_client(*cred)

def main():
    mimic_drugs = """
        SELECT itemid, label, abbreviation, category, unitname
        FROM `physionet-data.mimiciv_icu.d_items`
        WHERE linksto='inputevents' """

    mimic_job = get_authed_client().query(mimic_drugs)
    df = mimic_job.to_dataframe()
    df.label = df.label.fillna("")
    df.label = df.label.apply(str.lower)
    return df

icu_drugs = main()

<google.oauth2.credentials.Credentials object at 0x000001FF900B2950> mimic-iv-desktop


In [3]:
def read_drugs():
    df = pd.read_csv("resources/NDC_product_table.csv", encoding='ISO-8859-1')
    df.NONPROPRIETARYNAME = df.NONPROPRIETARYNAME.fillna("")
    df.NONPROPRIETARYNAME = df.NONPROPRIETARYNAME.apply(str.lower)
    df.PROPRIETARYNAME = df.PROPRIETARYNAME.fillna("")
    df.PROPRIETARYNAME = df.PROPRIETARYNAME.apply(str.lower)
    return df
norm_drugs = read_drugs()
norm_drugs

Unnamed: 0,PRODUCTID,PRODUCTNDC,PRODUCTTYPENAME,PROPRIETARYNAME,PROPRIETARYNAMESUFFIX,NONPROPRIETARYNAME,DOSAGEFORMNAME,ROUTENAME,STARTMARKETINGDATE,ENDMARKETINGDATE,MARKETINGCATEGORYNAME,APPLICATIONNUMBER,LABELERNAME,SUBSTANCENAME,ACTIVE_NUMERATOR_STRENGTH,ACTIVE_INGRED_UNIT,PHARM_CLASSES,DEASCHEDULE,NDC_EXCLUDE_FLAG,LISTING_RECORD_CERTIFIED_THROUGH
0,0002-0800_662164fd-5ea0-4a08-bfd1-6b08bdd73342,0002-0800,HUMAN OTC DRUG,sterile diluent,,diluent,"INJECTION, SOLUTION",SUBCUTANEOUS,19870710,,BLA,BLA018781,Eli Lilly and Company,WATER,1,mL/mL,,,N,20221231.0
1,0002-1200_480fceef-6596-4478-97de-677c155506b3,0002-1200,HUMAN PRESCRIPTION DRUG,amyvid,,florbetapir f 18,"INJECTION, SOLUTION",INTRAVENOUS,20120601,,NDA,NDA202008,Eli Lilly and Company,FLORBETAPIR F-18,51,mCi/mL,"Radioactive Diagnostic Agent [EPC],Positron Em...",,N,20221231.0
2,0002-1210_151a431b-f07b-4959-b6fa-c41ff80364c8,0002-1210,HUMAN PRESCRIPTION DRUG,tauvid,,flortaucipir f-18,"INJECTION, SOLUTION",INTRAVENOUS,20200528,,NDA,NDA212123,Eli Lilly and Company,FLORTAUCIPIR F-18,51,mCi/mL,,,N,20221231.0
3,0002-1433_d4ca0797-5a21-4962-bed1-2c4c9b52d78b,0002-1433,HUMAN PRESCRIPTION DRUG,trulicity,,dulaglutide,"INJECTION, SOLUTION",SUBCUTANEOUS,20140918,,BLA,BLA125469,Eli Lilly and Company,DULAGLUTIDE,0.75,mg/.5mL,"GLP-1 Receptor Agonist [EPC],Glucagon-Like Pep...",,N,20221231.0
4,0002-1434_d4ca0797-5a21-4962-bed1-2c4c9b52d78b,0002-1434,HUMAN PRESCRIPTION DRUG,trulicity,,dulaglutide,"INJECTION, SOLUTION",SUBCUTANEOUS,20140918,,BLA,BLA125469,Eli Lilly and Company,DULAGLUTIDE,1.5,mg/.5mL,"GLP-1 Receptor Agonist [EPC],Glucagon-Like Pep...",,N,20221231.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105042,99207-466_a594be33-33ce-4304-a6fe-65a4eeda3f19,99207-466,HUMAN PRESCRIPTION DRUG,solodyn,,minocycline hydrochloride,"TABLET, FILM COATED, EXTENDED RELEASE",ORAL,20100927,,NDA,NDA050808,Bausch Health US LLC,MINOCYCLINE HYDROCHLORIDE,80,mg/1,"Tetracycline-class Drug [EPC],Tetracyclines [CS]",,N,20221231.0
105043,99207-467_a594be33-33ce-4304-a6fe-65a4eeda3f19,99207-467,HUMAN PRESCRIPTION DRUG,solodyn,,minocycline hydrochloride,"TABLET, FILM COATED, EXTENDED RELEASE",ORAL,20100927,,NDA,NDA050808,Bausch Health US LLC,MINOCYCLINE HYDROCHLORIDE,105,mg/1,"Tetracycline-class Drug [EPC],Tetracyclines [CS]",,N,20221231.0
105044,99207-525_a075b415-7d7c-405a-9645-bdd4c4f92990,99207-525,HUMAN PRESCRIPTION DRUG,vanos,,fluocinonide,CREAM,TOPICAL,20060313,,NDA,NDA021758,"Bausch Health US, LLC",FLUOCINONIDE,1,mg/g,"Corticosteroid [EPC],Corticosteroid Hormone Re...",,N,20221231.0
105045,99207-850_e52ec481-1114-4f09-9be0-311e5eb52fa1,99207-850,HUMAN PRESCRIPTION DRUG,luzu,,luliconazole,CREAM,TOPICAL,20131114,,NDA,NDA204153,"Bausch Health US, LLC",LULICONAZOLE,10,mg/g,"Azole Antifungal [EPC],Azoles [CS],Cytochrome ...",,N,20221231.0


In [27]:
class Synonym:
    def __init__(self, id, common_name, synonym):
        self.id = id
        self.common_name = common_name
        self.synonym = synonym

    def as_tuple(self):
        return self.id, self.common_name, self.synonym

    def __repr__(self):
        return f'\'{self.common_name}\' -- Synonym:{self.id}'

def read_drug_bank():
    df = pd.read_csv("resources/drugbank/drugbank_vocab.csv")

    synonym_dict = {}
    for index, row in df.iterrows():
        pipe_separated_synonyms = str(row["Synonyms"])
        for synonym in pipe_separated_synonyms.split("|"):
            synonym_dict[synonym] = Synonym(row["DrugBank ID"], row["Common name"], synonym)

    return synonym_dict


read_drug_bank()


{'[Leu1, Thr2]-63-desulfohirudin ': 'Lepirudin' -- Synonym:DB00001,
 ' Desulfatohirudin ': 'Lepirudin' -- Synonym:DB00001,
 ' Hirudin variant-1 ': 'Lepirudin' -- Synonym:DB00001,
 ' Lepirudin ': 'Lepirudin' -- Synonym:DB00001,
 ' Lepirudin recombinant ': 'Lepirudin' -- Synonym:DB00001,
 ' R-hirudin': 'Lepirudin' -- Synonym:DB00001,
 'Cetuximab ': 'Cetuximab' -- Synonym:DB00002,
 ' Cétuximab ': 'Cetuximab' -- Synonym:DB00002,
 ' Cetuximabum': 'Cetuximab' -- Synonym:DB00002,
 'Deoxyribonuclease (human clone 18-1 protein moiety) ': 'Dornase alfa' -- Synonym:DB00003,
 ' Dornasa alfa ': 'Dornase alfa' -- Synonym:DB00003,
 ' Dornase alfa ': 'Dornase alfa' -- Synonym:DB00003,
 ' Dornase alfa, recombinant ': 'Dornase alfa' -- Synonym:DB00003,
 ' Dornase alpha ': 'Dornase alfa' -- Synonym:DB00003,
 ' Recombinant deoxyribonuclease (DNAse)': 'Dornase alfa' -- Synonym:DB00003,
 'Denileukin ': 'Denileukin diftitox' -- Synonym:DB00004,
 ' Denileukin diftitox ': 'Denileukin diftitox' -- Synonym:DB000

In [23]:
def read_hosp_drugs():
    sql_hosp_drugs_query = """
    SELECT """
    get_authed_client()

In [5]:
def match():
    df = icu_drugs.merge(
        norm_drugs.drop_duplicates(subset="NONPROPRIETARYNAME"), left_on= ['label'], right_on=['NONPROPRIETARYNAME'], how='left', indicator=True)[['itemid', 'label', 'category', 'PRODUCTID', 'NONPROPRIETARYNAME']]\
        .merge(norm_drugs.drop_duplicates(subset="PROPRIETARYNAME"), left_on=['label'], right_on='PROPRIETARYNAME', how='left',indicator=True)
    print(df.count())
    return df


matched = match()
matched

itemid                              474
label                               474
category                            474
PRODUCTID_x                          71
NONPROPRIETARYNAME_x                 71
PRODUCTID_y                          77
PRODUCTNDC                           77
PRODUCTTYPENAME                      77
PROPRIETARYNAME                      77
PROPRIETARYNAMESUFFIX                 2
NONPROPRIETARYNAME_y                 77
DOSAGEFORMNAME                       77
ROUTENAME                            77
STARTMARKETINGDATE                   77
ENDMARKETINGDATE                      4
MARKETINGCATEGORYNAME                77
APPLICATIONNUMBER                    74
LABELERNAME                          77
SUBSTANCENAME                        76
ACTIVE_NUMERATOR_STRENGTH            76
ACTIVE_INGRED_UNIT                   76
PHARM_CLASSES                        66
DEASCHEDULE                           3
NDC_EXCLUDE_FLAG                     77
LISTING_RECORD_CERTIFIED_THROUGH     73


Unnamed: 0,itemid,label,category,PRODUCTID_x,NONPROPRIETARYNAME_x,PRODUCTID_y,PRODUCTNDC,PRODUCTTYPENAME,PROPRIETARYNAME,PROPRIETARYNAMESUFFIX,...,APPLICATIONNUMBER,LABELERNAME,SUBSTANCENAME,ACTIVE_NUMERATOR_STRENGTH,ACTIVE_INGRED_UNIT,PHARM_CLASSES,DEASCHEDULE,NDC_EXCLUDE_FLAG,LISTING_RECORD_CERTIFIED_THROUGH,_merge
0,222139,potassium acetate,Nutrition - Supplements,0409-3294_500bb334-6d33-4bb1-b081-f505423f2872,potassium acetate,0409-3294_500bb334-6d33-4bb1-b081-f505423f2872,0409-3294,HUMAN PRESCRIPTION DRUG,potassium acetate,,...,NDA018896,"Hospira, Inc.",POTASSIUM ACETATE,196.3,mg/mL,"Potassium Compounds [CS],Potassium Salt [EPC],...",,N,20221231.0,both
1,222190,ranitidine,Nutrition - Parenteral,0904-6715_0b8b6ed2-7d1d-4d6d-86f9-0f86fb06b738,ranitidine,0904-6921_2a917fc5-06a6-d0e3-f8d4-39013116a446,0904-6921,HUMAN OTC DRUG,ranitidine,,...,ANDA075294,MAJOR PHARMACEUTICALS,RANITIDINE HYDROCHLORIDE,75,mg/1,,,N,20221231.0,both
2,225801,lipids 20%,Nutrition - Parenteral,,,,,,,,...,,,,,,,,,,left_only
3,225916,tpn w/ lipids,Nutrition - Parenteral,,,,,,,,...,,,,,,,,,,left_only
4,225917,tpn without lipids,Nutrition - Parenteral,,,,,,,,...,,,,,,,,,,left_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469,221209,peptisorb,Fluids - Other (Not In Use),,,,,,,,...,,,,,,,,,,left_only
470,221210,suplena,Fluids - Other (Not In Use),,,,,,,,...,,,,,,,,,,left_only
471,221211,"sodium bicarbonate 1,4%",Fluids - Other (Not In Use),,,,,,,,...,,,,,,,,,,left_only
472,221212,"saline 0,18%",Fluids - Other (Not In Use),,,,,,,,...,,,,,,,,,,left_only


In [6]:

def fuzzy_match(str1, str2):
    return fuzz.token_set_ratio(str1, str2)

def match_dataframe(df1, key1, df2, key2, threshold=90):
    matches = []
    # matches = pd.DataFrame(columns=['index', 'label', 'norm_label', 'match0', 'match1'])
    for i, row in df1.iterrows():
        match = process.extractOne(row[key1], df2[key2], scorer=fuzzy_match)

        if match[1] >= threshold:
            print(match[1])
            matches.append([i, row['itemid'],row[key1], match[0], match[1], match[2]])
        print(i)
        # if i > 100:
        #     break
    return pd.DataFrame(matches, columns=['index', 'itemid', 'label', 'norm_label', 'score', 'norm_index'])



def fuzzy_merge():
    medications = icu_drugs.loc[(~matched['category'].isin(['Medications']))]
    return match_dataframe(medications, "label", norm_drugs, "PROPRIETARYNAME")

fuzzy_matched = fuzzy_merge()
fuzzy_matched

100
0
100
1
2
3
4
5
6
7
100
8
9
100
10
11
100
12
100
13
100
14
15
16
17
18
100
19
20
21
100
22
23
100
24
25
100
26
27
100
28
100
29
100
30
100
31
100
32
100
33
100
34
100
35
100
36
100
37
100
38
100
39
100
40
100
41
100
42
100
43
100
44
45
100
46
100
47
100
48
100
49
100
50
100
51
91
52
100
53
100
54
100
55
100
56
100
57
100
58
100
59
100
60
100
61
100
62
100
63
100
64
100
65
100
66
100
67
100
68
100
69
100
70
100
71
100
72
100
73
100
74
93
75
100
76
77
100
78
100
79
100
80
100
81
82
100
214
100
215
100
216
217
218
100
219
220
221
222
223
100
224
225
226
227
228
229
230
231
100
232
100
233
234
235
236
237
238
239
240
100
241
100
242
100
243
244
245
100
246
100
247
100
248
100
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
100
323
100
324
100

Unnamed: 0,index,itemid,label,norm_label,score,norm_index
0,0,222139,potassium acetate,potassium acetate,100,9428
1,1,222190,ranitidine,ranitidine hydrochloride,100,11999
2,8,225921,hepatamine,hepatamine,100,5882
3,10,225923,folate,x folate,100,53248
4,12,225925,potassium phosphate,"dibasic sodium phosphate, monobasic potassium ...",100,67730
...,...,...,...,...,...,...
134,462,221201,fibrini energy,energy,100,30653
135,468,221208,normosol,normosol-r,100,9594
136,471,221211,"sodium bicarbonate 1,4%",sodium bicarbonate,100,9051
137,472,221212,"saline 0,18%",saline,100,26481


In [8]:
fuzzy_matched

Unnamed: 0,index,itemid,label,norm_label,score,norm_index
0,0,222139,potassium acetate,potassium acetate,100,9428
1,1,222190,ranitidine,ranitidine hydrochloride,100,11999
2,8,225921,hepatamine,hepatamine,100,5882
3,10,225923,folate,x folate,100,53248
4,12,225925,potassium phosphate,"dibasic sodium phosphate, monobasic potassium ...",100,67730
...,...,...,...,...,...,...
134,462,221201,fibrini energy,energy,100,30653
135,468,221208,normosol,normosol-r,100,9594
136,471,221211,"sodium bicarbonate 1,4%",sodium bicarbonate,100,9051
137,472,221212,"saline 0,18%",saline,100,26481


In [9]:
m1 = norm_drugs['PROPRIETARYNAME'].str.startswith("glyc")
norm_drugs.loc[m1]


Unnamed: 0,PRODUCTID,PRODUCTNDC,PRODUCTTYPENAME,PROPRIETARYNAME,PROPRIETARYNAMESUFFIX,NONPROPRIETARYNAME,DOSAGEFORMNAME,ROUTENAME,STARTMARKETINGDATE,ENDMARKETINGDATE,MARKETINGCATEGORYNAME,APPLICATIONNUMBER,LABELERNAME,SUBSTANCENAME,ACTIVE_NUMERATOR_STRENGTH,ACTIVE_INGRED_UNIT,PHARM_CLASSES,DEASCHEDULE,NDC_EXCLUDE_FLAG,LISTING_RECORD_CERTIFIED_THROUGH
3136,0143-9584_941776c0-fd84-4e62-99c9-55a2a847783d,0143-9584,HUMAN PRESCRIPTION DRUG,glycopyrrolate,,glycopyrrolate,INJECTION,INTRAMUSCULAR; INTRAVENOUS,20150601,,ANDA,ANDA090963,Hikma Pharmaceuticals USA Inc.,GLYCOPYRROLATE,0.2,mg/mL,"Anticholinergic [EPC],Cholinergic Antagonists ...",,N,20221231.0
3137,0143-9585_941776c0-fd84-4e62-99c9-55a2a847783d,0143-9585,HUMAN PRESCRIPTION DRUG,glycopyrrolate,,glycopyrrolate,INJECTION,INTRAMUSCULAR; INTRAVENOUS,20150601,,ANDA,ANDA090963,Hikma Pharmaceuticals USA Inc.,GLYCOPYRROLATE,0.2,mg/mL,"Anticholinergic [EPC],Cholinergic Antagonists ...",,N,20221231.0
3138,0143-9586_941776c0-fd84-4e62-99c9-55a2a847783d,0143-9586,HUMAN PRESCRIPTION DRUG,glycopyrrolate,,glycopyrrolate,INJECTION,INTRAMUSCULAR; INTRAVENOUS,20150601,,ANDA,ANDA090963,Hikma Pharmaceuticals USA Inc.,GLYCOPYRROLATE,0.2,mg/mL,"Anticholinergic [EPC],Cholinergic Antagonists ...",,N,20221231.0
3139,0143-9587_941776c0-fd84-4e62-99c9-55a2a847783d,0143-9587,HUMAN PRESCRIPTION DRUG,glycopyrrolate,,glycopyrrolate,INJECTION,INTRAMUSCULAR; INTRAVENOUS,20150601,,ANDA,ANDA090963,Hikma Pharmaceuticals USA Inc.,GLYCOPYRROLATE,0.2,mg/mL,"Anticholinergic [EPC],Cholinergic Antagonists ...",,N,20221231.0
3173,0143-9679_96744b85-c7c2-47d5-93fb-4fb55adb54ba,0143-9679,HUMAN PRESCRIPTION DRUG,glycopyrrolate,,glycopyrrolate,INJECTION,INTRAMUSCULAR; INTRAVENOUS,20110921,,ANDA,ANDA090963,Hikma Pharmaceuticals USA Inc.,GLYCOPYRROLATE,0.2,mg/mL,"Anticholinergic [EPC],Cholinergic Antagonists ...",,N,20221231.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98961,76045-203_d2efb821-f0f9-494d-bed9-71852d2f6fa7,76045-203,HUMAN PRESCRIPTION DRUG,glycopyrrolate,,glycopyrrolate,"INJECTION, SOLUTION",INTRAMUSCULAR; INTRAVENOUS,20180725,,ANDA,ANDA209024,"Fresenius Kabi USA, LLC",GLYCOPYRROLATE,0.2,mg/mL,"Anticholinergic [EPC],Cholinergic Antagonists ...",,N,20221231.0
98962,76045-206_d2efb821-f0f9-494d-bed9-71852d2f6fa7,76045-206,HUMAN PRESCRIPTION DRUG,glycopyrrolate,,glycopyrrolate,"INJECTION, SOLUTION",INTRAMUSCULAR; INTRAVENOUS,20201030,,ANDA,ANDA209024,"Fresenius Kabi USA, LLC",GLYCOPYRROLATE,0.2,mg/mL,"Anticholinergic [EPC],Cholinergic Antagonists ...",,N,20221231.0
98963,76045-208_d2efb821-f0f9-494d-bed9-71852d2f6fa7,76045-208,HUMAN PRESCRIPTION DRUG,glycopyrrolate,,glycopyrrolate,"INJECTION, SOLUTION",INTRAMUSCULAR; INTRAVENOUS,20201030,,ANDA,ANDA209024,"Fresenius Kabi USA, LLC",GLYCOPYRROLATE,0.2,mg/mL,"Anticholinergic [EPC],Cholinergic Antagonists ...",,N,20221231.0
102164,79739-7097_392a969d-1e14-4ad8-835c-a47be4983067,79739-7097,HUMAN PRESCRIPTION DRUG,glycopyrrolate,,glycopyrrolate,TABLET,ORAL,20120312,,ANDA,ANDA091522,"LGM Pharma Solutions, LLC",GLYCOPYRROLATE,1.5,mg/1,"Anticholinergic [EPC],Cholinergic Antagonists ...",,N,20221231.0


In [10]:
m1 = ~matched['PRODUCTID_x'].isnull()
m2 = ~matched['PRODUCTID_y'].isnull()
has_product_id = m1 | m2
matched.loc[~has_product_id].groupby('category').count()


Unnamed: 0_level_0,itemid,label,PRODUCTID_x,NONPROPRIETARYNAME_x,PRODUCTID_y,PRODUCTNDC,PRODUCTTYPENAME,PROPRIETARYNAME,PROPRIETARYNAMESUFFIX,NONPROPRIETARYNAME_y,...,APPLICATIONNUMBER,LABELERNAME,SUBSTANCENAME,ACTIVE_NUMERATOR_STRENGTH,ACTIVE_INGRED_UNIT,PHARM_CLASSES,DEASCHEDULE,NDC_EXCLUDE_FLAG,LISTING_RECORD_CERTIFIED_THROUGH,_merge
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Antibiotics,17,17,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,17
Blood Products/Colloids,25,25,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,25
Fluids - Other (Not In Use),83,83,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,83
Fluids/Intake,36,36,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,36
Medications,96,96,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,96
Nutrition - Enteral,112,112,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,112
Nutrition - Parenteral,8,8,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,8
Nutrition - Supplements,15,15,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,15


In [11]:
matched.loc[(~matched['category'].isin(['Medications'])) & has_product_id]

Unnamed: 0,itemid,label,category,PRODUCTID_x,NONPROPRIETARYNAME_x,PRODUCTID_y,PRODUCTNDC,PRODUCTTYPENAME,PROPRIETARYNAME,PROPRIETARYNAMESUFFIX,...,APPLICATIONNUMBER,LABELERNAME,SUBSTANCENAME,ACTIVE_NUMERATOR_STRENGTH,ACTIVE_INGRED_UNIT,PHARM_CLASSES,DEASCHEDULE,NDC_EXCLUDE_FLAG,LISTING_RECORD_CERTIFIED_THROUGH,_merge
0,222139,potassium acetate,Nutrition - Supplements,0409-3294_500bb334-6d33-4bb1-b081-f505423f2872,potassium acetate,0409-3294_500bb334-6d33-4bb1-b081-f505423f2872,0409-3294,HUMAN PRESCRIPTION DRUG,potassium acetate,,...,NDA018896,"Hospira, Inc.",POTASSIUM ACETATE,196.3,mg/mL,"Potassium Compounds [CS],Potassium Salt [EPC],...",,N,20221231.0,both
1,222190,ranitidine,Nutrition - Parenteral,0904-6715_0b8b6ed2-7d1d-4d6d-86f9-0f86fb06b738,ranitidine,0904-6921_2a917fc5-06a6-d0e3-f8d4-39013116a446,0904-6921,HUMAN OTC DRUG,ranitidine,,...,ANDA075294,MAJOR PHARMACEUTICALS,RANITIDINE HYDROCHLORIDE,75,mg/1,,,N,20221231.0,both
8,225921,hepatamine,Nutrition - Supplements,,,0264-9371_c1f4d7ca-8c22-4562-a49b-9d201be140fb,0264-9371,HUMAN PRESCRIPTION DRUG,hepatamine,,...,NDA018676,B. Braun Medical Inc.,ISOLEUCINE; LEUCINE; LYSINE ACETATE; METHIONIN...,.9; 1.1; .61; .1; .1; .45; .066; .84; .77; .6;...,g/100mL; g/100mL; g/100mL; g/100mL; g/100mL; g...,"Amino Acid [EPC],Amino Acids [CS]",,N,20221231.0,both
11,225924,hydrochloric acid,Nutrition - Supplements,0220-3466_8643994f-4e14-4572-e053-2991aa0a5b78,hydrochloric acid,,,,,,...,,,,,,,,,,left_only
13,225926,sodium acetate,Nutrition - Supplements,0409-3299_416a7d2a-271d-4b08-a491-1e8a13650b86,sodium acetate,0409-3299_416a7d2a-271d-4b08-a491-1e8a13650b86,0409-3299,HUMAN PRESCRIPTION DRUG,sodium acetate,,...,NDA018893,"Hospira, Inc.",SODIUM ACETATE ANHYDROUS,164,mg/mL,,,N,20221231.0,both
28,225798,vancomycin,Antibiotics,68001-338_c91fb4b4-7b6c-c700-e053-2995a90a6bc2,vancomycin,25021-157_eb393d1e-aff3-4a65-aae8-28418a1aec6c,25021-157,HUMAN PRESCRIPTION DRUG,vancomycin,,...,ANDA200837,Sagent Pharmaceuticals,VANCOMYCIN HYDROCHLORIDE,5,g/100mL,"Glycopeptide Antibacterial [EPC],Glycopeptides...",,N,20221231.0,both
29,225837,acyclovir,Antibiotics,0093-3630_8c3d75a1-fe85-49ca-9bb4-af670f910bf8,acyclovir,0093-3630_8c3d75a1-fe85-49ca-9bb4-af670f910bf8,0093-3630,HUMAN PRESCRIPTION DRUG,acyclovir,,...,NDA021478,"Teva Pharmaceuticals, Inc.",ACYCLOVIR,50,mg/g,"DNA Polymerase Inhibitors [MoA],Herpes Simplex...",,N,20221231.0,both
30,225838,ambisome,Antibiotics,,,0469-3051_8f57e867-909b-4a7c-bd4e-1e9427f5005b,0469-3051,HUMAN PRESCRIPTION DRUG,ambisome,,...,NDA050740,"Astellas Pharma US, Inc.",AMPHOTERICIN B,50,mg/12.5mL,"Lipid-based Polyene Antifungal [EPC],Polyene A...",,N,20221231.0,both
31,225840,amikacin,Antibiotics,71558-590_8b43768c-f7ba-4f33-8509-65a526b0422e,amikacin,,,,,,...,,,,,,,,,,left_only
32,225842,ampicillin,Antibiotics,0781-2144_e8f470a8-ec75-4c1e-84d8-766372f61cf3,ampicillin,0409-3718_1e43e05b-8ec3-4ce4-bdfc-f47516ef2efe,0409-3718,HUMAN PRESCRIPTION DRUG,ampicillin,,...,ANDA061395,"Hospira, Inc",AMPICILLIN SODIUM,500,mg/1,"Penicillin-class Antibacterial [EPC],Penicilli...",,N,20221231.0,both


In [12]:
icu_drugs

Unnamed: 0,itemid,label,abbreviation,category,unitname
0,222139,potassium acetate,Potassium ACEtate,Nutrition - Supplements,mEq
1,222190,ranitidine,Ranitidine,Nutrition - Parenteral,mg
2,225801,lipids 20%,Lipids 20%,Nutrition - Parenteral,mL
3,225916,tpn w/ lipids,TPN w/ Lipids,Nutrition - Parenteral,mL
4,225917,tpn without lipids,TPN without Lipids,Nutrition - Parenteral,mL
...,...,...,...,...,...
469,221209,peptisorb,Peptisorb,Fluids - Other (Not In Use),mL
470,221210,suplena,Suplena,Fluids - Other (Not In Use),mL
471,221211,"sodium bicarbonate 1,4%","NaBic 1,4%",Fluids - Other (Not In Use),mL
472,221212,"saline 0,18%","Saline 0,18%",Fluids - Other (Not In Use),mL


In [13]:
def read_ddinter():
    df = pd.read_csv("resources/ddinter_downloads_code_B.csv")
    df.Drug_B = df.Drug_B.fillna("")
    df.Drug_B = df.Drug_B.apply(str.lower)
    df.Drug_B = df.Drug_B.fillna("")
    df.Drug_B = df.Drug_B.apply(str.lower)

    df.Drug_A = df.Drug_A.fillna("")
    df.Drug_A = df.Drug_A.apply(str.lower)
    df.Drug_A = df.Drug_A.fillna("")
    df.Drug_A = df.Drug_A.apply(str.lower)
    return df

ddinter = read_ddinter()
cleaned= ddinter
cleaned.loc[(cleaned['Drug_B'].isin( fuzzy_matched['norm_label']) | cleaned['Drug_A'].isin( fuzzy_matched['norm_label'])) & cleaned.Level.isin( ['Major'])]


Unnamed: 0,DDInterID_A,Drug_A,DDInterID_B,Drug_B,Level
7,DDInter1683,sodium bicarbonate,DDInter582,dolutegravir,Major
541,DDInter1073,linezolid,DDInter652,epinephrine,Major
1165,DDInter66,amikacin,DDInter1123,mannitol,Major
1175,DDInter68,amiloride,DDInter1495,potassium acetate,Major
1226,DDInter177,benazepril,DDInter1495,potassium acetate,Major
...,...,...,...,...,...
9775,DDInter656,eplerenone,DDInter1495,potassium acetate,Major
9868,DDInter658,eprosartan,DDInter1495,potassium acetate,Major
10075,DDInter743,fluconazole,DDInter542,dicoumarol,Major
10192,DDInter781,fosinopril,DDInter1495,potassium acetate,Major


In [14]:
def query_for_drug_interactions():
    multimap = cleaned.groupby('Drug_A')['Drug_B'].apply(list).to_dict()
    clauses= []
    for key in multimap.keys():
        first_ids = fuzzy_matched.loc[(fuzzy_matched['norm_label'] == key)]['itemid']

        second_ids = fuzzy_matched.loc[(fuzzy_matched['norm_label'].isin(multimap[key]))]['itemid']
        if first_ids.any() & second_ids.any():
            sql = f"(first_ie.itemid IN ({','.join([str(item) for item in first_ids if item])}) AND second_ie.itemid IN ({','.join([str(item) for item in second_ids if item])}))"
            clauses.append(sql)
    print(len(clauses))
    clause = " OR ".join(clauses)
    print (clause)

    icu = "physionet-data.mimiciv_icu"

    sql = f"""SELECT first_ie.subject_id, first_ie.stay_id, first_ie.itemid, second_ie.itemid, count(*)
        FROM `{icu}.inputevents` as first_ie
        INNER JOIN `{icu}.inputevents` as second_ie ON first_ie.stay_id = second_ie.stay_id
        WHERE {clause} AND first_ie.amount > 0
            AND second_ie.amount > 0
            AND first_ie.starttime < second_ie.starttime
            AND DATETIME_DIFF(second_ie.starttime, first_ie.starttime, MINUTE) < 300
        GROUP BY first_ie.subject_id, first_ie.stay_id, first_ie.itemid, second_ie.itemid

    """
    print("\n\n",sql)
    mimic_job = get_authed_client().query(sql)
    return mimic_job.to_dataframe()

drug_interactions = query_for_drug_interactions()
drug_interactions

18
(first_ie.itemid IN (225837) AND second_ie.itemid IN (227533,221211)) OR (first_ie.itemid IN (226403) AND second_ie.itemid IN (227533,220988,220989,220990,220991,220992,221211)) OR (first_ie.itemid IN (225844) AND second_ie.itemid IN (227533,221211)) OR (first_ie.itemid IN (225845) AND second_ie.itemid IN (227533,220988,220989,220990,220991,220992,221211)) OR (first_ie.itemid IN (225850) AND second_ie.itemid IN (227533,221211)) OR (first_ie.itemid IN (225855) AND second_ie.itemid IN (227533,221211)) OR (first_ie.itemid IN (225859) AND second_ie.itemid IN (227533,220988,220989,220990,220991,220992,221211)) OR (first_ie.itemid IN (225865) AND second_ie.itemid IN (225926,227533,220988,220989,220990,220991,220992,221211)) OR (first_ie.itemid IN (225866) AND second_ie.itemid IN (227533,220988,220989,220990,220991,220992,221211)) OR (first_ie.itemid IN (225869) AND second_ie.itemid IN (227533,220988,220989,220990,220991,220992,221211)) OR (first_ie.itemid IN (225873) AND second_ie.itemid 

Unnamed: 0,subject_id,stay_id,itemid,itemid_1,f0_
0,15198026,38346904,225837,227533,24
1,13739613,36696488,225837,227533,24
2,15872027,34294044,225837,227533,6
3,11818034,30463812,225837,227533,5
4,16610592,30632193,225837,227533,16
...,...,...,...,...,...
2652,11917664,37501394,225866,227533,1
2653,10014610,39959884,225869,227533,1
2654,10471192,31857512,225869,227533,1
2655,13919405,38390594,225879,227533,1
