## Pulling Medication Itemsets - Baseline Patients

Credit: Dr. Krisnamurthy & Lab 9

In [1]:
import os
import sqlalchemy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
creds = pd.read_csv("sample_mimic_login_creds.csv")
myUserName = str(creds.iloc[0]['Username']).strip()
myPassword = str(creds.iloc[0]['password']).strip()

server_url = "mimic-db.renci.unc.edu"
database = "mimic"

# Create Connection String
conn_str = f"{myUserName}:{myPassword}@{server_url}/{database}"

# Create Engine
engine = sqlalchemy.create_engine('postgresql://' + conn_str)


We aren't trying to restrict by number of patients here because of the realities of our sample size.

In [18]:
query = """
-- pulling a list of medication to check against 
with medlist as (
select c.concept_name, count(distinct de.person_id) as ptct 
from omop.drug_exposure de 
JOIN omop.concept c ON de.drug_concept_id = c.concept_id
where concept_name <> 'No matching concept' 
group by c.concept_name
having count(distinct de.person_id) > 500
)

-- way harder with medications :( slower by a ton (not surprising though)
-- 1min 30s runtime for query :(
select distinct de.person_id, ml.concept_name 
from omop.drug_exposure de JOIN omop.concept c ON de.drug_concept_id = c.concept_id 
JOIN medlist ml ON c.concept_name = ml.concept_name
"""

In [19]:
%%time
# need to run the text through sqlalchemy to clean it up
medication_data = pd.read_sql_query(sqlalchemy.text(query), engine)
print(f"The returned date frame with no mimumum patient threshold has {len(medication_data)} rows")
medication_data.head(20)

The returned date frame with no mimumum patient threshold has 1685268 rows
CPU times: total: 3.77 s
Wall time: 1min 36s


Unnamed: 0,person_id,concept_name
0,392775850,1000 ML Glucose 50 MG/ML / Potassium Chloride ...
1,392775850,1000 ML Glucose 50 MG/ML / Sodium Chloride 4.5...
2,392775850,1000 ML Glucose 50 MG/ML / Sodium Chloride 9 M...
3,392775850,1000 ML Sodium Chloride 9 MG/ML Injection
4,392775850,100 ML Metronidazole 5 MG/ML Injection
5,392775850,100 ML Potassium Chloride 0.1 MEQ/ML Injection
6,392775850,100 ML Propofol 10 MG/ML Injection [Diprivan]
7,392775850,100 ML Sodium Chloride 9 MG/ML Injection
8,392775850,10 ML Diltiazem Hydrochloride 5 MG/ML Injection
9,392775850,10 ML Furosemide 10 MG/ML Injection


In [21]:
print(f"The data frame has {len(medication_data['person_id'].unique())} patients")
print(f"The data frame includes {len(medication_data['concept_name'].unique())} different medications")

The data frame has 41524 patients
The data frame includes 528 different medications


In [22]:
new_d = medication_data.groupby(['person_id'])
type(new_d)
print(new_d)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000025B89B0F1C0>


In [23]:
new_d = medication_data.groupby(['person_id'])

med_list = []
for person, med in new_d:
#     print(f"Person is {person}")
#     print(comorb['concept_name'])
    med_list.append(list(set(med['concept_name'])))
print(len(med_list))

41524


In [25]:
med_list[::5000] # start, stop, step -> no need for a step here I'd say

[['50 ML Glucose 50 MG/ML Injection',
  'Calcium Chloride 0.0014 MEQ/ML / Potassium Chloride 0.004 MEQ/ML / Sodium Chloride 0.103 MEQ/ML / Sodium Lactate 0.028 MEQ/ML Injectable Solution',
  'Acetaminophen 325 MG Oral Tablet',
  'Captopril 12.5 MG Oral Tablet',
  '1 ML Morphine Sulfate 2 MG/ML Prefilled Syringe',
  'Diltiazem Hydrochloride 60 MG Oral Tablet',
  'Vitamin K 1 5 MG Oral Tablet',
  'Piperacillin 2000 MG / tazobactam 250 MG Injection [Zosyn]',
  'Methylprednisolone 62.5 MG/ML Injectable Solution [Solu-Medrol]',
  'Furosemide 10 MG/ML Injection',
  'Lorazepam 0.5 MG Oral Tablet',
  'Ipratropium Bromide 0.2 MG/ML Inhalation Solution',
  'atorvastatin 20 MG Oral Tablet [Lipitor]',
  '200 ACTUAT Albuterol 0.09 MG/ACTUAT / Ipratropium Bromide 0.018 MG/ACTUAT Metered Dose Inhaler [Combivent]',
  'Transfusion of fresh frozen plasma',
  'Vitamin K 1 10 MG/ML Injectable Solution',
  'Piperacillin 4000 MG / tazobactam 500 MG Injection [Zosyn]',
  'Metoprolol Tartrate 50 MG Oral Table

In [26]:
# trying to do without making it sparse
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
med_ary = te.fit(med_list).transform(med_list, sparse=True)
df = pd.DataFrame.sparse.from_spmatrix(med_ary, columns=te.columns_)
df.head(25)

Unnamed: 0,0.3 ML Enoxaparin sodium 100 MG/ML Prefilled Syringe [Lovenox],0.4 ML Enoxaparin sodium 100 MG/ML Prefilled Syringe [Lovenox],"0.5 ML Influenza Virus Vaccine, Inactivated A-California-07-2009 X-181 (H1N1) strain 0.03 MG/ML / Influenza Virus Vaccine, Inactivated A-Victoria-210-2009 X-187 (H3N2) (A-Perth-16-2009) strain 0.03 MG/ML / Influenza Virus Vaccine, Inactivated B-Brisbane-6",0.5 ML pneumococcal capsular polysaccharide type 1 vaccine 0.05 MG/ML / pneumococcal capsular polysaccharide type 10A vaccine 0.05 MG/ML / pneumococcal capsular polysaccharide type 11A vaccine 0.05 MG/ML / pneumococcal capsular polysaccharide type 12F vac,0.6 ML Enoxaparin sodium 100 MG/ML Prefilled Syringe [Lovenox],0.625 ML dolasetron mesylate 20 MG/ML Injection [Anzemet],0.8 ML Enoxaparin sodium 100 MG/ML Prefilled Syringe [Lovenox],1 ML Atropine Sulfate 1 MG/ML Injection,1 ML Desmopressin Acetate 0.004 MG/ML Injection [DDAVP],1 ML Dexamethasone phosphate 4 MG/ML Injection,...,"sennosides, USP 1.76 MG/ML Oral Solution [Senokot]","sennosides, USP 8.6 MG Oral Tablet","sennosides, USP 8.6 MG Oral Tablet [Senokot]",sevelamer hydrochloride 800 MG Oral Tablet [RenaGel],sodium phosphate,tiotropium 0.018 MG Inhalation Powder [Spiriva],tramadol hydrochloride 50 MG Oral Tablet,valsartan 160 MG Oral Tablet [Diovan],valsartan 80 MG Oral Tablet [Diovan],{10 (Prednisone 10 MG Oral Tablet) } Pack
0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0


In [30]:
%%time
from mlxtend.frequent_patterns import apriori
support = 0.15 # choosing this b/c .1 led to negative dimensions (a sign support was too high b/c for the library)
frequent_itemsets = apriori(df, min_support=support, use_colnames=True)
print(f"Using a support of {support}, found {len(frequent_itemsets)} frequent itemsets")
frequent_itemsets.head(25)

Using a support of 0.15, found 4635 frequent itemsets
CPU times: total: 24.6 s
Wall time: 24.6 s


Unnamed: 0,support,itemsets
0,0.187217,(1 ML Hydralazine Hydrochloride 20 MG/ML Injec...
1,0.280055,(1 ML Lorazepam 2 MG/ML Cartridge)
2,0.356204,(1 ML Morphine Sulfate 2 MG/ML Prefilled Syringe)
3,0.23885,(1 ML Phenylephrine Hydrochloride 10 MG/ML Inj...
4,0.326221,(10 ML Calcium Gluconate 100 MG/ML Injection)
5,0.172093,(100 ML Glucose 50 MG/ML Injection)
6,0.172406,(100 ML Metronidazole 5 MG/ML Injection)
7,0.325619,(100 ML Propofol 10 MG/ML Injection [Diprivan])
8,0.506117,(100 ML Sodium Chloride 9 MG/ML Injection)
9,0.191311,(1000 ML Glucose 50 MG/ML / Sodium Chloride 4....


In [31]:
pd.options.display.max_rows = 50

In [32]:
frequent_itemsets['count'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets.head(25)

Unnamed: 0,support,itemsets,count
0,0.187217,(1 ML Hydralazine Hydrochloride 20 MG/ML Injec...,1
1,0.280055,(1 ML Lorazepam 2 MG/ML Cartridge),1
2,0.356204,(1 ML Morphine Sulfate 2 MG/ML Prefilled Syringe),1
3,0.23885,(1 ML Phenylephrine Hydrochloride 10 MG/ML Inj...,1
4,0.326221,(10 ML Calcium Gluconate 100 MG/ML Injection),1
5,0.172093,(100 ML Glucose 50 MG/ML Injection),1
6,0.172406,(100 ML Metronidazole 5 MG/ML Injection),1
7,0.325619,(100 ML Propofol 10 MG/ML Injection [Diprivan]),1
8,0.506117,(100 ML Sodium Chloride 9 MG/ML Injection),1
9,0.191311,(1000 ML Glucose 50 MG/ML / Sodium Chloride 4....,1


In [33]:
fi = frequent_itemsets.sort_values(by = ['support'], ascending = False)
print(f"Highest support is {fi.iloc[0]['support']} with a count of {fi.iloc[0]['count']}")
print(f"Lowest support is {fi.iloc[-1]['support']} with a count of {fi.iloc[-1]['count']}")
fi.head(25)

Highest support is 0.709541469993257 with a count of 1
Lowest support is 0.15000963298333495 with a count of 3


Unnamed: 0,support,itemsets,count
63,0.709541,(Sodium Chloride Prefilled Syringe),1
32,0.644543,(Acetaminophen 325 MG Oral Tablet),1
67,0.592693,"(heparin sodium, porcine 5000 UNT/ML Injectabl...",1
500,0.541614,"(Acetaminophen 325 MG Oral Tablet, Sodium Chlo...",2
8,0.506117,(100 ML Sodium Chloride 9 MG/ML Injection),1
661,0.505419,"(Sodium Chloride Prefilled Syringe, heparin so...",2
18,0.484587,(250 ML Glucose 50 MG/ML Injection),1
503,0.442058,"(Acetaminophen 325 MG Oral Tablet, heparin sod...",2
12,0.430305,(1000 ML Sodium Chloride 9 MG/ML Injection),1
207,0.426067,"(Sodium Chloride Prefilled Syringe, 100 ML Sod...",2


In [34]:
fige2 = fi[fi['count'] >= 2]
fige2.head(25)

Unnamed: 0,support,itemsets,count
500,0.541614,"(Acetaminophen 325 MG Oral Tablet, Sodium Chlo...",2
661,0.505419,"(Sodium Chloride Prefilled Syringe, heparin so...",2
503,0.442058,"(Acetaminophen 325 MG Oral Tablet, heparin sod...",2
207,0.426067,"(Sodium Chloride Prefilled Syringe, 100 ML Sod...",2
186,0.408414,"(Acetaminophen 325 MG Oral Tablet, 100 ML Sodi...",2
604,0.399937,"(Fluid intake intravascular Measured, Sodium C...",2
242,0.399721,"(Sodium Chloride Prefilled Syringe, 1000 ML So...",2
323,0.393917,"(Sodium Chloride Prefilled Syringe, 250 ML Glu...",2
303,0.392111,"(Acetaminophen 325 MG Oral Tablet, 250 ML Gluc...",2
1972,0.383826,"(Acetaminophen 325 MG Oral Tablet, Sodium Chlo...",3


In [35]:
fige2

Unnamed: 0,support,itemsets,count
500,0.541614,"(Acetaminophen 325 MG Oral Tablet, Sodium Chlo...",2
661,0.505419,"(Sodium Chloride Prefilled Syringe, heparin so...",2
503,0.442058,"(Acetaminophen 325 MG Oral Tablet, heparin sod...",2
207,0.426067,"(Sodium Chloride Prefilled Syringe, 100 ML Sod...",2
186,0.408414,"(Acetaminophen 325 MG Oral Tablet, 100 ML Sodi...",2
...,...,...,...
2535,0.150010,(100 ML Propofol 10 MG/ML Injection [Diprivan]...,4
2388,0.150010,(100 ML Propofol 10 MG/ML Injection [Diprivan]...,4
2617,0.150010,"(50 ML Magnesium Sulfate 40 MG/ML Injection, 1...",4
4494,0.150010,"(Propofol, Calcium Chloride 0.0014 MEQ/ML / Po...",6
