In [None]:
pip install mlxtend

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

In [2]:
md = pd.read_csv('medical_market_basket.csv')

In [3]:
print(md.shape)

(15002, 20)


In [4]:
print(md.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15002 entries, 0 to 15001
Data columns (total 20 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Presc01  7501 non-null   object
 1   Presc02  5747 non-null   object
 2   Presc03  4389 non-null   object
 3   Presc04  3345 non-null   object
 4   Presc05  2529 non-null   object
 5   Presc06  1864 non-null   object
 6   Presc07  1369 non-null   object
 7   Presc08  981 non-null    object
 8   Presc09  654 non-null    object
 9   Presc10  395 non-null    object
 10  Presc11  256 non-null    object
 11  Presc12  154 non-null    object
 12  Presc13  87 non-null     object
 13  Presc14  47 non-null     object
 14  Presc15  25 non-null     object
 15  Presc16  8 non-null      object
 16  Presc17  4 non-null      object
 17  Presc18  4 non-null      object
 18  Presc19  3 non-null      object
 19  Presc20  1 non-null      object
dtypes: object(20)
memory usage: 2.3+ MB
None


In [5]:
md.head(5).T

Unnamed: 0,0,1,2,3,4
Presc01,,amlodipine,,citalopram,
Presc02,,albuterol aerosol,,benicar,
Presc03,,allopurinol,,amphetamine salt combo xr,
Presc04,,pantoprazole,,,
Presc05,,lorazepam,,,
Presc06,,omeprazole,,,
Presc07,,mometasone,,,
Presc08,,fluconozole,,,
Presc09,,gabapentin,,,
Presc10,,pravastatin,,,


In [6]:
mdlist = md.apply(lambda row: row.dropna().tolist(), axis=1).tolist()

In [7]:
encoder = TransactionEncoder()
temp_array = encoder.fit(mdlist).transform(mdlist)
mdencode = pd.DataFrame(temp_array, columns=encoder.columns_)
mdencode

Unnamed: 0,Duloxetine,Premarin,Yaz,abilify,acetaminophen,actonel,albuterol HFA,albuterol aerosol,alendronate,allopurinol,...,trazodone HCI,triamcinolone Ace topical,triamterene,trimethoprim DS,valaciclovir,valsartan,venlafaxine XR,verapamil SR,viagra,zolpidem
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,True,False,False,True,True,False,True,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14997,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
14998,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
14999,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
15000,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [8]:
mdencode.to_csv('medical_2123_data.csv', index=False)

In [9]:
fIS = apriori(mdencode, min_support = 0.005, use_colnames = True)
fIS = fIS.sort_values(by='support', ascending=False)
fIS

Unnamed: 0,support,itemsets
2,0.119184,(abilify)
11,0.089855,(amphetamine salt combo xr)
17,0.087055,(carvedilol)
39,0.085455,(glyburide)
27,0.081922,(diazepam)
...,...,...
85,0.005133,"(abilify, clonidine HCI)"
243,0.005066,"(amphetamine salt combo xr, lisinopril, abilify)"
251,0.005066,"(abilify, carvedilol, glyburide)"
212,0.005066,"(diazepam, metformin)"


In [10]:
rules = association_rules(fIS, metric = 'lift', min_threshold = 1.0)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(abilify),(carvedilol),0.119184,0.087055,0.029863,0.250559,2.878170,1.0,0.019487,1.218168,0.740855,0.169312,0.179095,0.296796
1,(carvedilol),(abilify),0.087055,0.119184,0.029863,0.343032,2.878170,1.0,0.019487,1.340729,0.714782,0.169312,0.254137,0.296796
2,(diazepam),(abilify),0.081922,0.119184,0.026330,0.321400,2.696664,1.0,0.016566,1.297989,0.685314,0.150648,0.229577,0.271158
3,(abilify),(diazepam),0.119184,0.081922,0.026330,0.220917,2.696664,1.0,0.016566,1.178408,0.714305,0.150648,0.151398,0.271158
4,(amphetamine salt combo xr),(abilify),0.089855,0.119184,0.025463,0.283383,2.377689,1.0,0.014754,1.229130,0.636628,0.138707,0.186417,0.248515
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,(glyburide),"(abilify, carvedilol)",0.085455,0.029863,0.005066,0.059282,1.985165,1.0,0.002514,1.031274,0.542635,0.045949,0.030325,0.114463
428,(diazepam),(metformin),0.081922,0.025263,0.005066,0.061839,2.447776,1.0,0.002996,1.038986,0.644244,0.049608,0.037524,0.131183
429,(metformin),(diazepam),0.025263,0.081922,0.005066,0.200528,2.447776,1.0,0.002996,1.148354,0.606796,0.049608,0.129189,0.131183
430,(cialis),(metoprolol),0.038262,0.047660,0.005066,0.132404,2.778080,1.0,0.003242,1.097677,0.665502,0.062655,0.088985,0.119349


In [11]:
rules.sort_values(['lift','support','confidence'], ascending=False).head(6)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
170,(methylprednisone),(lisinopril),0.02473,0.049127,0.007999,0.32345,6.583988,1.0,0.006784,1.405474,0.869622,0.121457,0.288496,0.243136
171,(lisinopril),(methylprednisone),0.049127,0.02473,0.007999,0.162822,6.583988,1.0,0.006784,1.16495,0.891934,0.121457,0.141594,0.243136
138,"(abilify, carvedilol)",(lisinopril),0.029863,0.049127,0.008532,0.285714,5.815856,1.0,0.007065,1.331223,0.853545,0.121097,0.248811,0.229696
139,(lisinopril),"(abilify, carvedilol)",0.049127,0.029863,0.008532,0.173677,5.815856,1.0,0.007065,1.174041,0.870838,0.121097,0.148241,0.229696
406,"(abilify, carvedilol)",(glipizide),0.029863,0.032929,0.005133,0.171875,5.219572,1.0,0.004149,1.167784,0.833298,0.089017,0.143677,0.163873
411,(glipizide),"(abilify, carvedilol)",0.032929,0.029863,0.005133,0.15587,5.219572,1.0,0.004149,1.149275,0.83594,0.089017,0.129887,0.163873


In [12]:
antecedents = rules[rules['antecedents'] == {'glipizide'}]
consequents = rules[rules['consequents'] == {'glipizide'}]
glipizide_fIS = pd.concat([antecedents, consequents]).sort_values(by=['lift'], ascending= False)
glipizide_fIS

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
406,"(abilify, carvedilol)",(glipizide),0.029863,0.032929,0.005133,0.171875,5.219572,1.0,0.004149,1.167784,0.833298,0.089017,0.143677,0.163873
411,(glipizide),"(abilify, carvedilol)",0.032929,0.029863,0.005133,0.15587,5.219572,1.0,0.004149,1.149275,0.83594,0.089017,0.129887,0.163873
223,(glipizide),(lisinopril),0.032929,0.049127,0.007066,0.214575,4.367778,1.0,0.005448,1.210648,0.797305,0.094222,0.173996,0.179201
222,(lisinopril),(glipizide),0.049127,0.032929,0.007066,0.143826,4.367778,1.0,0.005448,1.129527,0.810887,0.094222,0.114673,0.179201
72,(glipizide),(carvedilol),0.032929,0.087055,0.011465,0.348178,3.999516,1.0,0.008599,1.400605,0.775506,0.105651,0.286023,0.239939
73,(carvedilol),(glipizide),0.087055,0.032929,0.011465,0.1317,3.999516,1.0,0.008599,1.113752,0.821484,0.105651,0.102134,0.239939
133,(glipizide),(atorvastatin),0.032929,0.064791,0.008532,0.259109,3.999134,1.0,0.006399,1.262276,0.775482,0.095665,0.20778,0.195398
132,(atorvastatin),(glipizide),0.064791,0.032929,0.008532,0.131687,3.999134,1.0,0.006399,1.113736,0.801902,0.095665,0.102121,0.195398
330,(metoprolol),(glipizide),0.04766,0.032929,0.005666,0.118881,3.610232,1.0,0.004097,1.097549,0.759193,0.075623,0.088879,0.145473
331,(glipizide),(metoprolol),0.032929,0.04766,0.005666,0.172065,3.610232,1.0,0.004097,1.150259,0.747628,0.075623,0.13063,0.145473
