In [1]:
#import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
from sklearn.base import clone
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Imputer

In [4]:
# read 2016 medicaide drugs
medicaid_drugs = pd.read_csv('State_Drug_Utilization_Data_2016.csv')
print(medicaid_drugs.shape)

(4644856, 20)


In [6]:
# identify the list of columns
medicaid_drugs.columns

Index(['Utilization Type', 'State', 'Labeler Code', 'Product Code',
       'Package Size', 'Year', 'Quarter', 'Product Name', 'Suppression Used',
       'Units Reimbursed', 'Number of Prescriptions',
       'Total Amount Reimbursed', 'Medicaid Amount Reimbursed',
       'Non Medicaid Amount Reimbursed', 'Quarter Begin', 'Quarter Begin Date',
       'Latitude', 'Longitude', 'Location', 'NDC'],
      dtype='object')

In [7]:
#view part of the dataset
medicaid_drugs.head()

Unnamed: 0,Utilization Type,State,Labeler Code,Product Code,Package Size,Year,Quarter,Product Name,Suppression Used,Units Reimbursed,Number of Prescriptions,Total Amount Reimbursed,Medicaid Amount Reimbursed,Non Medicaid Amount Reimbursed,Quarter Begin,Quarter Begin Date,Latitude,Longitude,Location,NDC
0,MCOU,XX,50458,925,50,2016,3,LEVAQUIN 5,False,96.0,12.0,2990.59,2990.59,0.0,7/1,07/01/2016,,,,50458092550
1,FFSU,AL,603,6345,2,2016,4,VALSARTAN/,True,,,,,,10/1,10/01/2016,32.799,-86.8073,"(32.799, -86.8073)",603634502
2,FFSU,AL,31722,731,30,2016,4,IRBESARTAN,True,,,,,,10/1,10/01/2016,32.799,-86.8073,"(32.799, -86.8073)",31722073130
3,FFSU,AR,61958,1003,1,2016,3,RANEXA,False,16296.0,278.0,93093.15,92573.86,519.29,7/1,07/01/2016,34.9513,-92.3809,"(34.9513, -92.3809)",61958100301
4,FFSU,AR,185,129,1,2016,4,BUMETANIDE,False,2821.0,58.0,2032.87,2011.37,21.5,10/1,10/01/2016,34.9513,-92.3809,"(34.9513, -92.3809)",185012901


In [11]:
#view the product name column
medicaid_drugs['Product Name'].head(15)

0     LEVAQUIN 5
1     VALSARTAN/
2     IRBESARTAN
3         RANEXA
4     BUMETANIDE
5      ADENOSINE
6     LISINOPRIL
7     HALOPERIDO
8     PROMETHAZI
9     OLANZAPINE
10    DIGOXIN TA
11    CETIRIZINE
12     QUINAPRIL
13    PRAZOSIN 5
14    Valacyclov
Name: Product Name, dtype: object

In [20]:
medicaid_drugs['Product Name'].nunique()

14299

In [48]:
#filter for only common opioid drugs
VICOD_drugs = medicaid_drugs.loc[medicaid_drugs['Product Name'].str.contains('VICOD',na=False)]
HYDRO_drugs = medicaid_drugs.loc[medicaid_drugs['Product Name'].str.contains('HYDROM',na=False)]
FENT_drugs = medicaid_drugs.loc[medicaid_drugs['Product Name'].str.contains('FENT',na=False)]
MORPH_drugs = medicaid_drugs.loc[medicaid_drugs['Product Name'].str.contains('MORPH',na=False)]
COD_drugs = medicaid_drugs.loc[medicaid_drugs['Product Name'].str.contains('COD',na=False)]
common_opioid = pd.concat([VICOD_drugs, HYDRO_drugs, FENT_drugs, MORPH_drugs,COD_drugs])

In [49]:
common_opioid.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 118759 entries, 69868 to 4644851
Data columns (total 20 columns):
Utilization Type                  118759 non-null object
State                             118759 non-null object
Labeler Code                      118759 non-null int64
Product Code                      118759 non-null int64
Package Size                      118759 non-null int64
Year                              118759 non-null int64
Quarter                           118759 non-null int64
Product Name                      118759 non-null object
Suppression Used                  118759 non-null bool
Units Reimbursed                  61934 non-null object
Number of Prescriptions           61934 non-null object
Total Amount Reimbursed           61934 non-null object
Medicaid Amount Reimbursed        61934 non-null object
Non Medicaid Amount Reimbursed    61934 non-null object
Quarter Begin                     118759 non-null object
Quarter Begin Date                118759 

In [50]:
# replace missing values
common_opioid.replace(np.nan, 0)

Unnamed: 0,Utilization Type,State,Labeler Code,Product Code,Package Size,Year,Quarter,Product Name,Suppression Used,Units Reimbursed,Number of Prescriptions,Total Amount Reimbursed,Medicaid Amount Reimbursed,Non Medicaid Amount Reimbursed,Quarter Begin,Quarter Begin Date,Latitude,Longitude,Location,NDC
69868,MCOU,XX,74,3041,13,2016,4,VICODIN 5-,False,18042,627,22672.1,22447.05,225.05,10/1,10/01/2016,0.0000,0.0000,0,74304113
86430,MCOU,XX,74,3041,53,2016,4,VICODIN 5-,False,52557,1884,62504.17,60440.24,2063.93,10/1,10/01/2016,0.0000,0.0000,0,74304153
128205,MCOU,TX,74,3054,53,2016,4,VICODIN HP,False,8312,74,14096.13,14096.13,0,10/1,10/01/2016,33.8191,-80.9066,"(33.8191, -80.9066)",74305453
129435,MCOU,XX,74,3054,53,2016,4,VICODIN TA,False,22896,256,42037.61,41075.55,962.06,10/1,10/01/2016,0.0000,0.0000,0,74305453
150393,MCOU,TX,74,3041,53,2016,4,VICODIN 5-,False,6633,232,7240.72,7240.72,0,10/1,10/01/2016,33.8191,-80.9066,"(33.8191, -80.9066)",74304153
173226,MCOU,NJ,74,3041,13,2016,4,VICODIN TA,False,1695,66,2587.42,2515.97,71.45,10/1,10/01/2016,41.1289,-98.2883,"(41.1289, -98.2883)",74304113
176172,MCOU,XX,74,3043,13,2016,4,VICODIN TA,False,16870,326,23289.94,22780.51,509.43,10/1,10/01/2016,0.0000,0.0000,0,74304313
179458,MCOU,XX,74,3041,53,2016,1,VICODIN 5-,False,95636,3415,133530.11,132420.7,1109.41,1/1,01/01/2016,0.0000,0.0000,0,74304153
182990,FFSU,NY,74,3043,53,2016,3,VICODIN ES,True,0,0,0,0,0,7/1,07/01/2016,34.8375,-106.2371,"(34.8375, -106.2371)",74304353
187301,FFSU,XX,74,3041,53,2016,3,VICODIN 5-,False,44692,1953,72038.66,71611.82,426.84,7/1,07/01/2016,0.0000,0.0000,0,74304153


In [51]:
#drop repeated rows
common_opioid.drop_duplicates()

Unnamed: 0,Utilization Type,State,Labeler Code,Product Code,Package Size,Year,Quarter,Product Name,Suppression Used,Units Reimbursed,Number of Prescriptions,Total Amount Reimbursed,Medicaid Amount Reimbursed,Non Medicaid Amount Reimbursed,Quarter Begin,Quarter Begin Date,Latitude,Longitude,Location,NDC
69868,MCOU,XX,74,3041,13,2016,4,VICODIN 5-,False,18042,627,22672.1,22447.05,225.05,10/1,10/01/2016,,,,74304113
86430,MCOU,XX,74,3041,53,2016,4,VICODIN 5-,False,52557,1884,62504.17,60440.24,2063.93,10/1,10/01/2016,,,,74304153
128205,MCOU,TX,74,3054,53,2016,4,VICODIN HP,False,8312,74,14096.13,14096.13,0,10/1,10/01/2016,33.8191,-80.9066,"(33.8191, -80.9066)",74305453
129435,MCOU,XX,74,3054,53,2016,4,VICODIN TA,False,22896,256,42037.61,41075.55,962.06,10/1,10/01/2016,,,,74305453
150393,MCOU,TX,74,3041,53,2016,4,VICODIN 5-,False,6633,232,7240.72,7240.72,0,10/1,10/01/2016,33.8191,-80.9066,"(33.8191, -80.9066)",74304153
173226,MCOU,NJ,74,3041,13,2016,4,VICODIN TA,False,1695,66,2587.42,2515.97,71.45,10/1,10/01/2016,41.1289,-98.2883,"(41.1289, -98.2883)",74304113
176172,MCOU,XX,74,3043,13,2016,4,VICODIN TA,False,16870,326,23289.94,22780.51,509.43,10/1,10/01/2016,,,,74304313
179458,MCOU,XX,74,3041,53,2016,1,VICODIN 5-,False,95636,3415,133530.11,132420.7,1109.41,1/1,01/01/2016,,,,74304153
182990,FFSU,NY,74,3043,53,2016,3,VICODIN ES,True,,,,,,7/1,07/01/2016,34.8375,-106.2371,"(34.8375, -106.2371)",74304353
187301,FFSU,XX,74,3041,53,2016,3,VICODIN 5-,False,44692,1953,72038.66,71611.82,426.84,7/1,07/01/2016,,,,74304153


In [53]:
common_opioid.shape

(118759, 20)

In [79]:
common_opioid.dtypes

Utilization Type                   object
State                              object
Labeler Code                        int64
Product Code                        int64
Package Size                        int64
Year                                int64
Quarter                             int64
Product Name                       object
Suppression Used                     bool
Units Reimbursed                   object
Number of Prescriptions            object
Total Amount Reimbursed            object
Medicaid Amount Reimbursed         object
Non Medicaid Amount Reimbursed     object
Quarter Begin                      object
Quarter Begin Date                 object
Latitude                          float64
Longitude                         float64
Location                           object
NDC                                 int64
dtype: object

In [None]:
common_opioid['Number of Prescriptions'].astype(int)

In [None]:
common_opioid.groupby(['State'])['Number of Prescriptions'].sum()

In [None]:
#analyzing by state

sm_common_opioid = common_opioid.groupby('State')['Number of Prescriptions'].sum()
sm_common_opioid = pd.DataFrame(data=sm_common_opioid).reset_index()

sm_common_opioid.head()

In [86]:
#need to join table with the opioid drugs table
ndc = pd.read_excel('HEDIS-2019-NDC-MLD-Directory-Complete-Workbook-FINAL-11-1-2018-2.xlsx',
                         'Medications List to NDC Codes')

In [87]:
# creating a df with only opioids and we capture all opioids by seeng where MED (Morphine Equivalent Dose) is not nan
opioiddf_medicaid = ndc[ndc['MED Conversion Factor'].notnull()].copy()
opioiddf_medicaid['Drug Name'] = opioiddf_medicaid['Drug Name'].str.upper()
opioiddf_medicaid.head()

Unnamed: 0,Medication List,NDC Code,Brand Name,Generic Product Name,Route,Description,Drug ID,Drug Name,Package Size,Unit,Dose,Form,MED Conversion Factor,Unnamed: 13,Unnamed: 14
63818,UOD Opioid Medications,54309036,Butorphanol Tartrate,butorphanol 10 mg/mL nasal spray,nasal,UOD Opioid Medications,d00838,BUTORPHANOL,,mg/ml,1.0,,7.0,,
63819,UOD Opioid Medications,378963943,Butorphanol Tartrate,butorphanol 10 mg/mL nasal spray,nasal,UOD Opioid Medications,d00838,BUTORPHANOL,,mg/ml,10.0,,7.0,,
63820,UOD Opioid Medications,54569598800,Butorphanol Tartrate,butorphanol 10 mg/mL nasal spray,nasal,UOD Opioid Medications,d00838,BUTORPHANOL,,mg/ml,10.0,,7.0,,
63821,UOD Opioid Medications,60505081301,Butorphanol Tartrate,butorphanol 10 mg/mL nasal spray,nasal,UOD Opioid Medications,d00838,BUTORPHANOL,,mg/ml,10.0,,7.0,,
63822,UOD Opioid Medications,591264101,APAP/Butalbital/Caffeine/Codeine,acetaminophen/butalbital/caffeine/codeine 300 ...,oral,UOD Opioid Medications,d03425,CODEINE,,mg,30.0,,0.15,,


In [88]:
# make the opioid drug column into a list
opioidlist_medicaid = opioiddf_medicaid['Drug Name'].unique().tolist()

In [89]:
#join the list of strings
opioidlist_medicaid = '|'.join(opioidlist_medicaid)

In [92]:
opioidlist_medicaid = medicaid_drugs[medicaid_drugs['Product Name'].str.contains(opioidlist_medicaid, na=False)]