In [18]:
#Import necessary packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [17]:
# get medicare drug utilization data from the 2016 set 
medicare2016 = pd.read_csv('Medicare_Provider_Utilization_and_Payment_Data__2016_Part_D_Prescriber.csv')
medicare2016.head()

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_provider_first_name,nppes_provider_city,nppes_provider_state,specialty_description,description_flag,drug_name,generic_name,bene_count,...,total_30_day_fill_count,total_day_supply,total_drug_cost,bene_count_ge65,bene_count_ge65_suppress_flag,total_claim_count_ge65,ge65_suppress_flag,total_30_day_fill_count_ge65,total_day_supply_ge65,total_drug_cost_ge65
0,1003000126,ENKESHAFI,ARDALAN,CUMBERLAND,MD,Internal Medicine,S,ATORVASTATIN CALCIUM,ATORVASTATIN CALCIUM,,...,15,450,139.32,,*,13.0,,15.0,450.0,139.32
1,1003000126,ENKESHAFI,ARDALAN,CUMBERLAND,MD,Internal Medicine,S,CIPROFLOXACIN HCL,CIPROFLOXACIN HCL,,...,11,96,80.99,,*,,*,,,
2,1003000126,ENKESHAFI,ARDALAN,CUMBERLAND,MD,Internal Medicine,S,DOXYCYCLINE HYCLATE,DOXYCYCLINE HYCLATE,20.0,...,20,199,586.12,,#,,#,,,
3,1003000126,ENKESHAFI,ARDALAN,CUMBERLAND,MD,Internal Medicine,S,ELIQUIS,APIXABAN,,...,17,510,6065.02,,*,17.0,,17.0,510.0,6065.02
4,1003000126,ENKESHAFI,ARDALAN,CUMBERLAND,MD,Internal Medicine,S,FUROSEMIDE,FUROSEMIDE,12.0,...,17,405,45.76,,#,,#,,,


In [19]:
medicare2016.columns

Index(['npi', 'nppes_provider_last_org_name', 'nppes_provider_first_name',
       'nppes_provider_city', 'nppes_provider_state', 'specialty_description',
       'description_flag', 'drug_name', 'generic_name', 'bene_count',
       'total_claim_count', 'total_30_day_fill_count', 'total_day_supply',
       'total_drug_cost', 'bene_count_ge65', 'bene_count_ge65_suppress_flag',
       'total_claim_count_ge65', 'ge65_suppress_flag',
       'total_30_day_fill_count_ge65', 'total_day_supply_ge65',
       'total_drug_cost_ge65'],
      dtype='object')

In [20]:
# read in drug NDC code information to use later to filter medicare dataset
ndc = pd.read_excel('HEDIS-2019-NDC-MLD-Directory-Complete-Workbook-FINAL-11-1-2018-2.xlsx',
                         'Medications List to NDC Codes')
ndc.head()

Unnamed: 0,Medication List,NDC Code,Brand Name,Generic Product Name,Route,Description,Drug ID,Drug Name,Package Size,Unit,Dose,Form,MED Conversion Factor,Unnamed: 13,Unnamed: 14
0,5-ARI Medications,54039513,Dutasteride,dutasteride 0.5 mg oral capsule,oral,5-ARI Medications,d04788,,,,,,,,
1,5-ARI Medications,54039522,Dutasteride,dutasteride 0.5 mg oral capsule,oral,5-ARI Medications,d04788,,,,,,,,
2,5-ARI Medications,93565556,Dutasteride,dutasteride 0.5 mg oral capsule,oral,5-ARI Medications,d04788,,,,,,,,
3,5-ARI Medications,93565598,Dutasteride,dutasteride 0.5 mg oral capsule,oral,5-ARI Medications,d04788,,,,,,,,
4,5-ARI Medications,115143808,Dutasteride,dutasteride 0.5 mg oral capsule,oral,5-ARI Medications,d04788,,,,,,,,


In [21]:
# creating a df with only opioids and we capture all opioids by seeng where MED (Morphine Equivalent Dose) is not nan
opioiddf = ndc[ndc['MED Conversion Factor'].notnull()].copy()
opioiddf['Drug Name'] = opioiddf['Drug Name'].str.upper()
opioiddf.head()

Unnamed: 0,Medication List,NDC Code,Brand Name,Generic Product Name,Route,Description,Drug ID,Drug Name,Package Size,Unit,Dose,Form,MED Conversion Factor,Unnamed: 13,Unnamed: 14
63818,UOD Opioid Medications,54309036,Butorphanol Tartrate,butorphanol 10 mg/mL nasal spray,nasal,UOD Opioid Medications,d00838,BUTORPHANOL,,mg/ml,1.0,,7.0,,
63819,UOD Opioid Medications,378963943,Butorphanol Tartrate,butorphanol 10 mg/mL nasal spray,nasal,UOD Opioid Medications,d00838,BUTORPHANOL,,mg/ml,10.0,,7.0,,
63820,UOD Opioid Medications,54569598800,Butorphanol Tartrate,butorphanol 10 mg/mL nasal spray,nasal,UOD Opioid Medications,d00838,BUTORPHANOL,,mg/ml,10.0,,7.0,,
63821,UOD Opioid Medications,60505081301,Butorphanol Tartrate,butorphanol 10 mg/mL nasal spray,nasal,UOD Opioid Medications,d00838,BUTORPHANOL,,mg/ml,10.0,,7.0,,
63822,UOD Opioid Medications,591264101,APAP/Butalbital/Caffeine/Codeine,acetaminophen/butalbital/caffeine/codeine 300 ...,oral,UOD Opioid Medications,d03425,CODEINE,,mg,30.0,,0.15,,


In [23]:
#Check the opioids in the df
opioiddf['Drug Name'].unique()

array(['BUTORPHANOL', 'CODEINE', 'DIHYDROCODEINE', 'FENTANYL',
       'HYDROCODONE', 'HYDROMORPHONE', 'LEVORPHANOL', 'MEPERIDINE',
       'METHADONE', 'MORPHINE', 'MORPHINE EQUIVALENT', 'OPIUM',
       'OXYCODONE', 'OXYMORPHONE', 'PENTAZOCINE', 'TAPENTADOL',
       'TRAMADOL'], dtype=object)

In [28]:
# make the opioid drug column into a list
opioidlist = opioiddf['Drug Name'].unique().tolist()
# join together list of opioids to create a pattern for comparison
opioidlist = '|'.join(opioidlist)
# use str.contains to match the two patterns from the two datasets
opioidlist_ = medicare2016[(medicare2016['drug_name'].str.contains(opioidlist)) | (medicare2016['generic_name'].str.contains(opioidlist))]

In [29]:
#List the opioids
opioidlist

'BUTORPHANOL|CODEINE|DIHYDROCODEINE|FENTANYL|HYDROCODONE|HYDROMORPHONE|LEVORPHANOL|MEPERIDINE|METHADONE|MORPHINE|MORPHINE EQUIVALENT|OPIUM|OXYCODONE|OXYMORPHONE|PENTAZOCINE|TAPENTADOL|TRAMADOL'

In [32]:
#Function to combine into one dataframe
def opioid_df(df):
    df = df[df['generic_name'].str.contains(opioidlist)].copy()
    return df

In [33]:
#Create a singular opioid datafraome for medicare
opioid2016 = opioid_df(medicare2016)

In [34]:
## All of the drugs in the combined dataset
opioid2016.drug_name.unique()

array(['ACETAMINOPHEN-CODEINE', 'FENTANYL', 'HYDROCODONE-ACETAMINOPHEN',
       'MORPHINE SULFATE ER', 'OXYCODONE-ACETAMINOPHEN', 'OXYCODONE HCL',
       'OXYCONTIN', 'TRAMADOL HCL', 'TRAMADOL HCL-ACETAMINOPHEN',
       'ATROVENT HFA', 'COMBIVENT RESPIMAT', 'OXYCODONE HCL ER',
       'SPIRIVA', 'HYDROMORPHONE HCL', 'METHADONE HCL',
       'MORPHINE SULFATE', 'NUCYNTA ER', 'OPANA ER', 'ENDOCET',
       'HYDROMORPHONE ER', 'HYSINGLA ER', 'SPIRIVA RESPIMAT',
       'TRAMADOL HCL ER', 'IPRATROPIUM BROMIDE', 'OXYMORPHONE HCL ER',
       'MEPERIDINE HCL', 'HYDROCODONE-IBUPROFEN', 'IPRATROPIUM-ALBUTEROL',
       'BUTALBITAL COMPOUND-CODEINE', 'STIOLTO RESPIMAT', 'NORCO',
       'NUCYNTA', 'OXYMORPHONE HCL', 'ZOHYDRO ER', 'APOKYN',
       'BUTORPHANOL TARTRATE', 'BUTALB-CAFF-ACETAMINOPH-CODEIN',
       'VICODIN', 'CODEINE SULFATE', 'KADIAN', 'EXALGO', 'EMBEDA',
       'PENTAZOCINE-NALOXONE HCL', 'ULTRAM', 'OXYCODONE HCL-ASPIRIN',
       'HYDROMET', 'DURAGESIC', 'DEMEROL', 'SUBSYS',
       'ASC

In [35]:
# There were drugs in the combined dataset that weren't explicitly opioids, so we remove them
otherdrugs = ['COMBIVENT RESPIMAT', 'SPIRIVA RESPIMAT', 'ATROVENT', 'STIOLTO RESPIMAT', 'IPRATROPIUM BROMIDE', 'SPIRIVA', 'IPRATROPIUM-ALBUTEROL']

opioid2016 = opioid2016[~opioid2016.drug_name.isin(otherdrugs)]

In [36]:
#Checking to see if the irrelevant medications were removed
opioid2016.drug_name.unique()

array(['ACETAMINOPHEN-CODEINE', 'FENTANYL', 'HYDROCODONE-ACETAMINOPHEN',
       'MORPHINE SULFATE ER', 'OXYCODONE-ACETAMINOPHEN', 'OXYCODONE HCL',
       'OXYCONTIN', 'TRAMADOL HCL', 'TRAMADOL HCL-ACETAMINOPHEN',
       'ATROVENT HFA', 'OXYCODONE HCL ER', 'HYDROMORPHONE HCL',
       'METHADONE HCL', 'MORPHINE SULFATE', 'NUCYNTA ER', 'OPANA ER',
       'ENDOCET', 'HYDROMORPHONE ER', 'HYSINGLA ER', 'TRAMADOL HCL ER',
       'OXYMORPHONE HCL ER', 'MEPERIDINE HCL', 'HYDROCODONE-IBUPROFEN',
       'BUTALBITAL COMPOUND-CODEINE', 'NORCO', 'NUCYNTA',
       'OXYMORPHONE HCL', 'ZOHYDRO ER', 'APOKYN', 'BUTORPHANOL TARTRATE',
       'BUTALB-CAFF-ACETAMINOPH-CODEIN', 'VICODIN', 'CODEINE SULFATE',
       'KADIAN', 'EXALGO', 'EMBEDA', 'PENTAZOCINE-NALOXONE HCL', 'ULTRAM',
       'OXYCODONE HCL-ASPIRIN', 'HYDROMET', 'DURAGESIC', 'DEMEROL',
       'SUBSYS', 'ASCOMP WITH CODEINE', 'PERCOCET', 'VICODIN ES',
       'ROXICODONE', 'VICODIN HP', 'FIORICET WITH CODEINE',
       'FENTANYL CITRATE', 'DILAUDID'

In [37]:
#Setting the year to 2016
opioid2016['year'] = 2016

In [40]:
#Review dataframe
opioid2016.sample(5)

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_provider_first_name,nppes_provider_city,nppes_provider_state,specialty_description,description_flag,drug_name,generic_name,bene_count,...,total_day_supply,total_drug_cost,bene_count_ge65,bene_count_ge65_suppress_flag,total_claim_count_ge65,ge65_suppress_flag,total_30_day_fill_count_ge65,total_day_supply_ge65,total_drug_cost_ge65,year
10951429,1437315470,JACKSON,GALE,CLARKSVILLE,TN,Family Practice,S,HYDROCODONE-ACETAMINOPHEN,HYDROCODONE/ACETAMINOPHEN,51.0,...,5271,4598.18,28.0,,91.0,,91.0,2509.0,2338.98,2016
17571280,1700809266,OSTERHOLM,RICHARD,OMAHA,NE,Internal Medicine,S,OXYCONTIN,OXYCODONE HCL,,...,326,2116.33,,*,11.0,,11.0,326.0,2116.33,2016
4156990,1164583761,ARMSTRONG,DAMON,BLACKFOOT,ID,Dentist,T,HYDROCODONE-ACETAMINOPHEN,HYDROCODONE/ACETAMINOPHEN,12.0,...,60,121.16,,#,,#,,,,2016
18183824,1720218076,MANNER,MAKAELA,FORT MYERS,FL,Dentist,T,HYDROCODONE-ACETAMINOPHEN,HYDROCODONE/ACETAMINOPHEN,,...,48,142.96,,*,12.0,,12.0,48.0,142.96,2016
2894595,1114935061,JUSINO-MCDOUGALL,ISMAEL,GUANICA,PR,General Practice,S,OXYCODONE-ACETAMINOPHEN,OXYCODONE HCL/ACETAMINOPHEN,,...,1156,589.24,,*,18.0,,18.0,503.0,336.35,2016


In [41]:
# saving cleaned dataframe to csv
opioid2016.to_csv('medicare_opioids_2016.csv')