# Is there any association between a particular type of opioid and number of overdose deaths?

### Import libraries and data from the prescribers database

In [1]:
# import statements
from sqlalchemy import create_engine
import pandas as pd
import plotly.express as px

In [2]:
# establish path to prescribers database
connection_string = "postgres://postgres:postgres@localhost:5432/prescribers"

In [3]:
# define SQL query engine
engine = create_engine(connection_string)

In [4]:
# get opioid drug data from the prescribers database
drug_query = '''
WITH zip_to_county AS (
	SELECT
		zf.fipscounty
		, fc.county
		, fc.state
		, zip
		, tot_ratio
		, RANK() OVER(PARTITION BY zip ORDER BY tot_ratio DESC) AS rnk
	FROM zip_fips AS zf
	JOIN fips_county AS fc
		ON fc.fipscounty = zf.fipscounty
	WHERE fc.state = 'TN'
)

SELECT zc.fipscounty
	, zc.county
	, zc.state
	, p3.population
	, d.generic_name
	, CASE WHEN d.opioid_drug_flag = 'Y' AND d.generic_name LIKE 'HYDROCODONE%%' THEN 'HYDROCODONE (ALL)'
        WHEN d.opioid_drug_flag = 'Y' AND d.generic_name LIKE 'METHADONE%%' THEN 'METHADONE (ALL)'
        WHEN d.opioid_drug_flag = 'Y' AND d.generic_name LIKE 'OXYCODONE%%' THEN 'OXYCODONE (ALL)'
        WHEN d.opioid_drug_flag = 'N' THEN 'NOT AN OPIOID'
        ELSE 'NOT A TOP 3 OPIOID'
        END AS drug_group
    , d.opioid_drug_flag
	, d.long_acting_opioid_drug_flag
	, SUM(p2.total_claim_count) AS tot_scripts
    , SUM(SUM(p2.total_claim_count)) OVER(PARTITION BY zc.county) AS tot_scripts_per_county
	, ROUND(SUM(p2.total_claim_count) / p3.population * 10000, 6) AS scripts_per_10k
	
FROM zip_to_county AS zc

JOIN prescriber AS p1
	ON p1.nppes_provider_zip5 = zc.zip

JOIN prescription AS p2
	ON p2.npi = p1.npi

JOIN drug AS d
	ON d.drug_name = p2.drug_name

JOIN population AS p3
	ON zc.fipscounty = p3.fipscounty

WHERE
	zc.rnk = 1
	--AND d.opioid_drug_flag = 'Y'

GROUP BY 1,2,3,4,5,6,7,8
ORDER BY 4 DESC
;
'''
drug_result = engine.execute(drug_query)

In [5]:
# read in the query results as a pandas dataframe
opioids = pd.read_sql(drug_query, con = engine)

# take a look at the overdoses dataframe
opioids.head()

# make sure the datatypes are correct in the overdoses dataframe
opioids.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41131 entries, 0 to 41130
Data columns (total 11 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   fipscounty                    41131 non-null  object 
 1   county                        41131 non-null  object 
 2   state                         41131 non-null  object 
 3   population                    41131 non-null  float64
 4   generic_name                  41131 non-null  object 
 5   drug_group                    41131 non-null  object 
 6   opioid_drug_flag              41131 non-null  object 
 7   long_acting_opioid_drug_flag  41131 non-null  object 
 8   tot_scripts                   41131 non-null  float64
 9   tot_scripts_per_county        41131 non-null  float64
 10  scripts_per_10k               41131 non-null  float64
dtypes: float64(4), object(7)
memory usage: 3.5+ MB


In [6]:
# get OD data from the prescribers database
od_query = '''
SELECT
	fc.fipscounty
	, CASE WHEN cbsa.fipscounty IS NOT NULL THEN 'urban' ELSE 'rural' END AS county_type
	, od.overdose_deaths AS num_ods_2017
	, ROUND((od.overdose_deaths / p3.population * 10000), 6) AS od_rate_per_10K_2017

FROM overdose_deaths AS od

JOIN fips_county AS fc
	ON fc.fipscounty = od.fipscounty

JOIN population AS p3
	ON p3.fipscounty = od.fipscounty

LEFT JOIN cbsa
    ON cbsa.fipscounty = fc.fipscounty

WHERE od.year = 2017
AND fc.state = 'TN'
;
'''

In [7]:
# read in the query results as a pandas dataframe
ods = pd.read_sql(od_query, con = engine)

# take a look at the overdoses dataframe
ods.head()

# make sure the datatypes are correct in the overdoses dataframe
ods.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fipscounty            95 non-null     object 
 1   county_type           95 non-null     object 
 2   num_ods_2017          95 non-null     float64
 3   od_rate_per_10k_2017  95 non-null     float64
dtypes: float64(2), object(2)
memory usage: 3.1+ KB


In [8]:
# merge the overdoses and opioid prescription dataframes
# I opted to do this in python because...why not?
oao = opioids.merge(ods, how = 'inner', on = 'fipscounty')

# take a look at the merged dataframe
oao.head()

# check the datatypes
oao.info()

# check to make sure no counties went missing in the join
oao.county.nunique()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 41131 entries, 0 to 41130
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   fipscounty                    41131 non-null  object 
 1   county                        41131 non-null  object 
 2   state                         41131 non-null  object 
 3   population                    41131 non-null  float64
 4   generic_name                  41131 non-null  object 
 5   drug_group                    41131 non-null  object 
 6   opioid_drug_flag              41131 non-null  object 
 7   long_acting_opioid_drug_flag  41131 non-null  object 
 8   tot_scripts                   41131 non-null  float64
 9   tot_scripts_per_county        41131 non-null  float64
 10  scripts_per_10k               41131 non-null  float64
 11  county_type                   41131 non-null  object 
 12  num_ods_2017                  41131 non-null  float64
 13  o

95

### Take a look at some correlations related to opioid drugs

In [9]:
# generate a correlations matrix
oao[oao['opioid_drug_flag'] == 'Y'].corr()

Unnamed: 0,population,tot_scripts,tot_scripts_per_county,scripts_per_10k,num_ods_2017,od_rate_per_10k_2017
population,1.0,0.280214,0.947251,-0.08019,0.910905,0.236303
tot_scripts,0.280214,1.0,0.304586,0.4807,0.28349,0.090853
tot_scripts_per_county,0.947251,0.304586,1.0,-0.054533,0.930639,0.283545
scripts_per_10k,-0.08019,0.4807,-0.054533,1.0,-0.069313,-0.036091
num_ods_2017,0.910905,0.28349,0.930639,-0.069313,1.0,0.426827
od_rate_per_10k_2017,0.236303,0.090853,0.283545,-0.036091,0.426827,1.0


In [10]:
# take a look at the correlations matrix for long-acting opioids
oao[(oao['long_acting_opioid_drug_flag'] == 'Y') & (oao['opioid_drug_flag'] == 'Y')].corr()

Unnamed: 0,population,tot_scripts,tot_scripts_per_county,scripts_per_10k,num_ods_2017,od_rate_per_10k_2017
population,1.0,0.381237,0.943982,-0.074062,0.910619,0.234687
tot_scripts,0.381237,1.0,0.420128,0.447211,0.421151,0.154211
tot_scripts_per_county,0.943982,0.420128,1.0,-0.04184,0.927083,0.279785
scripts_per_10k,-0.074062,0.447211,-0.04184,1.0,-0.050676,0.005068
num_ods_2017,0.910619,0.421151,0.927083,-0.050676,1.0,0.426706
od_rate_per_10k_2017,0.234687,0.154211,0.279785,0.005068,0.426706,1.0


In [11]:
# take a look at the correlations matrix for short-acting opioids
oao[(oao['long_acting_opioid_drug_flag'] == 'N') & (oao['opioid_drug_flag'] == 'Y')].corr()

Unnamed: 0,population,tot_scripts,tot_scripts_per_county,scripts_per_10k,num_ods_2017,od_rate_per_10k_2017
population,1.0,0.333807,0.949644,-0.0998,0.911147,0.237514
tot_scripts,0.333807,1.0,0.362734,0.458649,0.331167,0.103495
tot_scripts_per_county,0.949644,0.362734,1.0,-0.065761,0.933288,0.286347
scripts_per_10k,-0.0998,0.458649,-0.065761,1.0,-0.089006,-0.051158
num_ods_2017,0.911147,0.331167,0.933288,-0.089006,1.0,0.426929
od_rate_per_10k_2017,0.237514,0.103495,0.286347,-0.051158,0.426929,1.0


In [12]:
# get a list of the generic drugs in the prescribers database with the number of entries for each
oao.groupby(['opioid_drug_flag','drug_group','long_acting_opioid_drug_flag']).size()

opioid_drug_flag  drug_group          long_acting_opioid_drug_flag
N                 NOT AN OPIOID       N                               39731
Y                 HYDROCODONE (ALL)   N                                 126
                                      Y                                  20
                  METHADONE (ALL)     Y                                  57
                  NOT A TOP 3 OPIOID  N                                 486
                                      Y                                 423
                  OXYCODONE (ALL)     N                                 191
                                      Y                                  97
dtype: int64

In [13]:
# take a look at the correlations matrix for some specific opioids
oao[oao['long_acting_opioid_drug_flag'] == 'N'].corr()

Unnamed: 0,population,tot_scripts,tot_scripts_per_county,scripts_per_10k,num_ods_2017,od_rate_per_10k_2017
population,1.0,0.240359,0.950676,-0.094118,0.912538,0.241668
tot_scripts,0.240359,1.0,0.255174,0.455681,0.233977,0.072785
tot_scripts_per_county,0.950676,0.255174,1.0,-0.076646,0.933332,0.288978
scripts_per_10k,-0.094118,0.455681,-0.076646,1.0,-0.084914,-0.04548
num_ods_2017,0.912538,0.233977,0.933332,-0.084914,1.0,0.430168
od_rate_per_10k_2017,0.241668,0.072785,0.288978,-0.04548,0.430168,1.0


### Create some visualizations

In [25]:
# create a boxplot to see how frequently different opioids are typically prescribed
fig = px.box(oao[oao['opioid_drug_flag'] == 'Y'],
                 x = 'generic_name',
                 y = 'scripts_per_10k'
            )
fig.show()

In [26]:
oao_grouped = oao.groupby(['county','drug_group', 'num_ods_2017', 'population', 'tot_scripts_per_county'])[['tot_scripts']].sum().reset_index()

In [27]:
oao_grouped.head()

Unnamed: 0,county,drug_group,num_ods_2017,population,tot_scripts_per_county,tot_scripts
0,ANDERSON,HYDROCODONE (ALL),34.0,75538.0,532145.0,14227.0
1,ANDERSON,NOT A TOP 3 OPIOID,34.0,75538.0,532145.0,7556.0
2,ANDERSON,NOT AN OPIOID,34.0,75538.0,532145.0,497263.0
3,ANDERSON,OXYCODONE (ALL),34.0,75538.0,532145.0,13099.0
4,BEDFORD,HYDROCODONE (ALL),3.0,46854.0,157650.0,4703.0


In [17]:
oao_grouped['ods_per_10k'] = oao_grouped['num_ods_2017'] / oao_grouped['population'] * 10000
oao_grouped['scripts_per_10k'] = oao_grouped['tot_scripts'] / oao_grouped['population'] * 10000

In [18]:
oao_grouped.head(20)

Unnamed: 0,county,drug_group,num_ods_2017,population,tot_scripts,ods_per_10k,scripts_per_10k
0,ANDERSON,HYDROCODONE (ALL),34.0,75538.0,14227.0,4.501046,1883.422913
1,ANDERSON,NOT A TOP 3 OPIOID,34.0,75538.0,7556.0,4.501046,1000.291244
2,ANDERSON,NOT AN OPIOID,34.0,75538.0,497263.0,4.501046,65829.51627
3,ANDERSON,OXYCODONE (ALL),34.0,75538.0,13099.0,4.501046,1734.094098
4,BEDFORD,HYDROCODONE (ALL),3.0,46854.0,4703.0,0.640287,1003.75635
5,BEDFORD,NOT A TOP 3 OPIOID,3.0,46854.0,2053.0,0.640287,438.169633
6,BEDFORD,NOT AN OPIOID,3.0,46854.0,149361.0,0.640287,31877.961327
7,BEDFORD,OXYCODONE (ALL),3.0,46854.0,1533.0,0.640287,327.18658
8,BENTON,HYDROCODONE (ALL),5.0,16154.0,4804.0,3.095209,2973.876439
9,BENTON,METHADONE (ALL),5.0,16154.0,59.0,3.095209,36.523462


In [19]:
oao_regrouped = oao.groupby(['county','opioid_drug_flag', 'num_ods_2017', 'population'])[['tot_scripts']].sum().reset_index()

In [20]:
oao_regrouped.head()

Unnamed: 0,county,opioid_drug_flag,num_ods_2017,population,tot_scripts
0,ANDERSON,N,34.0,75538.0,497263.0
1,ANDERSON,Y,34.0,75538.0,34882.0
2,BEDFORD,N,3.0,46854.0,149361.0
3,BEDFORD,Y,3.0,46854.0,8289.0
4,BENTON,N,5.0,16154.0,55684.0


In [21]:
oao_regrouped['ods_per_10k'] = oao_regrouped['num_ods_2017'] / oao_regrouped['population'] * 10000
oao_regrouped['scripts_per_10k'] = oao_regrouped['tot_scripts'] / oao_regrouped['population'] * 10000

In [22]:
oao_regrouped.head()

Unnamed: 0,county,opioid_drug_flag,num_ods_2017,population,tot_scripts,ods_per_10k,scripts_per_10k,pct_tot_scripts_per_county
0,ANDERSON,N,34.0,75538.0,497263.0,4.501046,65829.51627,
1,ANDERSON,Y,34.0,75538.0,34882.0,4.501046,4617.808255,
2,BEDFORD,N,3.0,46854.0,149361.0,0.640287,31877.961327,
3,BEDFORD,Y,3.0,46854.0,8289.0,0.640287,1769.112562,
4,BENTON,N,5.0,16154.0,55684.0,3.095209,34470.719326,


In [23]:
# create a boxplot to see how frequently long-acting vs. short-acting opioids are typically prescribed
fig = px.box(oao[oao['opioid_drug_flag'] == 'Y'],
                 x = 'long_acting_opioid_drug_flag',
                 y = 'scripts_per_10k'
            )
fig.show()

In [24]:
oao.groupby('long_acting_opioid_drug_flag')['tot_scripts'].sum()

long_acting_opioid_drug_flag
N    37987645.0
Y      296044.0
Name: tot_scripts, dtype: float64