# Q4: Is there an association between rates of opioid prescriptions and overdose deaths by county?

### Import libraries and get data from the `prescribers` database

In [1]:
# import statements
from sqlalchemy import create_engine
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

In [2]:
# establish path to prescribers database
connection_string = "postgres://postgres:postgres@localhost:5432/prescribers"

In [3]:
# define SQL query engine
engine = create_engine(connection_string)

In [4]:
# get OD data from the prescribers database
od_query = '''
SELECT
	fc.fipscounty
	, CASE WHEN cbsa.fipscounty IS NOT NULL THEN 'urban' ELSE 'rural' END AS county_type
	, od.overdose_deaths AS num_ods_2017
	, ROUND((od.overdose_deaths * 1.0 / p3.population * 10000), 6) AS od_rate_per_10K_2017

FROM overdose_deaths AS od

JOIN fips_county AS fc
	ON fc.fipscounty = od.fipscounty

JOIN population AS p3
	ON p3.fipscounty = od.fipscounty

LEFT JOIN cbsa
	ON cbsa.fipscounty = fc.fipscounty
	
WHERE od.year = 2017
AND fc.state = 'TN';
'''

In [5]:
# read in the query results as a pandas dataframe
ods = pd.read_sql(od_query, con = engine)

# take a look at the overdoses dataframe
ods.head()

# make sure the datatypes are correct in the overdoses dataframe
ods.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fipscounty            95 non-null     object 
 1   county_type           95 non-null     object 
 2   num_ods_2017          95 non-null     float64
 3   od_rate_per_10k_2017  95 non-null     float64
dtypes: float64(2), object(2)
memory usage: 3.1+ KB


In [6]:
# get opioid prescription data from the prescribers database
opioid_query = '''
WITH zip_to_county AS (
	SELECT
		zf.fipscounty
		, fc.county
		, fc.state
		, zip
		, tot_ratio
		, RANK() OVER(PARTITION BY zip ORDER BY tot_ratio DESC) AS rnk
	FROM zip_fips AS zf
	JOIN fips_county AS fc
		ON fc.fipscounty = zf.fipscounty
	WHERE fc.state = 'TN'
)

SELECT zc.fipscounty
	, zc.county
	, zc.state
	, p3.population
	--, d.generic_name
	--, d.long_acting_opioid_drug_flag
	--, COUNT(DISTINCT zc.zip) AS num_zips
	, SUM(p2.total_claim_count) AS tot_opioid_scripts
	, ROUND(SUM(p2.total_claim_count) / p3.population * 10000, 6) AS scripts_per_10k
	
FROM zip_to_county AS zc

JOIN prescriber AS p1
	ON p1.nppes_provider_zip5 = zc.zip

JOIN prescription AS p2
	ON p2.npi = p1.npi

JOIN drug AS d
	ON d.drug_name = p2.drug_name

JOIN population AS p3
	ON zc.fipscounty = p3.fipscounty

WHERE
	zc.rnk = 1
	AND d.opioid_drug_flag = 'Y'

GROUP BY 1,2,3,4
ORDER BY 4 DESC;
'''

In [7]:
# read in the query results as a pandas dataframe
opioids = pd.read_sql(opioid_query, con = engine)

# take a look at the opiod prescription dataframe
opioids.head()

# make sure the datatypes are correct in the opioid prescription dataframe
opioids.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   fipscounty          95 non-null     object 
 1   county              95 non-null     object 
 2   state               95 non-null     object 
 3   population          95 non-null     float64
 4   tot_opioid_scripts  95 non-null     float64
 5   scripts_per_10k     95 non-null     float64
dtypes: float64(3), object(3)
memory usage: 4.6+ KB


In [8]:
# merge the overdoses and opioid prescription dataframes
# I opted to do this in python because...why not?
oao = ods.merge(opioids, how = 'inner', on = 'fipscounty')

# Check to make sure no counties went missing in the join
oao.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 95 entries, 0 to 94
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fipscounty            95 non-null     object 
 1   county_type           95 non-null     object 
 2   num_ods_2017          95 non-null     float64
 3   od_rate_per_10k_2017  95 non-null     float64
 4   county                95 non-null     object 
 5   state                 95 non-null     object 
 6   population            95 non-null     float64
 7   tot_opioid_scripts    95 non-null     float64
 8   scripts_per_10k       95 non-null     float64
dtypes: float64(5), object(4)
memory usage: 7.4+ KB


In [9]:
oao.head()

Unnamed: 0,fipscounty,county_type,num_ods_2017,od_rate_per_10k_2017,county,state,population,tot_opioid_scripts,scripts_per_10k
0,47001,urban,34.0,4.501046,ANDERSON,TN,75538.0,34882.0,4617.808255
1,47009,urban,29.0,2.28104,BLOUNT,TN,127135.0,27352.0,2151.413851
2,47011,urban,14.0,1.350491,BRADLEY,TN,103666.0,48244.0,4653.791986
3,47013,urban,5.0,1.255524,CAMPBELL,TN,39824.0,18687.0,4692.396545
4,47015,urban,3.0,2.167787,CANNON,TN,13839.0,2758.0,1992.918563


In [10]:
oao.describe()

Unnamed: 0,num_ods_2017,od_rate_per_10k_2017,population,tot_opioid_scripts,scripts_per_10k
count,95.0,95.0,95.0,95.0,95.0
mean,13.336842,1.530907,69446.115789,27091.631579,3688.619654
std,32.093152,1.112389,131329.843549,49820.475106,2071.034099
min,0.0,0.0,5071.0,220.0,289.931471
25%,2.0,0.758731,17838.0,4828.5,2180.986717
50%,4.0,1.350491,31566.0,11230.0,3329.781268
75%,12.0,1.994736,56451.0,23750.0,4651.084483
max,196.0,6.043361,937847.0,284885.0,11899.403162


### Take a look at the correlation between the overdose and opioid prescription data

In [11]:
# create a correlation matrix between all of the numerical values in the dataset
oao.corr()

Unnamed: 0,num_ods_2017,od_rate_per_10k_2017,population,tot_opioid_scripts,scripts_per_10k
num_ods_2017,1.0,0.359151,0.914094,0.924365,0.070105
od_rate_per_10k_2017,0.359151,1.0,0.18715,0.224856,-0.005052
population,0.914094,0.18715,1.0,0.912015,0.054829
tot_opioid_scripts,0.924365,0.224856,0.912015,1.0,0.272752
scripts_per_10k,0.070105,-0.005052,0.054829,0.272752,1.0


*Interestingly, there seems to be a low, positive correlation between county population and overdose death rates but a low-to-moderate, negative correlation between county population and opioid prescription rates. This amounts to a very low, negative correlation between overdose death rates and opioid prescription rates. Might be worth separating out by urban / rural counties to see if the trends are different.*

In [12]:
# Check correlation matrix just for rural counties in TN
oao[oao['county_type'] == 'rural'].corr()

Unnamed: 0,num_ods_2017,od_rate_per_10k_2017,population,tot_opioid_scripts,scripts_per_10k
num_ods_2017,1.0,0.518578,0.805248,0.580384,0.092447
od_rate_per_10k_2017,0.518578,1.0,0.069637,0.027568,-0.002662
population,0.805248,0.069637,1.0,0.791004,0.207897
tot_opioid_scripts,0.580384,0.027568,0.791004,1.0,0.659542
scripts_per_10k,0.092447,-0.002662,0.207897,0.659542,1.0


*In rural counties, there is a practically non-existent correlation between opioid prescriptions per capita and overdose deaths per capita.*

In [13]:
# Check correlation matrix just for urban counties in TN
oao[oao['county_type'] == 'urban'].corr()

Unnamed: 0,num_ods_2017,od_rate_per_10k_2017,population,tot_opioid_scripts,scripts_per_10k
num_ods_2017,1.0,0.349484,0.903695,0.922086,0.159036
od_rate_per_10k_2017,0.349484,1.0,0.128399,0.191886,0.038058
population,0.903695,0.128399,1.0,0.903014,0.114033
tot_opioid_scripts,0.922086,0.191886,0.903014,1.0,0.398009
scripts_per_10k,0.159036,0.038058,0.114033,0.398009,1.0


*In urban counties, there is a very low, negative correlation between opioid prescriptions per capita and overdose deaths per capita.*

### Make some scatterplots

In [18]:
# create a bubble plot of ODs per 10K vs opioid prescriptions per 10K
fig = px.scatter(oao,
                 x = 'od_rate_per_10k_2017',
                 y = 'scripts_per_10k',
                 size = 'population',
                 color = 'county_type',
                 width = 800,
                 height = 600,
                 labels = {
                     'population': 'County Population',
                     'county_type': 'County Type',
                     'od_rate_per_10k_2017': 'Overdose Deaths (per 10k)',
                     'scripts_per_10k': 'Opioid Prescriptions (per 10k)',
                     'num_ods_2017': 'Total number of opioid overdose deaths'
                 },
                 hover_name = 'county',
                 hover_data = ['population',
                               'tot_opioid_scripts',
                               'num_ods_2017'],
                title = '2017 TN Opioid Prescriptions vs. Overdose Deaths (per 10k residents)')

fig = fig.update_traces(mode='markers', marker=dict(sizemin = 3))
fig.show()

In [15]:
# create a facet grid with two bubble plots of ODs per 10K vs opioid prescriptions per 10K, separated by urban / rural
fig = px.scatter(oao,
                 x = 'od_rate_per_10k_2017',
                 y = 'scripts_per_10k',
                 facet_col = 'county_type',
                 color = 'county_type',
                 trendline = 'ols',
                 width = 1200,
                 height = 600,
                 labels = {
                     'population': 'County Population',
                     'county_type': 'County Type',
                     'od_rate_per_10k_2017': 'Overdose Deaths (per 10k)',
                     'scripts_per_10k': 'Opioid Prescriptions (per 10k)',
                     'num_ods_2017': 'Total number of opioid overdose deaths'
                 },
                 hover_name = 'county',
                 hover_data = ['population',
                               'tot_opioid_scripts',
                               'num_ods_2017'],
                title = '2017 TN Opioid Prescriptions vs. Overdose Deaths (per 10k residents)')

fig.show()

In [16]:
# create a facet grid with two bubble plots of ODs per 10K vs opioid prescriptions per 10K, separated by urban / rural
fig = px.scatter(oao,
                 x = 'od_rate_per_10k_2017',
                 y = 'scripts_per_10k',
                 facet_col = 'county_type',
                 color = 'county_type',
                 trendline = 'ols',
                 width = 1200,
                 height = 600,
                 labels = {
                     'population': 'County Population',
                     'county_type': 'County Type',
                     'od_rate_per_10k_2017': 'Overdose Death Rate (per 10k)',
                     'scripts_per_10k': 'Opioid Prescription Rate (per 10k)',
                     'num_ods_2017': 'Total number of opioid overdose deaths'
                 },
                 hover_name = 'county',
                 hover_data = ['population',
                               'tot_opioid_scripts',
                               'num_ods_2017'],
                title = '2017 TN Opioid Prescriptions vs. Overdose Deaths (per 10k residents)')

fig.show()

*Conclusion: the correlation between opioid prescription rates and overdose death rates is rather weak regardless of TN county type.*

In [23]:
fig = px.scatter(oao,
                 y = 'scripts_per_10k',
                 x = 'population',
                 color = 'county_type',
                 width = 800,
                 height = 600,
                 labels = {
                     'population': 'County Population',
                     'county_type': 'County Type',
                     'od_rate_per_10k_2017': 'Overdose Deaths (per 10k)',
                     'scripts_per_10k': 'Opioid Prescriptions (per 10k)',
                     'num_ods_2017': 'Total number of opioid overdose deaths'
                 },
                 hover_name = 'county',
                 hover_data = ['population',
                               'tot_opioid_scripts',
                               'num_ods_2017'],
                title = '2017 TN Opioid Prescriptions (per 10k residents) by Total County Population')
fig.show()

*There is hardly any correlation between county population size and opioid prescription rates.*