# Q4: Is there an association between rates of opioid prescriptions and overdose deaths by county?

### Import libraries and get data from the `prescribers` database

In [29]:
# import statements
from sqlalchemy import create_engine
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

In [2]:
# establish path to prescribers database
connection_string = "postgres://postgres:postgres@localhost:5432/prescribers"

In [3]:
# define SQL query engine
engine = create_engine(connection_string)

In [11]:
# get OD data from the prescribers database
od_query = '''
SELECT
	fc.fipscounty
	, CASE WHEN cbsa.fipscounty IS NOT NULL THEN 'urban' ELSE 'rural' END AS county_type
	, od.overdose_deaths AS num_ods_2017
	, ROUND((od.overdose_deaths / p3.population * 10000), 6) AS od_rate_per_10K_2017

FROM overdose_deaths AS od

JOIN fips_county AS fc
	ON fc.fipscounty = od.fipscounty

JOIN population AS p3
	ON p3.fipscounty = od.fipscounty

LEFT JOIN cbsa
	ON cbsa.fipscounty = fc.fipscounty
	
WHERE od.year = 2017
AND fc.state = 'TN';
'''

In [12]:
# read in the query results as a pandas dataframe
ods = pd.read_sql(od_query, con = engine)

# take a look at the overdoses dataframe
ods.head()

# make sure the datatypes are correct in the overdoses dataframe
ods.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fipscounty            95 non-null     object 
 1   county_type           95 non-null     object 
 2   num_ods_2017          95 non-null     float64
 3   od_rate_per_10k_2017  95 non-null     float64
dtypes: float64(2), object(2)
memory usage: 3.1+ KB


In [13]:
# get opioid prescription data from the prescribers database
opioid_query = '''
WITH zip_to_county AS (
	SELECT
		zf.fipscounty
		, fc.county
		, fc.state
		, zip
		, tot_ratio
		, RANK() OVER(PARTITION BY zip ORDER BY tot_ratio DESC) AS rnk
	FROM zip_fips AS zf
	JOIN fips_county AS fc
		ON fc.fipscounty = zf.fipscounty
	WHERE fc.state = 'TN'
)

SELECT zc.fipscounty
	, zc.county
	, zc.state
	, p3.population
	--, d.generic_name
	--, d.long_acting_opioid_drug_flag
	--, COUNT(DISTINCT zc.zip) AS num_zips
	, SUM(p2.total_claim_count) AS tot_opioid_scripts
	, ROUND(SUM(p2.total_claim_count) / SUM(p3.population) * 10000, 6) AS scripts_per_10k
	
FROM zip_to_county AS zc

JOIN prescriber AS p1
	ON p1.nppes_provider_zip5 = zc.zip

JOIN prescription AS p2
	ON p2.npi = p1.npi

JOIN drug AS d
	ON d.drug_name = p2.drug_name

JOIN population AS p3
	ON zc.fipscounty = p3.fipscounty

WHERE
	zc.rnk = 1
	AND d.opioid_drug_flag = 'Y'

GROUP BY 1,2,3,4
ORDER BY 4 DESC;
'''

In [14]:
# read in the query results as a pandas dataframe
opioids = pd.read_sql(opioid_query, con = engine)

# take a look at the opiod prescription dataframe
opioids.head()

# make sure the datatypes are correct in the opioid prescription dataframe
opioids.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   fipscounty          95 non-null     object 
 1   county              95 non-null     object 
 2   state               95 non-null     object 
 3   population          95 non-null     float64
 4   tot_opioid_scripts  95 non-null     float64
 5   scripts_per_10k     95 non-null     float64
dtypes: float64(3), object(3)
memory usage: 4.6+ KB


In [15]:
# merge the overdoses and opioid prescription dataframes
# I opted to do this in python because...why not?
oao = ods.merge(opioids, how = 'inner', on = 'fipscounty')

# Check to make sure no counties went missing in the join
oao.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 95 entries, 0 to 94
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fipscounty            95 non-null     object 
 1   county_type           95 non-null     object 
 2   num_ods_2017          95 non-null     float64
 3   od_rate_per_10k_2017  95 non-null     float64
 4   county                95 non-null     object 
 5   state                 95 non-null     object 
 6   population            95 non-null     float64
 7   tot_opioid_scripts    95 non-null     float64
 8   scripts_per_10k       95 non-null     float64
dtypes: float64(5), object(4)
memory usage: 7.4+ KB


### Take a look at the correlation between the overdose and opioid prescription data

In [18]:
# create a correlation matrix between all of the numerical values in the dataset
oao.corr()

Unnamed: 0,num_ods_2017,od_rate_per_10k_2017,population,tot_opioid_scripts,scripts_per_10k
num_ods_2017,1.0,0.359151,0.914094,0.924365,-0.327258
od_rate_per_10k_2017,0.359151,1.0,0.18715,0.224856,-0.115678
population,0.914094,0.18715,1.0,0.912015,-0.374794
tot_opioid_scripts,0.924365,0.224856,0.912015,1.0,-0.38055
scripts_per_10k,-0.327258,-0.115678,-0.374794,-0.38055,1.0


*Interestingly, there seems to be a low, positive correlation between county population and overdose death rates but a low-to-moderate, negative correlation between county population and opioid prescription rates. This amounts to a very low, negative correlation between overdose death rates and opioid prescription rates. Might be worth separating out by urban / rural counties to see if the trends are different.*

In [47]:
# Check correlation matrix just for rural counties in TN
oao[oao['county_type'] == 'rural'].corr()

Unnamed: 0,num_ods_2017,od_rate_per_10k_2017,population,tot_opioid_scripts,scripts_per_10k
num_ods_2017,1.0,0.518578,0.805248,0.580384,-0.419317
od_rate_per_10k_2017,0.518578,1.0,0.069637,0.027568,0.044795
population,0.805248,0.069637,1.0,0.791004,-0.667687
tot_opioid_scripts,0.580384,0.027568,0.791004,1.0,-0.464194
scripts_per_10k,-0.419317,0.044795,-0.667687,-0.464194,1.0


*In rural counties, there is a practically non-existent correlation between opioid prescriptions per capita and overdose deaths per capita.*

In [49]:
# Check correlation matrix just for urban counties in TN
oao[oao['county_type'] == 'urban'].corr()

Unnamed: 0,num_ods_2017,od_rate_per_10k_2017,population,tot_opioid_scripts,scripts_per_10k
num_ods_2017,1.0,0.349484,0.903695,0.922086,-0.447697
od_rate_per_10k_2017,0.349484,1.0,0.128399,0.191886,-0.12385
population,0.903695,0.128399,1.0,0.903014,-0.49949
tot_opioid_scripts,0.922086,0.191886,0.903014,1.0,-0.520382
scripts_per_10k,-0.447697,-0.12385,-0.49949,-0.520382,1.0


*In urban counties, there is a very low, negative correlation between opioid prescriptions per capita and overdose deaths per capita.*

### Make some scatterplots

In [50]:
# create a bubble plot of ODs per 10K vs opioid prescriptions per 10K
fig = px.scatter(oao,
                 x = 'od_rate_per_10k_2017',
                 y = 'scripts_per_10k',
                 size = 'population',
                 color = 'county_type',
                 labels = {
                     'population': 'County Population',
                     'county_type': 'County Type',
                     'od_rate_per_10k_2017': 'Overdose Death Rate (per 10k)',
                     'scripts_per_10k': 'Opioid Prescription Rate (per 10k)',
                     'num_ods_2017': 'Total number of opioid overdose deaths'
                 },
                 hover_name = 'county',
                 hover_data = ['population',
                               'tot_opioid_scripts',
                               'num_ods_2017'],
                title = '2017 TN Opioid Prescriptions vs. Overdose Deaths (per 10k residents)')

fig = fig.update_traces(mode='markers', marker=dict(sizemin = 3))
fig.show()

In [51]:
# create a facet grid with two bubble plots of ODs per 10K vs opioid prescriptions per 10K, separated by urban / rural
fig = px.scatter(oao,
                 x = 'od_rate_per_10k_2017',
                 y = 'scripts_per_10k',
                 facet_col = 'county_type',
                 color = 'county_type',
                 trendline = 'ols',
                 labels = {
                     'population': 'County Population',
                     'county_type': 'County Type',
                     'od_rate_per_10k_2017': 'Overdose Death Rate (per 10k)',
                     'scripts_per_10k': 'Opioid Prescription Rate (per 10k)',
                     'num_ods_2017': 'Total number of opioid overdose deaths'
                 },
                 hover_name = 'county',
                 hover_data = ['population',
                               'tot_opioid_scripts',
                               'num_ods_2017'],
                title = '2017 TN Opioid Prescriptions vs. Overdose Deaths (per 10k residents)')

fig.show()

*Conclusion: the correlation between opioid prescription rates and overdose death rates is rather weak regardless of TN county type.*

In [59]:
fig = px.scatter(oao,
                 x = 'scripts_per_10k',
                 y = 'population',
                 color = 'county_type',
                 labels = {
                     'population': 'County Population',
                     'county_type': 'County Type',
                     'od_rate_per_10k_2017': 'Overdose Death Rate (per 10k)',
                     'scripts_per_10k': 'Opioid Prescription Rate (per 10k)',
                     'num_ods_2017': 'Total number of opioid overdose deaths'
                 },
                 hover_name = 'county',
                 hover_data = ['population',
                               'tot_opioid_scripts',
                               'num_ods_2017'],
                title = '2017 TN Opioid Prescription (per 10k residents) by Total County Population')
fig.show()

*The larger the population of the county, however, the more opioid prescriptions there are per capita.*