# Is there any association between a particular type of opioid and number of overdose deaths?

In [20]:
from sqlalchemy import create_engine
import pandas as pd
import plotly.express as px

In [2]:
connection_string = "postgres://postgres:postgres@localhost:5432/prescribers"

In [3]:
engine = create_engine(connection_string)

In [11]:
drug_query = '''
WITH zip_to_county AS (
	SELECT
		zf.fipscounty
		, fc.county
		, fc.state
		, zip
		, tot_ratio
		, RANK() OVER(PARTITION BY zip ORDER BY tot_ratio DESC) AS rnk
	FROM zip_fips AS zf
	JOIN fips_county AS fc
		ON fc.fipscounty = zf.fipscounty
	WHERE fc.state = 'TN'
)

SELECT zc.fipscounty
	, zc.county
	, zc.state
	, p3.population
	, d.generic_name
	, d.long_acting_opioid_drug_flag
	, SUM(p2.total_claim_count) AS tot_scripts
	, ROUND(SUM(p2.total_claim_count) / p3.population * 10000, 6) AS scripts_per_10k
	
FROM zip_to_county AS zc

JOIN prescriber AS p1
	ON p1.nppes_provider_zip5 = zc.zip

JOIN prescription AS p2
	ON p2.npi = p1.npi

JOIN drug AS d
	ON d.drug_name = p2.drug_name

JOIN population AS p3
	ON zc.fipscounty = p3.fipscounty

WHERE
	zc.rnk = 1
	AND d.opioid_drug_flag = 'Y'

GROUP BY 1,2,3,4,5,6
ORDER BY 4 DESC
;
'''
drug_result = engine.execute(drug_query)

In [12]:
opioids = pd.read_sql(drug_query, con = engine)
opioids.head()

Unnamed: 0,fipscounty,county,state,population,generic_name,long_acting_opioid_drug_flag,tot_scripts,scripts_per_10k
0,47157,SHELBY,TN,937847.0,ACETAMINOPHEN WITH CODEINE,N,6864.0,73.18891
1,47157,SHELBY,TN,937847.0,ACETAMINOPHEN/CAFF/DIHYDROCOD,N,12.0,0.127953
2,47157,SHELBY,TN,937847.0,BUPRENORPHINE,Y,339.0,3.614662
3,47157,SHELBY,TN,937847.0,BUTORPHANOL TARTRATE,N,131.0,1.396816
4,47157,SHELBY,TN,937847.0,CODEINE SULFATE,N,12.0,0.127953


In [13]:
od_query = '''
SELECT
	fc.fipscounty
	, od.overdose_deaths AS num_ods_2017
	, ROUND((od.overdose_deaths / p3.population * 10000), 6) AS od_rate_per_10K_2017

FROM overdose_deaths AS od

JOIN fips_county AS fc
	ON fc.fipscounty = od.fipscounty

JOIN population AS p3
	ON p3.fipscounty = od.fipscounty

WHERE od.year = 2017
AND fc.state = 'TN'
;
'''

In [14]:
ods = pd.read_sql(od_query, con = engine)
ods.head()

Unnamed: 0,fipscounty,num_ods_2017,od_rate_per_10k_2017
0,47017,2.0,0.710808
1,47023,1.0,0.584898
2,47039,0.0,0.0
3,47037,184.0,2.712576
4,47087,2.0,1.72816


In [15]:
oao = opioids.merge(ods, how = 'outer', on = 'fipscounty')
oao.head()

Unnamed: 0,fipscounty,county,state,population,generic_name,long_acting_opioid_drug_flag,tot_scripts,scripts_per_10k,num_ods_2017,od_rate_per_10k_2017
0,47157,SHELBY,TN,937847.0,ACETAMINOPHEN WITH CODEINE,N,6864.0,73.18891,159.0,1.695372
1,47157,SHELBY,TN,937847.0,ACETAMINOPHEN/CAFF/DIHYDROCOD,N,12.0,0.127953,159.0,1.695372
2,47157,SHELBY,TN,937847.0,BUPRENORPHINE,Y,339.0,3.614662,159.0,1.695372
3,47157,SHELBY,TN,937847.0,BUTORPHANOL TARTRATE,N,131.0,1.396816,159.0,1.695372
4,47157,SHELBY,TN,937847.0,CODEINE SULFATE,N,12.0,0.127953,159.0,1.695372


In [28]:
# Create a boxplot to see 
fig = px.box(oao,
                 x = 'generic_name',
                 y = 'scripts_per_10k'
            )
fig.show()

In [26]:
oao.corr()

Unnamed: 0,population,tot_scripts,scripts_per_10k,num_ods_2017,od_rate_per_10k_2017
population,1.0,0.280214,-0.08019,0.910905,0.236303
tot_scripts,0.280214,1.0,0.4807,0.28349,0.090853
scripts_per_10k,-0.08019,0.4807,1.0,-0.069313,-0.036091
num_ods_2017,0.910905,0.28349,-0.069313,1.0,0.426827
od_rate_per_10k_2017,0.236303,0.090853,-0.036091,0.426827,1.0
