In [2]:
from sqlalchemy import create_engine
import pandas as pd

In [3]:
connection_string = "postgres://postgres:postgres@localhost:5432/prescribers"

In [4]:
engine = create_engine(connection_string)

### 4 (MATT) Is there an association between rates of opioid prescriptions and overdose deaths by county?

In [7]:
#Total claims per Generic Name
#!!! Need to combine rows with shared opioids
query = '''
SELECT SUM(p.total_claim_count)
    , d.generic_name
FROM prescription AS p 
INNER JOIN drug AS d
USING(drug_name)
WHERE d.opioid_drug_flag = 'Y'
GROUP BY generic_name
ORDER BY generic_name, sum;
'''

claims_per_generic = pd.read_sql(query, con = engine)
claims_per_generic

Unnamed: 0,sum,generic_name
0,34694.0,ACETAMINOPHEN WITH CODEINE
1,34.0,ACETAMINOPHEN/CAFF/DIHYDROCOD
2,3315.0,BUPRENORPHINE
3,62.0,BUPRENORPHINE HCL
4,289.0,BUTALBIT/ACETAMIN/CAFF/CODEINE
5,1145.0,BUTORPHANOL TARTRATE
6,117.0,CODEINE SULFATE
7,369.0,CODEINE/BUTALBITAL/ASA/CAFFEIN
8,61557.0,FENTANYL
9,967.0,HYDROCODONE BITARTRATE


In [23]:
#Find Counties by Zip Code
#!!! Need to window to find max tot_ratio per zip code
query = '''
SELECT zip, fipscounty, tot_ratio
    , RANK() OVER(PARTITION BY zip ORDER BY tot_ratio DESC) AS rank
FROM zip_fips
ORDER BY zip
LIMIT 20;
'''

claims_per_generic = pd.read_sql(query, con = engine)
claims_per_generic

Unnamed: 0,zip,fipscounty,tot_ratio,rank
0,501,36103,1.0,1
1,601,72001,0.8375,1
2,601,72113,0.1625,2
3,602,72003,0.999919,1
4,602,72005,8.1e-05,2
5,603,72005,0.99729,1
6,603,72099,0.002454,2
7,603,72071,0.000256,3
8,604,72005,1.0,1
9,605,72005,1.0,1


In [8]:
#Overdose deaths per county per year
query = '''
SELECT f.county
    , year
    , p.population
    , overdose_deaths
    , ROUND(overdose_deaths / population * 1000.0, 3) AS deaths_per_thousand
FROM overdose_deaths AS o
JOIN fips_county AS f
USING(fipscounty)
JOIN population AS p
USING(fipscounty)
ORDER BY deaths_per_thousand DESC;
'''

claims_per_generic = pd.read_sql(query, con = engine)
claims_per_generic

Unnamed: 0,county,year,population,overdose_deaths,deaths_per_thousand
0,TROUSDALE,2018.0,8773.0,7.0,0.798
1,CLAY,2016.0,7684.0,6.0,0.781
2,CHEATHAM,2017.0,39713.0,24.0,0.604
3,CHEATHAM,2018.0,39713.0,21.0,0.529
4,VAN BUREN,2016.0,5675.0,3.0,0.529
...,...,...,...,...,...
375,WAYNE,2017.0,16713.0,0.0,0.000
376,HAYWOOD,2015.0,17944.0,0.0,0.000
377,HAYWOOD,2017.0,17944.0,0.0,0.000
378,WHITE,2017.0,26394.0,0.0,0.000


In [10]:
#First attempt at combining
query = '''
WITH claims AS (
    SELECT p.npi
        , p.total_claim_count
        , d.generic_name
    FROM prescription AS p 
    INNER JOIN drug AS d
    USING(drug_name)
    WHERE d.opioid_drug_flag = 'Y';
)






SELECT f.county
    , year
    , p.population
    , overdose_deaths
    , ROUND(overdose_deaths / population * 1000.0, 3) AS deaths_per_thousand
FROM overdose_deaths AS o
JOIN fips_county AS f
USING(fipscounty)
JOIN population AS p
USING(fipscounty)
ORDER BY deaths_per_thousand DESC;
'''

claims_per_generic = pd.read_sql(query, con = engine)
claims_per_generic

Unnamed: 0,npi,total_claim_count,generic_name
0,1.659798e+09,525.0,OXYCODONE HCL/ACETAMINOPHEN
1,1.003955e+09,79.0,HYDROCODONE/ACETAMINOPHEN
2,1.487942e+09,12.0,HYDROCODONE/ACETAMINOPHEN
3,1.295730e+09,26.0,TRAMADOL HCL
4,1.316100e+09,12.0,HYDROCODONE/ACETAMINOPHEN
...,...,...,...
31927,1.376778e+09,241.0,HYDROCODONE/ACETAMINOPHEN
31928,1.558546e+09,22.0,OXYCODONE HCL/ACETAMINOPHEN
31929,1.942324e+09,13.0,OXYCODONE HCL/ACETAMINOPHEN
31930,1.700931e+09,12.0,ACETAMINOPHEN WITH CODEINE
