In [3]:
from sqlalchemy import create_engine
import pandas as pd

In [4]:
connection_string = "postgres://postgres:postgres@localhost:5432/prescribers"

In [5]:
engine = create_engine(connection_string)

### 4 (MATT) Is there an association between rates of opioid prescriptions and overdose deaths by county?

Methadone
Oxycodone
Hydrocodone

In [45]:
#Total claims per Generic Name
#!!! Need to combine rows with shared opioids
query = '''
SELECT SUM(p.total_claim_count)
    , d.generic_name
    , CASE WHEN d.generic_name LIKE '%%CODEINE%%' THEN 'codeine'
        WHEN d.generic_name LIKE '%%FENTANYL%%' THEN 'fentanyl'
        WHEN d.generic_name LIKE '%%HYDROCODONE%%' THEN 'hydrocodone'
        WHEN d.generic_name LIKE '%%MORPHINE%%' THEN 'morphine'
        WHEN d.generic_name LIKE '%%OXYCODONE%%' THEN 'oxycodone'
        WHEN d.generic_name LIKE '%%OXYMORPHONE%%' THEN 'oxymorphone'
        WHEN d.generic_name LIKE '%%TRAMADOL%%' THEN 'tramadol'
        ELSE 'other' END 
        AS opioid
FROM prescription AS p 
INNER JOIN drug AS d
USING(drug_name)
WHERE d.opioid_drug_flag = 'Y'
GROUP BY generic_name
ORDER BY generic_name, sum;
'''

claims_per_opioid = pd.read_sql(query, con = engine)
claims_per_opioid.head()

Unnamed: 0,sum,generic_name,opioid
0,34694.0,ACETAMINOPHEN WITH CODEINE,codeine
1,34.0,ACETAMINOPHEN/CAFF/DIHYDROCOD,other
2,3315.0,BUPRENORPHINE,other
3,62.0,BUPRENORPHINE HCL,other
4,289.0,BUTALBIT/ACETAMIN/CAFF/CODEINE,codeine


In [54]:
#Find Counties by Zip Code
#!!! Need to window to find max tot_ratio per zip code
query = '''
SELECT zip, fipscounty, tot_ratio
    , RANK() OVER(PARTITION BY zip ORDER BY tot_ratio DESC) AS rank
FROM zip_fips
ORDER BY zip;
'''

zip_counties = pd.read_sql(query, con = engine)
zip_counties.head()

Unnamed: 0,zip,fipscounty,tot_ratio,rank
0,501,36103,1.0,1
1,601,72001,0.8375,1
2,601,72113,0.1625,2
3,602,72003,0.999919,1
4,602,72005,8.1e-05,2


In [41]:
#Overdose deaths per county per year
query = '''
SELECT f.fipscounty
    , f.county
    , p.population
    , overdose_deaths
    , ROUND(overdose_deaths / population * 1000.0, 3) AS deaths_per_thousand
FROM overdose_deaths AS o
JOIN fips_county AS f
USING(fipscounty)
JOIN population AS p
USING(fipscounty)
WHERE year = 2017
ORDER BY deaths_per_thousand DESC;
'''

od_deaths = pd.read_sql(query, con = engine)
od_deaths.head()

Unnamed: 0,fipscounty,county,population,overdose_deaths,deaths_per_thousand
0,47021,CHEATHAM,39713.0,24.0,0.604
1,47067,HANCOCK,6605.0,3.0,0.454
2,47001,ANDERSON,75538.0,34.0,0.45
3,47093,KNOX,452286.0,196.0,0.433
4,47171,UNICOI,17830.0,7.0,0.393


In [58]:
#First attempt at combining
##I will ultimately be able to drop zip from SELECT - I will just need it to join.
query = '''
WITH zip_counties AS (
    SELECT zip
        , fipscounty
        , tot_ratio
        , RANK() OVER(PARTITION BY zip ORDER BY tot_ratio DESC) AS rank
    FROM zip_fips
),
od_deaths AS (
    SELECT f.fipscounty
        , f.county
        , p.population
        , overdose_deaths
        , ROUND(overdose_deaths / population * 1000.0, 3) AS deaths_per_thousand
    FROM overdose_deaths AS o
    JOIN fips_county AS f
    USING(fipscounty)
    JOIN population AS p
    USING(fipscounty)
    WHERE year = 2017
)
SELECT z.zip
    , o.county
    , o.population
    , o.overdose_deaths
    , o.deaths_per_thousand
FROM zip_counties AS z
JOIN od_deaths AS o
USING(fipscounty)
WHERE z.rank = 1
'''

claims_per_generic = pd.read_sql(query, con = engine)
claims_per_generic

Unnamed: 0,zip,county,population,overdose_deaths,deaths_per_thousand
0,37010,MONTGOMERY,192120.0,22.0,0.115
1,37011,DAVIDSON,678322.0,184.0,0.271
2,37012,DE KALB,19380.0,7.0,0.361
3,37013,DAVIDSON,678322.0,184.0,0.271
4,37014,WILLIAMSON,212161.0,24.0,0.113
...,...,...,...,...,...
755,38585,VAN BUREN,5675.0,0.0,0.000
756,38587,WHITE,26394.0,0.0,0.000
757,38588,JACKSON,11573.0,2.0,0.173
758,38589,FENTRESS,17940.0,1.0,0.056


In [62]:
#Total claims per Generic Name
#!!! Need to combine rows with shared opioids
query = '''
SELECT p1.npi
    , p2.nppes_provider_zip5 AS zip
    , p1.total_claim_count
    , d.generic_name
    , CASE WHEN d.generic_name LIKE '%%CODEINE%%' THEN 'codeine'
        WHEN d.generic_name LIKE '%%FENTANYL%%' THEN 'fentanyl'
        WHEN d.generic_name LIKE '%%HYDROCODONE%%' THEN 'hydrocodone'
        WHEN d.generic_name LIKE '%%MORPHINE%%' THEN 'morphine'
        WHEN d.generic_name LIKE '%%OXYCODONE%%' THEN 'oxycodone'
        WHEN d.generic_name LIKE '%%OXYMORPHONE%%' THEN 'oxymorphone'
        WHEN d.generic_name LIKE '%%TRAMADOL%%' THEN 'tramadol'
        ELSE 'other' END 
        AS opioid
FROM prescription AS p1
INNER JOIN drug AS d
USING(drug_name)
INNER JOIN  prescriber AS p2
USING (npi)
WHERE d.opioid_drug_flag = 'Y'
'''

claims_per_opioid = pd.read_sql(query, con = engine)
claims_per_opioid.head()

Unnamed: 0,npi,zip,total_claim_count,generic_name,opioid
0,1659798000.0,37311,525.0,OXYCODONE HCL/ACETAMINOPHEN,oxycodone
1,1003955000.0,37421,79.0,HYDROCODONE/ACETAMINOPHEN,hydrocodone
2,1073577000.0,37660,20.0,OXYCODONE HCL/ACETAMINOPHEN,oxycodone
3,1124013000.0,37067,17.0,TRAMADOL HCL,tramadol
4,1821086000.0,37404,199.0,OXYCODONE HCL,oxycodone


### 4 Is there an association between rates of opioid prescriptions and overdose deaths by county?
### 5 Is there any association between a particular type of opioid and number of overdose deaths?

In [86]:
query = '''
WITH zip_counties AS (
    SELECT zip
        , fipscounty
        , tot_ratio
        , RANK() OVER(PARTITION BY zip ORDER BY tot_ratio DESC) AS rank
    FROM zip_fips
),
od_deaths AS (
    SELECT z.zip
        , f.fipscounty
        , f.county
        , p.population
        , overdose_deaths
        , ROUND(overdose_deaths / population * 1000.0, 3) AS deaths_per_thousand
    FROM overdose_deaths AS o 
    JOIN fips_county AS f USING(fipscounty)
    JOIN population AS p USING(fipscounty)
    JOIN zip_counties AS z USING(fipscounty)
    WHERE year = 2017
    AND rank = 1
),
npis AS (
    SELECT p1.npi
        , p2.nppes_provider_zip5 AS zip
        , p1.total_claim_count
        , d.generic_name
        , CASE WHEN d.generic_name LIKE '%%CODEINE%%' THEN 'codeine'
            WHEN d.generic_name LIKE '%%FENTANYL%%' THEN 'fentanyl'
            WHEN d.generic_name LIKE '%%HYDROCODONE%%' THEN 'hydrocodone'
            WHEN d.generic_name LIKE '%%MORPHINE%%' THEN 'morphine'
            WHEN d.generic_name LIKE '%%OXYCODONE%%' THEN 'oxycodone'
            WHEN d.generic_name LIKE '%%OXYMORPHONE%%' THEN 'oxymorphone'
            WHEN d.generic_name LIKE '%%TRAMADOL%%' THEN 'tramadol'
            ELSE 'other' END 
            AS opioid
    FROM prescription AS p1
    INNER JOIN drug AS d
    USING(drug_name)
    INNER JOIN  prescriber AS p2
    USING (npi)
    WHERE d.opioid_drug_flag = 'Y'
),
npi_opioids AS (
    SELECT npi, zip, SUM(total_claim_count) AS claims, opioid
    FROM npis
    GROUP BY npi, zip, opioid
)
SELECT od.county
    , od.population
    , od.overdose_deaths
    , od.deaths_per_thousand
    , SUM(n. claims) AS claims_per_county
    , n.opioid
FROM od_deaths AS od
JOIN npi_opioids AS n
USING (zip)
GROUP BY 1, 2, 3, 4, 6
'''

claims_per_opioid = pd.read_sql(query, con = engine)
claims_per_opioid.head(40)

Unnamed: 0,county,population,overdose_deaths,deaths_per_thousand,claims_per_county,opioid
0,ANDERSON,75538.0,34.0,0.45,694.0,codeine
1,ANDERSON,75538.0,34.0,0.45,498.0,fentanyl
2,ANDERSON,75538.0,34.0,0.45,14227.0,hydrocodone
3,ANDERSON,75538.0,34.0,0.45,1971.0,morphine
4,ANDERSON,75538.0,34.0,0.45,49.0,other
5,ANDERSON,75538.0,34.0,0.45,13099.0,oxycodone
6,ANDERSON,75538.0,34.0,0.45,716.0,oxymorphone
7,ANDERSON,75538.0,34.0,0.45,3628.0,tramadol
8,BEDFORD,46854.0,3.0,0.064,48.0,codeine
9,BEDFORD,46854.0,3.0,0.064,247.0,fentanyl


In [68]:
query = '''
WITH opioids_per_npi AS (
    SELECT p1.npi
        , p2.nppes_provider_zip5 AS zip
        , p1.total_claim_count
        , d.generic_name
        , CASE WHEN d.generic_name LIKE '%%CODEINE%%' THEN 'codeine'
            WHEN d.generic_name LIKE '%%FENTANYL%%' THEN 'fentanyl'
            WHEN d.generic_name LIKE '%%HYDROCODONE%%' THEN 'hydrocodone'
            WHEN d.generic_name LIKE '%%MORPHINE%%' THEN 'morphine'
            WHEN d.generic_name LIKE '%%OXYCODONE%%' THEN 'oxycodone'
            WHEN d.generic_name LIKE '%%OXYMORPHONE%%' THEN 'oxymorphone'
            WHEN d.generic_name LIKE '%%TRAMADOL%%' THEN 'tramadol'
            ELSE 'other' END 
            AS opioid
    FROM prescription AS p1
    INNER JOIN drug AS d
    USING(drug_name)
    INNER JOIN  prescriber AS p2
    USING (npi)
    WHERE d.opioid_drug_flag = 'Y'
)
SELECT npi, zip, SUM(total_claim_count) AS claims, opioid
FROM opioids_per_npi
GROUP BY npi, zip, opioid
'''

claims_per_opioid = pd.read_sql(query, con = engine)
claims_per_opioid

Unnamed: 0,npi,zip,claims,opioid
0,1.003013e+09,37043,121.0,hydrocodone
1,1.003013e+09,37043,15.0,morphine
2,1.003013e+09,37043,29.0,other
3,1.003013e+09,37043,100.0,oxycodone
4,1.003013e+09,37043,50.0,tramadol
...,...,...,...,...
26943,1.992994e+09,37604,74.0,oxycodone
26944,1.992996e+09,37403,25.0,hydrocodone
26945,1.992996e+09,37403,44.0,oxycodone
26946,1.993000e+09,38119,42.0,hydrocodone


In [81]:
query = '''
WITH zip_counties AS (
    SELECT zip
        , fipscounty
        , tot_ratio
        , RANK() OVER(PARTITION BY zip ORDER BY tot_ratio DESC) AS rank
    FROM zip_fips
),
od_deaths AS (
    SELECT z.zip
        , f.fipscounty
        , f.county
        , p.population
        , overdose_deaths
        , ROUND(overdose_deaths / population * 1000.0, 3) AS deaths_per_thousand
    FROM overdose_deaths AS o 
    JOIN fips_county AS f USING(fipscounty)
    JOIN population AS p USING(fipscounty)
    JOIN zip_counties AS z USING(fipscounty)
    WHERE year = 2017
    AND rank = 1
)
SELECT zip
    , county
    , population
    , overdose_deaths
    , deaths_per_thousand
FROM od_deaths
'''

claims_per_opioid = pd.read_sql(query, con = engine)
claims_per_opioid

Unnamed: 0,zip,county,population,overdose_deaths,deaths_per_thousand
0,37010,MONTGOMERY,192120.0,22.0,0.115
1,37011,DAVIDSON,678322.0,184.0,0.271
2,37012,DE KALB,19380.0,7.0,0.361
3,37013,DAVIDSON,678322.0,184.0,0.271
4,37014,WILLIAMSON,212161.0,24.0,0.113
...,...,...,...,...,...
755,38585,VAN BUREN,5675.0,0.0,0.000
756,38587,WHITE,26394.0,0.0,0.000
757,38588,JACKSON,11573.0,2.0,0.173
758,38589,FENTRESS,17940.0,1.0,0.056
