In [117]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import statsmodels.api as sm
import plotly.express as px
%matplotlib inline
pd.options.display.max_rows = 999

In [118]:
connection_string = "postgres://postgres:postgres@localhost:5432/prescribers"
engine = create_engine(connection_string)

In [119]:
query = '''
SELECT *
FROM opioid_scrips
where county_pop>0;
'''

result = engine.execute(query)

In [120]:
result.fetchone()

(1, 'OXYCODONE HCL/ACETAMINOPHEN', 'Y', 'N', Decimal('525'), Decimal('27665.43'), Decimal('1659797769'), 'HILL', 'PIPER', 'FNP-C', 'CLEVELAND', 'TN', '37311', 'Nurse Practitioner', 'BRADLEY', Decimal('103666'))

In [121]:
opioid_scrips = pd.read_sql(query, con = engine)
opioid_scrips.head()

Unnamed: 0,id,generic_name,opioid,long_acting,total_claim_count,total_drug_cost,npi,provider_lname,provider_fname,nppes_credentials,provider_city,provider_state,provider_zip5,specialty_desc,provider_county,county_pop
0,1,OXYCODONE HCL/ACETAMINOPHEN,Y,N,525.0,27665.43,1659798000.0,HILL,PIPER,FNP-C,CLEVELAND,TN,37311,Nurse Practitioner,BRADLEY,103666.0
1,2,HYDROCODONE/ACETAMINOPHEN,Y,N,79.0,626.75,1003955000.0,JACKS,BEVERLY,"CNM, APRN",CHATTANOOGA,TN,37421,Certified Nurse Midwife,HAMILTON,354589.0
2,3,HYDROCODONE/ACETAMINOPHEN,Y,N,12.0,154.58,1487942000.0,NGO,PAUL,D.O,NASHVILLE,TN,37208,Family Practice,DAVIDSON,678322.0
3,4,TRAMADOL HCL,Y,N,26.0,112.66,1295730000.0,DRAKE,ALAN,M.D.,SPARTA,TN,38583,Family Practice,WHITE,26394.0
4,5,HYDROCODONE/ACETAMINOPHEN,Y,N,12.0,53.25,1316100000.0,FRANCIS,KENDRA,DMD,KINGSPORT,TN,37660,Dentist,SULLIVAN,156519.0


In [122]:
opioid_scrips = opioid_scrips.drop(columns = 'id')

In [123]:
opioid_scrips['provider_name'] = opioid_scrips['provider_lname'] + ', ' + opioid_scrips['provider_fname']

In [124]:
opioid_scrips.head(10)

Unnamed: 0,generic_name,opioid,long_acting,total_claim_count,total_drug_cost,npi,provider_lname,provider_fname,nppes_credentials,provider_city,provider_state,provider_zip5,specialty_desc,provider_county,county_pop,provider_name
0,OXYCODONE HCL/ACETAMINOPHEN,Y,N,525.0,27665.43,1659798000.0,HILL,PIPER,FNP-C,CLEVELAND,TN,37311,Nurse Practitioner,BRADLEY,103666.0,"HILL, PIPER"
1,HYDROCODONE/ACETAMINOPHEN,Y,N,79.0,626.75,1003955000.0,JACKS,BEVERLY,"CNM, APRN",CHATTANOOGA,TN,37421,Certified Nurse Midwife,HAMILTON,354589.0,"JACKS, BEVERLY"
2,HYDROCODONE/ACETAMINOPHEN,Y,N,12.0,154.58,1487942000.0,NGO,PAUL,D.O,NASHVILLE,TN,37208,Family Practice,DAVIDSON,678322.0,"NGO, PAUL"
3,TRAMADOL HCL,Y,N,26.0,112.66,1295730000.0,DRAKE,ALAN,M.D.,SPARTA,TN,38583,Family Practice,WHITE,26394.0,"DRAKE, ALAN"
4,HYDROCODONE/ACETAMINOPHEN,Y,N,12.0,53.25,1316100000.0,FRANCIS,KENDRA,DMD,KINGSPORT,TN,37660,Dentist,SULLIVAN,156519.0,"FRANCIS, KENDRA"
5,FENTANYL,Y,Y,13.0,2416.62,1952795000.0,MARDIS,LESLIE,,MARYVILLE,TN,37804,Nurse Practitioner,BLOUNT,127135.0,"MARDIS, LESLIE"
6,HYDROCODONE/ACETAMINOPHEN,Y,N,19.0,185.38,1851339000.0,WILES,DAVID,MD,JOHNSON CITY,TN,37604,Neurosurgery,WASHINGTON,126437.0,"WILES, DAVID"
7,MORPHINE SULFATE,Y,Y,33.0,2810.3,1487743000.0,SHONE,LOUIS,ANP,LYNNVILLE,TN,38472,Nurse Practitioner,GILES,29024.0,"SHONE, LOUIS"
8,OXYCODONE HCL,Y,N,13.0,367.54,1780872000.0,MORGAN,VICKIE,M.D.,BRISTOL,TN,37620,Medical Oncology,SULLIVAN,156519.0,"MORGAN, VICKIE"
9,OXYCODONE HCL/ACETAMINOPHEN,Y,N,20.0,159.69,1073577000.0,PASTRICK,GREGORY,"M.D., P.C.",KINGSPORT,TN,37660,Plastic Surgery,SULLIVAN,156519.0,"PASTRICK, GREGORY"


In [126]:
#Here is a summary of the min and max per continent for 2014
#gdp_df[gdp_df['Year']==2014].groupby('Continent').agg({'GDP_Per_Capita':(min, max)})
opioid_scrips.groupby(['generic_name', 'long_acting']).agg({'total_claim_count':(sum)}).sort_values(['total_claim_count'], ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_claim_count
generic_name,long_acting,Unnamed: 2_level_1
HYDROCODONE/ACETAMINOPHEN,N,1122353.0
OXYCODONE HCL/ACETAMINOPHEN,N,455553.0
TRAMADOL HCL,N,335310.0
OXYCODONE HCL,N,293018.0
MORPHINE SULFATE,Y,137006.0
FENTANYL,Y,61489.0
OXYCODONE HCL,Y,37212.0
ACETAMINOPHEN WITH CODEINE,N,34649.0
OXYMORPHONE HCL,Y,32622.0
MORPHINE SULFATE,N,15920.0


In [127]:
opioid_scrips.groupby('specialty_desc').agg({'total_claim_count':(sum)}).sort_values(['total_claim_count'], ascending=False)


Unnamed: 0_level_0,total_claim_count
specialty_desc,Unnamed: 1_level_1
Nurse Practitioner,899155.0
Family Practice,467063.0
Internal Medicine,402031.0
Physician Assistant,204084.0
Orthopedic Surgery,78564.0
Anesthesiology,53587.0
Emergency Medicine,53539.0
Physical Medicine and Rehabilitation,46686.0
Pain Management,42101.0
Dentist,39945.0


In [128]:
opioid_scrips.groupby(['specialty_desc', 'long_acting']).agg({'total_claim_count':(sum)}).sort_values(['total_claim_count'], ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_claim_count
specialty_desc,long_acting,Unnamed: 2_level_1
Nurse Practitioner,N,751224.0
Family Practice,N,438080.0
Internal Medicine,N,375789.0
Physician Assistant,N,175448.0
Nurse Practitioner,Y,147931.0
Orthopedic Surgery,N,77983.0
Emergency Medicine,N,51289.0
Anesthesiology,N,40048.0
Dentist,N,39945.0
Physical Medicine and Rehabilitation,N,37163.0


In [129]:
opioid_scrips.groupby(['provider_name','specialty_desc']).agg({'total_claim_count':(sum)}).sort_values(['total_claim_count'], ascending=False)


Unnamed: 0_level_0,Unnamed: 1_level_0,total_claim_count
provider_name,specialty_desc,Unnamed: 2_level_1
"COFFEY, DAVID",Family Practice,9275.0
"KINDRICK, JUSTIN",Nurse Practitioner,8405.0
"CATHERS, SHARON",Nurse Practitioner,7274.0
"PAINTER, MICHELLE",Nurse Practitioner,5709.0
"CLARK, RICHARD",Internal Medicine,5607.0
...,...,...
"ST. CHARLES, CHRISTOPHER",Otolaryngology,11.0
"THOMAS, BLAIR",Dentist,11.0
"HITT, LISA",Nurse Practitioner,11.0
"JONES, DAVID",Dentist,11.0


In [130]:
claims_per10k_a = opioid_scrips.groupby(['provider_name','specialty_desc', 'provider_county','provider_city','county_pop']).agg({'total_claim_count':(sum)}).sort_values(['total_claim_count'], ascending=False)

In [131]:
claims_per10k_a.dropna()
claims_per10k_a = claims_per10k_a.reset_index()
claims_per10k_a.head()

Unnamed: 0,provider_name,specialty_desc,provider_county,provider_city,county_pop,total_claim_count
0,"COFFEY, DAVID",Family Practice,SCOTT,ONEIDA,21949.0,9275.0
1,"KINDRICK, JUSTIN",Nurse Practitioner,CUMBERLAND,CROSSVILLE,58178.0,8405.0
2,"CATHERS, SHARON",Nurse Practitioner,KNOX,KNOXVILLE,452286.0,7274.0
3,"PAINTER, MICHELLE",Nurse Practitioner,SULLIVAN,BRISTOL,156519.0,5709.0
4,"CLARK, RICHARD",Internal Medicine,FENTRESS,JAMESTOWN,17940.0,5607.0


In [132]:
claims_per10k_a['claims_per10k'] = claims_per10k_a['total_claim_count'] / claims_per10k_a['county_pop'] * 10000

In [133]:
claims_per10k_a.head(10)

Unnamed: 0,provider_name,specialty_desc,provider_county,provider_city,county_pop,total_claim_count,claims_per10k
0,"COFFEY, DAVID",Family Practice,SCOTT,ONEIDA,21949.0,9275.0,4225.705044
1,"KINDRICK, JUSTIN",Nurse Practitioner,CUMBERLAND,CROSSVILLE,58178.0,8405.0,1444.704184
2,"CATHERS, SHARON",Nurse Practitioner,KNOX,KNOXVILLE,452286.0,7274.0,160.827441
3,"PAINTER, MICHELLE",Nurse Practitioner,SULLIVAN,BRISTOL,156519.0,5709.0,364.74805
4,"CLARK, RICHARD",Internal Medicine,FENTRESS,JAMESTOWN,17940.0,5607.0,3125.41806
5,"LADSON, JAMES",Anesthesiology,RUTHERFORD,MURFREESBORO,298456.0,5423.0,181.701825
6,"WILLETT, DWIGHT",Family Practice,ROANE,KINGSTON,52926.0,5221.0,986.471677
7,"TAYLOR, ALICIA",Physician Assistant,CAMPBELL,LA FOLLETTE,39824.0,5088.0,1277.621535
8,"GREEN, JENNIFER",Nurse Practitioner,KNOX,KNOXVILLE,452286.0,4979.0,110.085212
9,"BOWSER, AMY",Nurse Practitioner,SUMNER,GALLATIN,175730.0,4979.0,283.332385


In [134]:
claims_per10k_a['pop_ctgry']= pd.qcut(claims_per10k_a['county_pop'], q = 3, 
                                                 labels=['1Low','2Med','3High'])

In [135]:
claims_per10k_a['vol_ctgry']= pd.qcut(claims_per10k_a['total_claim_count'], q = 3, 
                                                 labels=['1Low','2Med','3High'])

In [136]:
claims_per10k_a.describe()

Unnamed: 0,county_pop,total_claim_count,claims_per10k
count,11012.0,11012.0,11012.0
mean,357074.047857,233.712859,31.820935
std,314344.510027,488.49403,120.338567
min,5071.0,11.0,0.11729
25%,75565.0,27.0,0.884864
50%,298456.0,70.0,3.36469
75%,678322.0,208.0,15.38461
max,937847.0,9275.0,4225.705044


In [137]:
claims_per10k_a[claims_per10k_a['county_pop']==0].count()

provider_name        0
specialty_desc       0
provider_county      0
provider_city        0
county_pop           0
total_claim_count    0
claims_per10k        0
pop_ctgry            0
vol_ctgry            0
dtype: int64

In [141]:
np.log(claims_per10k_a['claims_per10k'])

0        8.348941
1        7.275660
2        5.080332
3        5.899207
4        8.047323
           ...   
11007   -0.352697
11008   -0.352697
11009    0.116667
11010   -2.143106
11011   -1.413834
Name: claims_per10k, Length: 11012, dtype: float64

In [140]:
fig = px.scatter(claims_per10k_a[claims_per10k_a['vol_ctgry']=='3High'], x='total_claim_count',
                 y='claims_per10k', 
                 color = 'county_pop', 
                 color_continuous_scale=px.colors.sequential.Viridis)
fig.update_traces(marker=dict(size=8, 
                              opacity=.5,
                              line=dict(color='Black', width=1)))
fig.show()

In [69]:
opioid_scrips.groupby(['provider_county']).agg({'total_claim_count':(sum)}).sort_values(['provider_county'])

Unnamed: 0_level_0,total_claim_count
provider_county,Unnamed: 1_level_1
ANDERSON,34882.0
BEDFORD,8289.0
BENTON,10717.0
BLEDSOE,3306.0
BLOUNT,27352.0
BRADLEY,48244.0
CAMPBELL,18687.0
CANNON,2758.0
CARROLL,19512.0
CARTER,11230.0


In [33]:
opioid_scrips.groupby(['provider_county','specialty_desc', 'long_acting']).agg({'total_claim_count':(sum)}).sort_values(['provider_county','total_claim_count'], ascending=(True, False))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total_claim_count
provider_county,specialty_desc,long_acting,Unnamed: 3_level_1
ANDERSON,Nurse Practitioner,N,11751.0
ANDERSON,Family Practice,N,8217.0
ANDERSON,Physician Assistant,N,3718.0
ANDERSON,Orthopedic Surgery,N,2666.0
ANDERSON,Nurse Practitioner,Y,2195.0
...,...,...,...
WILSON,Infectious Disease,N,37.0
WILSON,Podiatry,N,30.0
WILSON,Gastroenterology,N,25.0
WILSON,Student in an Organized Health Care Education/Training Program,N,19.0


In [34]:
#plotly express treemap

take the top quartile of drugs and group them together by county ... who are the top prescribers of that volume in each county?