In [39]:
import pandas as pd
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect

pd.options.mode.chained_assignment = None  # default='warn'

#Pull in CSV paths
csvDD = "csv data/Clean Drug Data.csv"
csvUER = "csv data/Clean Unemployment Rate.csv"
csvST = "csv data/Stress by State.csv"

In [40]:
druguse_data = pd.read_csv(csvDD)
unemployment_data = pd.read_csv(csvUER)
stress_data = pd.read_csv(csvST)

In [41]:
#Set DataFrames
druguse_df = pd.DataFrame(druguse_data)
unemployment_df = pd.DataFrame(unemployment_data)
stress_df = pd.DataFrame(stress_data)


In [54]:
druguse_cleaned = druguse_df.filter(['state', 'year', 'indicator', 'data_value', 'state_name'])
druguse_ods = druguse_cleaned[druguse_cleaned['indicator'] == 'Number of Drug Overdose Deaths']
druguse_totaldeaths = druguse_cleaned[druguse_cleaned['indicator'] == 'Number of Deaths']

druguse_ods.rename(columns = {'data_value': 'OD_Deaths'}, inplace = True)
druguse_totaldeaths.rename(columns = {'data_value': 'TOTAL_Deaths'}, inplace = True)

drug_death_merge = druguse_ods.merge(druguse_totaldeaths, how ='inner', left_on=['state', 'year'], right_on=['state', 'year'])

drug_death_filtered = drug_death_merge.filter(['year', 'indicator', 'OD_Deaths', 'TOTAL_Deaths', 'state_name_x'])

drug_death_final['OD_perctage'] = drug_death_filtered.OD_Deaths / drug_death_filtered.TOTAL_Deaths * 100

drug_death_final.rename(columns = {'ABBR': 'abbr', 'state_name_x': 'state' }, inplace = True)

drug_death_final.head()


Unnamed: 0,abbr,year,OD_Deaths,TOTAL_Deaths,state,OD_perctage
0,AK,2015,121.0,4193.0,Alaska,2.885762
1,AK,2016,129.0,4355.0,Alaska,2.962113
2,AK,2017,141.0,4279.0,Alaska,3.295162
3,AK,2018,105.0,4340.0,Alaska,2.419355
4,AL,2015,720.0,50870.0,Alabama,1.415373


In [58]:
unemployment_df.head()

combined_merge = unemployment_df.merge(drug_death_final, how ='inner', left_on=['state', 'year'], right_on=['state', 'year'])

combined_merge

Unnamed: 0,state,year,month,unemployment_rate,abbr,OD_Deaths,TOTAL_Deaths,OD_perctage
0,Alabama,2015,12,6.0,AL,720.0,50870.0,1.415373
1,Alaska,2015,12,6.7,AK,121.0,4193.0,2.885762
2,Arizona,2015,12,5.7,AZ,1267.0,54853.0,2.309810
3,Arkansas,2015,12,4.3,AR,381.0,30986.0,1.229588
4,California,2015,12,5.7,CA,4762.0,260172.0,1.830328
...,...,...,...,...,...,...,...,...
199,Virginia,2018,12,3.0,VA,1434.0,68469.0,2.094378
200,Washington,2018,12,4.5,WA,1163.0,56928.0,2.042931
201,West Virginia,2018,12,4.9,WV,883.0,22940.0,3.849172
202,Wisconsin,2018,12,3.0,WI,1087.0,53556.0,2.029651


In [64]:
cocaine_data = druguse_cleaned[druguse_cleaned['indicator'] == 'Cocaine (T40.5)']
heroin_data = druguse_cleaned[druguse_cleaned['indicator'] == 'Heroin (T40.1)']
opiodids_data = druguse_cleaned[druguse_cleaned['indicator'] == 'Opioids (T40.0-T40.4,T40.6)']

cocaine_data.rename(columns = {'data_value': 'coke_od'}, inplace = True)
heroin_data.rename(columns = {'data_value': 'her_od'}, inplace = True)
opiodids_data.rename(columns = {'data_value': 'opi_od'}, inplace = True)

opiodids_data

Unnamed: 0,state,year,indicator,opi_od,state_name
3,AK,2015,"Opioids (T40.0-T40.4,T40.6)",0.0,Alaska
19,AK,2016,"Opioids (T40.0-T40.4,T40.6)",96.0,Alaska
32,AK,2017,"Opioids (T40.0-T40.4,T40.6)",100.0,Alaska
43,AK,2018,"Opioids (T40.0-T40.4,T40.6)",65.0,Alaska
75,AZ,2015,"Opioids (T40.0-T40.4,T40.6)",0.0,Arizona
...,...,...,...,...,...
1840,WY,2018,"Opioids (T40.0-T40.4,T40.6)",41.0,Wyoming
1857,YC,2015,"Opioids (T40.0-T40.4,T40.6)",726.0,New York City
1869,YC,2016,"Opioids (T40.0-T40.4,T40.6)",1136.0,New York City
1881,YC,2017,"Opioids (T40.0-T40.4,T40.6)",1201.0,New York City


In [79]:
merge1 = cocaine_data.merge(heroin_data, how ='inner', left_on=['state', 'year'], right_on=['state', 'year'])
merge2 = merge1.merge(opiodids_data, how ='inner', left_on=['state', 'year'], right_on=['state', 'year'])
merge_clean = merge2.filter(['state', 'year', 'coke_od', 'her_od', 'opi_od', 'state_name'])
merge_clean['OD_sum'] = merge_clean.coke_od + merge_clean.her_od + merge_clean.opi_od
merge_clean['coke_per'] = merge_clean.coke_od / merge_clean.OD_sum * 100
merge_clean['her_per'] = merge_clean.her_od / merge_clean.OD_sum * 100
merge_clean['opi_per'] = merge_clean.opi_od / merge_clean.OD_sum * 100

merge_clean

Unnamed: 0,state,year,coke_od,her_od,opi_od,state_name,OD_sum,coke_per,her_per,opi_per
0,AK,2015,0.0,0.0,0.0,Alaska,0.0,,,
1,AK,2016,15.0,49.0,96.0,Alaska,160.0,9.375000,30.625000,60.000000
2,AK,2017,18.0,36.0,100.0,Alaska,154.0,11.688312,23.376623,64.935065
3,AK,2018,10.0,28.0,65.0,Alaska,103.0,9.708738,27.184466,63.106796
4,AZ,2015,0.0,0.0,0.0,Arizona,0.0,,,
...,...,...,...,...,...,...,...,...,...,...
135,WY,2018,0.0,0.0,41.0,Wyoming,41.0,0.000000,0.000000,100.000000
136,YC,2015,290.0,408.0,726.0,New York City,1424.0,20.365169,28.651685,50.983146
137,YC,2016,507.0,574.0,1136.0,New York City,2217.0,22.868742,25.890843,51.240415
138,YC,2017,630.0,643.0,1201.0,New York City,2474.0,25.464834,25.990299,48.544867
