In [1]:
import pandas as pd
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect
import json

pd.options.mode.chained_assignment = None  # default='warn'

#Pull in CSV paths
csvDD = "csv data/Clean Drug Data.csv"
csvUER = "csv data/Clean Unemployment Rate.csv"
csvST = "csv data/Stress by State.csv"

In [2]:
druguse_data = pd.read_csv(csvDD)
unemployment_data = pd.read_csv(csvUER)
stress_data = pd.read_csv(csvST)

In [3]:
#Set DataFrames
druguse_df = pd.DataFrame(druguse_data)
unemployment_df = pd.DataFrame(unemployment_data)
stress_df = pd.DataFrame(stress_data)


In [4]:
druguse_cleaned = druguse_df.filter(['state', 'year', 'indicator', 'data_value', 'state_name'])
druguse_ods = druguse_cleaned[druguse_cleaned['indicator'] == 'Number of Drug Overdose Deaths']
druguse_totaldeaths = druguse_cleaned[druguse_cleaned['indicator'] == 'Number of Deaths']

druguse_ods.rename(columns = {'data_value': 'OD_Deaths'}, inplace = True)
druguse_totaldeaths.rename(columns = {'data_value': 'TOTAL_Deaths'}, inplace = True)

drug_death_merge = druguse_ods.merge(druguse_totaldeaths, how ='inner', left_on=['state', 'year'], right_on=['state', 'year'])

drug_death_filtered = drug_death_merge.filter(['year', 'indicator', 'OD_Deaths', 'TOTAL_Deaths', 'state_name_x'])

drug_death_filtered['OD_perctage'] = drug_death_filtered.OD_Deaths / drug_death_filtered.TOTAL_Deaths * 100

drug_death_filtered.rename(columns = {'state_name_x': 'state' }, inplace = True)

drug_death_final = drug_death_filtered.set_index('state')


drug_death_final.head()

Unnamed: 0_level_0,year,OD_Deaths,TOTAL_Deaths,OD_perctage
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alaska,2015,121.0,4193.0,2.885762
Alaska,2016,129.0,4355.0,2.962113
Alaska,2017,141.0,4279.0,3.295162
Alaska,2018,105.0,4340.0,2.419355
Alabama,2015,720.0,50870.0,1.415373


In [5]:
unemployment_df.head()

combined_merge = unemployment_df.merge(drug_death_filtered, how ='inner', left_on=['state', 'year'], right_on=['state', 'year'])

combinded_merge_final = combined_merge.set_index('state')
combinded_merge_final

Unnamed: 0_level_0,year,month,unemployment_rate,OD_Deaths,TOTAL_Deaths,OD_perctage
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alabama,2015,12,6.0,720.0,50870.0,1.415373
Alaska,2015,12,6.7,121.0,4193.0,2.885762
Arizona,2015,12,5.7,1267.0,54853.0,2.309810
Arkansas,2015,12,4.3,381.0,30986.0,1.229588
California,2015,12,5.7,4762.0,260172.0,1.830328
...,...,...,...,...,...,...
Virginia,2018,12,3.0,1434.0,68469.0,2.094378
Washington,2018,12,4.5,1163.0,56928.0,2.042931
West Virginia,2018,12,4.9,883.0,22940.0,3.849172
Wisconsin,2018,12,3.0,1087.0,53556.0,2.029651


In [6]:
cocaine_data = druguse_cleaned[druguse_cleaned['indicator'] == 'Cocaine (T40.5)']
heroin_data = druguse_cleaned[druguse_cleaned['indicator'] == 'Heroin (T40.1)']
opiodids_data = druguse_cleaned[druguse_cleaned['indicator'] == 'Opioids (T40.0-T40.4,T40.6)']

cocaine_data.rename(columns = {'data_value': 'coke_od'}, inplace = True)
heroin_data.rename(columns = {'data_value': 'her_od'}, inplace = True)
opiodids_data.rename(columns = {'data_value': 'opi_od'}, inplace = True)



heroin_data


Unnamed: 0,state,year,indicator,her_od,state_name
9,AK,2015,Heroin (T40.1),0.0,Alaska
15,AK,2016,Heroin (T40.1),49.0,Alaska
25,AK,2017,Heroin (T40.1),36.0,Alaska
38,AK,2018,Heroin (T40.1),28.0,Alaska
81,AZ,2015,Heroin (T40.1),0.0,Arizona
...,...,...,...,...,...
1846,WY,2018,Heroin (T40.1),0.0,Wyoming
1855,YC,2015,Heroin (T40.1),408.0,New York City
1866,YC,2016,Heroin (T40.1),574.0,New York City
1878,YC,2017,Heroin (T40.1),643.0,New York City


In [7]:
merge1 = cocaine_data.merge(heroin_data, how ='inner', left_on=['state_name', 'year'], right_on=['state_name', 'year'])
merge2 = merge1.merge(opiodids_data, how ='inner', left_on=['state_name', 'year'], right_on=['state_name', 'year'])
merge_clean = merge2.filter(['state_name', 'year', 'coke_od', 'her_od', 'opi_od'])
merge_clean.rename(columns = {'state_name': 'state'}, inplace = True)
merge_clean['OD_sum'] = merge_clean.coke_od + merge_clean.her_od + merge_clean.opi_od
merge_clean['coke_per'] = merge_clean.coke_od / merge_clean.OD_sum * 100
merge_clean['her_per'] = merge_clean.her_od / merge_clean.OD_sum * 100
merge_clean['opi_per'] = merge_clean.opi_od / merge_clean.OD_sum * 100

merge_clean_final = merge_clean.set_index('state')
merge_clean_drnan = merge_clean_final.dropna()
merge_clean_drnan.head()

Unnamed: 0_level_0,year,coke_od,her_od,opi_od,OD_sum,coke_per,her_per,opi_per
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alaska,2016,15.0,49.0,96.0,160.0,9.375,30.625,60.0
Alaska,2017,18.0,36.0,100.0,154.0,11.688312,23.376623,64.935065
Alaska,2018,10.0,28.0,65.0,103.0,9.708738,27.184466,63.106796
Arizona,2017,136.0,341.0,936.0,1413.0,9.624912,24.13305,66.242038
Arizona,2018,175.0,367.0,1132.0,1674.0,10.454002,21.923536,67.622461


In [10]:
engine = create_engine("sqlite:///OD_Data_byDrug.db", echo=True)
sqlite_connect = engine.connect()
Base = automap_base()
Base.prepare(engine, reflect = True)
Base.classes.keys()

sqlite_table = "OD Data by Drug Type"
merge_clean_drnan.to_sql(sqlite_table, sqlite_connect, if_exists='fail')

2020-10-05 11:08:52,719 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2020-10-05 11:08:52,726 INFO sqlalchemy.engine.base.Engine ()
2020-10-05 11:08:52,730 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2020-10-05 11:08:52,730 INFO sqlalchemy.engine.base.Engine ()
2020-10-05 11:08:52,735 INFO sqlalchemy.engine.base.Engine SELECT name FROM sqlite_master WHERE type='table' ORDER BY name
2020-10-05 11:08:52,737 INFO sqlalchemy.engine.base.Engine ()
2020-10-05 11:08:52,747 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("OD Data by Drug Type")
2020-10-05 11:08:52,750 INFO sqlalchemy.engine.base.Engine ()
2020-10-05 11:08:52,752 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info("OD Data by Drug Type")
2020-10-05 11:08:52,753 INFO sqlalchemy.engine.base.Engine ()
2020-10-05 11:08:52,757 INFO sqlalchemy.engine.base.Engine 
CREATE TABLE "OD Data by Drug Type" (
	state TEXT, 
	year

In [12]:
engine = create_engine("sqlite:///OD_DeathratevsUnemployment.db", echo=True)
sqlite_connect = engine.connect()
Base = automap_base()
Base.prepare(engine, reflect = True)
Base.classes.keys()

sqlite_table = "OD Data Rate versus Unemployment Rate"
combinded_merge_final.to_sql(sqlite_table, sqlite_connect, if_exists='fail')

2020-10-04 14:23:50,911 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2020-10-04 14:23:50,912 INFO sqlalchemy.engine.base.Engine ()
2020-10-04 14:23:50,913 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2020-10-04 14:23:50,914 INFO sqlalchemy.engine.base.Engine ()
2020-10-04 14:23:50,916 INFO sqlalchemy.engine.base.Engine SELECT name FROM sqlite_master WHERE type='table' ORDER BY name
2020-10-04 14:23:50,917 INFO sqlalchemy.engine.base.Engine ()
2020-10-04 14:23:50,918 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("OD Data Rate versus Unemployment Rate")
2020-10-04 14:23:50,919 INFO sqlalchemy.engine.base.Engine ()
2020-10-04 14:23:50,921 INFO sqlalchemy.engine.base.Engine SELECT sql FROM  (SELECT * FROM sqlite_master UNION ALL   SELECT * FROM sqlite_temp_master) WHERE name = 'OD Data Rate versus Unemployment Rate' AND type = 'table'
2020-10-04 14:23:50,921 INFO sqlalchemy.engin

ValueError: Table 'OD Data Rate versus Unemployment Rate' already exists.

In [8]:
drug_db_df = pd.read_sql_table('OD Data by Drug Type', 'sqlite:///OD_Data_byDrug.db')  
drugvunemployment_db_df = pd.read_sql_table('OD Data Rate versus Unemployment Rate', 'sqlite:///OD_DeathratevsUnemployment.db')

In [12]:
drug_dict = drug_db_df.to_dict('records')
drug_json = json.dumps(drug_dict, indent = 2)
drug_db_df.reset_index().to_json("test.json")



In [29]:
unemployment_dict = drugvunemployment_db_df.to_dict('records')
unemployment_json = json.dumps(unemployment_dict, indent = 2)
##print(unemployment_json)