In [1]:
# Import preliminaries
import pandas as pd
from sqlalchemy import create_engine, inspect
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import func

### Store Annual Health Data CSV into DataFrame

In [2]:
health_csv = "annual_report.csv"
health_df = pd.read_csv(health_csv)
health_df.head()

Unnamed: 0,Edition,Report Type,Measure Name,State Name,Rank,Value,Score,Lower CI,Upper CI,Source,Source Year
0,2019,2019 Annual,Adverse Childhood Experiences,Alaska,34.0,24.1,0.96,27.7,20.5,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017
1,2019,2019 Annual,Adverse Childhood Experiences,Alabama,46.0,26.3,1.56,30.1,22.5,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017
2,2019,2019 Annual,Adverse Childhood Experiences,United States,,20.5,,21.2,19.8,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017
3,2019,2019 Annual,Adverse Childhood Experiences,Arkansas,47.0,27.1,1.76,30.8,23.3,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017
4,2019,2019 Annual,Adverse Childhood Experiences,Arizona,48.0,27.3,1.82,31.1,23.4,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017


### Clean up the columns

In [3]:
health_df = health_df[['State Name', 'Rank', 'Measure Name', 'Score', 'Source', 'Source Year']].copy()
health_df.head()

Unnamed: 0,State Name,Rank,Measure Name,Score,Source,Source Year
0,Alaska,34.0,Adverse Childhood Experiences,0.96,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017
1,Alabama,46.0,Adverse Childhood Experiences,1.56,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017
2,United States,,Adverse Childhood Experiences,,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017
3,Arkansas,47.0,Adverse Childhood Experiences,1.76,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017
4,Arizona,48.0,Adverse Childhood Experiences,1.82,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017


### Isolate each Health Measure Name of interest (mental illness, insufficient sleep, air_pollution)

In [4]:
health_mental_df = health_df.loc[health_df['Measure Name']=='Mental illness', :]
health_mental_df = health_mental_df.sort_values(by='State Name', ascending=True)
health_mental_df = health_mental_df.rename(columns={"State Name":"state", 'Measure Name': 'measure_name', 'Source Year': 'source_year'})
health_mental_df = health_mental_df.reset_index(drop=True)
health_mental_df.head()

Unnamed: 0,state,Rank,measure_name,Score,Source,source_year
0,Alabama,14.0,Mental illness,0.05,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017
1,Alaska,45.0,Mental illness,1.94,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017
2,Arizona,28.0,Mental illness,0.45,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017
3,Arkansas,41.0,Mental illness,1.59,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017
4,California,3.0,Mental illness,-1.0,"U.S. HHS, HRSA, Maternal and Child Health Bure...",2016-2017


In [5]:
health_sleep_df = health_df.loc[health_df['Measure Name']=='Insufficient Sleep', :]
health_sleep_df = health_sleep_df.sort_values(by='State Name', ascending=True)
health_sleep_df = health_sleep_df.rename(columns={"State Name":"state", 'Measure Name': 'measure_name', 'Source Year': 'source_year'})
health_sleep_df = health_sleep_df.reset_index(drop=True)
health_sleep_df.head()

Unnamed: 0,state,Rank,measure_name,Score,Source,source_year
0,Alabama,43.0,Insufficient Sleep,1.21,"CDC, Behavioral Risk Factor Surveillance System",2018
1,Alaska,20.0,Insufficient Sleep,-0.3,"CDC, Behavioral Risk Factor Surveillance System",2018
2,Arizona,22.0,Insufficient Sleep,-0.15,"CDC, Behavioral Risk Factor Surveillance System",2018
3,Arkansas,29.0,Insufficient Sleep,0.47,"CDC, Behavioral Risk Factor Surveillance System",2018
4,California,25.0,Insufficient Sleep,-0.03,"CDC, Behavioral Risk Factor Surveillance System",2018


In [6]:
health_airpollution_df = health_df.loc[health_df['Measure Name']=='Air Pollution', :]
health_airpollution_df = health_airpollution_df.sort_values(by='State Name', ascending=True)
health_airpollution_df = health_airpollution_df.rename(columns={"State Name":"state", 'Measure Name': 'measure_name', 'Source Year': 'source_year'})
health_airpollution_df = health_airpollution_df.reset_index(drop=True)
health_airpollution_df.head()

Unnamed: 0,state,Rank,measure_name,Score,Source,source_year
0,Alabama,36.0,Air Pollution,-0.21,U.S. Environmental Protection Agency; U.S. Cen...,2016-2018
1,Alaska,10.0,Air Pollution,-1.4,U.S. Environmental Protection Agency; U.S. Cen...,2016-2018
2,Arizona,49.0,Air Pollution,0.91,U.S. Environmental Protection Agency; U.S. Cen...,2016-2018
3,Arkansas,19.0,Air Pollution,-0.91,U.S. Environmental Protection Agency; U.S. Cen...,2016-2018
4,California,50.0,Air Pollution,2.0,U.S. Environmental Protection Agency; U.S. Cen...,2016-2018


In [7]:
df_col ={'health_mental_df': 'mentalill', 'health_sleep_df':'sleep', 'health_airpollution_df': 'pollution'}
for key, value in df_col.items():
    df = globals()[key]
    df.columns = ['{}_'.format(value)+col_name for col_name in df.columns]
    

In [8]:
health_airpollution_df.head()
health_airpollution_df.count()

pollution_state           52
pollution_Rank            50
pollution_measure_name    52
pollution_Score           50
pollution_Source          52
pollution_source_year     52
dtype: int64

In [9]:
health_mental_df.head()
health_mental_df.count()

mentalill_state           52
mentalill_Rank            50
mentalill_measure_name    52
mentalill_Score           50
mentalill_Source          52
mentalill_source_year     52
dtype: int64

In [10]:
health_sleep_df.head()
health_sleep_df.count()

sleep_state           52
sleep_Rank            50
sleep_measure_name    52
sleep_Score           50
sleep_Source          52
sleep_source_year     52
dtype: int64

### Store State Happiness Ranking Data

In [11]:
happiness_csv = "happiness.csv"
happiness_df = pd.read_csv(happiness_csv)
happiness_df = happiness_df.rename(columns={"State":"state"})
happiness_df = happiness_df.sort_values(by='state', ascending=True)
happiness_df = happiness_df.reset_index(drop=True)
happiness_df.head()

Unnamed: 0,overall,state,totalScore,emotAndPhysRank,workEnvironRank,communityAndEnvironRank,Pop
0,45,Alabama,39.35,46,39,43,4898246
1,47,Alaska,38.21,33,49,50,735720
2,21,Arizona,52.92,27,12,34,7275070
3,49,Arkansas,36.61,50,29,23,3026412
4,5,California,63.14,4,24,12,39747267
