In [1]:
import pandas as pd
from sqlalchemy import create_engine

### Breastfed (Store CSV into DataFrame)

In [14]:
csv_file = "Resources/breastfeeding2015.csv"
bf_raw_data_df = pd.read_csv(csv_file)

bf_df = bf_raw_data_df.iloc[1:, 0:2]
bf_df.dropna(how='any', inplace=True)

bf_df['TimeFrame'] = 2015

bf_df['Ever Breastfed']=bf_df['Ever Breastfed'].map("{:,.1%}".format)
bf_df.reset_index(inplace=True, drop=True)
bf_df

Unnamed: 0,Location,Ever Breastfed,TimeFrame
0,Alabama,68.1%,2015
1,Alaska,93.1%,2015
2,Arizona,82.7%,2015
3,Arkansas,73.8%,2015
4,California,87.2%,2015
5,Colorado,90.9%,2015
6,Connecticut,86.3%,2015
7,Delaware,77.4%,2015
8,District of Columbia,83.0%,2015
9,Florida,82.6%,2015


### Parent Education (Store XLSX into DataFrame)
Chose not to use this data after all because it splits education into immigrant familites and US-born families. I wanted this data separately.

In [None]:
# xlsx_file = "Resources/Children whose parents all have less than a high school degree by family nativity.xlsx"
# parent_edu_raw_data_df = pd.read_excel(xlsx_file)

# parent_edu_2015 = parent_edu_raw_data_df.loc[(parent_edu_raw_data_df["TimeFrame"]=='2015')&(parent_edu_raw_data_df["LocationType"]=='State')&(parent_edu_raw_data_df["DataFormat"]=='Number')]
# parent_edu_2015 = parent_edu_2015[['Location', 'Data']]
# parent_edu_2015
# parent_edu_raw_data_df

# df = parent_edu_2015.groupby(['Location']).sum()
# df

### Families on Public Assistance (Store XLSX into DataFrame)

In [17]:
public_assistance_xlsx_file = "Resources/Children in families that receive public assistance.xlsx"
public_assistance_raw_data_df = pd.read_excel(public_assistance_xlsx_file)

public_assistance_df = public_assistance_raw_data_df.loc[(public_assistance_raw_data_df["LocationType"]=='State')&(public_assistance_raw_data_df["DataFormat"]=='Percent')&(public_assistance_raw_data_df["TimeFrame"]==2015)]
public_assistance_df = public_assistance_df[['Location', 'TimeFrame', 'Data']]

public_assistance_df['Data']=public_assistance_df['Data'].map("{:,.1%}".format)
public_assistance_df.reset_index(inplace=True, drop=True)
public_assistance_df

Unnamed: 0,Location,TimeFrame,Data
0,Alabama,2015,34.0%
1,Alaska,2015,25.0%
2,Arizona,2015,31.0%
3,Arkansas,2015,31.0%
4,California,2015,27.0%
5,Colorado,2015,19.0%
6,Connecticut,2015,21.0%
7,Delaware,2015,30.0%
8,Florida,2015,33.0%
9,Georgia,2015,31.0%


### Children in Immigrant Families (Store XLSX into DataFrame)

In [25]:
immigrant_families_xlsx_file = "Resources/Children in immigrant families.xlsx"
immigrant_families_raw_data_df = pd.read_excel(immigrant_families_xlsx_file)

immigrant_families_df = immigrant_families_raw_data_df.loc[(immigrant_families_raw_data_df["LocationType"]=='State')&(immigrant_families_raw_data_df["DataFormat"]=='Percent')&(immigrant_families_raw_data_df["TimeFrame"]=='2015')]
immigrant_families_df = immigrant_families_df[['Location', 'TimeFrame', 'Data']]

immigrant_families_df['Data']=pd.to_numeric(immigrant_families_df['Data'])

immigrant_families_df['Data']=immigrant_families_df['Data'].map("{:,.1%}".format)
immigrant_families_df.reset_index(inplace=True, drop=True)
immigrant_families_df

Unnamed: 0,Location,TimeFrame,Data
0,Alabama,2015,8.0%
1,Alaska,2015,14.0%
2,Arizona,2015,27.0%
3,Arkansas,2015,11.0%
4,California,2015,48.0%
5,Colorado,2015,23.0%
6,Connecticut,2015,26.0%
7,Delaware,2015,19.0%
8,Florida,2015,32.0%
9,Georgia,2015,21.0%


### Head of household with less than high school education (Store XLSX into DataFrame)

In [28]:
head_edu_xlsx_file = "Resources/Children by household head’s educational attainment.xlsx"
head_edu_raw_df = pd.read_excel(head_edu_xlsx_file)

head_edu_df = head_edu_raw_df.loc[(head_edu_raw_df["LocationType"]=='State')&(head_edu_raw_df["DataFormat"]=='Percent')&(head_edu_raw_df["TimeFrame"]==2015)&(head_edu_raw_df["Education Level"]=="Not a high school graduate")]
head_edu_df = head_edu_df[['Location', 'TimeFrame', 'Data']]

head_edu_df['Data']=head_edu_df['Data'].map("{:,.1%}".format)
head_edu_df.reset_index(inplace=True, drop=True)
head_edu_df

Unnamed: 0,Location,TimeFrame,Data
0,Alabama,2015,14.0%
1,Alaska,2015,8.0%
2,Arizona,2015,18.0%
3,Arkansas,2015,14.0%
4,California,2015,22.0%
5,Colorado,2015,12.0%
6,Connecticut,2015,8.0%
7,Delaware,2015,11.0%
8,Florida,2015,12.0%
9,Georgia,2015,14.0%


### Health Insurance (Store XLSX into DataFrame)

In [27]:
health_insurance_xlsx_file = "Resources/Children 17 and below without health insurance.xlsx"
health_insurance_raw_df = pd.read_excel(health_insurance_xlsx_file)

health_insurance_df = health_insurance_raw_df.loc[(health_insurance_raw_df["LocationType"]=='State')&(health_insurance_raw_df["DataFormat"]=='Percent')&(health_insurance_raw_df["TimeFrame"]==2015)]
health_insurance_df = health_insurance_df[['Location', 'TimeFrame', 'Data']]

health_insurance_df['Data']=pd.to_numeric(health_insurance_df['Data'])
health_insurance_df['Data']=health_insurance_df['Data'].map("{:,.1%}".format)
health_insurance_df.reset_index(inplace=True, drop=True)
health_insurance_df

Unnamed: 0,Location,TimeFrame,Data
0,Alabama,2015,3.0%
1,Alaska,2015,11.0%
2,Arizona,2015,8.0%
3,Arkansas,2015,5.0%
4,California,2015,3.0%
5,Colorado,2015,4.0%
6,Connecticut,2015,3.0%
7,Delaware,2015,3.0%
8,Florida,2015,7.0%
9,Georgia,2015,7.0%


## Make SQLITE database

In [39]:
engine = create_engine('sqlite:///parental_impact_db.sqlite')

In [40]:
engine.table_names()

[]

In [41]:
bf_df.to_sql(name='breastfed', con=engine, if_exists='append', index=True)

In [42]:
public_assistance_df.to_sql(name='public_assistance', con=engine, if_exists='append', index=True)

In [43]:
immigrant_families_df.to_sql(name='immigrant_families', con=engine, if_exists='append', index=True)

In [44]:
head_edu_df.to_sql(name='household_head_less_than_HS_education', con=engine, if_exists='append', index=True)

In [45]:
health_insurance_df.to_sql(name='minors_without_health_insurance', con=engine, if_exists='append', index=True)

In [46]:
engine.table_names()

['breastfed',
 'household_head_less_than_HS_education',
 'immigrant_families',
 'minors_without_health_insurance',
 'public_assistance']