## Extract data from csv files

In [None]:
# Dependencies
import pandas as pd

In [None]:
# Load CSV file as dataframe
nutr_df = pd.read_csv("Resources/Nutrition__Physical_Activity__and_Obesity_-_Women__Infant__and_Child.csv")
nutr_df.head()

## Transform data

In [None]:
# Get a list of column headers
list(nutr_df)

In [None]:
# Get the relevant columns
nutr_df2 = nutr_df[["YearEnd", "LocationAbbr", "LocationDesc", "Question", "Data_Value", 
                    "StratificationID1"]]
nutr_df2.head()

In [None]:
# Choose data for 2010, 2012, and 2014
Years = [2010, 2012, 2014]

nutr_df3 = nutr_df2[nutr_df2["YearEnd"].isin(Years)]
nutr_df3.head()

In [None]:
# Choose data with no stratification (stratification ID1 = "overall") 
# and choose values for obese children 2-4 yo
nutr_df4 = nutr_df3.query("StratificationID1 == 'OVERALL'& \
                          Question == 'Percent of WIC children aged 2 to 4 years who have obesity'")
nutr_df4.head()

In [None]:
# Further clean the dataset
nutr_df4 = nutr_df4[["YearEnd", "LocationDesc", "Data_Value"]]

# Rename the remaining columns
nutr_df4 = nutr_df4.rename(columns = {"YearEnd": "Year",
                                      "LocationDesc": "US_State",
                                      "Data_Value": "Obese_Children_%"})

# Preview the dataframe
nutr_df4.head()

In [None]:
# Create a table of Location Abbreviations and Descriptions (unique values only)
location = nutr_df4[["US_State"]].drop_duplicates()

# Preview the dataframe
location

## Load data into database

In [None]:
# Dependencies
from sqlalchemy import create_engine
from config import password

In [None]:
# Create a connection to the database
conn = "root:{0}@localhost:3306/diabetes_db".format(password) # Password is in a separate file
engine = create_engine(f"mysql://{conn}")

In [None]:
# Confirm presence of tables
engine.table_names()

In [None]:
# Load dataframes into tables
location.to_sql(name = "location", con = engine, if_exists = "replace", index = False)
nutr_df4.to_sql(name = "nutrition", con = engine, if_exists = "replace", index = False)

In [None]:
# Read the table contents (for location)
pd.read_sql_query("select * from location").head()

In [None]:
# Read the table contents (for nutrition)
pd.read_sql_query("select * from nutrition").head()