In [None]:

# Import Dependencies
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np

import time
from api_key import api_key

In [None]:
# Save path to data set in a variable
Diab2011_data = "2011Diab.csv"
Diab2016_data = "2016Diab.csv"
Diab2021_data = "2021Diab.csv"

In [None]:
# Use Pandas to read 2011 data
Diab2011_df = pd.read_csv(Diab2011_data)
Diab2011_df.head()

In [None]:
# Use Pandas to read 2011 data
Diab2016_df = pd.read_csv(Diab2016_data)
Diab2016_df.head()

In [None]:
# Use Pandas to read 2011 data
Diab2021_df = pd.read_csv(Diab2021_data)
Diab2021_df.head()

In [None]:
# Filter the 2011 data to FIPS, State, County, % Diabetes, and Population Columns
Diab2011_filtered_df = Diab2011_df[  ["FIPS","State","County","% Diabetic","Population" ]  ]
Diab2011_filtered_df.head()

In [None]:
# Filter the 2016 data to FIPS, State, County, % Diabetes, and Population Columns
Diab2016_filtered_df = Diab2016_df[  ["FIPS","State","County","% Diabetic","Population" ]  ]
Diab2016_filtered_df.head()

In [None]:
# Filter the 2021 data to FIPS, State, County, % Diabetes, and Population Columns
Diab2021_filtered_df = Diab2021_df[  ["FIPS","State","County","% Adults with Diabetes","Population" ]  ]

# rename the % Adults with Diabetes column for consistency with 2018 and 2019
Diab2021_filtered_df = Diab2021_filtered_df.rename( columns = {"% Adults with Diabetes":"% Diabetic"} )
Diab2021_filtered_df.head()

In [None]:
#Add a year column to each dataframe


Diab2011_filtered_df.loc[:,['Year']] = 2011
Diab2016_filtered_df.loc[:,['Year']] = 2016
Diab2021_filtered_df.loc[:,['Year']] = 2021

In [None]:
#Verify year column added correctly
Diab2011_filtered_df.head()

In [None]:
Diab2016_filtered_df.head()

In [None]:
Diab2021_filtered_df.head()

In [None]:
# now combine the three dataframes into one by doing a union via concatenation
# https://datatofish.com/union-pandas-dataframes/

# start with combining 2011 and 2016

Diab2011_2016_df = pd.concat([Diab2011_filtered_df, Diab2016_filtered_df])
Diab2011_2016_df

In [None]:
# add 2021
Diab2011_2016_2021_df = pd.concat([Diab2011_2016_df, Diab2021_filtered_df])
Diab2011_2016_2021_df

In [None]:
# Clean up Diab2011_2016_2021_df to remove na
Diab2011_2016_2021_df.dropna(how='any')

In [None]:
Diab2011_2016_2021_df["% Diabetic"].max()

In [None]:
Diab2011_2016_2021_df["Population"].max()

In [None]:
# Now add a Diabetic Population column (% Diabetic * Population) as it may be useful for analysis

Diab2011_2016_2021_df["Diabetic Population"] = round(1/100 * \
    Diab2011_2016_2021_df["% Diabetic"] * Diab2011_2016_2021_df["Population"],0)

In [None]:
Diab2011_2016_2021_df

In [None]:
Diab2011_2016_2021_df.dtypes


In [None]:
# Now bring in the fast food restaurant data that was compiled in the fast food notebook:

fast_food_11_16_21_data = "fast_food_11_16_21.csv"
fast_food_11_16_21_df = pd.read_csv(fast_food_11_16_21_data)
fast_food_11_16_21_df.head()

In [None]:
fast_food_11_16_21_df.dtypes

In [None]:
# make a merge key in both data frames to prep for merging...merging on both FIPS number and Year
# making FIPS a string in order to make the string key

Diab2011_2016_2021_df["FIPS str"] = Diab2011_2016_2021_df["FIPS"].astype("str")
Diab2011_2016_2021_df["merge_key"] = Diab2011_2016_2021_df["FIPS str"].str[:5] + Diab2011_2016_2021_df["Year"].astype("str")
Diab2011_2016_2021_df

In [None]:
# making merge key in the fast food dataframe

fast_food_11_16_21_df["FIPS str"] = fast_food_11_16_21_df["FIPS"].astype("str")
fast_food_11_16_21_df["merge_key"] = fast_food_11_16_21_df["FIPS str"].str[:5] + fast_food_11_16_21_df["Year"].astype("str")
fast_food_11_16_21_df

In [None]:
# Now merge in the fast food restaurant count data into the diabetes dataframe

merge_diab_fastfood_df = pd.merge(Diab2011_2016_2021_df, fast_food_11_16_21_df, how="left", on="merge_key")
merge_diab_fastfood_df

In [None]:
# select the columns we want

diab_fastfood_df = merge_diab_fastfood_df.loc[ :, [ "FIPS_x", "State", "County", "% Diabetic", "Population", "Year_x", "Diabetic Population","Restaurant Count"]]
diab_fastfood_df

In [None]:
# clean up the column names

diab_fastfood_rename_df = diab_fastfood_df.rename(columns={"FIPS_x":"FIPS","Year_x":"Year"})
diab_fastfood_rename_df

In [None]:
# some 2021 fast food counts come in blank, as counties with 0 restaurants did not get brought into the 2021 yelp data.
# this is creating nan restaurant counts for some counties in 2021.  Therefore, filling those with 0's

diab_fastfood_rename_df["Restaurant Count"] = diab_fastfood_rename_df["Restaurant Count"].fillna(0)
diab_fastfood_rename_df.head()

In [None]:
# now removing any rows that have blanks (for example FIPS 17000 which is the entire state and not a county)

# THIS IS THE DATAFRAME TO PLOT AND ANALYZE WITH


diab_fastfood_clean_df = diab_fastfood_rename_df.dropna(how='any') 
diab_fastfood_clean_df.head()

In [None]:
# Export the dataframe to csv
diab_fastfood_clean_df.to_csv("diab_fastfood_clean_df.csv")