# Combine and Create Datasets

In [1]:
# Get dependencies
import pandas as pd

In [2]:
# Read CSV of county population data
county_pops = pd.read_csv("data/ca-co-pop2010-2019.csv")
county_pops.head()

Unnamed: 0.1,Unnamed: 0,Census,Estimates Base,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,California,37253956,37254519,37319502,37638369,37948800,38260787,38596972,38918045,39167117,39358497,39461588,39512223
1,"Alameda County, California",1510271,1510258,1512986,1530915,1553764,1579593,1607792,1634538,1650950,1660196,1666756,1671329
2,"Alpine County, California",1175,1175,1161,1093,1110,1128,1080,1077,1047,1111,1089,1129
3,"Amador County, California",38091,38091,37886,37543,37104,36620,36726,37031,37429,38529,39405,39752
4,"Butte County, California",220000,220005,219949,219975,220869,221641,223516,224631,226231,228696,230339,219186


In [3]:
# Rename county column
county_pops = county_pops.rename(columns={"Unnamed: 0": "County"})

# Get county names and population estimates for 2011 and 2016
county_2011_2016 = pd.DataFrame(county_pops[["County", "2011", "2016"]])
county_2011_2016.head()

Unnamed: 0,County,2011,2016
0,California,37638369,39167117
1,"Alameda County, California",1530915,1650950
2,"Alpine County, California",1093,1047
3,"Amador County, California",37543,37429
4,"Butte County, California",219975,226231


In [4]:
# Remove "County, California" from the County column

for index, row in county_2011_2016.iterrows():
    county = row["County"]
    county_split = county.split(" County")
    #new_county = county_split[0]
    #print("County:" + new_county)
    county_2011_2016.loc[index, "County"] = str(county_split[0])
        
county_2011_2016

Unnamed: 0,County,2011,2016
0,California,37638369,39167117
1,Alameda,1530915,1650950
2,Alpine,1093,1047
3,Amador,37543,37429
4,Butte,219975,226231
5,Calaveras,45160,45322
6,Colusa,21323,21496
7,Contra Costa,1065440,1137268
8,Del Norte,28443,27382
9,El Dorado,180936,185976


In [5]:
# Save dataset to CSV
county_2011_2016.to_csv("data/county_2011_2016.csv", index=False)

## Fast Food Data

In [6]:
# Read deprecated county data
county_data = pd.read_csv("data/county_2011_2016.csv")
county_data.head()

Unnamed: 0,County,2011,2016
0,California,37638369,39167117
1,Alameda,1530915,1650950
2,Alpine,1093,1047
3,Amador,37543,37429
4,Butte,219975,226231


In [7]:
# Grab county name (except "California" row), remove spaces, and create list of county names
counties = []

for index, row in county_data.iterrows():
    county = row["County"]
    # Remove spaces and add to list
    if county != "California":
        counties.append(county.replace(" ",""))
counties

['Alameda',
 'Alpine',
 'Amador',
 'Butte',
 'Calaveras',
 'Colusa',
 'ContraCosta',
 'DelNorte',
 'ElDorado',
 'Fresno',
 'Glenn',
 'Humboldt',
 'Imperial',
 'Inyo',
 'Kern',
 'Kings',
 'Lake',
 'Lassen',
 'LosAngeles',
 'Madera',
 'Marin',
 'Mariposa',
 'Mendocino',
 'Merced',
 'Modoc',
 'Mono',
 'Monterey',
 'Napa',
 'Nevada',
 'Orange',
 'Placer',
 'Plumas',
 'Riverside',
 'Sacramento',
 'SanBenito',
 'SanBernardino',
 'SanDiego',
 'SanFrancisco',
 'SanJoaquin',
 'SanLuisObispo',
 'SanMateo',
 'SantaBarbara',
 'SantaClara',
 'SantaCruz',
 'Shasta',
 'Sierra',
 'Siskiyou',
 'Solano',
 'Sonoma',
 'Stanislaus',
 'Sutter',
 'Tehama',
 'Trinity',
 'Tulare',
 'Tuolumne',
 'Ventura',
 'Yolo',
 'Yuba']

In [8]:
# The fast food data is not structured how we want, so we need to make sure it is
# Open the first CSV to use the first column to become the headers of our data frame
Alameda_fast_food = pd.read_csv(f"data/FastFood/Alameda.csv")
columns = Alameda_fast_food["State"].tolist()
columns

['County',
 'Fast-food restaurants, 2011',
 'Fast-food restaurants, 2016',
 'Fast-food restaurants (% change), 2011-16',
 'Fast-food restaurants/1,000 pop, 2011',
 'Fast-food restaurants/1,000 pop, 2016',
 'Fast-food restaurants/1,000 pop (% change), 2011-16',
 'Full-service restaurants, 2011',
 'Full-service restaurants, 2016',
 'Full-service restaurants (% change), 2011-16',
 'Full-service restaurants/1,000 pop, 2011',
 'Full-service restaurants/1,000 pop, 2016',
 'Full-service restaurants/1,000 pop (% change), 2011-16',
 'Expenditures per capita, fast food, 2007*',
 'Expenditures per capita, fast food, 2012*',
 'Expenditures per capita, restaurants, 2007*',
 'Expenditures per capita, restaurants, 2012*']

In [9]:
# Create data frame for fast food data using columns from above
#fast_food_data = pd.DataFrame(columns=columns)
#fast_food_data

In [10]:
# Create fast_food_data dataframe
fast_food_data = pd.DataFrame({"State": columns})

# Use county names to create a loop to open all the fast food CSV files and convert into one dataset

for county in counties:
    # Read county fast food file
    data_file = f"data/FastFood/{county}.csv"
    county_fast_food_data = pd.read_csv(data_file)
    
    # Merge fast_food_data dataframe with fast_food_data
    fast_food_data = fast_food_data.merge(county_fast_food_data, how="outer", on="State")

# Drop index
fast_food_data = fast_food_data.set_index("State")

# Transpose columns and rows
fast_food_data = fast_food_data.T

# Print data frame
fast_food_data


State,County,"Fast-food restaurants, 2011","Fast-food restaurants, 2016","Fast-food restaurants (% change), 2011-16","Fast-food restaurants/1,000 pop, 2011","Fast-food restaurants/1,000 pop, 2016","Fast-food restaurants/1,000 pop (% change), 2011-16","Full-service restaurants, 2011","Full-service restaurants, 2016","Full-service restaurants (% change), 2011-16","Full-service restaurants/1,000 pop, 2011","Full-service restaurants/1,000 pop, 2016","Full-service restaurants/1,000 pop (% change), 2011-16","Expenditures per capita, fast food, 2007*","Expenditures per capita, fast food, 2012*","Expenditures per capita, restaurants, 2007*","Expenditures per capita, restaurants, 2012*"
CA_x,Alameda,1090,1293,18.623853,0.711388,0.782102,9.940291,1401,1560,11.349036,0.914363,0.943604,3.198009,761.447125,642.487457,755.587505,750.318646
CA_y,Alpine,2,0,-100.0,1.829826,0.0,-100.0,3,1,-66.666667,2.744739,0.946074,-65.531378,761.447125,642.487457,755.587505,750.318646
CA_x,Amador,12,21,75.0,0.319668,0.560403,75.308222,39,44,12.820513,1.03892,1.174179,13.01922,761.447125,642.487457,755.587505,750.318646
CA_y,Butte,147,158,7.482993,0.668173,0.697495,4.388394,125,132,5.6,0.568174,0.582717,2.559615,761.447125,642.487457,755.587505,750.318646
CA_x,Calaveras,26,30,15.384615,0.575692,0.662032,14.997581,39,41,5.128205,0.863539,0.904778,4.775574,761.447125,642.487457,755.587505,750.318646
CA_y,Colusa,16,8,-50.0,0.748853,0.3696,-50.644491,10,11,10.0,0.468033,0.508201,8.582121,761.447125,642.487457,755.587505,750.318646
CA_x,Contra Costa,599,701,17.028381,0.561818,0.615644,9.580732,686,757,10.349854,0.643417,0.664825,3.327225,761.447125,642.487457,755.587505,750.318646
CA_y,Del Norte,15,10,-33.333333,0.527296,0.364299,-30.911961,16,24,50.0,0.562449,0.874317,55.448087,761.447125,642.487457,755.587505,750.318646
CA_x,El Dorado,97,110,13.402062,0.535986,0.590534,10.177258,165,202,22.424242,0.911728,1.084436,18.942875,761.447125,642.487457,755.587505,750.318646
CA_y,Fresno,569,634,11.42355,0.605087,0.647247,6.967445,490,519,5.918367,0.521077,0.529844,1.682429,761.447125,642.487457,755.587505,750.318646


In [11]:
# Save fast food merged data to CSV
fast_food_data.to_csv("data/fast_food_combined_data.csv", index=False)

In [12]:
new_fast_food_data = fast_food_data.iloc[:,0:7]
new_fast_food_data[["Fast-food restaurants/1,000 pop, 2011", "Fast-food restaurants (% change), 2011-16", "Fast-food restaurants/1,000 pop, 2016", "Fast-food restaurants/1,000 pop (% change), 2011-16"]] = new_fast_food_data[["Fast-food restaurants/1,000 pop, 2011", "Fast-food restaurants (% change), 2011-16", "Fast-food restaurants/1,000 pop, 2016", "Fast-food restaurants/1,000 pop (% change), 2011-16"]].astype(float)
new_fast_food_data[["Fast-food restaurants, 2011", "Fast-food restaurants, 2016"]] = new_fast_food_data[["Fast-food restaurants, 2011", "Fast-food restaurants, 2016"]].astype(int)
new_fast_food_data["Fast-food restaurants/1,000 pop, 2011"] = [10 * val for val in new_fast_food_data["Fast-food restaurants/1,000 pop, 2011"]]
new_fast_food_data["Fast-food restaurants/1,000 pop, 2016"] = [10 * val for val in new_fast_food_data["Fast-food restaurants/1,000 pop, 2016"]]
new_fast_food_data = new_fast_food_data.rename(columns={"Fast-food restaurants/1,000 pop, 2011": "Fast-food restaurants/10,000 pop, 2011"})
new_fast_food_data = new_fast_food_data.rename(columns={"Fast-food restaurants/1,000 pop, 2016": "Fast-food restaurants/10,000 pop, 2016"})
new_fast_food_data.to_csv("data/new_fast_food_combined_data.csv", index=False)
new_fast_food_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


State,County,"Fast-food restaurants, 2011","Fast-food restaurants, 2016","Fast-food restaurants (% change), 2011-16","Fast-food restaurants/10,000 pop, 2011","Fast-food restaurants/10,000 pop, 2016","Fast-food restaurants/1,000 pop (% change), 2011-16"
CA_x,Alameda,1090,1293,18.623853,7.11388,7.82102,9.940291
CA_y,Alpine,2,0,-100.0,18.29826,0.0,-100.0
CA_x,Amador,12,21,75.0,3.19668,5.60403,75.308222
CA_y,Butte,147,158,7.482993,6.68173,6.97495,4.388394
CA_x,Calaveras,26,30,15.384615,5.75692,6.62032,14.997581
CA_y,Colusa,16,8,-50.0,7.48853,3.696,-50.644491
CA_x,Contra Costa,599,701,17.028381,5.61818,6.15644,9.580732
CA_y,Del Norte,15,10,-33.333333,5.27296,3.64299,-30.911961
CA_x,El Dorado,97,110,13.402062,5.35986,5.90534,10.177258
CA_y,Fresno,569,634,11.42355,6.05087,6.47247,6.967445


## Combine Census Data

In [13]:
# Read Census data for 2011 and 2016
census_2011 = pd.read_csv("data/census_data_2011.csv")
census_2016 = pd.read_csv("data/census_data_2016.csv")


In [14]:
# Merge Census data
census_data = census_2011.merge(census_2016, how="outer", on="County")
census_data.head()

Unnamed: 0,County,Household Income_x,Population_x,Median Age_x,Per Capita Income_x,Poverty Count_x,Employed Persons Over 16_x,Unemployed Persons Over 16_x,Household Income_y,Population_y,Median Age_y,Per Capita Income_y,Poverty Count_y,Employed Persons Over 16_y,Unemployed Persons Over 16_y
0,Alameda,70821.0,1494876.0,36.4,34937.0,173156.0,792734.0,401593.0,79831.0,1605217.0,37.2,39042.0,189590.0,864007.0,434239.0
1,Alpine,59018.0,1167.0,40.4,29576.0,170.0,581.0,351.0,62375.0,1184.0,42.8,26783.0,221.0,468.0,499.0
2,Amador,56180.0,38244.0,48.0,28030.0,3372.0,15650.0,17192.0,57032.0,36963.0,50.3,27496.0,3684.0,14540.0,17341.0
3,Butte,42971.0,219309.0,37.2,23431.0,42283.0,101633.0,77585.0,44366.0,223877.0,36.9,25077.0,46456.0,102069.0,81862.0
4,Calaveras,55256.0,45794.0,49.0,28667.0,3751.0,20270.0,17987.0,53502.0,44787.0,51.2,30577.0,5641.0,18329.0,19512.0


In [15]:
# Fix column names
census_data = census_data.rename(columns={"Household Income_x": "Household Income 2011",
                                          "Population_x": "Population 2011",
                                          "Median Age_x": "Median Age 2011",
                                          "Per Capita Income_x": "Per Capita Income 2011",
                                          "Poverty Count_x": "Poverty Count 2011",
                                          "Employed Persons Over 16_x": "Employed Persons Over 16 2011",
                                          "Unemployed Persons Over 16_x": "Unemployed Persons Over 16 2011",
                                          "Household Income_y": "Household Income 2016",
                                          "Population_y": "Population 2016",
                                          "Median Age_y": "Median Age 2016",
                                          "Per Capita Income_y": "Per Capita Income 2016",
                                          "Poverty Count_y": "Poverty Count 2016",
                                          "Employed Persons Over 16_y": "Employed Persons Over 16 2016",
                                          "Unemployed Persons Over 16_y": "Unemployed Persons Over 16 2016"})
census_data.head()

Unnamed: 0,County,Household Income 2011,Population 2011,Median Age 2011,Per Capita Income 2011,Poverty Count 2011,Employed Persons Over 16 2011,Unemployed Persons Over 16 2011,Household Income 2016,Population 2016,Median Age 2016,Per Capita Income 2016,Poverty Count 2016,Employed Persons Over 16 2016,Unemployed Persons Over 16 2016
0,Alameda,70821.0,1494876.0,36.4,34937.0,173156.0,792734.0,401593.0,79831.0,1605217.0,37.2,39042.0,189590.0,864007.0,434239.0
1,Alpine,59018.0,1167.0,40.4,29576.0,170.0,581.0,351.0,62375.0,1184.0,42.8,26783.0,221.0,468.0,499.0
2,Amador,56180.0,38244.0,48.0,28030.0,3372.0,15650.0,17192.0,57032.0,36963.0,50.3,27496.0,3684.0,14540.0,17341.0
3,Butte,42971.0,219309.0,37.2,23431.0,42283.0,101633.0,77585.0,44366.0,223877.0,36.9,25077.0,46456.0,102069.0,81862.0
4,Calaveras,55256.0,45794.0,49.0,28667.0,3751.0,20270.0,17987.0,53502.0,44787.0,51.2,30577.0,5641.0,18329.0,19512.0


In [16]:
# Read health and fast food
health_ff = pd.read_csv("data/big_df.csv")
health_ff.head()

Unnamed: 0,County,"Fast-food restaurants, 2011","Fast-food restaurants, 2016","Fast-food restaurants (% change), 2011-16","Fast-food restaurants/10,000 pop, 2011","Fast-food restaurants/10,000 pop, 2016","Fast-food restaurants/1,000 pop (% change), 2011-16",Obesity 2011 %,Obesity 2016 %,New Diabetes per 1k : 2011,New Diabetes per 1k : 2016,Diagnosed Diabetes 2011 %,Diagnosed Diabetes 2016 %
0,Alameda,1090,1293,18.623853,7.11388,7.82102,9.940291,20.2,19.2,7.3,5.6,7.7,6.7
1,Alpine,2,0,-100.0,18.29826,0.0,-100.0,22.6,20.9,8.4,4.1,8.0,4.6
2,Amador,12,21,75.0,3.19668,5.60403,75.308222,24.8,29.8,8.3,10.1,7.8,10.4
3,Butte,147,158,7.482993,6.68173,6.97495,4.388394,24.2,27.2,9.0,8.0,8.6,8.7
4,Calaveras,26,30,15.384615,5.75692,6.62032,14.997581,23.9,30.4,7.6,7.9,7.4,8.3


In [17]:
# Merge Census data with health and fast food data
census_health_ff = census_data.merge(health_ff, how="outer", on="County")
census_health_ff.head()

Unnamed: 0,County,Household Income 2011,Population 2011,Median Age 2011,Per Capita Income 2011,Poverty Count 2011,Employed Persons Over 16 2011,Unemployed Persons Over 16 2011,Household Income 2016,Population 2016,...,"Fast-food restaurants (% change), 2011-16","Fast-food restaurants/10,000 pop, 2011","Fast-food restaurants/10,000 pop, 2016","Fast-food restaurants/1,000 pop (% change), 2011-16",Obesity 2011 %,Obesity 2016 %,New Diabetes per 1k : 2011,New Diabetes per 1k : 2016,Diagnosed Diabetes 2011 %,Diagnosed Diabetes 2016 %
0,Alameda,70821.0,1494876.0,36.4,34937.0,173156.0,792734.0,401593.0,79831.0,1605217.0,...,18.623853,7.11388,7.82102,9.940291,20.2,19.2,7.3,5.6,7.7,6.7
1,Alpine,59018.0,1167.0,40.4,29576.0,170.0,581.0,351.0,62375.0,1184.0,...,-100.0,18.29826,0.0,-100.0,22.6,20.9,8.4,4.1,8.0,4.6
2,Amador,56180.0,38244.0,48.0,28030.0,3372.0,15650.0,17192.0,57032.0,36963.0,...,75.0,3.19668,5.60403,75.308222,24.8,29.8,8.3,10.1,7.8,10.4
3,Butte,42971.0,219309.0,37.2,23431.0,42283.0,101633.0,77585.0,44366.0,223877.0,...,7.482993,6.68173,6.97495,4.388394,24.2,27.2,9.0,8.0,8.6,8.7
4,Calaveras,55256.0,45794.0,49.0,28667.0,3751.0,20270.0,17987.0,53502.0,44787.0,...,15.384615,5.75692,6.62032,14.997581,23.9,30.4,7.6,7.9,7.4,8.3


In [18]:
# Export data to CSV
census_health_ff.to_csv("data/census_health_ff_2011_2016.csv", index=False)

In [2]:
# Read main data and education data to merge the data sets
census_health_ff = pd.read_csv("data/census_health_ff_2011_2016.csv")
education_2012 = pd.read_csv("data/census_education_data_2012.csv")
education_2016 = pd.read_csv("data/census_education_data_2016.csv")

# Merge education data first
education_data = education_2012.merge(education_2016, how="outer", on="County")
education_data.head()

Unnamed: 0,County,Population_x,No Education_x,High School Education_x,GED education_x,Associate Education_x,Bachelor Education_x,Masters Education_x,Professional Education_x,Doctorate Education_x,...,No Education_y,High School Education_y,GED education_y,Associate Education_y,Bachelor Education_y,Masters Education_y,Professional Education_y,Doctorate Education_y,College_y,College Rate_y
0,Alameda,1515136.0,21320.0,183121.0,16511.0,69629.0,249246.0,116816.0,27561.0,27987.0,...,26836.0,182207.0,18164.0,74447.0,282754.0,141689.0,32639.0,31512.0,563041.0,35.075694
1,Alpine,1197.0,0.0,153.0,58.0,49.0,142.0,76.0,14.0,22.0,...,5.0,189.0,68.0,46.0,140.0,67.0,8.0,20.0,281.0,23.733108
2,Amador,37764.0,198.0,7117.0,938.0,2802.0,4042.0,825.0,417.0,138.0,...,279.0,6562.0,1131.0,2554.0,4212.0,1192.0,540.0,240.0,8738.0,23.639856
3,Butte,220101.0,2273.0,27686.0,4560.0,12955.0,22359.0,7246.0,2598.0,1585.0,...,2749.0,27537.0,5238.0,14030.0,25225.0,8002.0,2915.0,1611.0,51783.0,23.130112
4,Calaveras,45507.0,87.0,8778.0,1186.0,3234.0,4988.0,1373.0,513.0,142.0,...,310.0,8057.0,1158.0,3692.0,4645.0,1550.0,424.0,226.0,10537.0,23.526916


In [3]:
# Drop population columns, rename for years
education_data = education_data.rename(columns={"No Education_x": "No Education 2012",
                                            "Population_x": "Population 2012",
                                          "High School Education_x": "High School Education 2012",
                                          "GED education_x": "GED education 2012",
                                          "Associate Education_x": "Associate Education 2012",
                                          "Bachelor Education_x": "Bachelor Education 2012",
                                          "Masters Education_x": "Masters Education 2012",
                                          "Professional Education_x": "Professional Education 2012",
                                          "Doctorate Education_x": "Doctorate Education 2012",
                                          "College_x": "College 2012",
                                          "College Rate_x": "College Rate 2012",
                                          "No Education_y": "No Education 2016",
                                          "High School Education_y": "High School Education 2016",
                                          "GED education_y": "GED education 2016",
                                          "Associate Education_y": "Associate Education 2016",
                                          "Bachelor Education_y": "Bachelor Education 2016",
                                          "Masters Education_y": "Masters Education 2016",
                                          "Professional Education_y": "Professional Education 2016",
                                          "College_y": "College 2016",
                                          "College Rate_y": "College Rate 2016",
                                               "Doctorate Education_y": "Doctorate Education 2016"})
education_data.head()

Unnamed: 0,County,Population 2012,No Education 2012,High School Education 2012,GED education 2012,Associate Education 2012,Bachelor Education 2012,Masters Education 2012,Professional Education 2012,Doctorate Education 2012,...,No Education 2016,High School Education 2016,GED education 2016,Associate Education 2016,Bachelor Education 2016,Masters Education 2016,Professional Education 2016,Doctorate Education 2016,College 2016,College Rate 2016
0,Alameda,1515136.0,21320.0,183121.0,16511.0,69629.0,249246.0,116816.0,27561.0,27987.0,...,26836.0,182207.0,18164.0,74447.0,282754.0,141689.0,32639.0,31512.0,563041.0,35.075694
1,Alpine,1197.0,0.0,153.0,58.0,49.0,142.0,76.0,14.0,22.0,...,5.0,189.0,68.0,46.0,140.0,67.0,8.0,20.0,281.0,23.733108
2,Amador,37764.0,198.0,7117.0,938.0,2802.0,4042.0,825.0,417.0,138.0,...,279.0,6562.0,1131.0,2554.0,4212.0,1192.0,540.0,240.0,8738.0,23.639856
3,Butte,220101.0,2273.0,27686.0,4560.0,12955.0,22359.0,7246.0,2598.0,1585.0,...,2749.0,27537.0,5238.0,14030.0,25225.0,8002.0,2915.0,1611.0,51783.0,23.130112
4,Calaveras,45507.0,87.0,8778.0,1186.0,3234.0,4988.0,1373.0,513.0,142.0,...,310.0,8057.0,1158.0,3692.0,4645.0,1550.0,424.0,226.0,10537.0,23.526916


In [4]:
education_data = education_data.drop(columns=["Population_y"])
education_data.count()

County                         58
Population 2012                58
No Education 2012              58
High School Education 2012     58
GED education 2012             58
Associate Education 2012       58
Bachelor Education 2012        58
Masters Education 2012         58
Professional Education 2012    58
Doctorate Education 2012       58
College 2012                   58
College Rate 2012              58
No Education 2016              58
High School Education 2016     58
GED education 2016             58
Associate Education 2016       58
Bachelor Education 2016        58
Masters Education 2016         58
Professional Education 2016    58
Doctorate Education 2016       58
College 2016                   58
College Rate 2016              58
dtype: int64

In [5]:
# Merge education data with larger dataset
census_health_ff_education = census_health_ff.merge(education_data, how="outer", on="County")
census_health_ff_education.count()

County                                                 58
Household Income 2011                                  58
Population 2011                                        58
Median Age 2011                                        58
Per Capita Income 2011                                 58
Poverty Count 2011                                     58
Employed Persons Over 16 2011                          58
Unemployed Persons Over 16 2011                        58
Household Income 2016                                  58
Population 2016                                        58
Median Age 2016                                        58
Per Capita Income 2016                                 58
Poverty Count 2016                                     58
Employed Persons Over 16 2016                          58
Unemployed Persons Over 16 2016                        58
Fast-food restaurants, 2011                            58
Fast-food restaurants, 2016                            58
Fast-food rest

In [6]:
# Export to CSV
census_health_ff_education.to_csv("data/census_health_ff_education.csv", index=False)
