# County Education Levels for Data Storytelling

In [1]:
import pandas as pd

## Read and View
Source: [USDA, Economic Research Service](https://www.ers.usda.gov/data-products/county-level-data-sets/download-data/)

In [2]:
raw = pd.read_csv('data/Education.csv')
print(raw.shape)
raw.head()

(3283, 47)


Unnamed: 0,FIPS Code,State,Area name,2003 Rural-urban Continuum Code,2003 Urban Influence Code,2013 Rural-urban Continuum Code,2013 Urban Influence Code,"Less than a high school diploma, 1970","High school diploma only, 1970","Some college (1-3 years), 1970",...,"Percent of adults completing some college or associate's degree, 2000","Percent of adults with a bachelor's degree or higher, 2000","Less than a high school diploma, 2015-19","High school diploma only, 2015-19","Some college or associate's degree, 2015-19","Bachelor's degree or higher, 2015-19","Percent of adults with less than a high school diploma, 2015-19","Percent of adults with a high school diploma only, 2015-19","Percent of adults completing some college or associate's degree, 2015-19","Percent of adults with a bachelor's degree or higher, 2015-19"
0,0,US,United States,,,,,52373312,34158051,11650730,...,27.4,24.4,26472261,59472748,63756905,70920162,12.0,27.0,28.9,32.1
1,1000,AL,Alabama,,,,,1062306,468269,136287,...,25.9,19.0,458922,1022839,993344,845772,13.8,30.8,29.9,25.5
2,1001,AL,Autauga County,2.0,2.0,2.0,2.0,6611,3757,933,...,26.9,18.0,4291,12551,10596,9929,11.5,33.6,28.4,26.6
3,1003,AL,Baldwin County,4.0,5.0,3.0,2.0,18726,8426,2334,...,29.3,23.1,13893,41797,47274,48148,9.2,27.7,31.3,31.9
4,1005,AL,Barbour County,6.0,6.0,6.0,6.0,8120,2242,581,...,21.3,10.9,4812,6396,4676,2080,26.8,35.6,26.0,11.6


In [3]:
raw['FIPS Code'].dtype

dtype('int64')

In [4]:
int("1,234".replace(",", ""))

1234

In [5]:
raw.columns.tolist()

['FIPS Code',
 'State',
 'Area name',
 '2003 Rural-urban Continuum Code',
 '2003 Urban Influence Code',
 '2013 Rural-urban Continuum Code',
 '2013 Urban Influence Code',
 'Less than a high school diploma, 1970',
 'High school diploma only, 1970',
 'Some college (1-3 years), 1970',
 'Four years of college or higher, 1970',
 'Percent of adults with less than a high school diploma, 1970',
 'Percent of adults with a high school diploma only, 1970',
 'Percent of adults completing some college (1-3 years), 1970',
 'Percent of adults completing four years of college or higher, 1970',
 'Less than a high school diploma, 1980',
 'High school diploma only, 1980',
 'Some college (1-3 years), 1980',
 'Four years of college or higher, 1980',
 'Percent of adults with less than a high school diploma, 1980',
 'Percent of adults with a high school diploma only, 1980',
 'Percent of adults completing some college (1-3 years), 1980',
 'Percent of adults completing four years of college or higher, 1980',
 '

## Exploration

In [6]:
raw[['State', 'Percent of adults completing four years of college or higher, 1970',"Percent of adults with a bachelor's degree or higher, 2015-19"]].groupby("State").mean()

Unnamed: 0_level_0,"Percent of adults completing four years of college or higher, 1970","Percent of adults with a bachelor's degree or higher, 2015-19"
State,Unnamed: 1_level_1,Unnamed: 2_level_1
AK,10.9,23.26
AL,5.894118,18.161765
AR,5.034211,16.602632
AZ,9.913333,20.9125
CA,10.750847,27.40339
CO,10.79375,32.06
CT,12.877778,37.477778
DC,17.8,58.5
DE,11.15,30.05
FL,7.904412,22.523529


In [7]:
raw[['State', 'Percent of adults with less than a high school diploma, 1970','Percent of adults with less than a high school diploma, 2015-19']].groupby("State").mean()

Unnamed: 0_level_0,"Percent of adults with less than a high school diploma, 1970","Percent of adults with less than a high school diploma, 2015-19"
State,Unnamed: 1_level_1,Unnamed: 2_level_1
AK,49.026923,9.523333
AL,65.158824,17.180882
AR,65.972368,15.527632
AZ,49.293333,15.35625
CA,41.288136,15.335593
CO,43.346875,8.835385
CT,45.055556,8.522222
DC,44.8,9.1
DE,48.125,10.725
FL,55.629412,14.352941


## Subset for Initial Dataviz
* Columns: "FIPS Code", "State", "Area name", 'Percent of adults with less than a high school diploma, 1970'. "Percent of adults with less than a high school diploma, 2015-19"
* Rename: "fips", "state", "county", "perc_no_hs_1970", "perc_no_hs_2019"

In [9]:
subset = raw[[
    "FIPS Code", 
    "State", 
    "Area name", 
    'Percent of adults with less than a high school diploma, 1970',
    "Percent of adults with less than a high school diploma, 2015-19"
    ]].rename(columns = {
    "FIPS Code" : "fips", 
    "State" : "state", 
    "Area name" : "county",
    'Percent of adults with less than a high school diploma, 1970' : "perc_no_hs_1970",
    "Percent of adults with less than a high school diploma, 2015-19" : "perc_no_hs_2019"
})
subset.head()

Unnamed: 0,fips,state,county,perc_no_hs_1970,perc_no_hs_2019
0,0,US,United States,47.7,12.0
1,1000,AL,Alabama,58.7,13.8
2,1001,AL,Autauga County,54.8,11.5
3,1003,AL,Baldwin County,59.4,9.2
4,1005,AL,Barbour County,68.8,26.8


In [10]:
subset.to_csv('data/perc_no_hs_2019.csv', index=False)