In [1]:
#import dependencies
import pandas as pd
import requests
import json
from config import api_key
from census import Census
from us import states

In [2]:
#start 2010 session
c = Census(api_key, year = 2010)

In [3]:
#view tables
c.acs5.tables()

[{'name': 'B17015',
  'description': 'POVERTY STATUS IN THE PAST 12 MONTHS OF FAMILIES BY FAMILY TYPE BY SOCIAL SECURITY INCOME BY SUPPLEMENTAL SECURITY INCOME (SSI) AND CASH PUBLIC ASSISTANCE INCOME',
  'variables': 'https://api.census.gov/data/2010/acs/acs5/groups/B17015.json'},
 {'name': 'B17016',
  'description': 'POVERTY STATUS IN THE PAST 12 MONTHS OF FAMILIES BY FAMILY TYPE BY WORK EXPERIENCE OF HOUSEHOLDER AND SPOUSE',
  'variables': 'https://api.census.gov/data/2010/acs/acs5/groups/B17016.json'},
 {'name': 'B17017',
  'description': 'POVERTY STATUS IN THE PAST 12 MONTHS BY HOUSEHOLD TYPE BY AGE OF HOUSEHOLDER',
  'variables': 'https://api.census.gov/data/2010/acs/acs5/groups/B17017.json'},
 {'name': 'B17018',
  'description': 'POVERTY STATUS IN THE PAST 12 MONTHS OF FAMILIES BY HOUSEHOLD TYPE BY EDUCATIONAL ATTAINMENT OF HOUSEHOLDER',
  'variables': 'https://api.census.gov/data/2010/acs/acs5/groups/B17018.json'},
 {'name': 'B17011',
  'description': 'AGGREGATE INCOME DEFICIT (

In [4]:
#test for state data call
c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E"), {'for': 'state:17'})

[{'NAME': 'Illinois',
  'B19013_001E': 55735.0,
  'B01003_001E': 12745359.0,
  'B01002_001E': 36.2,
  'B19301_001E': 28782.0,
  'B17001_002E': 1572048.0,
  'state': '17'}]

In [5]:
#state & county level data call
census_2010= c.acs5.state_county(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E"), "17", "*")

In [6]:
#create 2010 df
census_2010df = pd.DataFrame(census_2010)
census_2010df = census_2010df.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count", "state":"state_id", "county":"county_id"})
census_2010df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id
0,"Adams County, Illinois",43824.0,67030.0,40.4,24308.0,7954.0,17,1
1,"Alexander County, Illinois",28833.0,8449.0,41.8,15858.0,1600.0,17,3
2,"Bond County, Illinois",51946.0,17904.0,39.3,24341.0,1870.0,17,5
3,"Boone County, Illinois",61210.0,53567.0,35.8,26105.0,5508.0,17,7
4,"Brown County, Illinois",38696.0,6897.0,37.6,17133.0,555.0,17,9


In [7]:
#create census year column
census_year = "2010"
census_2010df["census_year"]=census_year
census_2010df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year
0,"Adams County, Illinois",43824.0,67030.0,40.4,24308.0,7954.0,17,1,2010
1,"Alexander County, Illinois",28833.0,8449.0,41.8,15858.0,1600.0,17,3,2010
2,"Bond County, Illinois",51946.0,17904.0,39.3,24341.0,1870.0,17,5,2010
3,"Boone County, Illinois",61210.0,53567.0,35.8,26105.0,5508.0,17,7,2010
4,"Brown County, Illinois",38696.0,6897.0,37.6,17133.0,555.0,17,9,2010


In [8]:
#split name column into county and state columns
census_2010df[["county", "state"]] = census_2010df.NAME.str.split(",",expand=True)
census_2010df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,county,state
0,"Adams County, Illinois",43824.0,67030.0,40.4,24308.0,7954.0,17,1,2010,Adams County,Illinois
1,"Alexander County, Illinois",28833.0,8449.0,41.8,15858.0,1600.0,17,3,2010,Alexander County,Illinois
2,"Bond County, Illinois",51946.0,17904.0,39.3,24341.0,1870.0,17,5,2010,Bond County,Illinois
3,"Boone County, Illinois",61210.0,53567.0,35.8,26105.0,5508.0,17,7,2010,Boone County,Illinois
4,"Brown County, Illinois",38696.0,6897.0,37.6,17133.0,555.0,17,9,2010,Brown County,Illinois


In [9]:
#split string to be county name only
census_2010df['county2'] = census_2010df['county'].str.split(' ').str[0]
census_2010df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,county,state,county2
0,"Adams County, Illinois",43824.0,67030.0,40.4,24308.0,7954.0,17,1,2010,Adams County,Illinois,Adams
1,"Alexander County, Illinois",28833.0,8449.0,41.8,15858.0,1600.0,17,3,2010,Alexander County,Illinois,Alexander
2,"Bond County, Illinois",51946.0,17904.0,39.3,24341.0,1870.0,17,5,2010,Bond County,Illinois,Bond
3,"Boone County, Illinois",61210.0,53567.0,35.8,26105.0,5508.0,17,7,2010,Boone County,Illinois,Boone
4,"Brown County, Illinois",38696.0,6897.0,37.6,17133.0,555.0,17,9,2010,Brown County,Illinois,Brown


In [10]:
#drop old county column
census_2010df = census_2010df.drop(['county'], axis=1)
census_2010df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,state,county2
0,"Adams County, Illinois",43824.0,67030.0,40.4,24308.0,7954.0,17,1,2010,Illinois,Adams
1,"Alexander County, Illinois",28833.0,8449.0,41.8,15858.0,1600.0,17,3,2010,Illinois,Alexander
2,"Bond County, Illinois",51946.0,17904.0,39.3,24341.0,1870.0,17,5,2010,Illinois,Bond
3,"Boone County, Illinois",61210.0,53567.0,35.8,26105.0,5508.0,17,7,2010,Illinois,Boone
4,"Brown County, Illinois",38696.0,6897.0,37.6,17133.0,555.0,17,9,2010,Illinois,Brown


In [11]:
#drop old name column
census_2010df = census_2010df.drop(["NAME"], axis=1)
census_2010df.head()

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,state,county2
0,43824.0,67030.0,40.4,24308.0,7954.0,17,1,2010,Illinois,Adams
1,28833.0,8449.0,41.8,15858.0,1600.0,17,3,2010,Illinois,Alexander
2,51946.0,17904.0,39.3,24341.0,1870.0,17,5,2010,Illinois,Bond
3,61210.0,53567.0,35.8,26105.0,5508.0,17,7,2010,Illinois,Boone
4,38696.0,6897.0,37.6,17133.0,555.0,17,9,2010,Illinois,Brown


In [12]:
#rename county2 to county
census_2010df = census_2010df.rename(columns={"county2": "county"})
census_2010df.head()

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,state,county
0,43824.0,67030.0,40.4,24308.0,7954.0,17,1,2010,Illinois,Adams
1,28833.0,8449.0,41.8,15858.0,1600.0,17,3,2010,Illinois,Alexander
2,51946.0,17904.0,39.3,24341.0,1870.0,17,5,2010,Illinois,Bond
3,61210.0,53567.0,35.8,26105.0,5508.0,17,7,2010,Illinois,Boone
4,38696.0,6897.0,37.6,17133.0,555.0,17,9,2010,Illinois,Brown


In [13]:
#start 2014 session
c = Census(api_key, year = 2014)

In [14]:
#state & county level data call
census_2014= c.acs5.state_county(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E"), "17", "*")

In [15]:
#create 2014 df
census_2014df = pd.DataFrame(census_2014)
census_2014df = census_2014df.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count", "state":"state_id", "county":"county_id"})
census_2014df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id
0,"Madison County, Illinois",53912.0,267937.0,39.2,28093.0,36365.0,17,119
1,"Mercer County, Illinois",51259.0,16204.0,44.3,26739.0,1736.0,17,131
2,"Sangamon County, Illinois",55565.0,198808.0,39.7,30594.0,29045.0,17,167
3,"Randolph County, Illinois",48901.0,33091.0,41.6,22771.0,3656.0,17,157
4,"Ogle County, Illinois",55894.0,52782.0,41.1,27337.0,5734.0,17,141


In [16]:
#create census year column
census_year = "2014"
census_2014df["census_year"]=census_year
census_2014df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year
0,"Madison County, Illinois",53912.0,267937.0,39.2,28093.0,36365.0,17,119,2014
1,"Mercer County, Illinois",51259.0,16204.0,44.3,26739.0,1736.0,17,131,2014
2,"Sangamon County, Illinois",55565.0,198808.0,39.7,30594.0,29045.0,17,167,2014
3,"Randolph County, Illinois",48901.0,33091.0,41.6,22771.0,3656.0,17,157,2014
4,"Ogle County, Illinois",55894.0,52782.0,41.1,27337.0,5734.0,17,141,2014


In [17]:
#split name column into county and state columns
census_2014df[["county", "state"]] = census_2014df.NAME.str.split(",",expand=True)
census_2014df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,county,state
0,"Madison County, Illinois",53912.0,267937.0,39.2,28093.0,36365.0,17,119,2014,Madison County,Illinois
1,"Mercer County, Illinois",51259.0,16204.0,44.3,26739.0,1736.0,17,131,2014,Mercer County,Illinois
2,"Sangamon County, Illinois",55565.0,198808.0,39.7,30594.0,29045.0,17,167,2014,Sangamon County,Illinois
3,"Randolph County, Illinois",48901.0,33091.0,41.6,22771.0,3656.0,17,157,2014,Randolph County,Illinois
4,"Ogle County, Illinois",55894.0,52782.0,41.1,27337.0,5734.0,17,141,2014,Ogle County,Illinois


In [18]:
#split string to be county name only
census_2014df['county2'] = census_2014df['county'].str.split(' ').str[0]
census_2014df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,county,state,county2
0,"Madison County, Illinois",53912.0,267937.0,39.2,28093.0,36365.0,17,119,2014,Madison County,Illinois,Madison
1,"Mercer County, Illinois",51259.0,16204.0,44.3,26739.0,1736.0,17,131,2014,Mercer County,Illinois,Mercer
2,"Sangamon County, Illinois",55565.0,198808.0,39.7,30594.0,29045.0,17,167,2014,Sangamon County,Illinois,Sangamon
3,"Randolph County, Illinois",48901.0,33091.0,41.6,22771.0,3656.0,17,157,2014,Randolph County,Illinois,Randolph
4,"Ogle County, Illinois",55894.0,52782.0,41.1,27337.0,5734.0,17,141,2014,Ogle County,Illinois,Ogle


In [19]:
#drop old county column
census_2014df = census_2014df.drop(['county'], axis=1)
census_2014df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,state,county2
0,"Madison County, Illinois",53912.0,267937.0,39.2,28093.0,36365.0,17,119,2014,Illinois,Madison
1,"Mercer County, Illinois",51259.0,16204.0,44.3,26739.0,1736.0,17,131,2014,Illinois,Mercer
2,"Sangamon County, Illinois",55565.0,198808.0,39.7,30594.0,29045.0,17,167,2014,Illinois,Sangamon
3,"Randolph County, Illinois",48901.0,33091.0,41.6,22771.0,3656.0,17,157,2014,Illinois,Randolph
4,"Ogle County, Illinois",55894.0,52782.0,41.1,27337.0,5734.0,17,141,2014,Illinois,Ogle


In [20]:
#drop old name column
census_2014df = census_2014df.drop(["NAME"], axis=1)
census_2014df.head()

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,state,county2
0,53912.0,267937.0,39.2,28093.0,36365.0,17,119,2014,Illinois,Madison
1,51259.0,16204.0,44.3,26739.0,1736.0,17,131,2014,Illinois,Mercer
2,55565.0,198808.0,39.7,30594.0,29045.0,17,167,2014,Illinois,Sangamon
3,48901.0,33091.0,41.6,22771.0,3656.0,17,157,2014,Illinois,Randolph
4,55894.0,52782.0,41.1,27337.0,5734.0,17,141,2014,Illinois,Ogle


In [21]:
#rename county2 to county
census_2014df = census_2014df.rename(columns={"county2": "county"})
census_2014df.head()

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,state,county
0,53912.0,267937.0,39.2,28093.0,36365.0,17,119,2014,Illinois,Madison
1,51259.0,16204.0,44.3,26739.0,1736.0,17,131,2014,Illinois,Mercer
2,55565.0,198808.0,39.7,30594.0,29045.0,17,167,2014,Illinois,Sangamon
3,48901.0,33091.0,41.6,22771.0,3656.0,17,157,2014,Illinois,Randolph
4,55894.0,52782.0,41.1,27337.0,5734.0,17,141,2014,Illinois,Ogle


In [22]:
#start 2018 session
c = Census(api_key, year = 2018)

In [23]:
#state & county level data call
census_2018= c.acs5.state_county(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E"), "17", "*")

In [24]:
#create 2018 df
census_2018df = pd.DataFrame(census_2018)
census_2018df = census_2018df.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count", "state":"state_id", "county":"county_id"})
census_2018df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id
0,"Hancock County, Illinois",51549.0,18112.0,46.0,26880.0,2132.0,17,67
1,"Madison County, Illinois",57890.0,265670.0,40.1,30802.0,35625.0,17,119
2,"Grundy County, Illinois",74140.0,50509.0,37.6,33308.0,4389.0,17,63
3,"Kankakee County, Illinois",57981.0,111061.0,37.6,27164.0,15794.0,17,91
4,"Mercer County, Illinois",58011.0,15693.0,44.8,28995.0,1762.0,17,131


In [25]:
#create census year column
census_year = "2018"
census_2018df["census_year"]=census_year
census_2018df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year
0,"Hancock County, Illinois",51549.0,18112.0,46.0,26880.0,2132.0,17,67,2018
1,"Madison County, Illinois",57890.0,265670.0,40.1,30802.0,35625.0,17,119,2018
2,"Grundy County, Illinois",74140.0,50509.0,37.6,33308.0,4389.0,17,63,2018
3,"Kankakee County, Illinois",57981.0,111061.0,37.6,27164.0,15794.0,17,91,2018
4,"Mercer County, Illinois",58011.0,15693.0,44.8,28995.0,1762.0,17,131,2018


In [26]:
#split name column into county and state columns
census_2018df[["county", "state"]] = census_2018df.NAME.str.split(",",expand=True)
census_2018df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,county,state
0,"Hancock County, Illinois",51549.0,18112.0,46.0,26880.0,2132.0,17,67,2018,Hancock County,Illinois
1,"Madison County, Illinois",57890.0,265670.0,40.1,30802.0,35625.0,17,119,2018,Madison County,Illinois
2,"Grundy County, Illinois",74140.0,50509.0,37.6,33308.0,4389.0,17,63,2018,Grundy County,Illinois
3,"Kankakee County, Illinois",57981.0,111061.0,37.6,27164.0,15794.0,17,91,2018,Kankakee County,Illinois
4,"Mercer County, Illinois",58011.0,15693.0,44.8,28995.0,1762.0,17,131,2018,Mercer County,Illinois


In [27]:
#split string to be county name only
census_2018df['county2'] = census_2018df['county'].str.split(' ').str[0]
census_2018df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,county,state,county2
0,"Hancock County, Illinois",51549.0,18112.0,46.0,26880.0,2132.0,17,67,2018,Hancock County,Illinois,Hancock
1,"Madison County, Illinois",57890.0,265670.0,40.1,30802.0,35625.0,17,119,2018,Madison County,Illinois,Madison
2,"Grundy County, Illinois",74140.0,50509.0,37.6,33308.0,4389.0,17,63,2018,Grundy County,Illinois,Grundy
3,"Kankakee County, Illinois",57981.0,111061.0,37.6,27164.0,15794.0,17,91,2018,Kankakee County,Illinois,Kankakee
4,"Mercer County, Illinois",58011.0,15693.0,44.8,28995.0,1762.0,17,131,2018,Mercer County,Illinois,Mercer


In [28]:
#drop old county column
census_2018df = census_2018df.drop(['county'], axis=1)
census_2018df.head()

Unnamed: 0,NAME,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,state,county2
0,"Hancock County, Illinois",51549.0,18112.0,46.0,26880.0,2132.0,17,67,2018,Illinois,Hancock
1,"Madison County, Illinois",57890.0,265670.0,40.1,30802.0,35625.0,17,119,2018,Illinois,Madison
2,"Grundy County, Illinois",74140.0,50509.0,37.6,33308.0,4389.0,17,63,2018,Illinois,Grundy
3,"Kankakee County, Illinois",57981.0,111061.0,37.6,27164.0,15794.0,17,91,2018,Illinois,Kankakee
4,"Mercer County, Illinois",58011.0,15693.0,44.8,28995.0,1762.0,17,131,2018,Illinois,Mercer


In [29]:
#drop old name column
census_2018df = census_2018df.drop(["NAME"], axis=1)
census_2018df.head()

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,state,county2
0,51549.0,18112.0,46.0,26880.0,2132.0,17,67,2018,Illinois,Hancock
1,57890.0,265670.0,40.1,30802.0,35625.0,17,119,2018,Illinois,Madison
2,74140.0,50509.0,37.6,33308.0,4389.0,17,63,2018,Illinois,Grundy
3,57981.0,111061.0,37.6,27164.0,15794.0,17,91,2018,Illinois,Kankakee
4,58011.0,15693.0,44.8,28995.0,1762.0,17,131,2018,Illinois,Mercer


In [30]:
#rename county2 to county
census_2018df = census_2018df.rename(columns={"county2": "county"})
census_2018df.head()

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,state,county
0,51549.0,18112.0,46.0,26880.0,2132.0,17,67,2018,Illinois,Hancock
1,57890.0,265670.0,40.1,30802.0,35625.0,17,119,2018,Illinois,Madison
2,74140.0,50509.0,37.6,33308.0,4389.0,17,63,2018,Illinois,Grundy
3,57981.0,111061.0,37.6,27164.0,15794.0,17,91,2018,Illinois,Kankakee
4,58011.0,15693.0,44.8,28995.0,1762.0,17,131,2018,Illinois,Mercer


In [31]:
#view df description to see n of rows
census_2010df.describe()

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count
count,102.0,102.0,102.0,102.0,102.0
mean,47184.95098,124954.5,40.059804,23552.980392,15412.235294
std,10286.935272,524836.8,3.65598,3839.921289,77179.066606
min,27578.0,4402.0,28.4,15858.0,484.0
25%,40917.25,14810.25,39.025,21150.25,1584.25
50%,45141.0,27619.5,40.85,23197.5,2795.5
75%,50806.75,53575.5,42.3,25051.0,7365.5
max,79897.0,5172848.0,46.6,38120.0,778340.0


In [32]:
#view df description to see n of rows
census_2014df.describe()

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count
count,102.0,102.0,102.0,102.0,102.0
mean,49765.519608,126164.2,41.046078,25164.137255,17749.705882
std,10025.998361,530701.6,3.846657,3936.257704,88033.52162
min,25495.0,4226.0,29.2,14052.0,521.0
25%,43312.5,14532.5,39.625,22666.0,1772.5
50%,48284.5,27074.0,41.65,24808.5,3252.0
75%,53085.75,53917.5,43.275,26899.75,7505.5
max,83844.0,5227827.0,49.1,38931.0,886261.0


In [33]:
#view df description to see n of rows
census_2018df.describe()

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count
count,102.0,102.0,102.0,102.0,102.0
mean,54839.617647,125701.0,41.740196,28297.509804,16035.323529
std,11021.734681,530581.9,3.80282,4420.2139,77314.395264
min,33799.0,4009.0,29.9,19065.0,482.0
25%,48699.25,14057.5,40.1,25584.5,1744.75
50%,52671.0,26200.0,42.3,27577.0,3051.0
75%,57961.0,53138.5,43.6,29872.0,7295.25
max,91764.0,5223719.0,53.7,44287.0,778020.0


In [34]:
#create combined df with 2010df as base data
census_all = census_2010df

In [35]:
#view df description to see n of rows
census_all.describe()

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count
count,102.0,102.0,102.0,102.0,102.0
mean,47184.95098,124954.5,40.059804,23552.980392,15412.235294
std,10286.935272,524836.8,3.65598,3839.921289,77179.066606
min,27578.0,4402.0,28.4,15858.0,484.0
25%,40917.25,14810.25,39.025,21150.25,1584.25
50%,45141.0,27619.5,40.85,23197.5,2795.5
75%,50806.75,53575.5,42.3,25051.0,7365.5
max,79897.0,5172848.0,46.6,38120.0,778340.0


In [36]:
#append 2014 data and vier df description to ensure all rows were appended
census_all = census_all.append(census_2014df, ignore_index=True)
census_all.describe()

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count
count,204.0,204.0,204.0,204.0,204.0
mean,48475.235294,125559.3,40.552941,24358.558824,16580.970588
std,10214.482084,526476.2,3.775777,3961.973367,82588.542919
min,25495.0,4226.0,28.4,14052.0,484.0
25%,41765.25,14679.5,39.2,21879.5,1723.5
50%,46688.0,27221.5,41.4,24012.0,3088.5
75%,52267.5,53742.5,42.725,26390.75,7528.5
max,83844.0,5227827.0,49.1,38931.0,886261.0


In [37]:
#append 2018 data and vier df description to ensure all rows were appended
census_all = census_all.append(census_2018df, ignore_index=True)
census_all.describe()

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count
count,306.0,306.0,306.0,306.0,306.0
mean,50596.696078,125606.5,40.948693,25671.542484,16399.088235
std,10895.004513,526977.8,3.819932,4514.075779,80742.160345
min,25495.0,4009.0,28.4,14052.0,482.0
25%,43340.5,14454.25,39.325,22613.75,1736.75
50%,49084.5,26670.0,41.6,25067.0,3057.5
75%,54640.25,53642.75,43.1,28068.0,7511.25
max,91764.0,5227827.0,53.7,44287.0,886261.0


In [38]:
census_all

Unnamed: 0,Household Income,Population,Median Age,Per Capita Income,Poverty Count,state_id,county_id,census_year,state,county
0,43824.0,67030.0,40.4,24308.0,7954.0,17,001,2010,Illinois,Adams
1,28833.0,8449.0,41.8,15858.0,1600.0,17,003,2010,Illinois,Alexander
2,51946.0,17904.0,39.3,24341.0,1870.0,17,005,2010,Illinois,Bond
3,61210.0,53567.0,35.8,26105.0,5508.0,17,007,2010,Illinois,Boone
4,38696.0,6897.0,37.6,17133.0,555.0,17,009,2010,Illinois,Brown
...,...,...,...,...,...,...,...,...,...,...
301,56747.0,22069.0,42.7,28924.0,1944.0,17,083,2018,Illinois,Jersey
302,44432.0,21724.0,41.5,22260.0,3551.0,17,051,2018,Illinois,Fayette
303,57899.0,29207.0,40.4,27187.0,2143.0,17,107,2018,Illinois,Logan
304,41167.0,24231.0,42.0,23690.0,5260.0,17,165,2018,Illinois,Saline


In [39]:
#create variables for county pk table
county_idx = census_all['county_id'].unique()
county_namex = census_all['county'].unique()

print(len(county_idx))

102


In [40]:
#create county primary key table
county_pk_df = pd.DataFrame({"county_id":county_idx, "county_name": county_namex})
county_pk_df

Unnamed: 0,county_id,county_name
0,001,Adams
1,003,Alexander
2,005,Bond
3,007,Boone
4,009,Brown
...,...,...
97,195,Whiteside
98,197,Will
99,199,Williamson
100,201,Winnebago


In [41]:
county_pk_df.to_csv("census_data/county_id.csv")
census_all.to_csv("census_data/census.csv")
