In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
from tqdm import tqdm_notebook
# Census API Key
from config import api_key
c = Census(api_key, year=2017)

In [2]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",'B15003_002E', 'B15003_003E', 
                          'B15003_004E', 'B15003_005E', 'B15003_006E', 'B15003_007E', 'B15003_008E',
                          'B15003_009E', 'B15003_010E', 'B15003_011E', 'B15003_012E', 'B15003_013E',
                          'B15003_014E', 'B15003_015E', 'B15003_016E', 'B15003_017E', 'B15003_018E',
                          'B15003_019E', 'B15003_020E', "B15003_021E", "B15003_022E", "B15003_023E",
                          "B15003_024E", "B15003_025E", 'B15003_001E', 'B12001_001E', 'B12001_003E',
                          'B12001_004E', 'B12001_007E', 'B12001_009E', 'B12001_010E', 'B12001_011E', 
                          'B12001_012E', 'B12001_013E', 'B12001_016E', 'B12001_018E', 'B12001_019E')
                          , {'for': 'county:*'})


# Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Column Reordering
census_df = census_df.rename(columns={'B01003_001E': 'Population',
                                      'B01002_001E': 'Median Age',
                                      'B19013_001E': 'Household Income',
                                      'B19301_001E': 'Per Capita Income',
                                      'B17001_002E': 'Poverty Count',
                                      'NAME': 'Name', 
                                      'county': 'County',
                                      'B15003_021E' : 'Associate\'s Degree Count',
                                      'B15003_022E' : 'Bachelor\'s Degree Count',
                                      'B15003_023E' : 'Master\'s Degree Count',
                                      'B15003_024E' : 'Professional School Degree Count',
                                      'B15003_025E' : 'Doctorate Degree Count',
                                      'B15003_001E' : 'Total educational attainment over 25',
                                      'B15003_002E' : 'No School',
                                      'B15003_003E' : 'Nursery school',
                                      'B15003_004E' : 'Kindergarten',
                                      'B15003_005E' : '1st grade',
                                      'B15003_006E' : '2nd grade',
                                      'B15003_007E' : '3rd grade',
                                      'B15003_008E' : '4th grade',
                                      'B15003_009E' : '5th grade',
                                      'B15003_010E' : '6th grade',
                                      'B15003_011E' : '7th grade',
                                      'B15003_012E' : '8th grade',
                                      'B15003_013E' : '9th grade',
                                      'B15003_014E' : '10th grade',
                                      'B15003_015E' : '11th grade',
                                      'B15003_016E' : '12th grade No Diploma',
                                      'B15003_017E' : 'HS Diploma',
                                      'B15003_018E' : 'GED',
                                      'B15003_019E' : 'Some College less than 1 year',
                                      'B15003_020E' : 'Some College More than 1 year',
                                      'B12001_001E' : 'Over 15 Marital Status',
                                      'B12001_003E' : 'Male Never Married',
                                      'B12001_004E' : 'Male Married',
                                      'B12001_007E' : 'Male Spouse Seperated',
                                      'B12001_009E' : 'Male Widowed',
                                      'B12001_010E' : 'Male Divorced',
                                      'B12001_012E' : 'Female Never Married',
                                      'B12001_013E' : 'Female Married',
                                      'B12001_016E' : 'Female Spouse Seperated',
                                      'B12001_018E' : 'Female Widowed',
                                      'B12001_019E' : 'Female Divorced'})

census_df["Poverty Rate"] = (100 * census_df["Poverty Count"].astype(int)
                            / census_df["Population"].astype(int))
census_df["Total College Degrees"] = (census_df['Associate\'s Degree Count'].astype(int)+
                             census_df['Bachelor\'s Degree Count'].astype(int)+
                             census_df['Master\'s Degree Count'].astype(int)+
                             census_df['Professional School Degree Count'].astype(int)+
                             census_df['Doctorate Degree Count'].astype(int))
census_df['Total No HS Diploma'] = (census_df['No School'].astype(int)+
                                    census_df['Nursery school'].astype(int)+
                                    census_df['Kindergarten'].astype(int)+
                                    census_df['1st grade'].astype(int)+
                                    census_df['2nd grade'].astype(int)+
                                    census_df['3rd grade'].astype(int)+
                                    census_df['4th grade'].astype(int)+
                                    census_df['5th grade'].astype(int)+
                                    census_df['6th grade'].astype(int)+
                                    census_df['7th grade'].astype(int)+
                                    census_df['8th grade'].astype(int)+
                                    census_df['9th grade'].astype(int)+
                                    census_df['10th grade'].astype(int)+
                                    census_df['11th grade'].astype(int)+
                                    census_df['12th grade No Diploma'].astype(int))
census_df['Total HS Diploma'] = (census_df['HS Diploma'].astype(int)+
                                    census_df['GED'].astype(int))

census_df['Total Some College'] = (census_df['Some College less than 1 year'].astype(int)+
                                    census_df['Some College More than 1 year'].astype(int))

census_df['Education Check'] = (census_df['Total College Degrees'].astype(int)+
                             census_df['Total No HS Diploma'].astype(int)+
                             census_df['Total HS Diploma'].astype(int)+
                             census_df['Total Some College'].astype(int))

census_df["Never Married"] = (census_df['Male Never Married'].astype(int)+
                             census_df['Female Never Married'].astype(int))


census_df["Married"] = (census_df['Male Married'].astype(int)+
                        census_df['Female Married'].astype(int))
                        
census_df["Widowed"] = (census_df['Male Widowed'].astype(int)+
                             census_df['Female Widowed'].astype(int))

census_df["Divorced"] = (census_df['Male Divorced'].astype(int)+
                             census_df['Female Divorced'].astype(int))

census_df['Married Check'] = (census_df['Married'].astype(int)+
                             census_df['Widowed'].astype(int)+
                             census_df['Divorced'].astype(int)+
                             census_df["Never Married"].astype(int))                                   
# Final DataFrame
census_df = census_df[['Name', 'Population', 'Median Age', 'Household Income',
                       'Per Capita Income', 'Poverty Count', 'Poverty Rate', 'Total No HS Diploma',
                       'Total HS Diploma', 'Total Some College', 'Total College Degrees',
                       'Total educational attainment over 25', 'Married', 'Widowed', 'Divorced', 'Over 15 Marital Status']]

# Visualize
print(len(census_df))

3220


In [3]:
census_df.loc[census_df['Household Income'] < 0, 'Household Income'] = 0
census_df.loc[census_df['Median Age'] < 0, 'Median Age'] = 0
census_df.loc[census_df['Per Capita Income'] < 0, 'Per Capita Income'] = 0

In [4]:
census_df=census_df.sort_values(by='Name', ascending=True)
census_df = census_df.reset_index(drop=True)
census_df.head()

Unnamed: 0,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Total No HS Diploma,Total HS Diploma,Total Some College,Total College Degrees,Total educational attainment over 25,Married,Widowed,Divorced,Over 15 Marital Status
0,"Abbeville County, South Carolina",24788.0,43.7,35254.0,19234.0,5474.0,22.083266,3687,5864,3264,4295,17110.0,9887,2010,2202,20495.0
1,"Acadia Parish, Louisiana",62607.0,35.8,40492.0,21591.0,13196.0,21.077515,8984,16381,7384,7444,40193.0,24605,3280,5796,48650.0
2,"Accomack County, Virginia",32840.0,46.0,42260.0,24266.0,6416.0,19.53715,4199,9193,4073,6093,23558.0,15458,2285,2341,27087.0
3,"Ada County, Idaho",435117.0,36.4,60151.0,31642.0,50474.0,11.600098,15350,64701,73533,135790,289374.0,184610,14724,41950,345953.0
4,"Adair County, Iowa",7192.0,45.7,49477.0,28861.0,661.0,9.190768,309,2323,1096,1439,5167.0,3598,477,599,5917.0


In [5]:
census_df.describe()

Unnamed: 0,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Total No HS Diploma,Total HS Diploma,Total Some College,Total College Degrees,Total educational attainment over 25,Married,Widowed,Divorced,Over 15 Marital Status
count,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0
mean,100768.1,41.123882,48994.96677,25657.03323,14656.39,16.174182,8708.924,18556.83,14047.18,26594.66,67907.59,40892.36,4774.322671,8884.66118,81637.88
std,324499.6,5.325031,13877.178398,6667.520452,51162.98,8.078167,37386.09,50023.59,43549.33,92234.12,218080.7,123987.2,13796.25809,25607.396501,262216.3
min,74.0,21.6,11680.0,5943.0,10.0,2.424187,4.0,20.0,15.0,8.0,64.0,34.0,7.0,7.0,67.0
25%,11213.5,38.0,40622.0,21568.0,1664.5,11.055315,1048.0,2797.25,1613.0,1884.5,7743.75,4963.0,681.75,1083.25,9079.75
50%,25847.5,41.1,47636.5,25139.0,4240.5,14.808937,2646.0,6370.5,3648.5,4707.0,17864.5,11085.5,1531.5,2548.5,21318.0
75%,66608.25,44.2,55476.0,28997.0,10791.75,19.108649,6052.0,14943.5,9783.5,13967.0,44831.25,28154.25,3577.0,6458.0,54563.5
max,10105720.0,66.4,129588.0,69529.0,1688505.0,65.146881,1485760.0,1408905.0,1311907.0,2595279.0,6801851.0,3707246.0,409379.0,681281.0,8227851.0


In [7]:
census_df.to_csv('Project1_County_Data.csv',encoding="utf-8", index=False)