In [1]:
# Dependencies
from matplotlib import pyplot as plt
from scipy.stats import linregress
import numpy as np
from sklearn import datasets 
import pandas as pd
import time
import requests
from census import Census
pd.set_option('display.max_rows', 30000)
# Census API Key
from config import census_key
c = Census(census_key, year=2015)
import warnings
warnings.filterwarnings('ignore')

### Retrieve demographics data from census.gov

In [2]:
#Education
education_census_data = c.acs5.get(("NAME", "B01001_001E","B06009_002E", "B06009_003E", "B06009_004E",
                          "B06009_005E","B06009_006E"), {'for': 'place:*'})
education_census_df = pd.DataFrame(education_census_data)
education_census_df[['City', 'State']]=education_census_df.NAME.str.split(",",expand=True,)
education_clean_census_df = education_census_df[['State', 'City','B01001_001E','B06009_002E','B06009_003E','B06009_004E','B06009_005E',
                                   'B06009_006E']]
education_clean_census_df=education_clean_census_df.rename(columns={"B01001_001E":"Population","B06009_002E":"None", "B06009_003E":"High School", 
                                               "B06009_004E":"Associates","B06009_005E":"Bachelors",
                                               "B06009_006E":"Graduate"})
education_clean_census_df=education_clean_census_df.sort_values("State")
education_clean_census_df["City"] = education_clean_census_df["City"].map(lambda x: x.rstrip("CDPcityCountytownvillage"))
for i in range(len(education_clean_census_df["City"])):
    education_clean_census_df["City"][i]= education_clean_census_df["City"][i].strip()
for i in range(len(education_clean_census_df["State"])):
    education_clean_census_df["State"][i]= education_clean_census_df["State"][i].strip() 
    
education_clean_census_df.to_csv("../Analysis/education.csv", index=False)

In [3]:
#Race
race_census_data = c.acs5.get(("NAME", "B01001_001E","B01001B_001E", "B01001C_001E", "B01001D_001E",
                          "B01001E_001E","B01001I_001E","B01001H_001E","B01001F_001E","B01001G_001E"), 
                         {'for': 'place:*'})
race_census_df = pd.DataFrame(race_census_data)
race_census_df [['City', 'State']]=race_census_df.NAME.str.split(",",expand=True,)
race_clean_census_df = race_census_df[['State', 'City',"B01001_001E","B01001B_001E", "B01001C_001E", "B01001D_001E",
                          "B01001E_001E","B01001I_001E","B01001H_001E","B01001F_001E","B01001G_001E"]]
race_clean_census_df=race_clean_census_df.rename(columns={"B01001_001E":"Population","B01001B_001E":"Black",
                                                "B01001C_001E":"American Indian/Alaskan Native", 
                                                "B01001D_001E":"Asian","B01001E_001E":"Native Hawaiian / Pacific Islander",
                                                "B01001I_001E":"Hispanic/Latino","B01001H_001E":"White",
                                                "B01001G_001E":"Two or More","B01001F_001E":"Other"})
race_clean_census_df = race_clean_census_df.sort_values("State")
race_clean_census_df["City"] = race_clean_census_df["City"].map(lambda x: x.rstrip("CDPcityCountytownTown"))
for i in range(len(race_clean_census_df["City"])):
    race_clean_census_df["City"][i]= race_clean_census_df["City"][i].strip()
for i in range(len(race_clean_census_df["State"])):
    race_clean_census_df["State"][i]= race_clean_census_df["State"][i].strip() 
race_clean_census_df.to_csv("../Analysis/race.csv", index=False)

In [4]:
#Income
income_census_data = c.acs5.get(("NAME", "B01001_001E","B06011_001E"),{'for': 'place:*'})
income_census_df = pd.DataFrame(income_census_data)
income_census_df [['City', 'State']]=income_census_df.NAME.str.split(",",expand=True,)
income_clean_census_df = income_census_df[['State', 'City',"B01001_001E","B06011_001E"]]
income_clean_census_df=income_clean_census_df.rename(columns={"B01001_001E":"Population",
                                                              "B06011_001E":"Median Income"})
income_clean_census_df = income_clean_census_df.sort_values("State")
income_clean_census_df["City"] = income_clean_census_df["City"].map(lambda x: x.rstrip("CDPcityCountytownTown"))
for i in range(len(income_clean_census_df["City"])):
    income_clean_census_df["City"][i]= income_clean_census_df["City"][i].strip()
for i in range(len(income_clean_census_df["State"])):
    income_clean_census_df["State"][i]= income_clean_census_df["State"][i].strip() 
income_clean_census_df.to_csv("../Analysis/income.csv", index=False)

In [5]:
#Marital_Status
marital_status_census_data = c.acs5.get(("NAME", "B01001_001E","B06008_002E", "B06008_003E", "B06008_004E",
                                          "B06008_005E","B06008_006E"), {'for': 'place:*'})
marital_status_census_df = pd.DataFrame(marital_status_census_data)
marital_status_census_df [['City', 'State']]=marital_status_census_df.NAME.str.split(",",expand=True,)
marital_status_clean_census_df = marital_status_census_df[['State', 'City','B01001_001E','B06008_002E','B06008_003E',
                                                           'B06008_004E','B06008_005E','B06008_006E']]
marital_status_clean_census_df=marital_status_clean_census_df.rename(columns={"B01001_001E":"Population",
                                                                              "B06008_002E":"Never Married", 
                                                                              "B06008_003E":"Married",
                                                                              "B06008_004E":"Divorced",
                                                                              "B06008_005E":"Separated",
                                                                              "B06008_006E":"Widowed"})
marital_status_clean_census_df = marital_status_clean_census_df.sort_values("State")
marital_status_clean_census_df["City"] = marital_status_clean_census_df["City"].map(lambda x: x.rstrip("CDPcityCountytownTown"))
for i in range(len(marital_status_clean_census_df["City"])):
    marital_status_clean_census_df["City"][i]= marital_status_clean_census_df["City"][i].strip() 
for i in range(len(marital_status_clean_census_df["State"])):
    marital_status_clean_census_df["State"][i]= marital_status_clean_census_df["State"][i].strip() 
marital_status_clean_census_df.to_csv("../Analysis/marital_status.csv", index=False)

In [27]:
#Unemployment Count
unemployment_count_census_data = c.acs5.get(("NAME","B23025_001E","B23025_007E"),{'for': 'place:*'})
unemployment_count_census_df = pd.DataFrame(unemployment_count_census_data)
unemployment_count_census_df [['City', 'State']]=unemployment_count_census_df.NAME.str.split(",",expand=True,)
unemployment_count_clean_census_df = unemployment_count_census_df[['State', 'City',"B23025_001E","B23025_007E"]]
unemployment_count_clean_census_df=unemployment_count_clean_census_df.rename(columns={"B23025_001E":"Population",
                                                              "B23025_007E":"Unemployment Count"})
unemployment_count_clean_census_df = unemployment_count_clean_census_df.sort_values("State")
unemployment_count_clean_census_df["City"] = unemployment_count_clean_census_df["City"].map(lambda x: x.rstrip("CDPcityCountytownTown"))
for i in range(len(unemployment_count_clean_census_df["City"])):
    unemployment_count_clean_census_df["City"][i]= unemployment_count_clean_census_df["City"][i].strip() 
for i in range(len(unemployment_count_clean_census_df["State"])):
    unemployment_count_clean_census_df["State"][i]= unemployment_count_clean_census_df["State"][i].strip() 
unemployment_count_clean_census_df.to_csv("../Analysis/unemployment.csv", index=False)

In [8]:
#School Enrollment
school_enrollment_census_data = c.acs5.get(("NAME", "B01001_001E","B14001_003E", "B14001_004E",
                                          "B14001_005E","B14001_006E","B14001_007E"), {'for': 'place:*'})
school_enrollment_census_df = pd.DataFrame(school_enrollment_census_data)
school_enrollment_census_df [['City', 'State']]=school_enrollment_census_df.NAME.str.split(",",expand=True,)
school_enrollment_clean_census_df = school_enrollment_census_df[['State', 'City','B01001_001E','B14001_003E',
                                                           'B14001_004E','B14001_005E','B14001_006E',"B14001_007E"]]
school_enrollment_clean_census_df=school_enrollment_clean_census_df.rename(columns={"B01001_001E":"Population",                                                                           
                                                                              "B14001_003E":"Nursery-Pre-School",
                                                                              "B14001_004E":"Kindergarten",
                                                                              "B14001_005E":"1st-4th",
                                                                              "B14001_006E":"5th-8th", 
                                                                              "B14001_007E":"9th-12th"})
school_enrollment_clean_census_df = school_enrollment_clean_census_df.sort_values("State")
school_enrollment_clean_census_df["City"] = school_enrollment_clean_census_df["City"].map(lambda x: x.rstrip("CDPcityCountytownTown"))
for i in range(len(school_enrollment_clean_census_df["City"])):
    school_enrollment_clean_census_df["City"][i]= school_enrollment_clean_census_df["City"][i].strip() 
for i in range(len(school_enrollment_clean_census_df["State"])):
    school_enrollment_clean_census_df["State"][i]= school_enrollment_clean_census_df["State"][i].strip() 
school_enrollment_clean_census_df.to_csv("../Analysis/school_enrollment.csv", index=False)