In [1]:
# Import Dependencies
import pandas as pd
from census import Census
from config import census_key
import cpi # this will be used to determine the cost of everything in today's dollars. cpi.inflate(amount,year of amount)
#install cpi with pip install cpi on command line
cpi.update()
low_memory=False

In [2]:
# Import CSVs 
filepath = "Resources/"
kent_zipcodes = pd.read_csv(filepath+"kent_zipcodes.csv",squeeze=True,header=None)
oakland_zipcodes = pd.read_csv(filepath+"oakland_zipcodes.csv",squeeze=True,header=None)
saginaw_zipcodes = pd.read_csv(filepath+"saginaw_zipcodes.csv",squeeze=True,header=None)



In [3]:
# Create dataframe to add census data to. Needed to put in starter values for it to function properly later. Will remove further down.
census_df = pd.DataFrame({"Year":[0],
                          "Zipcode":["0"],
                          "Total Population":[0],
                          "White Population":[0],
                          "Black Population":[0],
                          "Employed Count":[0],
                          "Unemployed Count":[0],
                          "Poverty Count":[0],
                          "Poverty Count - White":[0],
                          "Poverty Count - Black":[0],
                          "Median Income":[0],
                          "Median Rent":[0],
                          "Median Home Value":[0]})

# Get data from API for Census
census_df = census_df.astype({"Year":"int64","Zipcode":"object"})

# For loop to iterate through the desired 10-year period
for year in [2011,2012,2013,2014,2015,2016,2017,2018,2019,2020]: 
    c = Census(census_key,year) 
    census_data = c.acs5.get(("NAME", "B19013_001E", "B23025_004E", "B23025_005E", "B17001_002E", 
            "B17001A_002E","B17001B_002E", "B01003_001E",
            "B02001_002E", "B02001_003E","B25058_001E", "B25077_001E"
           ), {"for": "zip code tabulation area:*"})
    census_raw_df = pd.DataFrame(census_data)
    
# Add the year to the year column so each iteration can be distinguished
    year_count = len(census_data)
    year_series = pd.Series([year for x in range(year_count)])    
    census_raw_df["Year"] = year_series
    
# Rename Columns
    census_raw_df = census_raw_df.rename(columns = {
            "B19013_001E" : "Median Income", 
            "B23025_004E" : "Employed Count", 
            "B23025_005E" : "Unemployed Count", 
            "B17001_002E" : "Poverty Count", 
            "B17001A_002E" : "Poverty Count - White" , 
            "B17001B_002E" : "Poverty Count - Black" , 
            "B01003_001E" : "Total Population", 
            "B02001_002E" : "White Population", 
            "B02001_003E" : "Black Population", 
            "B25058_001E" : "Median Rent", 
            "B25077_001E" : "Median Home Value", 
            "NAME" : "Name",
            "zip code tabulation area": "Zipcode"})
    
# Recast all variables in the output dataframe to the correct type 
    census_raw_df = census_raw_df.astype({"Year":"int64","Zipcode":"object","Median Income":"float","Employed Count":"float","Unemployed Count":"float","Poverty Count":"float",
             "Poverty Count - White":"float", "Poverty Count - Black":"float", "Total Population":"float",
             "White Population":"float","Black Population":"float", "Median Rent":"float","Median Home Value":"float"})
    
# Recast the Zipcode in census_df to object. Was running into issues with the zipcode repeatedly trying to cast as a float.
    census_df = census_df.astype({"Zipcode":"object"})
    
# Merge the new raw df into the finished df. Use outer to maintain all values
    census_df = pd.merge(census_df,census_raw_df,how="outer")

In [4]:
# Remove the name column, which is useless here, and the state column, which only functioned for some years
census_df = census_df.drop(columns = ["Name","state"])

#Remove the sample data added to initialize the dataframe
census_df = census_df.loc[census_df["Year"] != 0]

Unnamed: 0,Year,Zipcode,Total Population,White Population,Black Population,Employed Count,Unemployed Count,Poverty Count,Poverty Count - White,Poverty Count - Black,Median Income,Median Rent,Median Home Value
1,2011,00601,18533,17121,261,4400.0,1427.0,10102.0,9342.0,51.0,13318.0,295.0,103200.0
2,2011,00602,41930,36014,1794,11507.0,4437.0,23401.0,20126.0,1199.0,14947.0,310.0,89300.0
3,2011,00603,54475,48593,1767,12996.0,3922.0,27735.0,24646.0,902.0,14437.0,235.0,116700.0
4,2011,00606,6386,6232,43,1874.0,160.0,4117.0,3992.0,24.0,11155.0,218.0,101000.0
5,2011,00610,29111,26688,700,8430.0,781.0,14230.0,12986.0,505.0,16367.0,310.0,109400.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
331196,2020,16623,552,550,0,201.0,18.0,80.0,80.0,0.0,51667.0,621.0,147900.0
331197,2020,16627,2118,1980,9,988.0,124.0,324.0,275.0,0.0,45000.0,392.0,75900.0
331198,2020,16634,315,311,0,127.0,19.0,24.0,24.0,0.0,51500.0,-666666666.0,97500.0
331199,2020,16640,707,701,3,266.0,12.0,167.0,167.0,0.0,55982.0,511.0,117500.0


In [13]:
## This is not working.
# kent_zipcodes = kent_zipcodes.values.tolist()
kent_df = census_df.query("Zipcode in @kent_zipcodes")
kent_df

Unnamed: 0,Year,Zipcode,Total Population,White Population,Black Population,Employed Count,Unemployed Count,Poverty Count,Poverty Count - White,Poverty Count - Black,Median Income,Median Rent,Median Home Value


In [14]:
test = census_df.loc[census_df["Zipcode"] == 48430]
test

Unnamed: 0,Year,Zipcode,Total Population,White Population,Black Population,Employed Count,Unemployed Count,Poverty Count,Poverty Count - White,Poverty Count - Black,Median Income,Median Rent,Median Home Value
