In [21]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import the Census Data
from census import Census

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

In [22]:
# Import US Census API Key
from config import census_api_key

# Create an instance of the Census library
c = Census(
    census_api_key,
    year = 2018
)

In [53]:
def call_census(year):
    c = Census(census_api_key,year = year)
    #------
    
    census_data = c.acs5.get(
        (
            "NAME",
            "B19013_001E",
            "B01003_001E",
            "B01002_001E",
            "B19301_001E",
            "B17001_002E",
            "B15003_002E",
            "B15003_017E",
            "B15003_018E",
            "B15003_021E",
            "B15003_022E",
            "B15003_023E",
            "B15003_024E",
            "B15003_025E",
            "B17012_003E",
            "B17012_009E",
            "B17012_014E",
            "B17001_002E",
            "B17001_017E",
            "B17001_003E",
            "B17012_002E",
            "B17012_003E",
        ),
        {'for': 'zip code tabulation area:*'}
    )
    census_data = pd.DataFrame(census_data)
    
    census_data = census_data.rename(
    columns = {
        "B01003_001E": "Population",
        "B01002_001E": "Median Age",
        "B19013_001E": "Household Income",
        "B19301_001E": "Per Capita Income",
        "B15003_002E": "No Education",
        "B15003_017E": "High School Education",
        "B15003_018E": "GED",
        "B15003_021E": "Associates",
        "B15003_022E": "Bachelors",
        "B15003_023E": "Masters",
        "B15003_024E": "Professional",
        "B15003_025E": "Doctorate",
        "B17012_003E": "Married",
        "B17012_009E": "Single Male",
        "B17012_014E": "Single Female",
        "B17001_002E": "Poverty",
        "B17001_017E": "poverty_female",
        "B17001_003E": "poverty_male",
        "B17012_002E": "poverty_family",
        "B17012_003E": "poverty_family_married",
        "NAME": "Name",
        "zip code tabulation area": "Zipcode"
    }
    )   
    return census_data
        

In [54]:
years = [2018,2019,2020,2021,2022]
data_list = []
for year in years:
    data_list.append(call_census(year))

In [55]:
for i in range(5):
    data_list[i]['Year'] = years[i]
    print(data_list[i].head())

          Name  Household Income  Population  Median Age  Per Capita Income  \
0  ZCTA5 00601           13092.0     17242.0        40.5             6999.0   
1  ZCTA5 00602           16358.0     38442.0        42.3             9277.0   
2  ZCTA5 00603           16603.0     48814.0        41.1            11307.0   
3  ZCTA5 00606           12832.0      6437.0        43.3             5943.0   
4  ZCTA5 00610           19309.0     27073.0        42.1            10220.0   

   Poverty  No Education  High School Education    GED  Associates  ...  \
0  10772.0         492.0                 2868.0  160.0       888.0  ...   
1  19611.0         901.0                 5751.0  947.0      3387.0  ...   
2  24337.0         924.0                 8745.0  996.0      2370.0  ...   
3   4163.0         185.0                 1284.0  138.0       241.0  ...   
4  11724.0         643.0                 4945.0  624.0      2241.0  ...   

   Doctorate  poverty_family_married  Single Male  Single Female  \
0     

In [63]:
combined_df = pd.concat([data_list[0],data_list[1],data_list[2],data_list[3],data_list[4]])

In [62]:
combined_df = combined_df.dropna(axis=0, how='any')

In [65]:
combined_df['Poverty'] = combined_df['Poverty'].fillna(0)

In [66]:
# Add a Poverty Rate column (Poverty Count / Population)
combined_df["Poverty Rate"] = 100 * combined_df["Poverty"].astype(int) / combined_df["Population"].astype(int)

In [70]:
combined_df['Year'].value_counts()
combined_df.to_csv("combined_df.csv", encoding="utf-8", index=False)

In [68]:
# combined_df.head()

Unnamed: 0,Name,Household Income,Population,Median Age,Per Capita Income,Poverty,No Education,High School Education,GED,Associates,...,poverty_family_married,Single Male,Single Female,poverty_female,poverty_male,poverty_family,state,Zipcode,Year,Poverty Rate
0,ZCTA5 00601,13092.0,17242.0,40.5,6999.0,10772.0,492.0,2868.0,160.0,888.0,...,1223.0,215.0,988.0,5905.0,4867.0,2426.0,72,601,2018,62.475351
1,ZCTA5 00602,16358.0,38442.0,42.3,9277.0,19611.0,901.0,5751.0,947.0,3387.0,...,2360.0,502.0,1665.0,10077.0,9534.0,4527.0,72,602,2018,51.014515
2,ZCTA5 00603,16603.0,48814.0,41.1,11307.0,24337.0,924.0,8745.0,996.0,2370.0,...,2393.0,697.0,3349.0,13467.0,10870.0,6439.0,72,603,2018,49.856599
3,ZCTA5 00606,12832.0,6437.0,43.3,5943.0,4163.0,185.0,1284.0,138.0,241.0,...,421.0,109.0,251.0,2110.0,2053.0,781.0,72,606,2018,64.672984
4,ZCTA5 00610,19309.0,27073.0,42.1,10220.0,11724.0,643.0,4945.0,624.0,2241.0,...,1307.0,278.0,1029.0,6074.0,5650.0,2614.0,72,610,2018,43.305138
