In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
from config import api_key

## base_url = https://api.census.gov/data/2019/acs/acs5/subject?get=NAME,S0101_C01_001E&for=region:*&key=api_key

# api_key = "0e395bcf9cca78932f9c8e80c0a8a55fe1cba723"
# Census API Key

c = Census(api_key, year=2019)

## "0e395bcf9cca78932f9c8e80c0a8a55fe1cba723"

# STATE CODES:
# 04|AZ|Arizona|01779777
# 06|CA|California|01779778
# 08|CO|Colorado|01779779
# 16|ID|Idaho|01779783
# 30|MT|Montana|00767982
# 32|NV|Nevada|01779793
# 35|NM|New Mexico|00897535
# 41|OR|Oregon|01155107
# 49|UT|Utah|01455989
# 53|WA|Washington|01779804
# 56|WY|Wyoming|01779807


In [2]:
census_data = c.acs5.get(("NAME", "B01003_001E", "B19013_001E", "B01003_001E", "B01002_001E", "B11001_001E",
                          "B19301_001E", "B08303_001E", "B08015_001E",
                          "B17001_002E"), {'for': 'zip code tabulation area:*'})

# # Convert to DataFrame
census_pd = pd.DataFrame(census_data)
census_pd.head()


Unnamed: 0,NAME,B01003_001E,B19013_001E,B01002_001E,B11001_001E,B19301_001E,B08303_001E,B08015_001E,B17001_002E,state,zip code tabulation area
0,ZCTA5 00601,17113.0,14361.0,41.9,5509.0,7493.0,3504.0,3115.0,10552.0,72,601
1,ZCTA5 00602,37751.0,16807.0,42.9,12740.0,9694.0,10525.0,8645.0,18653.0,72,602
2,ZCTA5 00603,47081.0,16049.0,42.1,19228.0,11259.0,11479.0,10460.0,23691.0,72,603
3,ZCTA5 00606,6392.0,12119.0,44.3,1946.0,6093.0,1228.0,1085.0,4185.0,72,606
4,ZCTA5 00610,26686.0,19898.0,42.7,8795.0,10572.0,8065.0,6800.0,12204.0,72,610


In [3]:
# Column Reordering
census_pd = census_pd.rename(columns={"NAME": "State ID",
                                      "State": "State ID",
                                      "B01003_001E": "Population",
                                      "B11001_001E": "Number of Households",
                                      "B19013_001E": "Household Income",
#                                       "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B08303_001E": "Total Commute Time",
                                      "B08015_001E": "Total Vehicles",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})



census_pd.head()


Unnamed: 0,Name,Population,Household Income,B01002_001E,Number of Households,B19301_001E,Total Commute Time,Total Vehicles,Poverty Count,state,Zipcode
0,ZCTA5 00601,17113.0,14361.0,41.9,5509.0,7493.0,3504.0,3115.0,10552.0,72,601
1,ZCTA5 00602,37751.0,16807.0,42.9,12740.0,9694.0,10525.0,8645.0,18653.0,72,602
2,ZCTA5 00603,47081.0,16049.0,42.1,19228.0,11259.0,11479.0,10460.0,23691.0,72,603
3,ZCTA5 00606,6392.0,12119.0,44.3,1946.0,6093.0,1228.0,1085.0,4185.0,72,606
4,ZCTA5 00610,26686.0,19898.0,42.7,8795.0,10572.0,8065.0,6800.0,12204.0,72,610


In [4]:
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

census_pd["Average Vehicles per Household"] = census_pd["Total Vehicles"] / census_pd["Number of Households"]

census_pd["Average Commute Time"] = census_pd["Total Commute Time"] / census_pd["Number of Households"]

census_pd = census_pd[["State ID", "Zipcode", "Population", "Number of Households", 
                       "Household Income", "Poverty Count", "Poverty Rate", 
                       "Total Commute Time", "Average Commute Time",
                       "Total Vehicles", "Average Vehicles per Household"]]

# Visualize
print(len(census_pd))
census_pd.head()
    

KeyError: "['State ID'] not in index"

In [None]:
census_pd.to_csv("census_data.csv", encoding="utf-8", index=False)

In [None]:
states = ["04", "06", "08", "16", "30", "32", "35", "41", "49", "53", "56"]

state_name = {"04": "AZ",
             "06": "CA",
             "08": "CO",
             "16": "ID",
             "30": "MT",
             "32": "NV",
             "35": "NM",
             "41": "OR",
             "49": "UT",
             "53": "WA",
             "56": "WY"}

state_census_df = census_pd.loc[census_pd["State ID"].isin (states)]
state_census_df

state_census_df["State"] = [state_name[state] for state in state_census_df["State ID"]]
state_census_df



In [None]:
state_census_df.to_csv("state_census_df.csv", encoding="utf-8", index=False)