In [1]:
import numpy as np
import pandas as pd
import requests
from census import Census

# Census API Key

from config import api_key
c = Census(api_key, year=2019)

In [2]:
# Run Census Search to retrieve data on all zip codes (2018 ACS5 Census)
# See: https://github.com/datamade/census for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
# Code sourced from Oregon Data Bootcamp Class Activities 6.3.8
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E", "B19301_001E", "B17001_002E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Organize Columns
census_2019 = census_pd[["Zipcode", "Population", "Median Age", "Household Income", "Per Capita Income", 
                         "Poverty Count"]]

census_2019.head()

Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count
0,601,17113.0,41.9,14361.0,7493.0,10552.0
1,602,37751.0,42.9,16807.0,9694.0,18653.0
2,603,47081.0,42.1,16049.0,11259.0,23691.0
3,606,6392.0,44.3,12119.0,6093.0,4185.0
4,610,26686.0,42.7,19898.0,10572.0,12204.0


In [3]:
# Keep only PDX zipcodes
# Zipcodes chosen based on information from City of Portland (https://www.portlandoregon.gov/revenue/article/373203)
census_2019_pdx = census_2019.loc[census_2019['Zipcode'].isin(['97201', '97202', '97203', '97204', '97205', '97206',
                                                               '97209', '97210', '97211', '97212', '97213', '97214', 
                                                               '97215', '97216', '97217', '97218', '97219', '97220', 
                                                               '97221', '97222', '97223', '97225', '97227', '97229',
                                                               '97230', '97231', '97232', '97233', '97236', '97239', 
                                                               '97258', '97266'])]

print(census_2019_pdx)

      Zipcode  Population  Median Age  Household Income  Per Capita Income  \
31968   97201     17993.0        31.3           55485.0            52646.0   
31969   97202     43371.0        36.4           81772.0            45481.0   
31970   97203     34890.0        32.0           68860.0            29909.0   
31971   97204      1036.0        43.5           20593.0            14679.0   
31972   97205      7462.0        46.6           37420.0            50633.0   
31973   97206     52996.0        36.9           69672.0            33320.0   
31975   97209     18982.0        39.9           66435.0            65726.0   
31976   97210     11877.0        35.8           77845.0            76225.0   
31977   97211     35429.0        36.1           81802.0            40644.0   
31978   97212     26562.0        40.5          106705.0            59553.0   
31979   97213     33011.0        39.9           77804.0            43818.0   
31980   97214     26853.0        35.3           69946.0         

In [4]:
# Calculate Poverty Rate
census_2019_pdx["Poverty Rate"] = 100 * \
    census_2019_pdx["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Add the column to the dataframe
census_2019_pdx = census_2019_pdx[["Zipcode", "Population", "Median Age", "Household Income", "Per Capita Income", 
                         "Poverty Count", "Poverty Rate"]]

census_2019_pdx.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
31968,97201,17993.0,31.3,55485.0,52646.0,4299.0,23.892625
31969,97202,43371.0,36.4,81772.0,45481.0,4457.0,10.276452
31970,97203,34890.0,32.0,68860.0,29909.0,6488.0,18.595586
31971,97204,1036.0,43.5,20593.0,14679.0,266.0,25.675676
31972,97205,7462.0,46.6,37420.0,50633.0,1896.0,25.408738


In [5]:
# Add column for year
census_2019_pdx["Year"] = 2019

census_2019_pdx.head()

Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Year
31968,97201,17993.0,31.3,55485.0,52646.0,4299.0,23.892625,2019
31969,97202,43371.0,36.4,81772.0,45481.0,4457.0,10.276452,2019
31970,97203,34890.0,32.0,68860.0,29909.0,6488.0,18.595586,2019
31971,97204,1036.0,43.5,20593.0,14679.0,266.0,25.675676,2019
31972,97205,7462.0,46.6,37420.0,50633.0,1896.0,25.408738,2019


In [6]:
# Export file to json
census_2019_pdx.to_json("cleaned_census\census_2019_pdx.json", orient="records")