In [1]:
import numpy as np
import pandas as pd
import requests
from census import Census

# Census API Key

from config import api_key
c = Census(api_key, year=2017)

In [2]:
# Run Census Search to retrieve data on all zip codes (2018 ACS5 Census)
# See: https://github.com/datamade/census for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
# Code sourced from Oregon Data Bootcamp Class Activities 6.3.8
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E", "B19301_001E", "B17001_002E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Organize Columns
census_2017 = census_pd[["Zipcode", "Population", "Median Age", "Household Income", "Per Capita Income", 
                         "Poverty Count"]]

census_2017.head()

Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count
0,601,17599.0,38.9,11757.0,7041.0,11282.0
1,602,39209.0,40.9,16190.0,8978.0,20428.0
2,603,50135.0,40.4,16645.0,10897.0,25176.0
3,606,6304.0,42.8,13387.0,5960.0,4092.0
4,610,27590.0,41.4,18741.0,9266.0,12553.0


In [3]:
# Keep only PDX zipcodes
# Zipcodes chosen based on information from City of Portland (https://www.portlandoregon.gov/revenue/article/373203)
census_2017_pdx = census_2017.loc[census_2017['Zipcode'].isin(['97201', '97202', '97203', '97204', '97205', '97206',
                                                               '97209', '97210', '97211', '97212', '97213', '97214', 
                                                               '97215', '97216', '97217', '97218', '97219', '97220', 
                                                               '97221', '97222', '97223', '97225', '97227', '97229',
                                                               '97230', '97231', '97232', '97233', '97236', '97239', 
                                                               '97258', '97266'])]

print(census_2017_pdx)

      Zipcode  Population  Median Age  Household Income  Per Capita Income  \
31968   97201     17566.0        31.6           50420.0            48517.0   
31969   97202     42189.0        35.8           66017.0            40809.0   
31970   97203     34089.0        32.4           55174.0            25359.0   
31971   97204      1053.0        43.7      -666666666.0            17887.0   
31972   97205      7122.0        42.9           32276.0            42623.0   
31973   97206     50655.0        37.4           59622.0            29550.0   
31975   97209     16507.0        38.9           54747.0            57442.0   
31976   97210     11676.0        36.1           88947.0            75997.0   
31977   97211     34856.0        35.8           73312.0            36065.0   
31978   97212     26601.0        39.5           94192.0            52153.0   
31979   97213     32284.0        39.6           67882.0            38435.0   
31980   97214     25398.0        34.8           60754.0         

In [4]:
# Calculate Poverty Rate
census_2017_pdx["Poverty Rate"] = 100 * \
    census_2017_pdx["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Add the column to the dataframe
census_2017_pdx = census_2017_pdx[["Zipcode", "Population", "Median Age", "Household Income", "Per Capita Income", 
                         "Poverty Count", "Poverty Rate"]]

census_2017_pdx.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
31968,97201,17566.0,31.6,50420.0,48517.0,4018.0,22.873733
31969,97202,42189.0,35.8,66017.0,40809.0,5635.0,13.356562
31970,97203,34089.0,32.4,55174.0,25359.0,7877.0,23.107161
31971,97204,1053.0,43.7,-666666666.0,17887.0,345.0,32.763533
31972,97205,7122.0,42.9,32276.0,42623.0,2064.0,28.980623


In [5]:
# Add column for year
census_2017_pdx["Year"] = 2017

census_2017_pdx.head()

Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Year
31968,97201,17566.0,31.6,50420.0,48517.0,4018.0,22.873733,2017
31969,97202,42189.0,35.8,66017.0,40809.0,5635.0,13.356562,2017
31970,97203,34089.0,32.4,55174.0,25359.0,7877.0,23.107161,2017
31971,97204,1053.0,43.7,-666666666.0,17887.0,345.0,32.763533,2017
31972,97205,7122.0,42.9,32276.0,42623.0,2064.0,28.980623,2017


In [6]:
# Export file to json
census_2017_pdx.to_json("cleaned_census\census_2017_pdx.json", orient="records")