In [1]:
import numpy as np
import pandas as pd
import requests
from census import Census

# Census API Key

from config import api_key
c = Census(api_key, year=2018)

In [5]:
# Run Census Search to retrieve data on all zip codes (2018 ACS5 Census)
# See: https://github.com/datamade/census for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
# Code sourced from Oregon Data Bootcamp Class Activities 6.3.8
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E", "B19301_001E", "B17001_002E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Organize Columns
census_2018 = census_pd[["Zipcode", "Population", "Median Age", "Household Income", "Per Capita Income", 
                         "Poverty Count"]]

census_2018.head()

Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count
0,601,17242.0,40.5,13092.0,6999.0,10772.0
1,602,38442.0,42.3,16358.0,9277.0,19611.0
2,603,48814.0,41.1,16603.0,11307.0,24337.0
3,606,6437.0,43.3,12832.0,5943.0,4163.0
4,610,27073.0,42.1,19309.0,10220.0,11724.0


In [13]:
# Keep only PDX zipcodes
# Zipcodes chosen based on information from City of Portland (https://www.portlandoregon.gov/revenue/article/373203)
census_2018_pdx = census_2018.loc[census_2018['Zipcode'].isin(['97201', '97202', '97203', '97204', '97205', '97206',
                                                               '97209', '97210', '97211', '97212', '97213', '97214', 
                                                               '97215', '97216', '97217', '97218', '97219', '97220', 
                                                               '97221', '97222', '97223', '97225', '97227', '97229',
                                                               '97230', '97231', '97232', '97233', '97236', '97239', 
                                                               '97258', '97266'])]

print(census_2018_pdx)

      Zipcode  Population  Median Age  Household Income  Per Capita Income  \
31933   97201     18145.0        31.1           51625.0            49825.0   
31934   97202     42822.0        36.5           73924.0            44482.0   
31935   97203     34365.0        32.1           60908.0            28020.0   
31936   97204      1100.0        43.8           11763.0            17851.0   
31937   97205      7284.0        44.9           33302.0            44912.0   
31938   97206     51698.0        37.4           63381.0            31341.0   
31940   97209     17585.0        39.2           61070.0            60527.0   
31941   97210     12059.0        36.4           79696.0            75531.0   
31942   97211     34962.0        36.0           78154.0            38143.0   
31943   97212     26699.0        40.0          101492.0            57032.0   
31944   97213     32845.0        39.7           71574.0            40283.0   
31945   97214     26207.0        35.2           65183.0         

In [14]:
# Calculate Poverty Rate
census_2018_pdx["Poverty Rate"] = 100 * \
    census_2018_pdx["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Add the column to the dataframe
census_2018_pdx = census_2018_pdx[["Zipcode", "Population", "Median Age", "Household Income", "Per Capita Income", 
                         "Poverty Count", "Poverty Rate"]]

census_2018_pdx.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
31933,97201,18145.0,31.1,51625.0,49825.0,4560.0,25.13089
31934,97202,42822.0,36.5,73924.0,44482.0,4602.0,10.746812
31935,97203,34365.0,32.1,60908.0,28020.0,6948.0,20.218245
31936,97204,1100.0,43.8,11763.0,17851.0,378.0,34.363636
31937,97205,7284.0,44.9,33302.0,44912.0,1980.0,27.182867


In [16]:
# Export file to json
census_2018_pdx.to_json("cleaned_census\census_2018_pdx.json", orient="records")