In [121]:
# import dependencies
import pandas as pd
import numpy as np
import requests
import json

from api_keys import census_apikey

In [122]:
# Build endpoint url
base_url = "https://api.census.gov/data/timeseries/poverty/saipe"
get = "?get="

# Add variables
get_vars = "NAME,SAEPOVALL_PT,SAEPOVALL_MOE,SAEPOVRTALL_MOE,SAEPOVRTALL_PT"
county = "&for=county:*"
state = "&in=state:34"
time = "&time=from+1999+to+2016"
key = f"&key={census_apikey}"

In [123]:
# Create get statement to retrieve data
print("Starting census search")
response = requests.get(f"{base_url}{get}{get_vars}{county}{state}{time}{key}")

# Read response JSON with dumps
census_poverty = response.json()
print(json.dumps(census_poverty, indent = 4, sort_keys = True))


Starting census search
[
    [
        "NAME",
        "SAEPOVALL_PT",
        "SAEPOVALL_MOE",
        "SAEPOVRTALL_MOE",
        "SAEPOVRTALL_PT",
        "time",
        "state",
        "county"
    ],
    [
        "Atlantic County",
        "23797",
        "5404",
        "2.15",
        "9.6",
        "1999",
        "34",
        "001"
    ],
    [
        "Bergen County",
        "45644",
        "10345",
        "1.15",
        "5.2",
        "1999",
        "34",
        "003"
    ],
    [
        "Burlington County",
        "21995",
        "5043",
        "1.20",
        "5.3",
        "1999",
        "34",
        "005"
    ],
    [
        "Camden County",
        "53366",
        "12005",
        "2.40",
        "10.6",
        "1999",
        "34",
        "007"
    ],
    [
        "Cape May County",
        "9621",
        "2209",
        "2.15",
        "9.5",
        "1999",
        "34",
        "009"
    ],
    [
        "Cumberland County",
        "18481",
  

In [124]:
# Create DF with poverty data
census_poverty_df = pd.DataFrame(census_poverty)

# Shape dataframe
census_poverty_df.rename(columns = census_poverty_df.iloc[0], inplace = True)
census_poverty_df.drop(census_poverty_df.index[0], inplace = True)
census_poverty_df = census_poverty_df.rename(columns ={
    "NAME": "County Name",
    "SAEPOVALL_PT": "Poverty Count Estimate",
    "SAEPOVALL_MOE": "Poverty Count MOE",
    "SAEPOVRTALL_MOE": "Poverty Rate MOE",
    "SAEPOVRTALL_PT": "Poverty Rate",
    "time": "Year"
})
census_poverty_df.drop(["state", "county"], axis = 1, inplace = True)
census_poverty_df = census_poverty_df[["Year", "County Name", "Poverty Rate", "Poverty Rate MOE",
                                      "Poverty Count Estimate", "Poverty Count MOE"]]
census_poverty_df

Unnamed: 0,Year,County Name,Poverty Rate,Poverty Rate MOE,Poverty Count Estimate,Poverty Count MOE
1,1999,Atlantic County,9.6,2.15,23797,5404
2,1999,Bergen County,5.2,1.15,45644,10345
3,1999,Burlington County,5.3,1.20,21995,5043
4,1999,Camden County,10.6,2.40,53366,12005
5,1999,Cape May County,9.5,2.15,9621,2209
...,...,...,...,...,...,...
374,2016,Salem County,12.6,2.00,7868,1224
375,2016,Somerset County,5.2,0.90,17318,2858
376,2016,Sussex County,6.0,1.10,8460,1539
377,2016,Union County,10.6,1.20,58425,6643


In [125]:
# Replace "Cape May" value to include underscore, as to not change once splitting the column
census_poverty_df = census_poverty_df.replace({"County Name": {"Cape May County": "Cape_May County"}})

In [126]:
# Confirm successful replacement of value
census_poverty_df["County Name"].unique()

array(['Atlantic County', 'Bergen County', 'Burlington County',
       'Camden County', 'Cape_May County', 'Cumberland County',
       'Essex County', 'Gloucester County', 'Hudson County',
       'Hunterdon County', 'Mercer County', 'Middlesex County',
       'Monmouth County', 'Morris County', 'Ocean County',
       'Passaic County', 'Salem County', 'Somerset County',
       'Sussex County', 'Union County', 'Warren County'], dtype=object)

In [127]:
# Split county name and word county, to match CDC data
county_split = census_poverty_df["County Name"].str.split(" ", n = 1, expand = True)
county_split["County"] = county_split[0]
census_poverty_df.insert(loc = 2, column = "County", value = county_split[0])
census_poverty_df = census_poverty_df.drop("County Name", axis = 1)
census_poverty_df

Unnamed: 0,Year,County,Poverty Rate,Poverty Rate MOE,Poverty Count Estimate,Poverty Count MOE
1,1999,Atlantic,9.6,2.15,23797,5404
2,1999,Bergen,5.2,1.15,45644,10345
3,1999,Burlington,5.3,1.20,21995,5043
4,1999,Camden,10.6,2.40,53366,12005
5,1999,Cape_May,9.5,2.15,9621,2209
...,...,...,...,...,...,...
374,2016,Salem,12.6,2.00,7868,1224
375,2016,Somerset,5.2,0.90,17318,2858
376,2016,Sussex,6.0,1.10,8460,1539
377,2016,Union,10.6,1.20,58425,6643


In [128]:
# Sort dataframe and reset index
census_poverty_df = census_poverty_df.sort_values(by = ["County", "Year"]).reset_index()
census_poverty_df = census_poverty_df.drop("index", axis = 1)
census_poverty_df


Unnamed: 0,Year,County,Poverty Rate,Poverty Rate MOE,Poverty Count Estimate,Poverty Count MOE
0,1999,Atlantic,9.6,2.15,23797,5404
1,2000,Atlantic,9.5,2.00,23804,4991
2,2001,Atlantic,9.7,2.10,24621,5281
3,2002,Atlantic,10.1,2.20,26111,5785
4,2003,Atlantic,10.8,2.40,28422,6296
...,...,...,...,...,...,...
373,2012,Warren,6.9,1.40,7285,1441
374,2013,Warren,8.7,1.50,9194,1551
375,2014,Warren,8.3,1.50,8747,1594
376,2015,Warren,7.7,1.40,8040,1478


In [129]:
# Confirm Cape_May is not impacted by splitting of column, prior to export
census_poverty_df["County"].unique()

array(['Atlantic', 'Bergen', 'Burlington', 'Camden', 'Cape_May',
       'Cumberland', 'Essex', 'Gloucester', 'Hudson', 'Hunterdon',
       'Mercer', 'Middlesex', 'Monmouth', 'Morris', 'Ocean', 'Passaic',
       'Salem', 'Somerset', 'Sussex', 'Union', 'Warren'], dtype=object)

In [130]:
# Export Data to CSV so it can be integrated with Main Code
census_poverty_df.to_csv("output/final_census_poverty_data_1999-2019.csv")