In [50]:
# Dependencies
import requests
import pandas as pd
from census import Census

In [51]:
# Import U.S. Census API Key
from api_key import api_key

# Create an instance of the Census library
c = Census(
    api_key,
    year = 2020
)

## Retrieve data from the U.S. Census using the Census library

References:

* Review the following page to review the Python library documentation: <https://github.com/CommerceDataService/census-wrapper>

* Review the following page to learn more about the data labels: <https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b>

In [52]:
api_key

'9e43867a81ebd3ab527c43e714f159626edcd14e'

In [53]:
# Run Census Search to retrieve data on all zip codes (2020 ACS5 Census)
census_data = c.acs5.get(
    (
        "NAME",
        "B19013_001E",
        "B01003_001E",
        "B01002_001E",
        "B19301_001E",
        "B17001_002E"
    ),
    {'for': 'state:*'}
)

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column renaming
census_pd = census_pd.rename(
    columns = {
        "B01003_001E": "Population",
        "B01002_001E": "Median Age",
        "B19013_001E": "Household Income",
        "B19301_001E": "Per Capita Income",
        "B17001_002E": "Poverty Count",
        "NAME": "Name",
        "state": "State"
    }
)

# Add a Poverty Rate column (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * census_pd["Poverty Count"].astype(int) / census_pd["Population"].astype(int)

# Configure the final DataFrame
census_pd = census_pd[
    [
        "State",
        "Population",
        "Median Age",
        "Household Income",
        "Per Capita Income",
        "Poverty Count",
        "Poverty Rate"
    ]
]

# Display DataFrame length and sample data
print(f"Number of rows in the DataFrame: {len(census_pd)}")
census_pd

Number of rows in the DataFrame: 52


Unnamed: 0,State,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,42,12794885.0,40.9,63627.0,35518.0,1480430.0,11.570483
1,6,39346023.0,36.7,78672.0,38576.0,4853434.0,12.335259
2,54,1807426.0,42.7,48037.0,27346.0,300152.0,16.6066
3,49,3151239.0,31.1,74197.0,30986.0,283360.0,8.992019
4,36,19514849.0,39.0,71117.0,40898.0,2581048.0,13.226072
5,11,701974.0,34.1,90842.0,58659.0,103391.0,14.728608
6,2,736990.0,34.6,77790.0,37094.0,74369.0,10.09091
7,12,21216924.0,42.2,57703.0,32848.0,2772939.0,13.069468
8,45,5091517.0,39.7,54864.0,30727.0,726470.0,14.268243
9,38,760394.0,35.2,65315.0,36289.0,77491.0,10.190901


In [54]:
census_pd['State'].dtype

dtype('O')

In [56]:
# !pip install us



In [57]:
from us import states
state_list = {state.fips: state.name for state in states.STATES}

In [58]:
census_pd['State'] = census_pd['State'].map(state_list)

In [60]:
state_list

{'01': 'Alabama',
 '02': 'Alaska',
 '04': 'Arizona',
 '05': 'Arkansas',
 '06': 'California',
 '08': 'Colorado',
 '09': 'Connecticut',
 '10': 'Delaware',
 '12': 'Florida',
 '13': 'Georgia',
 '15': 'Hawaii',
 '16': 'Idaho',
 '17': 'Illinois',
 '18': 'Indiana',
 '19': 'Iowa',
 '20': 'Kansas',
 '21': 'Kentucky',
 '22': 'Louisiana',
 '23': 'Maine',
 '24': 'Maryland',
 '25': 'Massachusetts',
 '26': 'Michigan',
 '27': 'Minnesota',
 '28': 'Mississippi',
 '29': 'Missouri',
 '30': 'Montana',
 '31': 'Nebraska',
 '32': 'Nevada',
 '33': 'New Hampshire',
 '34': 'New Jersey',
 '35': 'New Mexico',
 '36': 'New York',
 '37': 'North Carolina',
 '38': 'North Dakota',
 '39': 'Ohio',
 '40': 'Oklahoma',
 '41': 'Oregon',
 '42': 'Pennsylvania',
 '44': 'Rhode Island',
 '45': 'South Carolina',
 '46': 'South Dakota',
 '47': 'Tennessee',
 '48': 'Texas',
 '49': 'Utah',
 '50': 'Vermont',
 '51': 'Virginia',
 '53': 'Washington',
 '54': 'West Virginia',
 '55': 'Wisconsin',
 '56': 'Wyoming'}

In [59]:
census_pd

Unnamed: 0,State,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,Pennsylvania,12794885.0,40.9,63627.0,35518.0,1480430.0,11.570483
1,California,39346023.0,36.7,78672.0,38576.0,4853434.0,12.335259
2,West Virginia,1807426.0,42.7,48037.0,27346.0,300152.0,16.6066
3,Utah,3151239.0,31.1,74197.0,30986.0,283360.0,8.992019
4,New York,19514849.0,39.0,71117.0,40898.0,2581048.0,13.226072
5,,701974.0,34.1,90842.0,58659.0,103391.0,14.728608
6,Alaska,736990.0,34.6,77790.0,37094.0,74369.0,10.09091
7,Florida,21216924.0,42.2,57703.0,32848.0,2772939.0,13.069468
8,South Carolina,5091517.0,39.7,54864.0,30727.0,726470.0,14.268243
9,North Dakota,760394.0,35.2,65315.0,36289.0,77491.0,10.190901


In [63]:
census_pd['Region'] = ['Northeast', 'West', 'Southeast', 'West', 'Northeast', 'Northeast', 'West', 'Southeast', 'Southeast', 'Midwest', 'Northeast', 'Southeast', 'Southeast', 'Northeast', 'West', 'West', 'Southwest', 'Southeast', 'Midwest', 'West', 'Northeast', 'West', 'Midwest', 'Northeast', 'Southwest', 'West', 'Midwest', 'Midwest', 'West', 'Northeast', 'Northeast', 'Southeast', 'Northeast', 'Southeast', 'Southeast', 'West', 'Midwest', 'Midwest', 'Midwest', 'Southeast', 'Midwest', 'Midwest', 'Northeast', 'Midwest', 'Midwest', 'Southwest', 'West', 'Northeast', 'Southeast', 'Southeast', 'Midwest', 'Southeast' ]
census_pd

Unnamed: 0,State,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Region
0,Pennsylvania,12794885.0,40.9,63627.0,35518.0,1480430.0,11.570483,Northeast
1,California,39346023.0,36.7,78672.0,38576.0,4853434.0,12.335259,West
2,West Virginia,1807426.0,42.7,48037.0,27346.0,300152.0,16.6066,Southeast
3,Utah,3151239.0,31.1,74197.0,30986.0,283360.0,8.992019,West
4,New York,19514849.0,39.0,71117.0,40898.0,2581048.0,13.226072,Northeast
5,,701974.0,34.1,90842.0,58659.0,103391.0,14.728608,Northeast
6,Alaska,736990.0,34.6,77790.0,37094.0,74369.0,10.09091,West
7,Florida,21216924.0,42.2,57703.0,32848.0,2772939.0,13.069468,Southeast
8,South Carolina,5091517.0,39.7,54864.0,30727.0,726470.0,14.268243,Southeast
9,North Dakota,760394.0,35.2,65315.0,36289.0,77491.0,10.190901,Midwest
