In [1]:
import requests
import json
import os
import pandas as pd
import numpy as np
from pprint import pprint
import censusdata as cd # This package allows for easier use of the US Census API

In [2]:
# Setup Base URL for US CENSUS DATA API
base_url = "https://api.census.gov/data/2021/acs/acs1/subject?get=group(S1101)&for=state:04"

#Call API
r = requests.get(base_url)

# Preview Response
print(r)

#Jsonify response
response = r.json()[0]

<Response [200]>


In [3]:
# Get Columns and specified Data
row_get = "_001E"
get_cols = [row for row in response if row_get == row[-len(row_get):]]

#Preview Columns and Rows From US CENSUS
pprint(get_cols)

['S1101_C01_001E',
 'S1101_C02_001E',
 'S1101_C03_001E',
 'S1101_C04_001E',
 'S1101_C05_001E']


In [4]:
# Call US CENSUS using the censusdata package (API extension)
city_households = cd.download('acs1/subject',2021,cd.censusgeo([('state', '*'),('place', '*')]),get_cols)

# Rename Columns
city_households.rename(columns=dict(zip(get_cols, ['Total Estimate', 'Married Households', 'Male Unmarried Household', 'Female Unmarried Household', 'Nonfamily Household'])), inplace=True)
# Reset Index
city_households.reset_index(inplace=True)

# Display Data Frame
display(city_households)

Unnamed: 0,index,Total Estimate,Married Households,Male Unmarried Household,Female Unmarried Household,Nonfamily Household
0,"O'Fallon city, Missouri: Summary level: 160, s...",34412,20673,1885,3159,8695
1,"St. Louis city, Missouri: Summary level: 160, ...",139736,31536,6151,21260,80789
2,"Passaic city, New Jersey: Summary level: 160, ...",20446,7551,822,7018,5055
3,"Nashua city, New Hampshire: Summary level: 160...",36986,16347,2057,4264,14318
4,"Rochester city, Minnesota: Summary level: 160,...",49984,23578,1498,4250,20658
...,...,...,...,...,...,...
629,"Ankeny city, Iowa: Summary level: 160, state:1...",27720,15605,600,2677,8838
630,"Waterloo city, Iowa: Summary level: 160, state...",29948,11794,1305,2919,13930
631,"West Des Moines city, Iowa: Summary level: 160...",36066,12613,1365,1789,20299
632,"Wichita city, Kansas: Summary level: 160, stat...",156668,66238,10239,19718,60473


In [5]:
# Clean Data Frame Cities and States
city_households['City/State'] = [city_households['index'][i].name for i,value in city_households.iterrows()]
city_households[['City','State']] = city_households['City/State'].str.split(' city, | town, ', regex=True, expand=True)
city_households.drop(columns=['index','City/State'],inplace=True)
city_households.dropna(subset='State',inplace=True) # Drop anything not a City or Town

# Reorder Columns for CSV and Readability
city_households = city_households[['City', 'State', 'Total Estimate','Married Households','Male Unmarried Household','Female Unmarried Household','Nonfamily Household']].sort_values(['State','City'],ascending=True)

# Display Data Frame
display(city_households)

Unnamed: 0,City,State,Total Estimate,Married Households,Male Unmarried Household,Female Unmarried Household,Nonfamily Household
539,Auburn,Alabama,29136,13886,1034,2484,11732
373,Birmingham,Alabama,87570,18910,4355,19476,44829
268,Dothan,Alabama,30301,11998,1208,5818,11277
574,Hoover,Alabama,36721,21668,1488,2059,11506
575,Huntsville,Alabama,96551,39641,4259,10487,42164
...,...,...,...,...,...,...,...
184,Milwaukee,Wisconsin,232362,58247,16688,50756,106671
238,Oshkosh,Wisconsin,28532,10450,1566,2232,14284
80,Racine,Wisconsin,30885,8965,1801,7624,12495
167,Waukesha,Wisconsin,31139,13132,1416,3694,12897


In [6]:
# Explore Data Set
city_households.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 572 entries, 539 to 233
Data columns (total 7 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   City                        572 non-null    object
 1   State                       572 non-null    object
 2   Total Estimate              572 non-null    int64 
 3   Married Households          572 non-null    int64 
 4   Male Unmarried Household    572 non-null    int64 
 5   Female Unmarried Household  572 non-null    int64 
 6   Nonfamily Household         572 non-null    int64 
dtypes: int64(5), object(2)
memory usage: 35.8+ KB


In [7]:
# Push to CSV File
city_households.to_csv(os.path.join('..','Outputs','US_Census_Households.csv'),index=False)