In [1]:
#Dependecies
from config import api_key
import requests
import json
import pandas as pd
from census import Census
from us import states

In [2]:
c = Census(api_key)

In [3]:
# Years range from 2009 to 2020
years = range(2009, 2021)

# Initialize an empty list to store the data
data = []

def get_census_data(fields, year):
    response = c.acs5.state(fields, state.fips, year=year)

all_fields = ('NAME', 'B01003_001E', "B17001_002E", "B01002_001E", "B23025_005E","B19013_001E", "B19301_001E")
selected_fields = ('NAME', 'B01003_001E', "B17001_002E", "B01002_001E","B19013_001E", "B19301_001E")
# Iterate over each year and state

for year in years:
    for state in states.STATES:
        # Construct the API call for a particular year and state
        try:
            response = c.acs5.state(all_fields, state.fips, year=year)
        except Exception:
            response = c.acs5.state(selected_fields, state.fips, year=year)
        # Process the response and append the data to the list
        state_name = response[0]['NAME']
        total_population = response[0]['B01003_001E']
        poverty = response[0]['B17001_002E']
        age = response[0]["B01002_001E"]
        employment_unemployed = response[0].get("B23025_005E", 0)
        income = response[0]["B19013_001E"]
        income_per_capita = response[0]["B19301_001E"]
        data.append({'Year': year, 'State': state_name, 'Total Population': total_population, 
                     'Poverty': poverty, "Median Age": age, 
                     "Unemployed Civilians": employment_unemployed, 
                     "Median Household Income": income, "Per Capita Income": income_per_capita })

# Create a dataframe from the collected data
df = pd.DataFrame(data)

# Display the dataframe
df.head(10)

Unnamed: 0,Year,State,Total Population,Poverty,Median Age,Unemployed Civilians,Median Household Income,Per Capita Income
0,2009,Alabama,4633360.0,757833.0,37.2,0.0,41216.0,22732.0
1,2009,Alaska,683142.0,64038.0,32.7,0.0,64635.0,29382.0
2,2009,Arizona,6324865.0,914040.0,34.8,0.0,50296.0,25203.0
3,2009,Arkansas,2838143.0,488788.0,36.9,0.0,38542.0,20977.0
4,2009,California,36308527.0,4694423.0,34.6,0.0,60392.0,29020.0
5,2009,Colorado,4843211.0,563574.0,35.5,0.0,56222.0,29679.0
6,2009,Connecticut,3494487.0,295608.0,39.0,0.0,67721.0,36468.0
7,2009,Delaware,863832.0,88505.0,37.8,0.0,57618.0,28935.0
8,2009,Florida,18222420.0,2346946.0,39.7,0.0,47450.0,26503.0
9,2009,Georgia,9497667.0,1384518.0,34.5,0.0,49466.0,25098.0


In [4]:
# adding rate of poverty and rate of unemployment
df["% in Poverty"] = round(df["Poverty"]/df["Total Population"]*100, 2)
df["% in Unemployed"] = round(df["Unemployed Civilians"]/df["Total Population"]*100, 2)
df.head()

Unnamed: 0,Year,State,Total Population,Poverty,Median Age,Unemployed Civilians,Median Household Income,Per Capita Income,% in Poverty,% in Unemployed
0,2009,Alabama,4633360.0,757833.0,37.2,0.0,41216.0,22732.0,16.36,0.0
1,2009,Alaska,683142.0,64038.0,32.7,0.0,64635.0,29382.0,9.37,0.0
2,2009,Arizona,6324865.0,914040.0,34.8,0.0,50296.0,25203.0,14.45,0.0
3,2009,Arkansas,2838143.0,488788.0,36.9,0.0,38542.0,20977.0,17.22,0.0
4,2009,California,36308527.0,4694423.0,34.6,0.0,60392.0,29020.0,12.93,0.0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Year                     600 non-null    int64  
 1   State                    600 non-null    object 
 2   Total Population         600 non-null    float64
 3   Poverty                  600 non-null    float64
 4   Median Age               600 non-null    float64
 5   Unemployed Civilians     600 non-null    float64
 6   Median Household Income  600 non-null    float64
 7   Per Capita Income        600 non-null    float64
 8   % in Poverty             600 non-null    float64
 9   % in Unemployed          600 non-null    float64
dtypes: float64(8), int64(1), object(1)
memory usage: 47.0+ KB


In [19]:
df = df.astype({"Total Population": int,
           "Poverty": int,
           "Unemployed Civilians": int,
           "Median Household Income": int,
           "Per Capita Income": int
          }, errors='raise')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 450 entries, 0 to 449
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Year                     450 non-null    int64  
 1   State                    450 non-null    object 
 2   Total Population         450 non-null    int32  
 3   Poverty                  450 non-null    int32  
 4   Median Age               450 non-null    float64
 5   Unemployed Civilians     450 non-null    int32  
 6   Median Household Income  450 non-null    int32  
 7   Per Capita Income        450 non-null    int32  
 8   % in Poverty             450 non-null    float64
 9   % in Unemployed          450 non-null    float64
dtypes: float64(3), int32(5), int64(1), object(1)
memory usage: 26.5+ KB


In [6]:
df.head()

Unnamed: 0,Year,State,Total Population,Poverty,Median Age,Unemployed Civilians,Median Household Income,Per Capita Income,% in Poverty,% in Unemployed
0,2009,Alabama,4633360.0,757833.0,37.2,0.0,41216.0,22732.0,16.36,0.0
1,2009,Alaska,683142.0,64038.0,32.7,0.0,64635.0,29382.0,9.37,0.0
2,2009,Arizona,6324865.0,914040.0,34.8,0.0,50296.0,25203.0,14.45,0.0
3,2009,Arkansas,2838143.0,488788.0,36.9,0.0,38542.0,20977.0,17.22,0.0
4,2009,California,36308527.0,4694423.0,34.6,0.0,60392.0,29020.0,12.93,0.0


In [7]:
# Save the DataFrame as a CSV
# Note: To avoid any issues later, use encoding="utf-8"
df.to_csv("../Resources/us_demogrpahics_2009to2020_cleaned.csv", encoding="utf-8", index=False)