In [3]:
#Dependecies
from config import api_key
import requests
import json
import pandas as pd
from census import Census
from us import states

In [4]:
c = Census(api_key)

In [10]:
# Years range from 2011 to 2019
years = range(2011, 2020)

# Initialize an empty list to store the data
data = []

# Iterate over each year and state
for year in years:
    for state in states.STATES:
        # Construct the API call for a particular year and state
        response = c.acs5.state(('NAME', 'B01003_001E', "B17001_002E", "B01002_001E", 
                                 "B23025_005E","B19013_001E", "B19301_001E" ), state.fips, year=year)
        
        # Process the response and append the data to the list
        state_name = response[0]['NAME']
        total_population = response[0]['B01003_001E']
        poverty = response[0]['B17001_002E']
        age = response[0]["B01002_001E"]
        employment_unemployed = response[0]["B23025_005E"]
        income = response[0]["B19013_001E"]
        income_per_capita = response[0]["B19301_001E"]
        data.append({'Year': year, 'State': state_name, 
                     'Total Population': total_population, 
                     'Poverty': poverty, 
                     "Median Age": age, 
                     "Unemployed Civilians": employment_unemployed, 
                     "Median Household Income": income, 
                     "Per Capita Income": income_per_capita })

# Create a dataframe from the collected data
df = pd.DataFrame(data)

# Display the dataframe
df.head(10)


Unnamed: 0,Year,State,Total Population,Poverty,Median Age,Unemployed Civilians,Median Household Income,Per Capita Income
0,2011,Alabama,4747424.0,813385.0,37.7,215543.0,42934.0,23483.0
1,2011,Alaska,700703.0,65111.0,33.8,30932.0,69014.0,31944.0
2,2011,Arizona,6337373.0,1003575.0,35.7,268363.0,50752.0,25784.0
3,2011,Arkansas,2895928.0,516822.0,37.3,114100.0,40149.0,21833.0
4,2011,California,36969200.0,5211481.0,35.1,1868871.0,61632.0,29634.0
5,2011,Colorado,4966061.0,607727.0,35.9,203279.0,57685.0,30816.0
6,2011,Connecticut,3558172.0,326771.0,39.8,163634.0,69243.0,37627.0
7,2011,Delaware,890856.0,96805.0,38.6,35231.0,59317.0,29659.0
8,2011,Florida,18688787.0,2679400.0,40.5,948078.0,47827.0,26733.0
9,2011,Georgia,9600612.0,1541462.0,35.2,469316.0,49736.0,25383.0


In [14]:
# adding rate of poverty and rate of unemployment
df["% in Poverty"] = round(df["Poverty"]/df["Total Population"]*100, 2)
df["% in Unemployed"] = round(df["Unemployed Civilians"]/df["Total Population"]*100, 2)
df.head()

Unnamed: 0,Year,State,Total Population,Poverty,Median Age,Unemployed Civilians,Median Household Income,Per Capita Income,% in Poverty,% in Unemployed
0,2011,Alabama,4747424.0,813385.0,37.7,215543.0,42934.0,23483.0,17.13,4.54
1,2011,Alaska,700703.0,65111.0,33.8,30932.0,69014.0,31944.0,9.29,4.41
2,2011,Arizona,6337373.0,1003575.0,35.7,268363.0,50752.0,25784.0,15.84,4.23
3,2011,Arkansas,2895928.0,516822.0,37.3,114100.0,40149.0,21833.0,17.85,3.94
4,2011,California,36969200.0,5211481.0,35.1,1868871.0,61632.0,29634.0,14.1,5.06


In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 450 entries, 0 to 449
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Year                     450 non-null    int64  
 1   State                    450 non-null    object 
 2   Total Population         450 non-null    float64
 3   Poverty                  450 non-null    float64
 4   Median Age               450 non-null    float64
 5   Unemployed Civilians     450 non-null    float64
 6   Median Household Income  450 non-null    float64
 7   Per Capita Income        450 non-null    float64
 8   % in Poverty             450 non-null    float64
 9   % in Unemployed          450 non-null    float64
dtypes: float64(8), int64(1), object(1)
memory usage: 35.3+ KB


In [19]:
df = df.astype({"Total Population": int,
           "Poverty": int,
           "Unemployed Civilians": int,
           "Median Household Income": int,
           "Per Capita Income": int
          }, errors='raise')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 450 entries, 0 to 449
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Year                     450 non-null    int64  
 1   State                    450 non-null    object 
 2   Total Population         450 non-null    int32  
 3   Poverty                  450 non-null    int32  
 4   Median Age               450 non-null    float64
 5   Unemployed Civilians     450 non-null    int32  
 6   Median Household Income  450 non-null    int32  
 7   Per Capita Income        450 non-null    int32  
 8   % in Poverty             450 non-null    float64
 9   % in Unemployed          450 non-null    float64
dtypes: float64(3), int32(5), int64(1), object(1)
memory usage: 26.5+ KB


In [20]:
df.head()

Unnamed: 0,Year,State,Total Population,Poverty,Median Age,Unemployed Civilians,Median Household Income,Per Capita Income,% in Poverty,% in Unemployed
0,2011,Alabama,4747424,813385,37.7,215543,42934,23483,17.13,4.54
1,2011,Alaska,700703,65111,33.8,30932,69014,31944,9.29,4.41
2,2011,Arizona,6337373,1003575,35.7,268363,50752,25784,15.84,4.23
3,2011,Arkansas,2895928,516822,37.3,114100,40149,21833,17.85,3.94
4,2011,California,36969200,5211481,35.1,1868871,61632,29634,14.1,5.06


In [23]:
# Save the DataFrame as a CSV
# Note: To avoid any issues later, use encoding="utf-8"
df.to_csv("../Resources/us_demogrpahics_2011to2019_cleaned.csv", encoding="utf-8", index=False)