In [3]:
# import dependencies
from api_keys import census_key
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from census import Census
import hvplot as hv


In [9]:
# use the census API to look at the population for each state over the last 10 years to compare the population growth of each state
base_url_2013 = "https://api.census.gov/data/2013/pep/natstprc?get=DATE_,STNAME,DOM"
key = "&key=" + census_key
all_states = "&for=state:*"
response = requests.get(base_url_2013 + all_states + key).json()
# Put the data into a dataframe
pop_2013 = pd.DataFrame(response)
# Rename the columns and set the index
pop_2013_df = pop_2013.rename(columns=pop_2013.iloc[0]).drop(pop_2013.index[0])
pop_2013_df = pop_2013_df.set_index("DATE_")
pop_2013_df


Unnamed: 0_level_0,STNAME,DOM,state
DATE_,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Alabama,,01
2,Alabama,,01
3,Alabama,542,01
4,Alabama,-154,01
5,Alabama,-759,01
...,...,...,...
2,Puerto Rico Commonwealth,0,72
3,Puerto Rico Commonwealth,0,72
4,Puerto Rico Commonwealth,0,72
5,Puerto Rico Commonwealth,0,72


In [11]:
# find the five states with the highest population growth in 2013
# convert the DOM column to a float
pop_2013_df["DOM"] = pop_2013_df["DOM"].astype(float)
growth_2013 = pop_2013_df.sort_values(by="DOM", ascending=False)
growth_2013.head(25)

Unnamed: 0_level_0,STNAME,DOM,state
DATE_,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5,Texas,142650.0,48
4,Texas,117521.0,48
6,Texas,113528.0,48
4,Florida,105305.0,12
5,Florida,100000.0,12
6,Florida,91484.0,12
6,North Carolina,37240.0,37
6,Colorado,36284.0,8
5,Arizona,34729.0,4
5,North Carolina,32791.0,37


In [15]:
# Get the population increase for each state in 2014
base_url_2014 = "https://api.census.gov/data/2014/pep/natstprc?get=DATE_,STNAME,DOM"
key = "&key=" + census_key
all_states = "&for=state:*"
response = requests.get(base_url_2014 + all_states + key).json()
# Put the data into a dataframe
pop_2014 = pd.DataFrame(response)
# Rename the columns and set the index
pop_2014_df = pop_2014.rename(columns=pop_2014.iloc[0]).drop(pop_2014.index[0])
pop_2014_df = pop_2014_df.set_index("DATE_")
pop_2014_df

Unnamed: 0_level_0,STNAME,DOM,state
DATE_,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Alabama,,01
2,Alabama,,01
3,Alabama,489,01
4,Alabama,-98,01
5,Alabama,-810,01
...,...,...,...
3,Puerto Rico Commonwealth,0,72
4,Puerto Rico Commonwealth,0,72
5,Puerto Rico Commonwealth,0,72
6,Puerto Rico Commonwealth,0,72


In [16]:
# find the five states with the highest population growth in 2014
# convert the DOM column to a float
pop_2014_df["DOM"] = pop_2014_df["DOM"].astype(float)
growth_2014 = pop_2014_df.sort_values(by="DOM", ascending=False)
growth_2014.head(25)

Unnamed: 0_level_0,STNAME,DOM,state
DATE_,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7,Texas,154467.0,48
5,Texas,145707.0,48
7,Florida,138546.0,12
4,Texas,116516.0,48
6,Texas,116464.0,48
4,Florida,105157.0,12
5,Florida,98568.0,12
6,Florida,95807.0,12
7,Arizona,41975.0,4
7,Colorado,40318.0,8


In [46]:
# Get the population for each state in 2015
base_url_2015 = "https://api.census.gov/data/2015/pep/population?get=GEONAME,POP"
key = "&key=" + census_key
all_states = "&for=state:*"
response = requests.get(base_url_2015 + all_states + key).json()
# Put the data into a dataframe
pop_2015 = pd.DataFrame(response)
# Rename the columns and set the index
pop_2015_df = pop_2015.rename(columns=pop_2015.iloc[0]).drop(pop_2015.index[0])

# edit GEONAME rows to include only the state name
pop_2015_df["GEONAME"] = pop_2015_df["GEONAME"].str.split(",").str[0]
# change the order to alphabetical by state name
pop_2015_df = pop_2015_df.sort_values(by="GEONAME")
pop_2015_df = pop_2015_df.set_index("GEONAME")
# rename POP column to POP_2015
pop_2015_df = pop_2015_df.rename(columns={"POP":"POP_2015"})
# convert POP_2015 column to a float
pop_2015_df["POP_2015"] = pop_2015_df["POP_2015"].astype(float)
pop_2015_df

Unnamed: 0_level_0,POP_2015,state
GEONAME,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,4858979.0,1
Alaska,738432.0,2
Arizona,6828065.0,4
Arkansas,2978204.0,5
California,39144818.0,6
Colorado,5456574.0,8
Connecticut,3590886.0,9
Delaware,945934.0,10
District of Columbia,672228.0,11
Florida,20271272.0,12


In [None]:
# Get the population for each state in 2016
base_url_2016 = "https://api.census.gov/data/2016/pep/population?get=GEONAME,POP"
key = "&key=" + census_key
all_states = "&for=state:*"
response = requests.get(base_url_2016 + all_states + key).json()
# Put the data into a dataframe
pop_2016 = pd.DataFrame(response)
# Rename the columns and set the index
pop_2016_df = pop_2016.rename(columns=pop_2016.iloc[0]).drop(pop_2016.index[0])
# change the order to alphabetical by state name
pop_2016_df = pop_2016_df.sort_values(by="GEONAME")
pop_2016_df = pop_2016_df.set_index("GEONAME")
# rename the POP column to POP_2016
pop_2016_df = pop_2016_df.rename(columns={"POP":"POP_2016"})
# convert POP_2016 column to a float
pop_2016_df["POP_2016"] = pop_2016_df["POP_2016"].astype(float)
pop_2016_df

In [None]:
# Get the population for each state in 2017
base_url_2017 = "https://api.census.gov/data/2017/pep/population?get=GEONAME,POP"
key = "&key=" + census_key
all_states = "&for=state:*"
response = requests.get(base_url_2017 + all_states + key).json()
# Put the data into a dataframe
pop_2017 = pd.DataFrame(response)
# Rename the columns and set the index
pop_2017_df = pop_2017.rename(columns=pop_2017.iloc[0]).drop(pop_2017.index[0])
# change the order to alphabetical by state name
pop_2017_df = pop_2017_df.sort_values(by="GEONAME")
pop_2017_df = pop_2017_df.set_index("GEONAME")
# rename the POP column to POP_2017
pop_2017_df = pop_2017_df.rename(columns={"POP":"POP_2017"})
# convert POP_2017 column to a float
pop_2017_df["POP_2017"] = pop_2017_df["POP_2017"].astype(float)
pop_2017_df

In [None]:
# Get the population for each state in 2018
base_url_2018 = "https://api.census.gov/data/2018/pep/population?get=GEONAME,POP"
key = "&key=" + census_key
all_states = "&for=state:*"
response = requests.get(base_url_2018 + all_states + key).json()
# Put the data into a dataframe
pop_2018 = pd.DataFrame(response)
# Rename the columns and set the index
pop_2018_df = pop_2018.rename(columns=pop_2018.iloc[0]).drop(pop_2018.index[0])
# change the order to alphabetical by state name
pop_2018_df = pop_2018_df.sort_values(by="GEONAME")
pop_2018_df = pop_2018_df.set_index("GEONAME")
# rename the POP column to POP_2018
pop_2018_df = pop_2018_df.rename(columns={"POP": "POP_2018"})
# convert POP_2018 column to a float
pop_2018_df["POP_2018"] = pop_2018_df["POP_2018"].astype(float)
pop_2018_df

In [None]:
# Get the population for each state in 2019
base_url_2019 = "https://api.census.gov/data/2019/pep/population?get=NAME,POP"
key = "&key=" + census_key
all_states = "&for=state:*"
response = requests.get(base_url_2019 + all_states + key).json()
# Put the data into a dataframe
pop_2019 = pd.DataFrame(response)
# Rename the columns and set the index
pop_2019_df = pop_2019.rename(columns=pop_2019.iloc[0]).drop(pop_2019.index[0])
# change the order to alphabetical by state name
pop_2019_df = pop_2019_df.sort_values(by="NAME")
pop_2019_df = pop_2019_df.set_index("NAME")
# rename POP column to POP_2019
pop_2019_df = pop_2019_df.rename(columns={"POP": "POP_2019"})
# convert POP_2019 column to a float
pop_2019_df["POP_2019"] = pop_2019_df["POP_2019"].astype(float)
pop_2019_df

In [53]:
# Get the population for each state in 2020 and 2021
base_url_2021 = "https://api.census.gov/data/2021/pep/population?get=NAME,POP_2020,POP_2021"
key = "&key=" + census_key
all_states = "&for=state:*"
response = requests.get(base_url_2021 + all_states + key).json()
# Put the data into a dataframe
pop_2021 = pd.DataFrame(response)
# Rename the columns and set the index
pop_2021_df = pop_2021.rename(columns=pop_2021.iloc[0]).drop(pop_2021.index[0])
# change the order to alphabetical by state name
pop_2021_df = pop_2021_df.sort_values(by="NAME")
pop_2021_df = pop_2021_df.set_index("NAME")
# convert POP_2020 and POP_2021 columns to a float
pop_2021_df["POP_2020"] = pop_2021_df["POP_2020"].astype(float)
pop_2021_df["POP_2021"] = pop_2021_df["POP_2021"].astype(float)
pop_2021_df

Unnamed: 0_level_0,POP_2020,POP_2021,state
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alabama,5024803.0,5039877.0,1
Alaska,732441.0,732673.0,2
Arizona,7177986.0,7276316.0,4
Arkansas,3012232.0,3025891.0,5
California,39499738.0,39237836.0,6
Colorado,5784308.0,5812069.0,8
Connecticut,3600260.0,3605597.0,9
Delaware,991886.0,1003384.0,10
District of Columbia,690093.0,670050.0,11
Florida,21569932.0,21781128.0,12


In [59]:
# Merge 2015-2021 dataframes
pop_change_df1 = pd.merge(pop_2015_df, pop_2016_df, on="GEONAME")
pop_change_df2 = pd.merge(pop_change_df1, pop_2017_df, on="GEONAME")
pop_change_df3 = pd.merge(pop_change_df2, pop_2018_df, on="GEONAME")
# change the index name to NAME
pop_change_df3.index.name = "NAME"
pop_change_df4 = pd.merge(pop_change_df3, pop_2019_df, on="NAME")
pop_change_df = pd.merge(pop_change_df4, pop_2021_df, on="NAME")
# change the index name to STATE
pop_change_df.index.name = "STATE"
# remove columns that are not needed
pop_change_df.drop(columns=["state_x", "state_y"], inplace=True)
pop_change_df


Unnamed: 0_level_0,POP_2015,POP_2016,POP_2017,POP_2018,POP_2019,POP_2020,POP_2021
STATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Alabama,4858979.0,4863300.0,4874747.0,4887871.0,4903185.0,5024803.0,5039877.0
Alaska,738432.0,741894.0,739795.0,737438.0,731545.0,732441.0,732673.0
Arizona,6828065.0,6931071.0,7016270.0,7171646.0,7278717.0,7177986.0,7276316.0
Arkansas,2978204.0,2988248.0,3004279.0,3013825.0,3017804.0,3012232.0,3025891.0
California,39144818.0,39250017.0,39536653.0,39557045.0,39512223.0,39499738.0,39237836.0
Colorado,5456574.0,5540545.0,5607154.0,5695564.0,5758736.0,5784308.0,5812069.0
Connecticut,3590886.0,3576452.0,3588184.0,3572665.0,3565287.0,3600260.0,3605597.0
Delaware,945934.0,952065.0,961939.0,967171.0,973764.0,991886.0,1003384.0
District of Columbia,672228.0,681170.0,693972.0,702455.0,705749.0,690093.0,670050.0
Florida,20271272.0,20612439.0,20984400.0,21299325.0,21477737.0,21569932.0,21781128.0
