In [2]:
import os
from dotenv import load_dotenv
import requests
import pandas as pd
import matplotlib as plt
import json
from fredapi import Fred

In [3]:
load_dotenv()
fred_api_key = os.getenv("FRED_API_KEY")
fred = Fred(api_key=fred_api_key)

In [4]:
# Real per capita Personal Income by State
# Real per capita personal income is the real personal income divided by midyear population.
df_inc = fred.search('Real Per Capita Personal Income')
# df_inc = df_inc.query('units == "Dollars"')
# filter for only Real per capita Personal Income in title
df_inc = df_inc.loc[df_inc['title'].str.contains('Real Per Capita Personal Income')]

In [5]:
df_inc['seasonal_adjustment'].value_counts()

seasonal_adjustment
Not Seasonally Adjusted            537
Seasonally Adjusted Annual Rate      7
Name: count, dtype: int64

In [6]:
df_inc['units'].value_counts()

units
Chained 2017 Dollars    540
Chained 2012 Dollars      2
Chained 2017 Dollar       1
Chained 2009 Dollars      1
Name: count, dtype: int64

In [7]:
df_inc.shape

(544, 15)

In [8]:
df_inc

Unnamed: 0_level_0,id,realtime_start,realtime_end,title,observation_start,observation_end,frequency,frequency_short,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,popularity,notes
series id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
RPIPCUS,RPIPCUS,2024-01-26,2024-01-26,Real Per Capita Personal Income for United States,2008-01-01,2022-01-01,Annual,A,Chained 2017 Dollars,Chn. 2017 $,Not Seasonally Adjusted,NSA,2023-12-14 11:56:02-06:00,40,Real per capita personal income is the real pe...
CARPIPC,CARPIPC,2024-01-26,2024-01-26,Real Per Capita Personal Income for California,2008-01-01,2022-01-01,Annual,A,Chained 2017 Dollars,Chn. 2017 $,Not Seasonally Adjusted,NSA,2023-12-14 11:56:03-06:00,10,Real per capita personal income is the real pe...
RPIPC19820,RPIPC19820,2024-01-26,2024-01-26,Real Per Capita Personal Income for Detroit-Wa...,2008-01-01,2022-01-01,Annual,A,Chained 2017 Dollars,Chn. 2017 $,Not Seasonally Adjusted,NSA,2023-12-14 11:58:32-06:00,7,Real per capita personal income is the real pe...
NYRPIPC,NYRPIPC,2024-01-26,2024-01-26,Real Per Capita Personal Income for New York,2008-01-01,2022-01-01,Annual,A,Chained 2017 Dollars,Chn. 2017 $,Not Seasonally Adjusted,NSA,2023-12-14 11:56:37-06:00,5,Real per capita personal income is the real pe...
RPIPC12060,RPIPC12060,2024-01-26,2024-01-26,Real Per Capita Personal Income for Atlanta-Sa...,2008-01-01,2022-01-01,Annual,A,Chained 2017 Dollars,Chn. 2017 $,Not Seasonally Adjusted,NSA,2023-12-14 11:58:36-06:00,4,Real per capita personal income is the real pe...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NCNMPRPIPC,NCNMPRPIPC,2024-01-26,2024-01-26,Real Per Capita Personal Income: Nonmetropolit...,2008-01-01,2022-01-01,Annual,A,Chained 2017 Dollars,Chn. 2017 $,Not Seasonally Adjusted,NSA,2023-12-14 12:00:33-06:00,0,Real per capita personal income is the real pe...
RPIPC35840,RPIPC35840,2024-01-26,2024-01-26,Real Per Capita Personal Income for North Port...,2008-01-01,2022-01-01,Annual,A,Chained 2017 Dollars,Chn. 2017 $,Not Seasonally Adjusted,NSA,2023-12-14 12:00:00-06:00,0,Real per capita personal income is the real pe...
RPIPC22220,RPIPC22220,2024-01-26,2024-01-26,Real Per Capita Personal Income for Fayettevil...,2008-01-01,2022-01-01,Annual,A,Chained 2017 Dollars,Chn. 2017 $,Not Seasonally Adjusted,NSA,2023-12-14 11:59:38-06:00,0,Real per capita personal income is the real pe...
DCNMPRPIPC,DCNMPRPIPC,2024-01-26,2024-01-26,Real Per Capita Personal Income: Nonmetropolit...,2008-01-01,2022-01-01,Annual,A,Chained 2017 Dollars,Chn. 2017 $,Not Seasonally Adjusted,NSA,2023-12-14 12:00:16-06:00,0,Real per capita personal income is the real pe...


In [9]:
df_inc[df_inc['id'] == 'TXRPIPC']

Unnamed: 0_level_0,id,realtime_start,realtime_end,title,observation_start,observation_end,frequency,frequency_short,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,popularity,notes
series id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
TXRPIPC,TXRPIPC,2024-01-26,2024-01-26,Real Per Capita Personal Income for Texas,2008-01-01,2022-01-01,Annual,A,Chained 2017 Dollars,Chn. 2017 $,Not Seasonally Adjusted,NSA,2023-12-14 11:56:45-06:00,4,Real per capita personal income is the real pe...


In [10]:
# drop rows in 'id' column that do not end with RPI
inc_result = df_inc[df_inc['id'].str.endswith('RPIPC')]

In [11]:
inc_result.id.unique()

array(['CARPIPC', 'NYRPIPC', 'TXRPIPC', 'WARPIPC', 'MARPIPC', 'CORPIPC',
       'FLRPIPC', 'MNRPIPC', 'DCRPIPC', 'WIRPIPC', 'MIRPIPC', 'KYMPRPIPC',
       'HINMPRPIPC', 'NMMPRPIPC', 'NDMPRPIPC', 'SDMPRPIPC', 'SCMPRPIPC',
       'NHNMPRPIPC', 'ALRPIPC', 'RIRPIPC', 'CTRPIPC', 'NJRPIPC',
       'VARPIPC', 'SCRPIPC', 'GARPIPC', 'WVRPIPC', 'AZRPIPC', 'TNRPIPC',
       'PARPIPC', 'MIMPRPIPC', 'NCRPIPC', 'MERPIPC', 'UTRPIPC',
       'IDNMPRPIPC', 'MDRPIPC', 'ILRPIPC', 'HIRPIPC', 'WYRPIPC',
       'MNNMPRPIPC', 'NENMPRPIPC', 'ARRPIPC', 'OHRPIPC', 'SDRPIPC',
       'NMRPIPC', 'MSRPIPC', 'IDRPIPC', 'GAMPRPIPC', 'LARPIPC', 'AKRPIPC',
       'AZMPRPIPC', 'OKRPIPC', 'VTNMPRPIPC', 'NDRPIPC', 'KYRPIPC',
       'ORRPIPC', 'NHRPIPC', 'ORMPRPIPC', 'MTRPIPC', 'KSRPIPC', 'VTRPIPC',
       'INRPIPC', 'NVRPIPC', 'MORPIPC', 'IARPIPC', 'DERPIPC', 'NERPIPC',
       'CTNMPRPIPC', 'GANMPRPIPC', 'AZNMPRPIPC', 'OKMPRPIPC',
       'NMNMPRPIPC', 'WIMPRPIPC', 'MTMPRPIPC', 'MTNMPRPIPC', 'TXMPRPIPC',
       'WINMPRPIPC

In [12]:
# filter for rows with ids whose length is 7
inc_result = inc_result[inc_result['id'].str.len() == 7]

In [13]:
inc_result.shape

(51, 15)

In [14]:
inc_result.id.unique()
# missing PR data

array(['CARPIPC', 'NYRPIPC', 'TXRPIPC', 'WARPIPC', 'MARPIPC', 'CORPIPC',
       'FLRPIPC', 'MNRPIPC', 'DCRPIPC', 'WIRPIPC', 'MIRPIPC', 'ALRPIPC',
       'RIRPIPC', 'CTRPIPC', 'NJRPIPC', 'VARPIPC', 'SCRPIPC', 'GARPIPC',
       'WVRPIPC', 'AZRPIPC', 'TNRPIPC', 'PARPIPC', 'NCRPIPC', 'MERPIPC',
       'UTRPIPC', 'MDRPIPC', 'ILRPIPC', 'HIRPIPC', 'WYRPIPC', 'ARRPIPC',
       'OHRPIPC', 'SDRPIPC', 'NMRPIPC', 'MSRPIPC', 'IDRPIPC', 'LARPIPC',
       'AKRPIPC', 'OKRPIPC', 'NDRPIPC', 'KYRPIPC', 'ORRPIPC', 'NHRPIPC',
       'MTRPIPC', 'KSRPIPC', 'VTRPIPC', 'INRPIPC', 'NVRPIPC', 'MORPIPC',
       'IARPIPC', 'DERPIPC', 'NERPIPC'], dtype=object)

In [15]:
all_result =[]

for id in inc_result.index:
    result = fred.get_series(id)
    result = result.to_frame(name = id)
    all_result.append(result)

In [16]:
all_result[1]

Unnamed: 0,NYRPIPC
2008-01-01,48246.0
2009-01-01,47526.0
2010-01-01,48800.0
2011-01-01,49369.0
2012-01-01,50801.0
2013-01-01,50345.0
2014-01-01,51565.0
2015-01-01,53135.0
2016-01-01,54059.0
2017-01-01,56703.0


In [17]:
inc_state = pd.concat(all_result, axis=1)

In [18]:
inc_state.tail()

Unnamed: 0,CARPIPC,NYRPIPC,TXRPIPC,WARPIPC,MARPIPC,CORPIPC,FLRPIPC,MNRPIPC,DCRPIPC,WIRPIPC,...,NHRPIPC,MTRPIPC,KSRPIPC,VTRPIPC,INRPIPC,NVRPIPC,MORPIPC,IARPIPC,DERPIPC,NERPIPC
2018-01-01,53481.0,57543.0,51578.0,54841.0,63345.0,56587.0,49932.0,55425.0,72433.0,52805.0,...,57229.0,50187.0,53374.0,50638.0,49297.0,50302.0,49937.0,52149.0,52109.0,54975.0
2019-01-01,55915.0,58669.0,52351.0,56828.0,65445.0,57850.0,52072.0,56672.0,74097.0,53670.0,...,59512.0,50986.0,54358.0,52739.0,50349.0,50824.0,50619.0,52319.0,52615.0,55874.0
2020-01-01,60011.0,60801.0,53456.0,60169.0,68032.0,59863.0,53888.0,59982.0,77515.0,57334.0,...,61785.0,55822.0,57114.0,54402.0,54014.0,53887.0,54141.0,55518.0,55233.0,58531.0
2021-01-01,63271.0,63666.0,56565.0,62680.0,71971.0,64282.0,57193.0,62468.0,80287.0,59557.0,...,64766.0,58704.0,58997.0,57077.0,56596.0,58810.0,56045.0,59451.0,55553.0,62680.0
2022-01-01,59103.0,60472.0,55382.0,59175.0,66701.0,63875.0,54746.0,60785.0,73383.0,57465.0,...,59247.0,58297.0,57958.0,53806.0,54811.0,55582.0,54753.0,58769.0,55709.0,61750.0


In [22]:
# create a mapping dict to change column names using list comprehension
id_to_state = df_inc['title'].str.replace('Real Per Capita Personal Income for ','').to_dict()

In [23]:
inc_state.columns

Index(['CARPIPC', 'NYRPIPC', 'TXRPIPC', 'WARPIPC', 'MARPIPC', 'CORPIPC',
       'FLRPIPC', 'MNRPIPC', 'DCRPIPC', 'WIRPIPC', 'MIRPIPC', 'ALRPIPC',
       'RIRPIPC', 'CTRPIPC', 'NJRPIPC', 'VARPIPC', 'SCRPIPC', 'GARPIPC',
       'WVRPIPC', 'AZRPIPC', 'TNRPIPC', 'PARPIPC', 'NCRPIPC', 'MERPIPC',
       'UTRPIPC', 'MDRPIPC', 'ILRPIPC', 'HIRPIPC', 'WYRPIPC', 'ARRPIPC',
       'OHRPIPC', 'SDRPIPC', 'NMRPIPC', 'MSRPIPC', 'IDRPIPC', 'LARPIPC',
       'AKRPIPC', 'OKRPIPC', 'NDRPIPC', 'KYRPIPC', 'ORRPIPC', 'NHRPIPC',
       'MTRPIPC', 'KSRPIPC', 'VTRPIPC', 'INRPIPC', 'NVRPIPC', 'MORPIPC',
       'IARPIPC', 'DERPIPC', 'NERPIPC'],
      dtype='object')

In [24]:
# change column names using list comprehension
inc_state.columns = [id_to_state[d] for d in inc_state.columns]

In [25]:
inc_state.columns

Index(['California', 'New York', 'Texas', 'Washington', 'Massachusetts',
       'Colorado', 'Florida', 'Minnesota', 'District of Columbia', 'Wisconsin',
       'Michigan', 'Alabama', 'Rhode Island', 'Connecticut', 'New Jersey',
       'Virginia', 'South Carolina', 'Georgia', 'West Virginia', 'Arizona',
       'Tennessee', 'Pennsylvania', 'North Carolina', 'Maine', 'Utah',
       'Maryland', 'Illinois', 'Hawaii', 'Wyoming', 'Arkansas', 'Ohio',
       'South Dakota', 'New Mexico', 'Mississippi', 'Idaho', 'Louisiana',
       'Alaska', 'Oklahoma', 'North Dakota', 'Kentucky', 'Oregon',
       'New Hampshire', 'Montana', 'Kansas', 'Vermont', 'Indiana', 'Nevada',
       'Missouri', 'Iowa', 'Delaware', 'Nebraska'],
      dtype='object')

In [20]:
# export inc_state to csv
inc_state.to_csv('../cleansed data/RPIPC_income_state.csv')