In [2]:
import os
from dotenv import load_dotenv
import requests
import pandas as pd
import matplotlib as plt
import json
from fredapi import Fred

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
load_dotenv()
fred_api_key = os.getenv("FRED_API_KEY")
fred = Fred(api_key=fred_api_key)

In [4]:
# Real Personal Income by State
# Real personal income is personal income at RPPs divided by the national PCE price index.
df_inc = fred.search('Real Personal Income')
# df_inc = df_inc.query('units == "Dollars"')
# filter for only Real Personal Income in title
df_inc = df_inc.loc[df_inc['title'].str.contains('Real Personal Income')]

In [5]:
df_inc['seasonal_adjustment'].value_counts()

seasonal_adjustment
Not Seasonally Adjusted            287
Seasonally Adjusted Annual Rate      4
Name: count, dtype: int64

In [6]:
df_inc['units'].value_counts()

units
Thousands of Chained 2012 Dollars    246
Millions of Chained 2012 Dollars      40
Millions of Dollars                    3
Billions of Chained 2017 Dollars       1
Millions of Chained 2009 Dollars       1
Name: count, dtype: int64

In [7]:
df_inc.shape

(291, 15)

In [8]:
df_inc

Unnamed: 0_level_0,id,realtime_start,realtime_end,title,observation_start,observation_end,frequency,frequency_short,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,popularity,notes
series id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
RPI,RPI,2024-01-26,2024-01-26,Real Personal Income,1959-01-01,2023-12-01,Monthly,M,Billions of Chained 2017 Dollars,Bil. of Chn. 2017 $,Seasonally Adjusted Annual Rate,SAAR,2024-01-26 09:20:15-06:00,58,Calculated by the Federal Reserve Bank of St. ...
RPI17860,RPI17860,2024-01-26,2024-01-26,"Real Personal Income for Columbia, MO (MSA)",2008-01-01,2022-01-01,Annual,A,Thousands of Chained 2012 Dollars,Thous. of Chn. 2012 $,Not Seasonally Adjusted,NSA,2023-12-14 11:58:48-06:00,4,Real personal income is personal income at RPP...
KSRPI,KSRPI,2024-01-26,2024-01-26,Real Personal Income for Kansas,2008-01-01,2022-01-01,Annual,A,Millions of Chained 2012 Dollars,Mil. of Chn. 2012 $,Not Seasonally Adjusted,NSA,2023-12-14 11:59:06-06:00,3,Real personal income is personal income at RPP...
RPI27060,RPI27060,2024-01-26,2024-01-26,"Real Personal Income for Ithaca, NY (MSA)",2008-01-01,2022-01-01,Annual,A,Thousands of Chained 2012 Dollars,Thous. of Chn. 2012 $,Not Seasonally Adjusted,NSA,2023-12-14 11:56:23-06:00,3,Real personal income is personal income at RPP...
RPI11700,RPI11700,2024-01-26,2024-01-26,"Real Personal Income for Asheville, NC (MSA)",2008-01-01,2022-01-01,Annual,A,Thousands of Chained 2012 Dollars,Thous. of Chn. 2012 $,Not Seasonally Adjusted,NSA,2023-12-14 11:56:24-06:00,3,Real personal income is personal income at RPP...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
RPI48140,RPI48140,2024-01-26,2024-01-26,"Real Personal Income for Wausau, WI (MSA)",2008-01-01,2022-01-01,Annual,A,Thousands of Chained 2012 Dollars,Thous. of Chn. 2012 $,Not Seasonally Adjusted,NSA,2023-12-14 11:58:37-06:00,1,Real personal income is personal income at RPP...
ILNMPRPI,ILNMPRPI,2024-01-26,2024-01-26,Real Personal Income: Nonmetropolitan Portion ...,2008-01-01,2022-01-01,Annual,A,Thousands of Chained 2012 Dollars,Thous. of Chn. 2012 $,Not Seasonally Adjusted,NSA,2023-12-14 11:59:24-06:00,1,Real personal income is personal income at RPP...
ORMPRPI,ORMPRPI,2024-01-26,2024-01-26,Real Personal Income: Metropolitan Portion for...,2008-01-01,2022-01-01,Annual,A,Thousands of Chained 2012 Dollars,Thous. of Chn. 2012 $,Not Seasonally Adjusted,NSA,2023-12-14 11:58:52-06:00,1,Real personal income is personal income at RPP...
TNRPI,TNRPI,2024-01-26,2024-01-26,Real Personal Income for Tennessee,2008-01-01,2022-01-01,Annual,A,Millions of Chained 2012 Dollars,Mil. of Chn. 2012 $,Not Seasonally Adjusted,NSA,2023-12-14 11:56:47-06:00,1,Real personal income is personal income at RPP...


In [9]:
df_inc[df_inc['id'] == 'TXRPI']

Unnamed: 0_level_0,id,realtime_start,realtime_end,title,observation_start,observation_end,frequency,frequency_short,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,popularity,notes
series id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1


In [10]:
# drop rows in 'id' column that do not end with RPI
inc_result = df_inc[df_inc['id'].str.endswith('RPI')]

In [11]:
inc_result.id.unique()

array(['RPI', 'KSRPI', 'MDNMPRPI', 'OKRPI', 'CORPI', 'IDNMPRPI', 'CARPI',
       'VARPI', 'UTRPI', 'MTMPRPI', 'MARPI', 'SCRPI', 'VTNMPRPI', 'IARPI',
       'MNRPI', 'WYNMPRPI', 'ORRPI', 'NYMPRPI', 'CANMPRPI', 'OHRPI',
       'CTNMPRPI', 'INRPI', 'RIRPI', 'FLRPI', 'MENMPRPI', 'DERPI',
       'MIRPI', 'NCRPI', 'WYRPI', 'NYRPI', 'WVRPI', 'IDRPI', 'MORPI',
       'AKRPI', 'MINMPRPI', 'ALRPI', 'ALNMPRPI', 'KYNMPRPI', 'VANMPRPI',
       'SDRPI', 'CONMPRPI', 'ILRPI', 'LAMPRPI', 'NHRPI', 'ARNMPRPI',
       'NMNMPRPI', 'KYRPI', 'AZNMPRPI', 'TNNMPRPI', 'PARPI', 'AZMPRPI',
       'NDRPI', 'LANMPRPI', 'CTRPI', 'ARRPI', 'WIRPI', 'GAMPRPI',
       'MOMPRPI', 'MSMPRPI', 'NCNMPRPI', 'ILMPRPI', 'SCMPRPI', 'AZRPI',
       'SDNMPRPI', 'NMMPRPI', 'SDMPRPI', 'MIMPRPI', 'MDRPI', 'MSRPI',
       'OHMPRPI', 'SCNMPRPI', 'ILNMPRPI', 'ORMPRPI', 'TNRPI', 'HINMPRPI'],
      dtype=object)

In [12]:
# filter for rows with ids whose length is 5
inc_result = inc_result[inc_result['id'].str.len() == 5]

In [13]:
inc_result.shape

(38, 15)

In [14]:
inc_result.id.unique()
# missing some states' data

array(['KSRPI', 'OKRPI', 'CORPI', 'CARPI', 'VARPI', 'UTRPI', 'MARPI',
       'SCRPI', 'IARPI', 'MNRPI', 'ORRPI', 'OHRPI', 'INRPI', 'RIRPI',
       'FLRPI', 'DERPI', 'MIRPI', 'NCRPI', 'WYRPI', 'NYRPI', 'WVRPI',
       'IDRPI', 'MORPI', 'AKRPI', 'ALRPI', 'SDRPI', 'ILRPI', 'NHRPI',
       'KYRPI', 'PARPI', 'NDRPI', 'CTRPI', 'ARRPI', 'WIRPI', 'AZRPI',
       'MDRPI', 'MSRPI', 'TNRPI'], dtype=object)

In [15]:
# extract first two characters from 'id' column
list3 = [x[:2] for x in inc_result['id']]

In [16]:
# list1 contains 52 states
list1 = ['CAUR', 'TXUR', 'FLUR', 'NYUR', 'OHUR', 'MIUR', 'PAUR', 'NCUR', 'ILUR',
       'ALUR', 'NJUR', 'MAUR', 'WIUR', 'COUR', 'GAUR', 'VAUR', 'SCUR', 'AZUR',
       'IAUR', 'ORUR', 'KYUR', 'INUR', 'MDUR', 'TNUR', 'ARUR', 'NVUR', 'MNUR',
       'NMUR', 'MOUR', 'LAUR', 'PRUR', 'WVUR', 'UTUR', 'CTUR', 'WAUR', 'MSUR',
       'NDUR', 'OKUR', 'DEUR', 'NEUR', 'HIUR', 'KSUR', 'AKUR', 'NHUR', 'WYUR',
       'MTUR', 'IDUR', 'DCUR', 'SDUR', 'MEUR', 'RIUR', 'VTUR']

list1 = [x[:2] for x in list1]
len(list1)

52

In [17]:
# these are the missing states
list(set(list1) - set(list3))

['PR',
 'VT',
 'NV',
 'NJ',
 'LA',
 'NE',
 'WA',
 'GA',
 'MT',
 'ME',
 'TX',
 'DC',
 'HI',
 'NM']

In [18]:
# for each state in list1, search for Real Personal Income
# eg. TXRPI, NERPI, etc.
# add each result to inc_result
for state in list(set(list1) - set(list3)):
    df = fred.search(state + 'RPI')
    inc_result = pd.concat([inc_result,df],axis=0)

In [19]:
inc_result['id'].unique()

array(['KSRPI', 'OKRPI', 'CORPI', 'CARPI', 'VARPI', 'UTRPI', 'MARPI',
       'SCRPI', 'IARPI', 'MNRPI', 'ORRPI', 'OHRPI', 'INRPI', 'RIRPI',
       'FLRPI', 'DERPI', 'MIRPI', 'NCRPI', 'WYRPI', 'NYRPI', 'WVRPI',
       'IDRPI', 'MORPI', 'AKRPI', 'ALRPI', 'SDRPI', 'ILRPI', 'NHRPI',
       'KYRPI', 'PARPI', 'NDRPI', 'CTRPI', 'ARRPI', 'WIRPI', 'AZRPI',
       'MDRPI', 'MSRPI', 'TNRPI', 'VTRPI', 'NVRPI', 'NJRPI', 'LARPI',
       'NERPI', 'WARPI', 'GARPI', 'MTRPI', 'MERPI', 'TXRPI', 'DCRPI',
       'HIRPI', 'NMRPI'], dtype=object)

In [20]:
inc_result.shape
# does not contain data for Puerto Rico

(51, 15)

In [21]:
all_result =[]

for id in inc_result.index:
    result = fred.get_series(id)
    result = result.to_frame(name = id)
    all_result.append(result)

In [22]:
all_result[3]

Unnamed: 0,CARPI
2008-01-01,1603306.0
2009-01-01,1567285.7
2010-01-01,1624234.9
2011-01-01,1688931.8
2012-01-01,1753099.3
2013-01-01,1755250.2
2014-01-01,1834858.3
2015-01-01,1951176.5
2016-01-01,2033539.5
2017-01-01,2073859.0


In [23]:
inc_state = pd.concat(all_result, axis=1)

In [24]:
inc_state.tail()

Unnamed: 0,KSRPI,OKRPI,CORPI,CARPI,VARPI,UTRPI,MARPI,SCRPI,IARPI,MNRPI,...,LARPI,NERPI,WARPI,GARPI,MTRPI,MERPI,TXRPI,DCRPI,HIRPI,NMRPI
2018-01-01,156470.6,198041.5,321261.3,2114414.8,461792.7,147988.6,443038.1,234088.9,165388.8,313158.8,...,229692.2,106946.6,413969.6,508578.3,53527.0,64801.5,1470536.8,49662.2,66685.8,92359.5
2019-01-01,159473.4,205884.6,331824.7,2211331.7,485163.6,154949.1,459204.9,247779.1,166575.5,322277.0,...,232867.1,109229.0,434089.4,529702.2,54833.7,68701.4,1510731.8,50934.9,68043.2,94884.3
2020-01-01,167795.8,212939.7,346297.8,2370532.3,503603.4,170963.0,475930.5,263888.7,177134.1,342486.2,...,245610.4,114875.3,464745.7,560064.9,60683.1,72474.7,1562645.1,52002.4,70085.7,103129.2
2021-01-01,173329.5,225186.8,373562.1,2476630.0,519934.8,185146.0,503053.4,269398.1,190106.4,356783.2,...,253842.3,123075.9,485192.3,582054.7,64939.4,76408.4,1672001.0,53695.2,72682.1,110822.1
2022-01-01,170230.0,219977.8,373023.8,2306733.7,506086.2,183600.3,465702.2,261255.3,188090.5,347518.7,...,238346.8,121519.0,460723.4,556052.9,65459.2,71836.7,1663095.2,49299.0,69265.6,104613.6


In [43]:
inc_result[inc_result.title.str.contains('Vermont')]

Unnamed: 0_level_0,id,realtime_start,realtime_end,title,observation_start,observation_end,frequency,frequency_short,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,popularity,notes
series id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
VTRPI,VTRPI,2024-01-26,2024-01-26,Real Personal Income for Vermont,2008-01-01,2022-01-01,Annual,A,Millions of Chained 2012 Dollars,Mil. of Chn. 2012 $,Not Seasonally Adjusted,NSA,2023-12-14 12:00:33-06:00,0,Real personal income is personal income at RPP...


In [45]:
inc_result.title

series id
KSRPI                  Real Personal Income for Kansas
OKRPI                Real Personal Income for Oklahoma
CORPI                Real Personal Income for Colorado
CARPI              Real Personal Income for California
VARPI                Real Personal Income for Virginia
UTRPI                    Real Personal Income for Utah
MARPI           Real Personal Income for Massachusetts
SCRPI          Real Personal Income for South Carolina
IARPI                    Real Personal Income for Iowa
MNRPI               Real Personal Income for Minnesota
ORRPI                  Real Personal Income for Oregon
OHRPI                    Real Personal Income for Ohio
INRPI                 Real Personal Income for Indiana
RIRPI            Real Personal Income for Rhode Island
FLRPI                 Real Personal Income for Florida
DERPI                Real Personal Income for Delaware
MIRPI                Real Personal Income for Michigan
NCRPI          Real Personal Income for North Carolina


In [46]:
# create a mapping dict to change column names using list comprehension
id_to_state = inc_result['title'].str.replace('Real Personal Income for ','').to_dict()

In [47]:
id_to_state.get('VTRPI')

'Vermont'

In [48]:
inc_state.columns

Index(['KSRPI', 'OKRPI', 'CORPI', 'CARPI', 'VARPI', 'UTRPI', 'MARPI', 'SCRPI',
       'IARPI', 'MNRPI', 'ORRPI', 'OHRPI', 'INRPI', 'RIRPI', 'FLRPI', 'DERPI',
       'MIRPI', 'NCRPI', 'WYRPI', 'NYRPI', 'WVRPI', 'IDRPI', 'MORPI', 'AKRPI',
       'ALRPI', 'SDRPI', 'ILRPI', 'NHRPI', 'KYRPI', 'PARPI', 'NDRPI', 'CTRPI',
       'ARRPI', 'WIRPI', 'AZRPI', 'MDRPI', 'MSRPI', 'TNRPI', 'VTRPI', 'NVRPI',
       'NJRPI', 'LARPI', 'NERPI', 'WARPI', 'GARPI', 'MTRPI', 'MERPI', 'TXRPI',
       'DCRPI', 'HIRPI', 'NMRPI'],
      dtype='object')

In [49]:
# change column names using list comprehension
inc_state.columns = [id_to_state[d] for d in inc_state.columns]

In [54]:
inc_state.head()

Unnamed: 0,Kansas,Oklahoma,Colorado,California,Virginia,Utah,Massachusetts,South Carolina,Iowa,Minnesota,...,Louisiana,Nebraska,Washington,Georgia,Montana,Maine,Texas,District of Columbia,Hawaii,New Mexico
2008-01-01,144002.4,180162.8,228236.2,1603306.0,386846.7,104127.9,352308.8,183085.3,149119.1,261617.2,...,208684.8,90654.1,310770.0,399457.2,41419.1,55982.4,1120909.7,35121.3,57070.7,79607.8
2009-01-01,136817.7,169675.1,220217.9,1567285.7,384428.5,97472.5,353100.9,181659.9,141685.1,246937.4,...,206489.2,88747.4,299091.6,403761.1,40981.4,56183.4,1075852.1,35401.9,55419.6,78630.2
2010-01-01,135439.8,170857.7,226621.4,1624234.9,389491.6,101138.6,365865.6,181307.2,141341.1,256175.2,...,207753.8,90296.9,307765.6,395187.5,42683.6,57177.8,1113308.5,37023.1,57291.7,81361.8
2011-01-01,143276.0,178480.4,233652.0,1688931.8,402526.5,103644.8,374037.9,185814.6,150625.1,268028.4,...,207629.7,99235.9,316238.7,415469.5,43600.1,58920.9,1196635.6,39115.0,58168.3,81154.2
2012-01-01,147397.5,185729.8,242375.6,1753099.3,415618.4,108592.8,381927.0,190915.1,152906.0,277805.6,...,213483.3,98938.0,335705.6,410423.3,45954.1,58117.2,1239768.8,40381.8,59780.8,81911.4


In [None]:
inc_state.to_csv('../cleansed data/RPI_income_state.csv')