In [1]:
import time

import pandas as pd
import numpy as np

from fredapi import Fred
fred = Fred(api_key='12ae65528f24e8974f32c172f44c3ee5')

from utilities import *

# FRED

Here I use the `fredapi` package to extrack a lot of Fred variables and do some very basic transformations

In [2]:
# Helper function to extract multiple series from fred at one, and join (concatenate) them
# Also returns the date as a column, not index
def get_fred_series(series):
    series_frame =  pd.concat([fred.get_series(x).rename(x) for x in series], axis = 1)
    return series_frame.reset_index().rename(columns = {'index': 'date'})

## Monthly data

In [3]:
# Get monthly data
vars_m = 'CPIAUCSL AAA BAA TB3MS GS1 GS10 FII10 UNRATE EMRATIO POP AWHNONAG AWHI MICH'
data_m = get_fred_series(str_to_list(vars_m))

# Get month, year
data_m['month'] = data_m['date'].dt.month
data_m['year'] = data_m['date'].dt.year
data_m.drop(columns = ['date'], inplace = True)

# Divide some vars by 100, POP by 1000
for var in str_to_list('AAA BAA TB3MS GS1 GS10 FII10 UNRATE EMRATIO MICH'):
    data_m[var] = data_m[var]/100
    
data_m['POP'] = data_m['POP']/1000
    
# Rename stuff
rename_m = '''
rename TB3MS r_tb3m
rename GS1 r_gs1
rename GS10 r_gs10
rename FII10 r_rty10
rename AAA r_aaa
rename BAA r_baa
rename CPIAUCSL p_cpi
rename UNRATE unr
rename EMRATIO emprate_civil
rename AWHNONAG hw_prod
rename AWHI htot_prod
rename POP pop
rename MICH infl_mich
'''

data_m.rename(columns = rename_str_to_dict(rename_m), inplace = True)

## Quarterly data

In [4]:
vars_q = '''GDP PCEC PCND PCDG PCESV A008RE1Q156NBEA B010RE1Q156NBEA GDPDEF 
IPDNBS A008RD3Q086SBEA A009RD3Q086SBEA A010RD3Q086SBEA Y001RD3Q086SBEA GDPC1 
GPDIC1 B191RA3Q086SBEA DPCERA3Q086SBEA DNDGRA3Q086SBEA B006RA3Q086SBEA 
B008RA3Q086SBEA B009RA3Q086SBEA B010RA3Q086SBEA Y001RA3Q086SBEA 
LFWA64TTUSQ647N LREM25TTUSQ156S LREM64TTUSQ156S Y001RE1Q156NBEA 
A011RE1Q156NBEA B009RE1Q156NBEA'''

# Get data
data_q = get_fred_series(str_to_list(vars_q))

# Get quarter, year
data_q['quarter'] = data_q['date'].dt.quarter
data_q['year'] = data_q['date'].dt.year

# Rename stuff
rename_q = '''
rename GDP y
rename PCEC c
rename PCND cnd
rename PCDG cd
rename PCESV cs
'''
data_q.rename(columns = rename_str_to_dict(rename_q), inplace = True)

# New stuff
data_q.eval('''
    i_nonres = y*A008RE1Q156NBEA/100
    i_equisoft = y*B010RE1Q156NBEA/100
    i_ip = y*Y001RE1Q156NBEA/100
    i_res = y*A011RE1Q156NBEA/100
    i_struc = y*B009RE1Q156NBEA/100
''', inplace = True)

drop_q = '''A008RE1Q156NBEA B009RE1Q156NBEA B010RE1Q156NBEA 
           Y001RE1Q156NBEA B010RE1Q156NBEA A011RE1Q156NBEA'''
data_q.drop(columns = str_to_list(drop_q), inplace=True)

# More renaming
rename_q1 = '''
rename GDPDEF p_gdp
rename IPDNBS p_nonfarmbus
rename A008RD3Q086SBEA p_i_nonres
rename A009RD3Q086SBEA p_i_struct
rename A010RD3Q086SBEA p_i_equisoft
rename Y001RD3Q086SBEA p_i_intell

rename GDPC1 yc09
rename GPDIC1 ic09_tot
rename B191RA3Q086SBEA yq
rename DPCERA3Q086SBEA cq
rename DNDGRA3Q086SBEA cndq
rename B006RA3Q086SBEA iq_tot
rename B008RA3Q086SBEA iq_nonres
rename B009RA3Q086SBEA iq_struct
rename B010RA3Q086SBEA iq_equisoft
rename Y001RA3Q086SBEA iq_intell

rename LFWA64TTUSQ647N pop1564
rename LREM25TTUSQ156S emprate2554
rename LREM64TTUSQ156S emprate1564
'''

data_q.rename(columns = rename_str_to_dict(rename_q1), inplace = True)
data_q['pop1564'] = data_q['pop1564']/1000000

### Non financial corporate sector

In [5]:
vars_q1 = '''TABSNNCB TTAABSNNCB NCBREMV ESABSNNCB NCBNIPPCCB IABSNNCB 
TFAABSNNCB MAABSNNCB MLLBSNNCB TLBSNNCB NCBDSL NCBLL NCBCEL TNWMVBSNNCB 
RCVSRNWMVBSNNCB RCSNNWMVBSNNCB B455RX1Q027SBEA N456RX1Q027SBEA NCBGVAQ027S 
NCBCCFQ027S NCBCEPQ027S NCBSCPQ027S W325RC1Q027SBEA NCBOSNQ027S B465RC1Q027SBEA 
W327RC1Q027SBEA NCBGCFQ027S NCBFNEQ027S'''

# Get variables, merge into main dataset
data_q1 = get_fred_series(str_to_list(vars_q1))
data_q = data_q.merge(data_q1, how = 'outer')

# Rename more stuff
rename_q2 = '''
rename TABSNNCB a_tot_nfcb
rename TTAABSNNCB a_nonfin_nfcb
rename NCBREMV a_realestate_mv_nfcb
rename ESABSNNCB a_equip_nfcb
rename NCBNIPPCCB a_intell_nfcb
rename IABSNNCB a_inventories_nfcb
rename TFAABSNNCB a_fin_nfcb
rename MAABSNNCB a_misc_nfcb
rename TLBSNNCB l_tot_nfcb
rename MLLBSNNCB l_misc_nfcb
rename NCBDSL l_debt_nfcb
rename NCBLL l_loans_nfcb
rename NCBCEL   mve_nfcb
rename TNWMVBSNNCB   nw_nfcb
rename RCVSRNWMVBSNNCB krc_res_nfcb
rename RCSNNWMVBSNNCB krc_nonres_nfcb

rename B455RX1Q027SBEA yc09_nfcb
rename N456RX1Q027SBEA depc09_nfcb
'''

data_q.rename(columns = rename_str_to_dict(rename_q2), inplace = True)

# Debt/loans to billions
data_q['l_debt_nfcb'] = data_q['l_debt_nfcb']/1000
data_q['l_loans_nfcb'] = data_q['l_loans_nfcb']/1000

### Non financial non-corporate sector

In [6]:
vars_q2 = '''TABSNNB TTAABSNNB NNBREMV ESABSNNB NNBNIPPCCB IABSNNB TFAABSNNB 
MAABSNNB MLLBSNNB TLBSNNB NNBLL RCVSRNWBSNNB RCVSNWBSNNB NNBGVAQ027S 
NNBCCFQ027S NNBCEPQ027S NNBGFNQ027S'''

# Get variables, merge into main dataset
data_q2 = get_fred_series(str_to_list(vars_q2))
data_q = data_q.merge(data_q2, how = 'outer')

# Rename stuff
rename_q3 = '''
rename TABSNNB    a_tot_nfncb
rename TTAABSNNB  a_nonfin_nfncb
rename NNBREMV    a_realestate_mv_nfncb
rename ESABSNNB   a_equip_nfncb
rename NNBNIPPCCB   a_intell_nfncb
rename IABSNNB      a_inventories_nfncb
rename TFAABSNNB    a_fin_nfncb
rename MAABSNNB     a_misc_nfncb
rename TLBSNNB      l_tot_nfncb
rename MLLBSNNB     l_misc_nfncb
rename NNBLL        l_loans_nfncb
rename RCVSRNWBSNNB  krc_res_nfncb
rename RCVSNWBSNNB   krc_nonres_nfncb
'''
data_q.rename(columns = rename_str_to_dict(rename_q3), inplace = True)

# Loans to billions
data_q['l_loans_nfncb'] = data_q['l_loans_nfncb']/1000

# Drop date
data_q.drop(columns = ['date'], inplace = True)

## Annual data

In [7]:
vars_a = '''GDPA PNFIA PRFIA K1NTOTL1ES000 M1NTOTL1ES000 K1X53101ES000 
M1X53101ES000 A438RC1A027NBEA NCBOSNA027N NNBBOSA027N A191RA3A086NBEA 
PNFICA A593RA3A086NBEA A594RA3A086NBEA A748RA3A086NBEA GDPCA PCECCA 
A593RX1A020NBEA A594RX1A020NBEA  B4701C0A222NBEA A191RD3A086NBEA 
DPCERD3A086NBEA A008RD3A086NBEA Y033RD3A086NBEA'''

# Get data
data_a = get_fred_series(str_to_list(vars_a))

# Get year
data_a['year'] = data_a['date'].dt.year

# Rename stuff
rename_a = '''
rename GDPA y
rename PNFIA i_nonres
rename PRFIA i_res
rename K1NTOTL1ES000    krc_nonres_priv
rename M1NTOTL1ES000    dep_nonres_priv
rename K1X53101ES000    krc_res_tot
rename M1X53101ES000    dep_res_tot
rename A438RC1A027NBEA  dep_cb
rename A191RA3A086NBEA  yq
rename A593RA3A086NBEA  niq_nonres
rename A594RA3A086NBEA  niq_struct
rename A748RA3A086NBEA  niq_res
rename GDPCA    yc09
rename PCECCA   cc09
rename PNFICA   ic09_nonres
rename A593RX1A020NBEA  nic09_nonres
rename A594RX1A020NBEA  nic09_struct
rename A191RD3A086NBEA  p_gdp
rename DPCERD3A086NBEA  p_pce
rename A008RD3A086NBEA  p_i_nonres
rename Y033RD3A086NBEA  p_i_equip
rename B4701C0A222NBEA  htot
'''
data_a.rename(columns = rename_str_to_dict(rename_a), inplace = True)

### Non financial corporate sector

In [8]:
# To avoid rate limit
time.sleep(30)

In [9]:
vars_a1 = '''
NCBGVAA027N NCBCFCA027N NCBCEPA027N NCBSCPA027N NCBOSNA027N NCBGFCA027N 
NCBFNEA027N NCBPISA027N B465RC1A027NBEA W327RC1A027NBEA NNBGVAA027N NNBCFCA027N 
NNBCEPA027N NNBESCA027N NNBTPIA027N NNBBOSA027N NNBGFNA027N NNBCFNA027N
'''

# Get variables, merge into main dataset
data_a1 = get_fred_series(str_to_list(vars_a1))
data_a = data_a.merge(data_a1, how = 'outer')

# Rename stuff
rename_a1 = '''
rename NCBGVAA027N  y_nfcb
rename NCBCFCA027N  dep_nfcb
rename NCBCEPA027N  wn_nfcb
rename NCBSCPA027N  taxsocial_nfcb
rename NCBPISA027N  taxprod_nfcb
rename NCBOSNA027N  nos_nfcb
rename B465RC1A027NBEA  taxinc_nfcb
rename W327RC1A027NBEA  taxtrans_nfcb
rename NCBGFCA027N  i_nfcb
rename NCBFNEA027N  ni_nfcb

rename NNBGVAA027N  y_nfncb
rename NNBCFCA027N  dep_nfncb
rename NNBCEPA027N  wn_nfncb
rename NNBESCA027N  taxsocial_nfncb
rename NNBTPIA027N  taxprod_nfncb
rename NNBBOSA027N  nos_nfncb
rename NNBGFNA027N  i_nfncb
rename NNBCFNA027N  ni_nfncb
'''
data_a.rename(columns = rename_str_to_dict(rename_a1), inplace = True)

# Variables to billions
vars_ab = '''y_nfcb y_nfncb dep_nfcb dep_nfncb wn_nfcb wn_nfncb taxsocial_nfcb 
taxsocial_nfncb taxprod_nfcb taxprod_nfncb i_nfcb i_nfncb ni_nfcb ni_nfncb 
nos_nfcb nos_nfncb'''

for var in str_to_list(vars_ab):
    data_a[var] = data_a[var]/1000
    
data_a.drop(columns = ['date'], inplace = True)

## Merge all data

In [10]:
# Aggregate some monthly vars
vars_agg = 'p_cpi r_aaa r_baa unr r_tb3m r_gs1 r_gs10 unr emprate_civil hw_prod htot_prod pop'
gby = data_m.groupby('year')

for var in str_to_list(vars_agg):
    data_m[var] = gby[var].transform('mean')

# Keep only january
data_m = data_m.query('month == 1')

# Keep only 4th quarters
data_q = data_q.query('quarter == 4')

# Merge all together
data = data_q.merge(data_m, on = 'year', how = 'outer')\
             .merge(data_a, on = 'year', how = 'outer')\
             .sort_values('year')

# For some reason, just keep the first occurence of column (ie, _x)
y_cols = data.columns[data.columns.str.contains('_y')]
data.drop(columns = y_cols, inplace = True)

x_cols = data.columns[data.columns.str.contains('_x')]
for col in x_cols:
    data.rename(columns = {col: col.replace('_x', '')}, inplace = True)

# Save to stata
data.to_stata('Data/Intermediate/fred_mapped.dta', write_index = False)