# Extract Raw Data

## Extract FRED macroeconomic variables

Extract macroeconomic data from FRED website and save to `raw_FRED.csv`.

In [1]:
import pandas as pd
pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', None)
from fredapi import Fred
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

FRED_API_KEY = 'd88c01cf7faeaeb6ce15a822bb1ade64' 
START_DATE = '2000-01-01'
END_DATE = datetime.now().strftime('%Y-%m-%d')

- Macroeconomic variable list

In [2]:
FRED_SERIES = {
    # Money Supply
    'M1': 'M1SL', 'M2': 'M2SL',
    # Economic Activity
    'GDP': 'GDP', 'REAL_GDP': 'GDPC1',
    'PERSONAL_INCOME': 'PI', 'DISPOSABLE_INCOME': 'DSPIC96',
    'PCE': 'PCE', 'SAVING_RATE': 'PSAVERT',
    # Employment
    'UNEMPLOYMENT': 'UNRATE','NONFARM_PAYROLL': 'PAYEMS','LABOR_FORCE': 'CLF16OV',
    # Interest Rates - Federal Funds
    'FED_FUNDS': 'FEDFUNDS',
    # Treasury Yields
    'TREASURY_3M': 'DGS3MO', 'TREASURY_2Y': 'DGS2', 'TREASURY_5Y': 'DGS5',
    'TREASURY_10Y': 'DGS10', 'TREASURY_30Y': 'DGS30',
    # Corporate Bonds
    'BBB_CORPORATE': 'DBAA',
    # Inflation
    'CPI': 'CPIAUCSL', 'CORE_CPI': 'CPILFESL', 'PCE_INFLATION': 'PCEPI',
    # Additional Economic Indicators
    'INDUSTRIAL_PRODUCTION':'INDPRO', 'RETAIL_SALES': 'RSXFS',
    'HOUSE_PRICE_INDEX': 'CSUSHPISA',}

- Extract macroeconomic variables

In [3]:
fred = Fred(api_key=FRED_API_KEY)
data_dict = {}
failed_series = []

for name, series_id in FRED_SERIES.items():
    try:
        series_data = fred.get_series(series_id, START_DATE, END_DATE)
        data_dict[name] = series_data
    except:
        pass
df_fred = pd.DataFrame(data_dict)

- Generate quarterly data

In [4]:
fred_quarterly_end = df_fred.resample('Q').last() #quarter-end
fred_quarterly_avg = df_fred.resample('Q').mean() #quarter-average
fred_quarterly = pd.concat([
    fred_quarterly_end,
    fred_quarterly_avg.add_suffix('_AVG')
], axis=1)
idx = fred_quarterly.index
fred_quarterly.insert(0, 'timekey', (idx.year-1980)*4 + (idx.quarter-1))
fred_quarterly.to_csv('data/raw_FRED.csv',index=False)

## Extract FRED macroeconomic variables

Extract deposit balance from `qbp-time-series-spreadsheets-second-quarter-2025.xlsx`, downloaded from FDIC website.

In [5]:
filename = "data/qbp-time-series-spreadsheets-second-quarter-2025.xlsx"
sheet_name = "Balance Sheet"

# Load and transpose
df = pd.read_excel(filename, sheet_name=sheet_name).iloc[4:,1:]
df = df[df.index != 5].reset_index(drop=True)
colnames = df.iloc[0]
df = df.iloc[1:]
df.columns = colnames
df.columns.name = None
df = df.reset_index(drop=True)
df.columns = df.columns

df = df.set_index(df.columns[0])
df = df.T
df.columns = df.columns.str.strip()
df.index.name = None

col_deposits = ['Domestic office deposits', 
                'Interest-bearing deposits', 
                'Noninterest-bearing deposits', 
                'Time deposits']
df_deposits = df[col_deposits].reset_index()

# Generate time key
df_deposits['timekey'] = 0
df_deposits.loc[:, 'timekey'] = df_deposits.loc[:, 'index'].apply(lambda q: (int(q[:4]) - 1980) * 4 + (int(q[-1]) - 1))
df_deposits = df_deposits.drop('index', axis=1)
df_deposits = df_deposits[['timekey'] + [col for col in df_deposits.columns if col != 'timekey']]
# Save to csv
df_deposits.to_csv('data/raw_FDIC_deposits.csv',index=False)