In [2]:
import pandas_datareader.data as web
import pandas as pd

# Configuration
start_date = "2016-01-01"
end_date = "2025-12-31"
file_name = "macro_data_raw.csv"

# Define web metrics
metrics = {
    "CPIAUCSL": "CPI",
    "M2SL": "M2"
}

# Extraction
macro_data = web.DataReader(list(metrics.keys()), "fred", start_date, end_date)
macro_data.rename(columns=metrics, inplace=True)

# Save to CSV for cleaning
macro_data.to_csv(file_name)

# Confirmation
print(f"Marco data saved to {file_name}")

Marco data saved to macro_data_raw.csv


In [None]:
# Convert macro_data_raw.csv into dataframe
file_path = r"c:\Users\rober\Documents\Python\rg_project\data\raw\macro_data_raw.csv"
df_macro_raw = pd.read_csv(file_path, index_col=0, parse_dates=True)

df_macro_raw.index.name = 'Date'

print(df_macro_raw.head())

In [None]:
df_macro_raw.shape 
# (120, 2)

df_macro_raw.dtypes
# CPI    float64
# M2     float64

df_macro_raw.isna().sum()
# CPI    1
# M2     0

# Checking where missing value occurs
print(df_macro_raw[df_macro_raw['CPI'].isna()])

# Missing value for Oct 25 due to US Govt shutdown
# Plugging gap with linear interpolation
df_macro_raw['CPI'] = df_macro_raw['CPI'].interpolate(method='linear')

# Verifying plug
print(df_macro_raw.loc['2025-09-01':'2025-11-01'])

In [None]:
# Creating 'clean' version
df_macro_raw.to_csv("macro_data_clean.csv")

In [None]:
# Convert csv into dataframe
file_path = r"c:\Users\rober\Documents\Python\rg_project\data\processed\macro_data_clean.csv"
df_macro_clean = pd.read_csv(file_path, index_col='Date', parse_dates=True)

print(df_macro_clean.head())

In [None]:
# Creating YoY % Change columns
# First 12 rows wil be NaN but retaining them to keep 2016 actuals

df_macro_clean['CPI_YoY'] = df_macro_clean['CPI'].pct_change(periods=12)
df_macro_clean['M2_YoY'] = df_macro_clean['M2'].pct_change(periods=12)

# Checking
print(df_macro_clean.tail())

In [None]:
# Saving new macro_data_final.csv
df_macro_clean.to_csv("macro_data_final.csv")