In [1]:
import pandas as pd
from pandas_datareader import data


In [5]:

# Define the start and end dates for your query
start_date = '1990-01-01'
end_date = '2023-12-31'

# Use the appropriate series ID for US CPI (CPIAUCSL is the series ID for All Urban Consumers)
series_id = 'CPIAUCSL'

# Fetching the data
cpi_data = data.DataReader(series_id, 'fred', start_date, end_date)
cpi_data['MoM_Change'] = cpi_data[series_id].pct_change() #* 100  # Multiply by 100 to convert to percentage
cpi_data['last_12m'] = (cpi_data['MoM_Change']).rolling(window=12).sum()

cpi_data

Unnamed: 0_level_0,CPIAUCSL,MoM_Change,last_12m
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1990-01-01,127.500,,
1990-02-01,128.000,0.003922,
1990-03-01,128.600,0.004687,
1990-04-01,128.900,0.002333,
1990-05-01,129.100,0.001552,
...,...,...,...
2023-08-01,306.187,0.005118,0.036585
2023-09-01,307.288,0.003596,0.036346
2023-10-01,307.531,0.000791,0.032001
2023-11-01,308.024,0.001603,0.030968


In [7]:
import plotly.express as px

fig = px.line(cpi_data, x=cpi_data.index, y='last_12m', title='US CPI Inflation Rate (YoY)', labels={'last_12m':'YoY Inflation' })
fig.add_scatter(x=cpi_data.index, y=cpi_data['MoM_Change'], mode='lines', name='MoM Change')

fig.show()

In [11]:
# get std dev and mean to generate random numbers

std, mean = cpi_data['MoM_Change'].std(), cpi_data['MoM_Change'].mean()

import numpy as np
np.random.seed(0)
years = 30
random_data = np.random.normal(mean, std, 30*12)
start_date = '2023-04-01'
years_from_start = pd.date_range(start_date, periods=years*12, freq='ME')

# df = pd.DataFrame(random_data, columns=['MoM_Change'])
df = pd.DataFrame(random_data, columns=['value'], index=years_from_start)
df



f = px.line(df, x=df.index, y='value', title='Random Data')
f.show()


In [12]:
# save the data
# NBVAL_SKIP
(df
 [['value']]
 .rename_axis('date')
 .to_csv('../data/monthly_cpi_simulated_USD.csv')
)