# Forecasting Consensus Expectations: Consumer Price Index (CPI)
## Data Exploration

In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import seaborn as sns
import matplotlib.dates as mdates
import statsmodels.api as sm
import plotly.express as px

from tqdm.auto import tqdm
from scipy import stats, special
from scipy.optimize import brentq
from collections import defaultdict
from itertools import product
from scipy.stats import t as student_t, norm, binomtest, jarque_bera
from statsmodels.stats.diagnostic import het_breuschpagan

**Read**

In [3]:
OUT_DIR = "../out"        
MOM_DF_FILE       = "cpi_mom_df.parquet"
MOM_DF_FULL_FILE  = "cpi_mom_df_full.parquet"
YOY_DF_FILE       = "cpi_yoy_df.parquet"
YOY_DF_FULL_FILE  = "cpi_yoy_df_full.parquet"

mom_df       = pd.read_parquet(os.path.join(OUT_DIR, MOM_DF_FILE),      engine="pyarrow")
mom_df_full  = pd.read_parquet(os.path.join(OUT_DIR, MOM_DF_FULL_FILE), engine="pyarrow")

yoy_df       = pd.read_parquet(os.path.join(OUT_DIR, YOY_DF_FILE),      engine="pyarrow")
yoy_df_full  = pd.read_parquet(os.path.join(OUT_DIR, YOY_DF_FULL_FILE), engine="pyarrow")

print("mom_df shape     :", mom_df.shape)
print("mom_df_full shape:", mom_df_full.shape)

print("yoy_df shape     :", yoy_df.shape)
print("yoy_df_full shape:", yoy_df_full.shape)

mom_df shape     : (57710, 11)
mom_df_full shape: (68150, 11)
yoy_df shape     : (39400, 11)
yoy_df_full shape: (46492, 11)


In [4]:
mom_df.head()

Unnamed: 0,release_date,period,median_survey,actual,economist,firm,forecast,asof,error,surprise,series
0,2006-01-18,2005-12-31,0.2,0.2,Adam Chester,Lloyds Bank PLC,,NaT,,0.0,Core CPI M/M
1,2006-01-18,2005-12-31,0.2,0.2,Alessandro Truppia,Aletti Gestielle Sgr Spa,,NaT,,0.0,Core CPI M/M
2,2006-01-18,2005-12-31,0.2,0.2,Alison Lynn Reaser,Point Loma Nazarene University,0.2,2006-01-16,0.0,0.0,Core CPI M/M
3,2006-01-18,2005-12-31,0.2,0.2,Allan Von Mehren,Danske Bank AS,,NaT,,0.0,Core CPI M/M
4,2006-01-18,2005-12-31,0.2,0.2,Andreas Busch,Bantleon AG,0.2,2006-01-16,0.0,0.0,Core CPI M/M


In [5]:
mom_df_full.head()

Unnamed: 0,release_date,period,median_survey,actual,economist,firm,forecast,asof,error,surprise,series
0,2006-01-18,2005-12-31,0.2,0.2,Adam Chester,Lloyds Bank PLC,,NaT,,0.0,Core CPI M/M
1,2006-01-18,2005-12-31,0.2,0.2,Alessandro Truppia,Aletti Gestielle Sgr Spa,,NaT,,0.0,Core CPI M/M
2,2006-01-18,2005-12-31,0.2,0.2,Alison Lynn Reaser,Point Loma Nazarene University,0.2,2006-01-16,0.0,0.0,Core CPI M/M
3,2006-01-18,2005-12-31,0.2,0.2,Allan Von Mehren,Danske Bank AS,,NaT,,0.0,Core CPI M/M
4,2006-01-18,2005-12-31,0.2,0.2,Andreas Busch,Bantleon AG,0.2,2006-01-16,0.0,0.0,Core CPI M/M


In [6]:
yoy_df.head()

Unnamed: 0,release_date,period,median_survey,actual,economist,firm,forecast,asof,error,surprise,series
0,2006-01-18,2005-12-31,2.2,2.2,Adam Chester,Lloyds Bank PLC,,NaT,,0.0,Core CPI Y/Y
1,2006-01-18,2005-12-31,2.2,2.2,Allan Von Mehren,Danske Bank AS,,NaT,,0.0,Core CPI Y/Y
2,2006-01-18,2005-12-31,2.2,2.2,Andreas Busch,Bantleon AG,,NaT,,0.0,Core CPI Y/Y
3,2006-01-18,2005-12-31,2.2,2.2,Andrew Gretzinger,Manulife Asset Management Limited,,NaT,,0.0,Core CPI Y/Y
4,2006-01-18,2005-12-31,2.2,2.2,Aneta Markowska,Moore Capital Management LP,,NaT,,0.0,Core CPI Y/Y


In [7]:
yoy_df_full.head()

Unnamed: 0,release_date,period,median_survey,actual,economist,firm,forecast,asof,error,surprise,series
0,2006-01-18,2005-12-31,2.2,2.2,Adam Chester,Lloyds Bank PLC,,NaT,,0.0,Core CPI Y/Y
1,2006-01-18,2005-12-31,2.2,2.2,Allan Von Mehren,Danske Bank AS,,NaT,,0.0,Core CPI Y/Y
2,2006-01-18,2005-12-31,2.2,2.2,Andreas Busch,Bantleon AG,,NaT,,0.0,Core CPI Y/Y
3,2006-01-18,2005-12-31,2.2,2.2,Andrew Gretzinger,Manulife Asset Management Limited,,NaT,,0.0,Core CPI Y/Y
4,2006-01-18,2005-12-31,2.2,2.2,Aneta Markowska,Moore Capital Management LP,,NaT,,0.0,Core CPI Y/Y
