In [None]:
import pandas as pd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.ar_model import AutoReg
import matplotlib.pyplot as plt

In [None]:
df = pd.read_excel("data/QUARTERLY-1.xls")
df['date'] = pd.PeriodIndex(df['Date'], freq='Q')
df.set_index('date', inplace=True)
# df = df[["CPINSA","Date"]]
df["s"] = df["r10"] - df["Tbill"]
df = df[["s","Date"]]
df

In [None]:
#4a
plt.figure(figsize=(8, 5))
plt.plot(df.index.to_timestamp(), df['s'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.ylabel('TB3mo')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# it looks stationary

In [None]:
#4b
fig = plot_acf(df['s'], lags=25)
plt.show()

In [None]:
plot_pacf(df['s'], lags=25)
plt.show()


In [None]:
# it seams to be autocorrolated by 4 units and partial autocorrelated by 1

In [None]:
res_ar2 = AutoReg(df['s'].dropna(), lags =2).fit()
print(res_ar2.summary())

In [None]:
# c
df["residuals"] = res_ar2.resid
plt.figure(figsize=(10, 5))
plt.plot(df.index.to_timestamp(), df["residuals"], marker='o', linestyle='-')
plt.axhline(0, color='red', linestyle='--')
plt.title("Regression Residuals: log(CPINSA_t / CPINSA_{t-1}) ~ Quarterly Dummies")
plt.xlabel("Date")
plt.ylabel("Residual")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
residuals = res_ar2.resid

# Apply Ljung-Box test up to lag 20 (you can change this as needed)
ljung_box_results = acorr_ljungbox(residuals, lags=range(1, 21), return_df=True)

# Print the test statistics
print(ljung_box_results)

# Optional: Plot p-values to visualize autocorrelation
ljung_box_results['lb_pvalue'].plot(title='Ljung-Box Test P-Values')
plt.axhline(y=0.05, color='red', linestyle='--', label='Significance Level (0.05)')
plt.xlabel('Lag')
plt.ylabel('P-value')
plt.legend()
plt.show()

In [None]:
# there seem to be autocorroration for periods after 5 

In [None]:
#4e
res_ar7 = AutoReg(df['s'].dropna(), lags =7).fit()
print(res_ar7.summary())

In [None]:
residuals = res_ar7.resid

# Apply Ljung-Box test up to lag 20 (you can change this as needed)
ljung_box_results = acorr_ljungbox(residuals, lags=range(1, 21), return_df=True)

# Print the test statistics
print(ljung_box_results)

# Optional: Plot p-values to visualize autocorrelation
ljung_box_results['lb_pvalue'].plot(title='Ljung-Box Test P-Values')
plt.axhline(y=0.05, color='red', linestyle='--', label='Significance Level (0.05)')
plt.xlabel('Lag')
plt.ylabel('P-value')
plt.legend()
plt.show()

In [None]:
# there does not seem to be autoccoeration anymore

In [None]:
#
print(f"AIC AR(2): {res_ar2.aic}")
print(f"AIC AR(7): {res_ar7.aic}")

print(f"BIC AR(2): {res_ar2.bic}")
print(f"BIC AR(7): {res_ar7.bic}")



In [None]:
# Looking at the AIC the AR(7) seems to preferm better than AR(2)

In [None]:
#h
df2 = df.head(-10)
df2

In [None]:
# One-step-ahead forecast for in-sample data

y = df2['s'].dropna()


forecast_ar2 = res_ar2.predict(start=res_ar2.model._hold_back, end=len(y)-1)

# Forecast error
error_ar2 = y[res_ar2.model._hold_back:] - forecast_ar2
error_ar2

In [None]:
y = df2['s'].dropna()

res_ar7 = AutoReg(df2['s'].dropna(), lags=7).fit()


forecast_ar7 = res_ar7.predict(start=res_ar7.model._hold_back, end=len(y)-1)

# Forecast error
error_ar7 = y[res_ar7.model._hold_back:] - forecast_ar7
error_ar7

In [None]:
mse_ar2 = (error_ar2**2).mean()
mse_ar7 = (error_ar7**2).mean()

print(f'MSE AR(2): {mse_ar2:.4f}')
print(f'MSE AR(7): {mse_ar7:.4f}')

In [None]:
# The AR(7) seems to have a smaller forecst error than AR(2) 

In [None]:
#i 
forecast_ar2 = res_ar2.predict(start=len(y), end=len(y)+9)

# Forecast error
error_ar2 = df['s'].tail(10) - forecast_ar2
error_ar2

In [None]:
forecast_ar7 = res_ar7.predict(start=len(y), end=len(y)+9)

# Forecast error
error_ar7 = df['s'].tail(10) - forecast_ar7
forecast_ar7

In [None]:
mse_ar2 = (error_ar2**2).mean()
mse_ar7 = (error_ar7**2).mean()

print(f'MSE AR(2): {mse_ar2:.4f}')
print(f'MSE AR(7): {mse_ar7:.4f}')

In [None]:
# the AR(2) seems to preferm better than the AR(7) at forcasting the 10 steps. This is supporicin ggiven that looking at the entire series the AR(7) fits better the historical data