**Author**:Alejandro M.Ouslan

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import statsmodels.stats.api as sms
from statsmodels.tsa.arima.model import ARIMA
import statsmodels.api as sm
from statsmodels.tsa.ar_model import AutoReg
import numpy as np
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
df = pd.read_excel("data/MONEYDEM-1.xls")
df['year'] = df['DATE'].astype(int)
df['quarter'] = ((df['DATE'] - df['year']) * 10).round().astype(int)
df['date'] = df['year'].astype(str) + 'Q' + df['quarter'].astype(str)
df['date'] = pd.PeriodIndex(df['date'], freq='Q')
df.set_index('date', inplace=True)
df

# Problems

## Problem a

In [None]:
#1a 
plt.figure(figsize=(8, 5))
plt.plot(df.index.to_timestamp(), df['TB3mo'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.ylabel('TB3mo')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(df.index.to_timestamp(), df['TB1yr'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.ylabel('TB1yr')
plt.grid(True)
plt.tight_layout()
plt.show()

- No, they appear to have changing mean and variance given that in 1980 had a vilont spike 

## Problem b

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(df.index.to_timestamp(), df['TB1yr'], marker='o')
plt.plot(df.index.to_timestamp(), df['TB3mo'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.grid(True)
plt.tight_layout()
plt.show()

- They apear to be practicaly identical the seam to follow the same trend

## Problem c

In [None]:
model = smf.ols("TB1yr ~ TB3mo", data=df).fit()
print(model.summary())

## Problem d
- A 1 percentage point increase in the 3-month Treasury rate (short-term) is associated with a 0.9167 percentage point increase in the 1-year Treasury rate (long-term), on average, holding other factors constant.

## Problem e 

In [None]:
t_test_result = model.t_test('TB3mo = 1')
print(t_test_result)

- since p-value is 0.000 there is strong but less-than-perfect pass-through from short- to long-term rates.

## Problem f

In [None]:
residuals = model.resid

plt.figure(figsize=(8, 5))
plt.scatter(df['TB3mo'], residuals, alpha=0.7)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('TB3mo (3-month Treasury rate)')
plt.ylabel('Residuals')
plt.title('Residuals vs TB3mo')
plt.grid(True)
plt.show()

- No there does not seam to be a pattern 

## Problem g

In [None]:
residuals = model.resid
exog = model.model.exog 

white_test = sms.het_white(residuals, exog)

lm_stat, lm_pvalue, f_stat, f_pvalue = white_test

print(f"White test LM statistic: {lm_stat:.4f}")
print(f"White test LM p-value: {lm_pvalue:.4f}")
print(f"White test F statistic: {f_stat:.4f}")
print(f"White test F p-value: {f_pvalue:.4f}")

- given that the p-value is 0.0000 there is evidence of heteroskedasticity

## Problem h

In [None]:
model_robust = smf.ols("TB1yr ~ TB3mo", data=df).fit(cov_type='HC0')
print(model_robust.summary())

- The coefecients did not change, however the std err did increase for both the intercept and the coeficient When heteroskedasticity is present, regular SEs underestimate the true variability of the coefficients, so the robust SEs tend to be larger and more reliable.

## Problem j


In [None]:
df['D'] = (df['TB3mo'] > 10.00).astype(int)
model_with_dummy = smf.ols("TB1yr ~ TB3mo + D", data=df).fit()

print(model_with_dummy.summary())

## Problem k
- given that the p-value of delta is 0.004 the dummy is revelat. 

## Problem l 
-  The coeficien increased but also did the standard error 

# Problem 2

## Problem A

In [None]:
df = pd.read_excel("data/SIM_2-1.xls")
plt.figure(figsize=(8, 5))
plt.plot(df["OBS"], df['Y1'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.ylabel('TB3mo')
plt.grid(True)
plt.tight_layout()
plt.show()

- given that the series has not broke out dwon or up and it seems to hover aroun 1 we could say that it is stationary 

## Problem B

In [None]:
fig = plot_acf(df['Y1'], lags=10)
plt.show()
plot_pacf(df['Y1'], lags=15)
plt.show()

- given that the series has not broke out dwon or up and it seems to hover aroun 1 we could say that it is stationary 

## Problem C

In [None]:
# AR(1)
res = AutoReg(df['Y1'], lags =1).fit()
print(res.summary())
y_true = res.model.endog[res.model._hold_back:]  
y_pred = res.fittedvalues


ssr = np.sum((y_true - y_pred) ** 2)

tss = np.sum((y_true - np.mean(y_true)) ** 2)

n = len(y_true)
k = res.df_model + 1  

r2 = 1 - ssr / tss
r2_adj = 1 - (ssr / (n - k)) / (tss / (n - 1))

print("R^2:", round(r2, 4))
print("Adjusted R^2:", round(r2_adj, 4))

In [None]:
# AR(2)
res = AutoReg(df['Y1'], lags=2).fit()
print(res.summary())
y_true = res.model.endog[res.model._hold_back:] 
y_pred = res.fittedvalues

ssr = np.sum((y_true - y_pred) ** 2)

tss = np.sum((y_true - np.mean(y_true)) ** 2)

n = len(y_true)
k = res.df_model + 1 

r2 = 1 - ssr / tss
r2_adj = 1 - (ssr / (n - k)) / (tss / (n - 1))

print("R^2:", round(r2, 4))
print("Adjusted R^2:", round(r2_adj, 4))

In [None]:
# ARMA(1,1)
arma_mod = ARIMA(df['Y1'], order=(1, 1, 0)).fit()
print(arma_mod.summary())

y_true = df['Y1'].diff().values


y_pred = arma_mod.fittedvalues

ssr = np.sum((y_true - y_pred) ** 2)
tss = np.sum((y_true - np.mean(y_true)) ** 2)

n = len(y_true)
k = arma_mod.df_model 

r2 = 1 - ssr / tss
r2_adj = 1 - (ssr / (n - k)) / (tss / (n - 1))

print("R^2:", round(r2, 4))
print("Adjusted R^2:", round(r2_adj, 4))

print("AIC:", arma_mod.aic)
print("BIC:", arma_mod.bic)

In [None]:
# ARMA(1,4)
arma_mod = ARIMA(df['Y1'], order=(1, 4, 0)).fit()
print(arma_mod.summary())
y_true = df['Y1'].diff().values

y_pred = arma_mod.fittedvalues
ssr = np.sum((y_true - y_pred) ** 2)
tss = np.sum((y_true - np.mean(y_true)) ** 2)

n = len(y_true)
k = arma_mod.df_model  

r2 = 1 - ssr / tss
r2_adj = 1 - (ssr / (n - k)) / (tss / (n - 1))

print("R^2:", round(r2, 4))
print("Adjusted R^2:", round(r2_adj, 4))

print("AIC:", arma_mod.aic)
print("BIC:", arma_mod.bic)

In [None]:
# ARMA(2,1)
arma_mod = ARIMA(df['Y1'], order=(2, 1, 0)).fit()
print(arma_mod.summary())
y_true = df['Y1'].diff().values

y_pred = arma_mod.fittedvalues
ssr = np.sum((y_true - y_pred) ** 2)
tss = np.sum((y_true - np.mean(y_true)) ** 2)

n = len(y_true)
k = arma_mod.df_model  

r2 = 1 - ssr / tss
r2_adj = 1 - (ssr / (n - k)) / (tss / (n - 1))

print("R^2:", round(r2, 4))
print("Adjusted R^2:", round(r2_adj, 4))

print("AIC:", arma_mod.aic)
print("BIC:", arma_mod.bic)

## Problem D

In [None]:
#AR(2)
res = AutoReg(df['Y1'], lags=2, trend="n").fit()
print(res.summary())
y_true = df['Y1'].diff().values

y_pred = arma_mod.fittedvalues
ssr = np.sum((y_true - y_pred) ** 2)
tss = np.sum((y_true - np.mean(y_true)) ** 2)

n = len(y_true)
k = arma_mod.df_model  
r2 = 1 - ssr / tss
r2_adj = 1 - (ssr / (n - k)) / (tss / (n - 1))

print("R^2:", round(r2, 4))
print("Adjusted R^2:", round(r2_adj, 4))

print("AIC:", arma_mod.aic)
print("BIC:", arma_mod.bic)

In [None]:
# ARIMA(1,1)
arma_mod = ARIMA(df['Y1'], order=(1, 1, 0), trend="n").fit()
print(arma_mod.summary())
y_true = df['Y1'].diff().values

y_pred = arma_mod.fittedvalues

ssr = np.sum((y_true - y_pred) ** 2)
tss = np.sum((y_true - np.mean(y_true)) ** 2)

n = len(y_true)
k = arma_mod.df_model  

r2 = 1 - ssr / tss
r2_adj = 1 - (ssr / (n - k)) / (tss / (n - 1))

print("R^2:", round(r2, 4))
print("Adjusted R^2:", round(r2_adj, 4))

print("AIC:", arma_mod.aic)
print("BIC:", arma_mod.bic)

## Problem E
- for the part c looking at the AIC the best model is the AR(2) given that it has the smallest AIC fro the part ed the best is sitll AR(2) given that it has the smallest AIC

## Problem F
- yes because looking at the AIC it shouldgest that the best model is AR(2) given that the simulated model was created with AR(1) one would expect AR(1) would be the best model

## Problem G

In [None]:
# AR(2)
model_ar2 = AutoReg(df['Y1'], lags=2, old_names=False)
res_ar2 = model_ar2.fit()

residuals = res_ar2.resid

fig, ax = plt.subplots(2, 1, figsize=(10, 6))

plot_acf(residuals, ax=ax[0], lags=20)
ax[0].set_title('ACF of AR(2) Residuals')

plot_pacf(residuals, ax=ax[1], lags=20, method='ywm')
ax[1].set_title('PACF of AR(2) Residuals')

plt.tight_layout()
plt.show()

- yes they look like random noice

# Problem 

## Problem A

In [None]:
df = pd.read_excel("data/QUARTERLY-1.xls")
df['date'] = pd.PeriodIndex(df['Date'], freq='Q')
df.set_index('date', inplace=True)
df = df[["CPINSA","Date"]]
df

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(df.index.to_timestamp(), df['CPINSA'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.ylabel('TB3mo')
plt.grid(True)
plt.tight_layout()
plt.show()

- does not look staionary 

## Problem B

In [None]:
fig = plot_acf(df['CPINSA'], lags=25)
plt.show()
plot_pacf(df['CPINSA'], lags=25)
plt.show()

## Problem C

In [None]:
df["log_CPINSA"] = np.log((df["CPINSA"] / df["CPINSA"].shift(1)))
plt.figure(figsize=(8, 5))
plt.plot(df.index.to_timestamp(), df['log_CPINSA'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.ylabel('TB3mo')
plt.grid(True)
plt.tight_layout()
plt.show()

- There was a dip but it seems to be stationary given that there is no strong trend 

## Problem D

In [None]:
fig = plot_acf(df['log_CPINSA'].dropna(), lags=25)
plt.show()
plot_pacf(df['log_CPINSA'].dropna(), lags=25)
plt.show()

## Problem E


In [None]:
df["log_CPINSA4"] = np.log((df["CPINSA"] / df["CPINSA"].shift(4)))
plt.figure(figsize=(8, 5))
plt.plot(df.index.to_timestamp(), df['log_CPINSA4'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.ylabel('TB3mo')
plt.grid(True)
plt.tight_layout()
plt.show()

- there was a dip but it seems to be stationary given that there is no strong trend 
  
## Problem F 

In [None]:
fig = plot_acf(df['log_CPINSA4'].dropna(), lags=25)
plt.show()
fig = plot_pacf(df['log_CPINSA4'].dropna(), lags=25)
plt.show()

## Problem G 

In [None]:
res = AutoReg(df['log_CPINSA4'].dropna(), lags =5).fit()
print(res.summary())


In [None]:
y_true = res.model.endog[res.model._hold_back:]  
y_pred = res.fittedvalues


ssr = np.sum((y_true - y_pred) ** 2)

tss = np.sum((y_true - np.mean(y_true)) ** 2)

n = len(y_true)
k = res.df_model + 1  

r2 = 1 - ssr / tss
r2_adj = 1 - (ssr / (n - k)) / (tss / (n - 1))

print("R^2:", round(r2, 4))
print("Adjusted R^2:", round(r2_adj, 4))

In [None]:
arma_mod = ARIMA(df['log_CPINSA4'].dropna(), order=(0, 0, 10)).fit()
print(arma_mod.summary())

In [None]:
arma_mod = ARIMA(df['log_CPINSA4'].dropna(), order=(5, 0, 10)).fit()
print(arma_mod.summary())

In [None]:
arma_mod = ARIMA(df['log_CPINSA4'].dropna(), order=(6, 0, 7)).fit()
print(arma_mod.summary())

## Problem H
-  looking at the AIC the MA(10) seems to prefrom the best

## Problem I 

In [None]:
df["quarter"] = df.index.quarter
quarter_dummies = pd.get_dummies(df["quarter"], prefix="Q", drop_first=True).astype(int)
df = pd.concat([df, quarter_dummies], axis=1)
X = df[[col for col in df.columns if col.startswith("Q_")]]
X = sm.add_constant(X)
y = df["log_CPINSA4"]

model = sm.OLS(y, X, missing='drop').fit()
print(model.summary())


## Problem J

In [None]:
df["residuals"] = model.resid
plt.figure(figsize=(10, 5))
plt.plot(df.index.to_timestamp(), df["residuals"], marker='o', linestyle='-')
plt.axhline(0, color='red', linestyle='--')
plt.title("Regression Residuals: log(CPINSA_t / CPINSA_{t-1}) ~ Quarterly Dummies")
plt.xlabel("Date")
plt.ylabel("Residual")
plt.grid(True)
plt.tight_layout()
plt.show()

- the residuals apear to be staionary 

## Problem K

In [None]:
fig = plot_acf(df['log_CPINSA4'].dropna(), lags=25)
plt.show()
fig = plot_pacf(df['log_CPINSA4'].dropna(), lags=25)
plt.show()

- the residuals seam to be autocorrolated by round 10 periods

# Problem 
## Problem A

In [None]:
df = pd.read_excel("data/QUARTERLY-1.xls")
df['date'] = pd.PeriodIndex(df['Date'], freq='Q')
df.set_index('date', inplace=True)
# df = df[["CPINSA","Date"]]
df["s"] = df["r10"] - df["Tbill"]
df = df[["s","Date"]]
df

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(df.index.to_timestamp(), df['s'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.ylabel('TB3mo')
plt.grid(True)
plt.tight_layout()
plt.show()

- it looks stationary

## Problem B

In [None]:
fig = plot_acf(df['s'], lags=25)
plt.show()
plot_pacf(df['s'], lags=25)
plt.show()

- it seams to be autocorrolated by 4 units and partial autocorrelated by 1

In [None]:
res_ar2 = AutoReg(df['s'].dropna(), lags =2).fit()
print(res_ar2.summary())
df["residuals"] = res_ar2.resid
plt.figure(figsize=(10, 5))
plt.plot(df.index.to_timestamp(), df["residuals"], marker='o', linestyle='-')
plt.axhline(0, color='red', linestyle='--')
plt.title("Regression Residuals: log(CPINSA_t / CPINSA_{t-1}) ~ Quarterly Dummies")
plt.xlabel("Date")
plt.ylabel("Residual")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
residuals = res_ar2.resid

ljung_box_results = acorr_ljungbox(residuals, lags=range(1, 21), return_df=True)

print(ljung_box_results)

# Optional: Plot p-values to visualize autocorrelation
ljung_box_results['lb_pvalue'].plot(title='Ljung-Box Test P-Values')
plt.axhline(y=0.05, color='red', linestyle='--', label='Significance Level (0.05)')
plt.xlabel('Lag')
plt.ylabel('P-value')
plt.legend()
plt.show()

- there seem to be autocorroration for periods after 5

## Problem E

In [None]:
res_ar7 = AutoReg(df['s'].dropna(), lags =7).fit()
print(res_ar7.summary())

## Problem F

In [None]:
residuals = res_ar7.resid

ljung_box_results = acorr_ljungbox(residuals, lags=range(1, 21), return_df=True)


print(ljung_box_results)

ljung_box_results['lb_pvalue'].plot(title='Ljung-Box Test P-Values')
plt.axhline(y=0.05, color='red', linestyle='--', label='Significance Level (0.05)')
plt.xlabel('Lag')
plt.ylabel('P-value')
plt.legend()
plt.show()

- there does not seem to be autoccoeration

## Problem G

In [None]:
print(f"AIC AR(2): {res_ar2.aic}")
print(f"AIC AR(7): {res_ar7.aic}")

print(f"BIC AR(2): {res_ar2.bic}")
print(f"BIC AR(7): {res_ar7.bic}")

## Problem H

In [None]:
df2 = df.head(-10)
y = df2['s'].dropna()

forecast_ar2 = res_ar2.predict(start=res_ar2.model._hold_back, end=len(y)-1)

error_ar2 = y[res_ar2.model._hold_back:] - forecast_ar2
error_ar2

In [None]:
y = df2['s'].dropna()

res_ar7 = AutoReg(df2['s'].dropna(), lags=7).fit()


forecast_ar7 = res_ar7.predict(start=res_ar7.model._hold_back, end=len(y)-1)

error_ar7 = y[res_ar7.model._hold_back:] - forecast_ar7
error_ar7

In [None]:
mse_ar2 = (error_ar2**2).mean()
mse_ar7 = (error_ar7**2).mean()

print(f'MSE AR(2): {mse_ar2:.4f}')
print(f'MSE AR(7): {mse_ar7:.4f}')

- The AR(7) seems to have a smaller forecst error than AR(2) 

## Problem I 

In [None]:
forecast_ar2 = res_ar2.predict(start=len(y), end=len(y)+9)

# Forecast error
error_ar2 = df['s'].tail(10) - forecast_ar2
error_ar2

In [None]:
forecast_ar7 = res_ar7.predict(start=len(y), end=len(y)+9)

# Forecast error
error_ar7 = df['s'].tail(10) - forecast_ar7
forecast_ar7

In [None]:
mse_ar2 = (error_ar2**2).mean()
mse_ar7 = (error_ar7**2).mean()

print(f'MSE AR(2): {mse_ar2:.4f}')
print(f'MSE AR(7): {mse_ar7:.4f}')

- the AR(2) seems to preferm better than the AR(7) at forcasting the 10 steps. This is supporicin ggiven that looking at the entire series the AR(7) fits better the historical data