In [None]:
import pandas as pd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
import statsmodels.api as sm
from statsmodels.tsa.ar_model import AutoReg
import matplotlib.pyplot as plt

In [None]:
df = pd.read_excel("data/QUARTERLY-1.xls")
df['date'] = pd.PeriodIndex(df['Date'], freq='Q')
df.set_index('date', inplace=True)
df = df[["CPINSA","Date"]]
df

In [None]:
#3a
plt.figure(figsize=(8, 5))
plt.plot(df.index.to_timestamp(), df['CPINSA'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.ylabel('TB3mo')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# does not look staionary 

In [None]:
#3b
fig = plot_acf(df['CPINSA'], lags=25)
plt.show()

In [None]:
plot_pacf(df['CPINSA'], lags=25)
plt.show()


In [None]:
#c 
df["log_CPINSA"] = np.log((df["CPINSA"] / df["CPINSA"].shift(1)))
df

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(df.index.to_timestamp(), df['log_CPINSA'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.ylabel('TB3mo')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# there was a dip but it seems to be stationary given that there is no strong trend 

In [None]:
#3d
fig = plot_acf(df['log_CPINSA'].dropna(), lags=25)
plt.show()

In [None]:
plot_pacf(df['log_CPINSA'].dropna(), lags=25)
plt.show()


In [None]:
#3e
df["log_CPINSA4"] = np.log((df["CPINSA"] / df["CPINSA"].shift(4)))
df

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(df.index.to_timestamp(), df['log_CPINSA4'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.ylabel('TB3mo')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# there was a dip but it seems to be stationary given that there is no strong trend 

In [None]:
#f
fig = plot_acf(df['log_CPINSA4'].dropna(), lags=25)
plt.show()

In [None]:
fig = plot_pacf(df['log_CPINSA4'].dropna(), lags=25)
plt.show()

In [None]:
#3g
res = AutoReg(df['log_CPINSA4'].dropna(), lags =5).fit()
print(res.summary())

In [None]:
y_true = res.model.endog[res.model._hold_back:]  
y_pred = res.fittedvalues


ssr = np.sum((y_true - y_pred) ** 2)

tss = np.sum((y_true - np.mean(y_true)) ** 2)

n = len(y_true)
k = res.df_model + 1  

r2 = 1 - ssr / tss
r2_adj = 1 - (ssr / (n - k)) / (tss / (n - 1))

print("R^2:", round(r2, 4))
print("Adjusted R^2:", round(r2_adj, 4))

In [None]:
arma_mod = ARIMA(df['log_CPINSA4'].dropna(), order=(0, 0, 10)).fit()
print(arma_mod.summary())

In [None]:
arma_mod = ARIMA(df['log_CPINSA4'].dropna(), order=(5, 0, 10)).fit()
print(arma_mod.summary())

In [None]:
arma_mod = ARIMA(df['log_CPINSA4'].dropna(), order=(6, 0, 7)).fit()
print(arma_mod.summary())

In [None]:
#h

In [None]:
# looking at the AIC the MA(10) seems to prefrom the best

In [None]:
#i
# Step 1: Extract quarter from datetime index
df["quarter"] = df.index.quarter

# Step 2: Create dummy variables with 0/1 values
quarter_dummies = pd.get_dummies(df["quarter"], prefix="Q", drop_first=True).astype(int)

# Step 3: Concatenate with main DataFrame
df = pd.concat([df, quarter_dummies], axis=1)
df

In [None]:
X = df[[col for col in df.columns if col.startswith("Q_")]]
X = sm.add_constant(X)
y = df["log_CPINSA4"]

model = sm.OLS(y, X, missing='drop').fit()
print(model.summary())


In [None]:
#j
df["residuals"] = model.resid
plt.figure(figsize=(10, 5))
plt.plot(df.index.to_timestamp(), df["residuals"], marker='o', linestyle='-')
plt.axhline(0, color='red', linestyle='--')
plt.title("Regression Residuals: log(CPINSA_t / CPINSA_{t-1}) ~ Quarterly Dummies")
plt.xlabel("Date")
plt.ylabel("Residual")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# the residuals apear to be staionary 

In [None]:
#k
fig = plot_acf(df['log_CPINSA4'].dropna(), lags=25)
plt.show()

In [None]:
fig = plot_pacf(df['log_CPINSA4'].dropna(), lags=25)
plt.show()

In [None]:
# the residuals seam to be autocorrolated by round 10 periods