**Author**:Alejandro M.Ouslan

# Probem

## Problem A

In [None]:
import polars as pl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import statsmodels.formula.api as smf
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.stattools import acf

In [None]:
df = pl.read_excel("data/us_macro_quarterly-1.xlsx")
df =df.with_columns(
        date=pl.col("column_0").str.replace(":0", "Q"),
        PCECTPI2=pl.col("PCECTPI").shift(1)

)
df = df.with_columns(
        infl=400*(pl.col("PCECTPI").log() - pl.col("PCECTPI2").log())
)
df

In [None]:
data = df.to_pandas()
data['date'] = pd.PeriodIndex(data['date'], freq='Q')
data = data[(data["date"] >= "1963Q1") & (data["date"] <= "2012Q4")]
data.set_index('date', inplace=True)

## Problem B

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(data.index.to_timestamp(), data['PCECTPI'], marker='o')
plt.title('Quarterly Data')
plt.xlabel('Date')
plt.ylabel('PCECTPI')
plt.grid(True)
plt.tight_layout()
plt.show()

- The data seems to follow a trend but there is some stochastic elemnts 

## Probem C

In [None]:
acf(data['PCECTPI'], nlags=4)

In [None]:
fig = plot_acf(data['PCECTPI'], lags=10)
plt.show()
plot_pacf(data['PCECTPI'], lags=15)
plt.show()

## Problem D


In [None]:
data["pch"] = data["infl"].pct_change()
data["pch2"] = data["pch"].shift(1)
data

In [None]:
results  = smf.ols("pch ~ pch2", data=data).fit()
print(results.summary())

- INFO: Missing the interpretation 

## Problem E 

In [None]:
res = AutoReg(data['pch'].dropna(), lags=1).fit()
print(res.summary())

In [None]:
res = AutoReg(data['pch'].dropna(), lags=2).fit()
print(res.summary())

- INFO: Missing intrepretation 

## Problem F

In [None]:
for i in range(0,9):
    test = res = AutoReg(data['pch'].dropna(), lags=i).fit()
    print(f"lag {i} : {test.bic}")

- Looking at the AIC the the chocen lag is 0

## Program G

In [None]:
data

In [None]:
y = data['pch'].dropna()
res = AutoReg(data['pch'].dropna(), lags=2, trend="n").fit()
forecast_ar = res.predict(start=res.model._hold_back, end=len(y)+1)
forecast_ar.tail(1)

# Problem 

## Problem A

- The formula interpretation is incorrect since a monthly percentage change in IP would use
  $\frac{(IP_{t} - IP_{t-1})}{IP_{t-1}}$, not $\frac{ln(IP_{t})}{IP_{t-1}}$. What the current model does is calculate a ratio between the current and prior month and log's it, which isn't the monthly change. 

## Problem B

- $$Y = 0.787 + 0.052(101.359) + 0.185(101.034) + 0.234(100.374) + 0.164(101.196) = 64.83$$

## Problem C

- Let N = 324 (27 years times 12 for total months)
The formulas for AIC (Akaike Information Criterion) and BIC (Bayesian Information Criterion) are:

- **AIC** = ln(SSR / N) + (2 * AR(X + 1)) / N  
- **BIC** = ln(SSR / N) + (ln(N) * AR(X + 1)) / N  

Where:  
- SSR = Sum of Squared Residuals  
- N = Number of observations  
- AR = Autoregressive model order  


Given:
- SSR = 19,533  
- N = 324  
- AR = 1  

Then:

- **BIC** = ln(19,533 / 324) + (ln(324) * 1) / 324  
- **AIC** = ln(19,533 / 324) + 2 / 324  


| AR | SSR    | BIC         | AIC         |
|----|--------|-------------|-------------|
| 0  | 19,533 | 4.116958907 | 4.105289946 |
| 1  | 18,643 | 4.088166106 | 4.064828183 |
| 2  | 17,377 | 4.035684659 | 4.000677774 |
| 3  | 16,285 | 3.988623406 | 3.941947560 |
| 4  | 15,842 | 3.978885409 | 3.920540602 |
| 5  | 15,824 | 3.995590344 | 3.925576575 |
| 6  | 15,824 | 4.013432145 | 3.931749415 |


The results only slightly differ when using AIC versus BIC, but not dramatically.

# Probelem 

## Problem A

In [None]:
def simulate_ar3_process(beta, n, reps):
    means = []
    cov_lag1 = []
    cov_lag2 = []
    cov_lag3 = []
    var_list = []

    for _ in range(reps):
        epsilon = np.random.normal(0, 1, n)
        y = np.zeros(n)
        for t in range(3, n):
            y[t] = beta * y[t-3] + epsilon[t]

        means.append(np.mean(y))
        y_centered = y - np.mean(y)

        var_list.append(np.mean(y_centered ** 2))
        cov_lag1.append(np.mean(y_centered[1:] * y_centered[:-1]))  # lag 1
        cov_lag2.append(np.mean(y_centered[2:] * y_centered[:-2]))  # lag 2
        cov_lag3.append(np.mean(y_centered[3:] * y_centered[:-3]))  # lag 3

    mean_of_means = np.mean(means)
    std_of_means = np.std(means, ddof=1)

    avg_var = np.mean(var_list)
    avg_cov_lag1 = np.mean(cov_lag1)
    avg_cov_lag2 = np.mean(cov_lag2)
    avg_cov_lag3 = np.mean(cov_lag3)

    # Compute autocorrelations
    rho_1 = avg_cov_lag1 / avg_var
    rho_2 = avg_cov_lag2 / avg_var
    rho_3 = avg_cov_lag3 / avg_var

    return mean_of_means, std_of_means,avg_cov_lag1, avg_cov_lag2, avg_cov_lag3, rho_1, rho_2, rho_3

# Example usage
beta = 0.7
n = 1000
reps = 10000

mean_estimate, std_estimate, cov1, cov2, cov3, acf1, acf2, acf3 = simulate_ar3_process(beta, n, reps)

## Problem B

In [None]:

print(f"Std dev of E[y_t]: {std_estimate:.6f}")


## Problem C

In [None]:
print(f"Estimated Covariance lag 1: {cov1:.6f}")
print(f"Estimated Covariance lag 2: {cov2:.6f}")
print(f"Estimated Covariance lag 3: {cov3:.6f}")

## Problem D 

In [None]:

print(f"Estimated Autocorrelation lag 1: {acf1:.6f}")
print(f"Estimated Autocorrelation lag 2: {acf2:.6f}")
print(f"Estimated Autocorrelation lag 3: {acf3:.6f}")

# Problem 4

## Problem A 

- $y_t = c + \phi_1 y_{t-1} + \epsilon_t + \theta_1 \epsilon_{t-1} + \theta_2 \epsilon{t-2}$

## Problem B 

- $\hat{y}_{t+1|t} = \beta y_{t-2}$


## Problem C 
- $\lim_{h \to \infty} \hat{y}_{t+h|t} = 0$

## Problem D 

-$y_t = c + \phi_1 y_{t-1} + \phi_2 y_{t-2} + \phi_3 y_{t-3} + \epsilon_{t}$
- The model expresses the current data point as a linear combination of the last previous 3 values plus an error term

# Problem 

## Problem A

In [None]:
T = 100
num_simulations = 1000
rng = np.random.default_rng(787)

r_squared_vals = []
t_stats = []

for i in range(num_simulations):
    e = rng.normal(0, 1, T)
    a = rng.normal(0, 1, T)

    Y = np.zeros(T)
    X = np.zeros(T)

    Y[0] = e[0]
    X[0] = a[0]

    for t in range(1, T):
        Y[t] = Y[t - 1] + e[t]
        X[t] = X[t - 1] + a[t]

    df = pd.DataFrame({'Y': Y, 'X': X})
    results = smf.ols("Y ~ X", data=df).fit()
    
    r_squared_vals.append(results.rsquared)
    t_stats.append(results.tvalues['X'])

r_squared_vals = np.array(r_squared_vals)
t_stats = np.array(t_stats)


plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(r_squared_vals, bins=30, edgecolor='k', alpha=0.7)
plt.title("Histogram of R²")
plt.xlabel("R²")
plt.ylabel("Frequency")

plt.subplot(1, 2, 2)
plt.hist(t_stats, bins=30, edgecolor='k', alpha=0.7)
plt.title("Histogram of t-statistics")
plt.xlabel("t-statistic")
plt.ylabel("Frequency")

plt.tight_layout()
plt.show()

r2_percentiles = np.percentile(r_squared_vals, [5, 50, 95])
t_stat_percentiles = np.percentile(t_stats, [5, 50, 95])


t_stat_exceeds_1_96 = np.mean(np.abs(t_stats) > 1.96)


print("R² percentiles (5%, 50%, 95%):", r2_percentiles)
print("t-statistic percentiles (5%, 50%, 95%):", t_stat_percentiles)
print(f"Fraction of |t| > 1.96: {t_stat_exceeds_1_96:.4f}")
