<a href="https://colab.research.google.com/github/safeai-snu/Econometrics/blob/main/Part.3/6.Time_Series_Regression_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 예제 6-1. 간단한 예측 방법 예시

### 호주 생산량 데이터 예측

#### 필요한 패키지 불러오기

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

#### 데이터 전처리

In [None]:
def parse_quarter_string(q):
    """
    Converts a quarter string 'YYYY Qq' to a datetime object.
    """
    year, quarter = q.split(' Q')
    first_month_of_quarter = 3 * int(quarter) - 2
    return f"{year}-{first_month_of_quarter}-01"

aus_production = pd.read_csv('https://raw.githubusercontent.com/safeai-snu/Econometrics/refs/heads/main/Part.3/data/aus_production.csv')
aus_production['Quarter'] = aus_production['Quarter'].apply(parse_quarter_string)
aus_production['Quarter'] = pd.to_datetime(aus_production['Quarter'])
aus_production.set_index('Quarter', inplace=True)

#### Naive forecasting

In [None]:
aus_production['Naive'] = aus_production['Bricks'].shift(1)

#### Naive seasonal forecasting

In [None]:
seasonality = 4
aus_production['Naive_Seasonal'] = aus_production['Bricks'].shift(seasonality)

#### Mean method

In [None]:
aus_production['Mean'] = aus_production['Bricks'].expanding().mean()

#### 예측

In [None]:
forecast_start = '2000-01-01'
naive_mask = (aus_production.index >= forecast_start)
naive_seasonal_mask = (aus_production.index >= forecast_start)
mean_mask = (aus_production.index >= forecast_start)

#### 시각화

In [None]:
plt.figure(figsize=(20,6))

plt.subplot(131)
plt.plot(aus_production['Bricks'], label='Actual', color='black')
plt.plot(aus_production.index[naive_mask], aus_production['Naive'][naive_mask], label='Naive Forecast', color='blue')
plt.title('Naive Method')
plt.legend()

plt.subplot(132)
plt.plot(aus_production['Bricks'], label='Actual', color='black')
plt.plot(aus_production.index[naive_seasonal_mask], aus_production['Naive_Seasonal'][naive_seasonal_mask], label='Naive Seasonal Forecast', color='blue')
plt.title('Naive Seasonal Method')
plt.legend()

plt.subplot(133)
plt.plot(aus_production['Bricks'], label='Actual', color='black')
plt.plot(aus_production.index[mean_mask], aus_production['Mean'][mean_mask], label='Mean Forecast', color='blue')
plt.title('Mean Method')
plt.legend()

plt.tight_layout()
plt.show()

## 예제 6-2. 선형회귀 예시

### 호주 생산량 데이터 선형 회귀

#### 필요한 패키지 불러오기

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

#### 데이터 전처리

In [None]:
aus_production = pd.read_csv('https://raw.githubusercontent.com/safeai-snu/Econometrics/refs/heads/main/Part.3/data/aus_production.csv')
aus_production['Quarter'] = aus_production['Quarter'].apply(parse_quarter_string)
aus_production['Quarter'] = pd.to_datetime(aus_production['Quarter'])
aus_production.set_index('Quarter', inplace=True)
aus_production['Time_Index'] = np.arange(len(aus_production))

#### 선형회귀 모델

In [None]:
X = sm.add_constant(aus_production['Time_Index'])
y = aus_production['Bricks']

model = sm.OLS(y, X, missing='drop').fit()  

aus_production['Linear_Forecast'] = model.predict(X)

#### 시각화

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(aus_production['Bricks'], label='Actual', color='black')
plt.plot(aus_production['Linear_Forecast'], label='Linear Forecast', color='red', linestyle='--')
plt.title('Linear Regression Forecast')
plt.xlabel('Quarter')
plt.ylabel('Bricks')
plt.legend()
plt.show()

## 예제 6-3. 부트스트랩 예시

### 애플 주가 데이터 부트스트랩

#### 필요한 패키지 불러오기

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### 애플(AAPL) 주식 데이터 가져오기

In [None]:
start_date = "2018-01-01"
end_date = "2023-12-31"

stock = pd.read_csv("https://raw.githubusercontent.com/safeai-snu/Econometrics/refs/heads/main/Part.3/data/AAPL_stock.csv")
stock = stock.loc[start_date:end_date]
stock

HTTPError: HTTP Error 404: Not Found

#### 주식 데이터 다운로드

In [None]:
tick = yf.Ticker(ticker)
stock = tick.history(start=start_date, end=end_date)

#### 종가 데이터 추출

In [None]:
stock = stock[['Close']]
stock['Naive_Forecast'] = stock['Close'].shift(1)
stock['Residuals'] = stock['Close'].diff(1)

#### 부트스트랩

In [None]:
np.random.seed(0)  
n_days = 100
n_simulations = 100  
last_value = stock['Close'].iloc[-1]

simulations = pd.DataFrame(index=range(n_days), columns=range(n_simulations))

for sim in range(n_simulations):
    simulated_values = [last_value]
    for day in range(1, n_days):
        simulated_values.append(simulated_values[day-1] + np.random.choice(stock['Residuals'].dropna()))
    simulations[sim] = simulated_values

percentiles = [80, 95]
lower_bounds = simulations.quantile(q=(1-percentiles[0]/100)/2, axis=1)
upper_bounds = simulations.quantile(q=1-(1-percentiles[0]/100)/2, axis=1)
lower_bounds_95 = simulations.quantile(q=(1-percentiles[1]/100)/2, axis=1)
upper_bounds_95 = simulations.quantile(q=1-(1-percentiles[1]/100)/2, axis=1)

#### 시각화

In [None]:
plt.figure(figsize=(14, 7))
plt.plot(stock.index, stock['Close'], label='Actual')
for sim in simulations.columns:
    plt.plot(stock.index[-1] + pd.to_timedelta(range(n_days), unit='D'), simulations[sim], alpha=0.1, color='grey')
plt.title('Apple Stock Price Simulation using Naive Method with Bootstrapped Residuals')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

plt.figure(figsize=(14, 7))
plt.plot(stock.index, stock['Close'], label='Actual')
plt.fill_between(stock.index[-1] + pd.to_timedelta(range(n_days), unit='D'), lower_bounds, upper_bounds, color='blue', alpha=0.1, label=f'{percentiles[0]}% Interval')
plt.fill_between(stock.index[-1] + pd.to_timedelta(range(n_days), unit='D'), lower_bounds_95, upper_bounds_95, color='blue', alpha=0.2, label=f'{percentiles[1]}% Interval')
plt.title('Apple Stock Price Forecast Intervals')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

## 예제 6-4. 푸리에 변환 예시

### 소매 판매량 데이터 푸리에 변환

#### 필요한 패키지 불러오기

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.api import OLS
from statsmodels.tsa.stattools import acf

#### 데이터 전처리

In [None]:
retail = pd.read_csv("https://raw.githubusercontent.com/safeai-snu/Econometrics/refs/heads/main/Part.3/data/example_retail_sales.csv")  
retail['ds'] = pd.to_datetime(retail['ds'])
retail['y'] = retail['y'].str.replace(",", "").astype(float)
retail.set_index('ds', inplace=True)
retail

#### 주기 P 자동 탐색

In [None]:
P = np.argmax(acf(np.log(retail["y"]).dropna(), nlags=24))
if P == 0:
    P = 12 
print(f"Detected Periodicity (P): {P} months")

#### 푸리에 변환 특성 생성 함수

In [None]:
def create_fourier_features(t, P, K):
    X = np.ones((len(t), 2*K+2))
    X[:, -1] = t
    for k in range(1, K+1):
        X[:, 2*k-1] = np.sin(2 * np.pi * k * t / P)
        X[:, 2*k] = np.cos(2 * np.pi * k * t / P)
    return X

#### 푸리에 변환

In [None]:
def fit_fourier_series(df, K, P):
    
    df = df.copy()
    dt = df[:-48].copy()

    t = np.arange(len(dt))
    X = create_fourier_features(t, P, K)
    y = np.log(dt["y"])  
    model = OLS(y, X).fit()
    
    dt["fitted"] = np.exp(model.predict(X)) * np.exp(y.mean() - model.predict(X).mean())

    future_t = np.arange(len(dt), len(dt) + 48)
    X_future = create_fourier_features(future_t, P, K)
    forecast = np.exp(model.predict(X_future)) * np.exp(y.mean() - model.predict(X_future).mean())
    adjustment_factor = dt["fitted"].iloc[-1] / forecast[0]
    forecast_adjusted = forecast * adjustment_factor

    future_dates = pd.date_range(dt.index[-1] + pd.DateOffset(months=1), periods=48, freq='M')
    
    return dt, forecast_adjusted, future_dates

#### 푸리에 예측 및 시각화

In [None]:
fig, axes = plt.subplots(3, 2, figsize=(15, 10))
axes = axes.flatten()

for i, K in enumerate(range(1, 7)):  
    modified_df, forecast, future_dates = fit_fourier_series(retail, K, P)

    ax = axes[i]
    ax.plot(retail.index, retail['y'], label='Actual', color='black')
    ax.plot(modified_df.index, modified_df['fitted'], label=f'Fourier(K={K})', color='red', linestyle="dashed")
    ax.plot(future_dates, forecast, label='Forecast', color='blue', linestyle="dotted")

    ax.set_title(f'Fourier Transform Prediction (K={K})')
    ax.legend()
    ax.set_xlabel("Date")
    ax.set_ylabel("Turnover")

plt.tight_layout()
plt.show()

## 예제 6-5. 추세 모델링 예시

### 소매 판매 데이터 결정론적 추세 모델링

#### 필요한 패키지 불러오기

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant

#### 데이터 전처리

In [None]:
retail = pd.read_csv("https://raw.githubusercontent.com/safeai-snu/Econometrics/refs/heads/main/Part.3/data/example_retail_sales.csv")  
retail['ds'] = pd.to_datetime(retail['ds'])
retail['y'] = retail['y'].str.replace(",", "").astype(float)
start_date = retail['ds'].min()
retail['Time'] = (retail['ds'] - start_date) / np.timedelta64(1, 'm')

#### 모델 피팅

In [None]:
y = np.log(retail['y'])
X = add_constant(retail['Time'])
model = OLS(y, X).fit()

#### 추세 예측

In [None]:
retail['Fitted'] = model.predict(X)
retail['Exp_Fitted'] = np.exp(retail['Fitted'])

#### 시각화

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(retail['ds'], retail['y'], label='Actual')
plt.plot(retail['ds'], retail['Exp_Fitted'], label='Fitted', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.title('Retail Sales – Exponential Trend Regression')
plt.legend()
plt.show()

plt.figure(figsize=(10, 5))
residuals = y - retail['Fitted']
plt.plot(retail['ds'], residuals, label='Residuals')
plt.axhline(y=0, color='black', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Residual')
plt.title('Retail Sales – Exponential Trend Residual Plot')
plt.legend()
plt.show()

## 예제 6-5. 계절성 모델링 예시

### 호주 맥주 판매량 데이터 결정론적 계절성 모델링

#### 필요한 패키지 불러오기

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf
import matplotlib.dates as mdates

#### 데이터 불러오기

In [None]:
aus_beer = pd.read_csv('https://raw.githubusercontent.com/safeai-snu/Econometrics/refs/heads/main/Part.3/data/aus_production.csv', parse_dates=['Quarter'], index_col='Quarter')

#### 시계열 분해

In [None]:
decomposition = seasonal_decompose(aus_beer['Beer'], model='additive', period=4)

#### 분해 시각화

In [None]:
fig, ax = plt.subplots(4, 1, figsize=(10, 8))

ax[0].plot(decomposition.observed)
ax[0].set_title('Observed', fontsize=12)
ax[0].set_ylabel('Value')

ax[1].plot(decomposition.trend)
ax[1].set_title('Trend', fontsize=12)
ax[1].set_ylabel('Value')

ax[2].plot(decomposition.seasonal)
ax[2].set_title('Seasonal', fontsize=12)
ax[2].set_ylabel('Value')

ax[3].scatter(aus_beer.index, decomposition.resid, alpha=0.5)
ax[3].set_title('Residual', fontsize=12)
ax[3].set_ylabel('Value')

ax[0].xaxis.set_major_locator(mdates.YearLocator())
ax[1].xaxis.set_major_locator(mdates.YearLocator())
ax[2].xaxis.set_major_locator(mdates.YearLocator())
ax[3].xaxis.set_major_locator(mdates.YearLocator())

ax[0].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
ax[2].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
ax[3].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))

for axis in ax:
    plt.setp(axis.get_xticklabels(), rotation=45, ha='right')

plt.tight_layout()
plt.show()

#### 계절성 더미 변수 추가

In [None]:
aus_beer_dummies = pd.get_dummies(aus_beer.index).T

#### 모델 피팅 (더미 변수 추가)

In [None]:
sarima_model = SARIMAX(aus_beer['Beer'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 4),exog=aus_beer_dummies)
sarima_results = sarima_model.fit()
residuals = sarima_results.resid

#### 잔차 시각화

In [None]:
plt.figure(figsize=(20, 8))
plt.plot(residuals)
plt.title('Residuals from SARIMA Model')
plt.show()

plt.figure(figsize=(20, 8))
plot_acf(residuals, lags=40)
plt.show()

plt.figure(figsize=(20, 8))
sns.histplot(residuals, bins=20, kde=True)
plt.title('Histogram of Residuals')
plt.show()

#### 모델 시각화

In [None]:
fitted_values = sarima_results.fittedvalues
plt.figure(figsize=(20, 8))
sns.scatterplot(x=aus_beer['Beer'], y=fitted_values)
plt.xlabel('Actual Values')
plt.ylabel('Fitted Values')
plt.title('Actual vs Fitted Values')
plt.show()

#### 예측 시각화

In [None]:
forecast = sarima_results.get_forecast(steps=8, exog=aus_beer_dummies.iloc[-8:])
forecast_ci = forecast.conf_int()

plt.figure(figsize=(20, 8))
plt.plot(aus_beer.index, aus_beer['Beer'], label='Actual', color='red')
plt.plot(forecast_ci.index, forecast.predicted_mean, label='Forecast')
plt.fill_between(forecast_ci.index, forecast_ci.iloc[:, 0], forecast_ci.iloc[:, 1], color='lightgrey', alpha=0.5)
plt.title('Beer Production Forecast')
plt.legend()
plt.show()