# Time Series Econometrics

This notebook covers advanced time series analysis techniques commonly used in econometrics.

## Topics Covered
1. Stationarity testing
2. Autocorrelation analysis
3. ARIMA modeling
4. Granger causality
5. Forecasting

## Setup

In [None]:
# Install required packages (uncomment if running in Google Colab)
# !pip install pandas numpy matplotlib seaborn scipy statsmodels -q

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller, acf, pacf, grangercausalitytests
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import warnings

warnings.filterwarnings('ignore')
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

print("Libraries imported successfully!")

## Load Data

In [None]:
# Load sample data
try:
    df = pd.read_csv('../data/sample_economic_data.csv')
except FileNotFoundError:
    url = 'https://raw.githubusercontent.com/koiti-yano/colab_and_economics/main/data/sample_economic_data.csv'
    df = pd.read_csv(url)

df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

print("Data loaded successfully!")
print(df.head())

## 1. Stationarity Testing

Many time series models require stationary data. The Augmented Dickey-Fuller (ADF) test checks for stationarity.

In [None]:
def test_stationarity(series, name='Series'):
    """
    Perform ADF test and display results.
    """
    result = adfuller(series.dropna())
    
    print(f'\n{name} - Augmented Dickey-Fuller Test:')
    print(f'ADF Statistic: {result[0]:.4f}')
    print(f'p-value: {result[1]:.4f}')
    print(f'Critical Values:')
    for key, value in result[4].items():
        print(f'  {key}: {value:.4f}')
    
    if result[1] < 0.05:
        print(f'\nResult: {name} is STATIONARY (reject null hypothesis)')
    else:
        print(f'\nResult: {name} is NON-STATIONARY (fail to reject null hypothesis)')
    
    return result[1] < 0.05

In [None]:
# Test stationarity of GDP
is_stationary = test_stationarity(df['gdp_billions'], 'GDP')

In [None]:
# If not stationary, test first difference
if not is_stationary:
    gdp_diff = df['gdp_billions'].diff().dropna()
    test_stationarity(gdp_diff, 'GDP (First Difference)')
    
    # Plot original and differenced series
    fig, axes = plt.subplots(2, 1, figsize=(14, 10))
    
    axes[0].plot(df.index, df['gdp_billions'])
    axes[0].set_title('GDP - Original Series', fontsize=14, fontweight='bold')
    axes[0].set_ylabel('GDP (Billions)', fontsize=12)
    axes[0].grid(True, alpha=0.3)
    
    axes[1].plot(gdp_diff.index, gdp_diff)
    axes[1].set_title('GDP - First Difference', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Date', fontsize=12)
    axes[1].set_ylabel('Change in GDP', fontsize=12)
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 2. Autocorrelation Analysis

ACF and PACF plots help identify appropriate ARIMA model parameters.

In [None]:
# Prepare data - use unemployment rate (more likely to be stationary)
unemployment = df['unemployment_rate'].dropna()

# Plot ACF and PACF
fig, axes = plt.subplots(2, 1, figsize=(14, 10))

plot_acf(unemployment, lags=40, ax=axes[0])
axes[0].set_title('Autocorrelation Function (ACF)', fontsize=14, fontweight='bold')

plot_pacf(unemployment, lags=40, ax=axes[1], method='ywm')
axes[1].set_title('Partial Autocorrelation Function (PACF)', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

## 3. ARIMA Modeling

ARIMA (AutoRegressive Integrated Moving Average) models are widely used for time series forecasting.

In [None]:
# Fit ARIMA model to unemployment rate
# Using ARIMA(1,1,1) as an example
model = ARIMA(unemployment, order=(1, 1, 1))
results = model.fit()

print("ARIMA Model Summary:")
print(results.summary())

In [None]:
# Plot fitted values vs actual
fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(unemployment.index, unemployment.values, label='Actual', linewidth=2)
ax.plot(results.fittedvalues.index, results.fittedvalues.values, 
       label='Fitted', linewidth=2, linestyle='--')

ax.set_title('ARIMA Model Fit - Unemployment Rate', fontsize=16, fontweight='bold')
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Unemployment Rate (%)', fontsize=12)
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Diagnostic plots
results.plot_diagnostics(figsize=(14, 10))
plt.tight_layout()
plt.show()

## 4. Forecasting

Use the fitted ARIMA model to make predictions.

In [None]:
# Forecast 12 periods ahead
forecast_periods = 12
forecast = results.forecast(steps=forecast_periods)

# Get confidence intervals
forecast_df = results.get_forecast(steps=forecast_periods)
forecast_ci = forecast_df.conf_int()

# Create forecast dates
last_date = unemployment.index[-1]
forecast_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), 
                               periods=forecast_periods, freq='ME')

# Plot forecast
fig, ax = plt.subplots(figsize=(14, 7))

# Plot historical data
ax.plot(unemployment.index, unemployment.values, label='Historical', linewidth=2)

# Plot forecast
ax.plot(forecast_dates, forecast, label='Forecast', linewidth=2, color='red')

# Plot confidence interval
ax.fill_between(forecast_dates, 
               forecast_ci.iloc[:, 0], 
               forecast_ci.iloc[:, 1], 
               alpha=0.3, color='red', label='95% Confidence Interval')

ax.set_title('Unemployment Rate Forecast (ARIMA)', fontsize=16, fontweight='bold')
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Unemployment Rate (%)', fontsize=12)
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nForecast values:")
for date, value in zip(forecast_dates, forecast):
    print(f"{date.strftime('%Y-%m')}: {value:.2f}%")

## 5. Granger Causality Test

Test whether one time series can help predict another.

In [None]:
# Test if inflation Granger-causes unemployment
# Prepare data
gc_data = df[['unemployment_rate', 'inflation_rate']].dropna()

print("Granger Causality Test: Does Inflation cause Unemployment?\n")
print("Null Hypothesis: Inflation does NOT Granger-cause Unemployment\n")

# Run test with different lag orders
max_lag = 4
gc_results = grangercausalitytests(gc_data, maxlag=max_lag, verbose=True)

In [None]:
# Extract and display p-values
print("\nSummary of p-values:")
for lag in range(1, max_lag + 1):
    p_value = gc_results[lag][0]['ssr_ftest'][1]
    print(f"Lag {lag}: p-value = {p_value:.4f} {'(Significant)' if p_value < 0.05 else '(Not significant)'}")

## Model Selection with Information Criteria

Compare different ARIMA specifications using AIC and BIC.

In [None]:
# Test different ARIMA specifications
results_list = []

for p in range(3):
    for d in range(2):
        for q in range(3):
            try:
                model = ARIMA(unemployment, order=(p, d, q))
                fitted = model.fit()
                results_list.append({
                    'order': (p, d, q),
                    'AIC': fitted.aic,
                    'BIC': fitted.bic
                })
            except:
                continue

# Create DataFrame and sort by AIC
results_df = pd.DataFrame(results_list)
results_df = results_df.sort_values('AIC')

print("\nTop 5 Models by AIC:")
print(results_df.head())

print("\nBest model by AIC:", results_df.iloc[0]['order'])

## Conclusion

This notebook covered:
- Testing for stationarity with ADF test
- Analyzing autocorrelation with ACF and PACF
- Fitting ARIMA models
- Forecasting with confidence intervals
- Testing Granger causality
- Model selection using information criteria

## Further Reading

- Vector Autoregression (VAR) for multivariate time series
- Cointegration and Error Correction Models (ECM)
- GARCH models for volatility
- Seasonal ARIMA (SARIMA)
- State Space Models