# Load packages

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib as mpl
import yfinance as yf
import seaborn as sns
import missingno as msno
import warnings

from matplotlib import pyplot as plt
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.tsa.api import VAR

# Configuration

In [None]:
# Korean font
from matplotlib import font_manager, rc
try:
    font_path = "C:/Windows/Fonts/malgun.TTF"
    Kfont = font_manager.FontProperties(fname=font_path).get_name()
    rc('font', family=Kfont)
except:
    pass

# Fix minus presentation
mpl.rcParams['axes.unicode_minus'] = False

In [None]:
# Fix random seed
def fix_random_seed(seed=42):
    import random
    import numpy as np 
    import os

    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    
fix_random_seed()

# Load preprocessed data

In [None]:
df_core = pd.read_csv('data/preprocessed_data.csv')
df_core.rename(columns={'날짜': 'date'}, inplace=True)
df_core.set_index('date', inplace=True)
df_core

# Analysis with VAR model

In [None]:
mdl_var = VAR(df_core)

In [None]:
var_max_lag = 4
rslt_var = mdl_var.fit(var_max_lag)
# rslt_var.summary()

In [None]:
var_pval_tf_mat = rslt_var.pvalues.applymap(lambda x: True if x < 0.05 else False)
var_pval_tf_mat.head()

In [None]:
# plot - VAR model coefficients p-value
fig_var_pval, ax_var_pval = plt.subplots(1, 1, figsize=(24, 20))

heat_pval = sns.heatmap(
    var_pval_tf_mat.T, 
    square=True, 
    ax=ax_var_pval, 
    annot=True, 
    cbar=False,
    linecolor='grey',
    linewidth=0.1,
    )
heat_pval.set_xticklabels(heat_pval.get_xticklabels(), fontsize=12)
heat_pval.set_yticklabels(heat_pval.get_yticklabels(), fontsize=12)
ax_var_pval.set_title('Statistical significances of VAR coefficients by p-values', fontsize=15, family='bold')

In [None]:
# Analyze causality between two time series variables with Granger Causality Test
selc_granger_test = [ 
    # (feature x, feature y, time_lag)
    ('exchange_rate', 'kospi', 2),
    ('wti_crude_future', 'carbon_credit', 4)
]

df_gct = pd.DataFrame(
    index=[x[0] for x in selc_granger_test]+[x[1] for x in selc_granger_test],
    columns=[x[0] for x in selc_granger_test]+[x[1] for x in selc_granger_test],
    dtype='object',
    )
df_gct = df_gct.applymap(lambda x: pd.Series(index=np.arange(1, var_max_lag+1), dtype='object'))

for factor_x, factor_y, time_lag in selc_granger_test:
    print(f'\n[{factor_x}][t-{time_lag}] -> [{factor_y}][t]', end='')
    df_gct.loc[factor_x, factor_y][time_lag] = grangercausalitytests(
        df_core[[factor_y, factor_x]],
        maxlag=[time_lag],
        verbose=1
    )

    print(f'\n[{factor_y}][t-{time_lag}] -> [{factor_x}][t]', end='')
    df_gct.loc[factor_y, factor_x][time_lag] = grangercausalitytests(
        df_core[[factor_x, factor_y]],
        maxlag=[time_lag],
        verbose=1
        )


df_gct