In [2]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests
import numpy as np

In [3]:
dataset = pd.read_csv("./dataset/double_pendulum_data_ORIGINAL_NORMALIZED.csv")

In [4]:
dataset.columns.str.match("Unnamed")
dataset = dataset.loc[:,~dataset.columns.str.match("Unnamed")]

In [5]:
n_obs=15
X_train, X_test = dataset[0:-n_obs], dataset[-n_obs:]
print(X_train.shape, X_test.shape)

(17552, 8) (15, 8)


In [6]:
maxlag=12
test = 'ssr-chi2test'


# One set

In [8]:
X_train.columns

Index(['0', '1', '2', '3', '4', '5', '6', '7'], dtype='object')

In [13]:
grangercausalitytests(X_train[['0','1']], maxlag=3)



Granger Causality
number of lags (no zero) 1
ssr based F test:         F=0.0049  , p=0.9443  , df_denom=17548, df_num=1
ssr based chi2 test:   chi2=0.0049  , p=0.9443  , df=1
likelihood ratio test: chi2=0.0049  , p=0.9443  , df=1
parameter F test:         F=0.0049  , p=0.9443  , df_denom=17548, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=0.0025  , p=0.9975  , df_denom=17545, df_num=2
ssr based chi2 test:   chi2=0.0050  , p=0.9975  , df=2
likelihood ratio test: chi2=0.0050  , p=0.9975  , df=2
parameter F test:         F=0.0025  , p=0.9975  , df_denom=17545, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=0.5386  , p=0.6558  , df_denom=17542, df_num=3
ssr based chi2 test:   chi2=1.6164  , p=0.6557  , df=3
likelihood ratio test: chi2=1.6163  , p=0.6557  , df=3
parameter F test:         F=0.5386  , p=0.6558  , df_denom=17542, df_num=3


{1: ({'ssr_ftest': (0.004882027822954043, 0.9442967260716967, 17548.0, 1),
   'ssr_chi2test': (0.004882862452739139, 0.9442911762863252, 1),
   'lrtest': (0.004882861743681133, 0.9442911803245854, 1),
   'params_ftest': (0.004882027822670938, 0.9442967260716967, 17548.0, 1.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x223eed98580>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x223eed98ac0>,
   array([[0., 1., 0.]])]),
 2: ({'ssr_ftest': (0.002475680462855399, 0.9975273818544483, 17545.0, 2),
   'ssr_chi2test': (0.0049527719718566255, 0.9975266777283458, 2),
   'lrtest': (0.0049527712690178305, 0.9975266780788961, 2),
   'params_ftest': (0.0024756804631747057, 0.9975273818544483, 17545.0, 2.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x223eed986a0>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x223eed986d0>,
   array([[0., 0., 1., 0., 0.],
          [0., 0., 0., 1., 0.]])]),
 3: ({'

# Granger Causality

In [6]:
def grangers_causality_matrix(X_train, variables, test = 'ssr_chi2test', verbose=False):
    dataset = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in dataset.columns:
        for r in dataset.index:
            test_result = grangercausalitytests(X_train[[r,c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: 
                print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            dataset.loc[r,c] = min_p_value
            
    dataset.columns = [var + '_x' for var in variables]
    dataset.index = [var + '_y' for var in variables]
    return dataset

#grangers_causality_matrix(dataset, variables = dataset.columns)

In [7]:
grangers_causality_matrix(dataset, variables = dataset.columns)

Unnamed: 0,0_x,1_x,2_x,3_x,4_x,5_x,6_x,7_x
0_y,1.0,0.0,0.0,0.0,0.0,0.0028,0.0,0.1434
1_y,0.0,1.0,0.0002,0.0013,0.0,0.0,0.3211,0.0
2_y,0.0,0.0,1.0,0.0,0.0,0.2688,0.0,0.1348
3_y,0.0,0.6628,0.0039,1.0,0.0,0.0,0.0,0.0
4_y,0.0,0.0,0.0,0.0,1.0,0.0,0.0002,0.0
5_y,0.006,0.0,0.5264,0.0,0.0,1.0,0.0,0.0
6_y,0.0,0.0934,0.0,0.0,0.0,0.0,1.0,0.0
7_y,0.6159,0.0,0.4198,0.0,0.0,0.0,0.0,1.0


In [8]:
dataset

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.987464,0.493503,0.920531,1.000000,0.500039,0.514157,0.509739,0.499933
1,0.987464,0.493503,0.920531,1.000000,0.500039,0.514156,0.509739,0.499932
2,0.987464,0.493503,0.920531,1.000000,0.500039,0.514156,0.509739,0.499932
3,0.987464,0.493503,0.920531,1.000000,0.500039,0.514155,0.509739,0.499932
4,0.987464,0.493503,0.920531,1.000000,0.500039,0.514155,0.509739,0.499932
...,...,...,...,...,...,...,...,...
17562,0.818654,0.145605,0.481150,0.212124,0.727679,0.816785,0.620238,0.855509
17563,0.827591,0.155214,0.483312,0.222034,0.720622,0.821668,0.621282,0.853087
17564,0.827591,0.155214,0.483312,0.222034,0.720622,0.821668,0.621282,0.853087
17565,0.836250,0.164973,0.485492,0.231874,0.713773,0.826351,0.622121,0.850420


# VAR

In [9]:
import statsmodels.tsa.api as smt

In [10]:
mod = smt.VAR(X_train)
res = mod.fit(maxlags=15, ic='aic')
print(res.summary())

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Fri, 05, Aug, 2022
Time:                     17:02:58
--------------------------------------------------------------------
No. of Equations:         8.00000    BIC:                   -96.4233
Nobs:                     17539.0    HQIC:                  -96.6730
Log likelihood:           650595.    FPE:                9.16672e-43
AIC:                     -96.7956    Det(Omega_mle):     8.73929e-43
--------------------------------------------------------------------
Results for equation 0
           coefficient       std. error           t-stat            prob
------------------------------------------------------------------------
const        -0.000918         0.000171           -5.368           0.000
L1.0          0.993663         0.009236          107.584           0.000
L1.1         -0.000124         0.007239           -0.017           0.986
L1.2         -0.003401 