In [34]:
import numpy as np
import statsmodels.api as sm
import pandas as pd
import os
import statsmodels.stats.diagnostic
import matplotlib.pyplot as plt
import scipy.sparse as sp
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import coint
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tsa.vector_ar.vecm import coint_johansen



In [35]:
raw_df=pd.read_csv('D:/var/var-demo/sample/VAR_Sample.csv')

In [36]:
print(raw_df.head())

        Time      Var1      Var2      Var3      Var4      Var5      Var6
0  1946/5/15 -0.449176 -0.452652 -0.441546 -0.549542  1.143237  0.874400
1  1946/5/15  1.416695 -0.452652 -0.441546 -0.549542 -0.628476  0.471380
2  1946/5/15 -0.759093 -0.452652 -0.441546 -0.549542  2.939909 -0.432916
3  1946/5/16 -0.613285 -0.452652 -0.441546 -0.434345  2.740102 -0.511441
4  1946/5/16 -0.759093 -0.452652 -0.441546  0.336319  2.321493 -0.660492


In [37]:
type(raw_df['Time'])

pandas.core.series.Series

In [38]:
print(raw_df['Time'].head())

0    1946/5/15
1    1946/5/15
2    1946/5/15
3    1946/5/16
4    1946/5/16
Name: Time, dtype: object


In [39]:
print(raw_df['Var1'].head())

0   -0.449176
1    1.416695
2   -0.759093
3   -0.613285
4   -0.759093
Name: Var1, dtype: float64


In [40]:
raw_df['Time']=pd.to_datetime(raw_df['Time'],errors='coerce',format='%Y/%m/%d')

In [41]:
print(raw_df['Time'].head())

0   1946-05-15
1   1946-05-15
2   1946-05-15
3   1946-05-16
4   1946-05-16
Name: Time, dtype: datetime64[ns]


In [42]:
month_df=raw_df.set_index('Time').resample('M')['Var1','Var2','Var3','Var4','Var5','Var6'].sum()

In [43]:
print(month_df.head())

                 Var1       Var2       Var3       Var4        Var5       Var6
Time                                                                         
1946-05-31  -2.281943 -13.191233 -13.546560 -10.475815   48.745735   1.455899
1946-06-30 -28.072774 -21.898958 -17.556361 -10.173858   79.373126  17.609288
1946-07-31 -36.668064 -29.920707 -26.042887 -33.321862  108.520508  38.330706
1946-08-31 -39.601941 -32.426416 -18.212683 -26.376097  109.402345  35.585008
1946-09-30 -48.226357 -32.703090 -34.956209 -34.921183  176.411931   4.906370


In [44]:
print(month_df.columns)

Index(['Var1', 'Var2', 'Var3', 'Var4', 'Var5', 'Var6'], dtype='object')


ADF检验


In [45]:
t_list=[]
name_list=[]
p_list=[]

for i in month_df.columns:
     name_list.append(i)
     test_output=list(adfuller(month_df.loc[:,i]))
     t_list.append(test_output[0])
     p_list.append(test_output[1])
     adf1_df=pd.DataFrame({'name':name_list,'t_value':t_list,'p_value':p_list})

In [46]:
print(adf1_df.head())

   name   t_value   p_value
0  Var1 -2.279710  0.178595
1  Var2 -1.911637  0.326607
2  Var3 -2.559231  0.101720
3  Var4 -1.276212  0.639984
4  Var5 -5.391270  0.000004


In [47]:
diff_month_df=month_df.diff(periods=1).dropna()
t_list1=[]
name_list1=[]
p_list1=[]
for i in diff_month_df.columns:
  name_list1.append(i)
  test_output1=list(adfuller(diff_month_df.loc[:,i]))
  t_list1.append(test_output1[0])
  p_list1.append(test_output1[1])
  adf2_df=pd.DataFrame({'name':name_list1,'t_value':t_list1,'p_value':p_list1})

In [48]:
print(adf2_df.head())

   name    t_value       p_value
0  Var1  -8.643798  5.337725e-14
1  Var2  -9.952142  2.498435e-17
2  Var3 -10.450722  1.440605e-18
3  Var4 -11.423953  6.766772e-21
4  Var5  -9.857478  4.320792e-17


In [49]:
print(diff_month_df.columns)

Index(['Var1', 'Var2', 'Var3', 'Var4', 'Var5', 'Var6'], dtype='object')


ols回归

In [50]:
def ols(x,y):
  est=sm.OLS(y,sm.add_constant(x)).fit()
  return est.summary()

In [51]:
def olsre(df):
  olsr=list()
  for i in df.columns:
    y=df[i]
    x=df.drop([i],axis=1)
    ols(x,y)
    olsr.append(ols(x,y))
  return olsr

In [52]:
olsre(diff_month_df)

[<class 'statsmodels.iolib.summary.Summary'>
 """
                             OLS Regression Results                            
 Dep. Variable:                   Var1   R-squared:                       0.177
 Model:                            OLS   Adj. R-squared:                  0.170
 Method:                 Least Squares   F-statistic:                     27.86
 Date:                Wed, 18 Nov 2020   Prob (F-statistic):           1.32e-25
 Time:                        22:40:39   Log-Likelihood:                -3260.5
 No. Observations:                 655   AIC:                             6533.
 Df Residuals:                     649   BIC:                             6560.
 Df Model:                           5                                         
 Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
 ---------------------------------------------------------------------

VAR


In [53]:
var_df=diff_month_df.dropna()
var_mod=sm.tsa.VAR(var_df.loc['1946-06-30':])




In [54]:
lag_order=var_mod.select_order()

In [55]:
ic=lag_order.summary()

In [56]:
res=var_mod.fit(ic='aic')

In [57]:
print(ic)

 VAR Order Selection (* highlights the minimums)  
       AIC         BIC         FPE         HQIC   
--------------------------------------------------
0        41.61       41.66   1.182e+18       41.63
1        41.02       41.32   6.550e+17       41.14
2        40.69      41.23*   4.678e+17       40.90
3        40.45       41.25   3.691e+17       40.76
4        40.20       41.25   2.869e+17      40.61*
5        40.18       41.48   2.806e+17       40.68
6        40.19       41.74   2.846e+17       40.79
7        40.18       41.99   2.834e+17       40.89
8        40.14       42.20   2.715e+17       40.94
9        40.12       42.43   2.660e+17       41.02
10       40.08       42.65   2.564e+17       41.08
11      40.01*       42.82  2.378e+17*       41.10
12       40.06       43.13   2.514e+17       41.25
13       40.10       43.42   2.614e+17       41.39
14       40.11       43.68   2.645e+17       41.49
15       40.14       43.96   2.736e+17       41.62
16       40.18       44.26   2.

In [58]:
print(res.summary())

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 18, Nov, 2020
Time:                     22:40:40
--------------------------------------------------------------------
No. of Equations:         6.00000    BIC:                    42.7228
Nobs:                     644.000    HQIC:                   41.0161
Log likelihood:          -17939.5    FPE:                2.21341e+17
AIC:                      39.9339    Det(Omega_mle):     1.22225e+17
--------------------------------------------------------------------
Results for equation Var1
              coefficient       std. error           t-stat            prob
---------------------------------------------------------------------------
const            0.002884         1.264081            0.002           0.998
L1.Var1         -0.532178         0.045088          -11.803           0.000
L1.Var2         -0.004849         0.061469           -0.079           0.937
L1.Va