In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.api import VAR, DynamicVAR
from statsmodels.tsa.base.datetools import dates_from_str
import datetime

from IO import get_data

In [21]:
# Check whether one time series G-causes the other
#    model:
#           y_t = Sum_{j=1}^p A_{11, j} x_{t-j} + Sum_{j=1}^p A_{12, j} y_{t-j} + Error
#    inputs:
#           data: T x 3 numpy array, where T = number of time steps
#               column 1: time index
#               column 2: x_t, regressor time trend
#               column 3: y_t: dependent variable time trend
#    output:
#           logarithm of F-statistic from F-Test with null hypothesis A_{12}| = 0
def granger_causes(ndata):
    data = pd.DataFrame(ndata[:,1:], columns=["aapl", "amzn", "goog", "adi"])
    date_col = [datetime.datetime.fromtimestamp(x) for x in ndata[:,[0]]]
    dates = pd.Series(date_col)
    data.index = dates
    print(data)
    model = VAR(data)
    results = model.fit(2)
    results.test_causality("adi", ["aapl"], kind='f')
    

In [22]:
d = get_data(["data/AAPL.csv", "data/AMZN.csv", "data/GOOG.csv", "data/ADI.csv"])

Reading data/AAPL.csv
Reading data/AMZN.csv
Reading data/GOOG.csv
Reading data/ADI.csv


In [23]:
granger_causes(d)

                           aapl        amzn        goog        adi
1970-01-09 06:16:52    2.250714   40.259998   49.813286  36.259998
1970-01-09 06:16:53    2.193571   38.459999   50.316402  36.630001
1970-01-09 06:16:56    2.204286   39.889999   55.168217  36.770000
1970-01-09 06:16:57    2.232857   39.720001   55.412300  36.970001
1970-01-09 06:16:58    2.276428   39.060001   52.284027  35.040001
1970-01-09 06:16:59    2.360000   40.180000   52.279045  35.500000
1970-01-09 06:17:00    2.477143   40.320000   53.848164  35.180000
1970-01-09 06:17:03    2.428571   39.880001   52.443428  35.090000
1970-01-09 06:17:04    2.433571   38.380001   50.958992  34.400002
1970-01-09 06:17:05    2.450000   38.230000   51.158245  34.750000
1970-01-09 06:17:06    2.535714   38.040001   49.409801  34.869999
1970-01-09 06:17:07    2.500714   39.009998   50.286514  35.000000
1970-01-09 06:17:11    2.528571   39.150002   50.316402  34.599998
1970-01-09 06:17:12    2.550000   38.490002   50.181908  33.59

In [106]:
mdata = sm.datasets.macrodata.load_pandas().data
dates = mdata[['year', 'quarter']].astype(int).astype(str)
quarterly = dates["year"] + "Q" + dates["quarter"]
type(dates["quarter"])
#quarterly = dates_from_str(quarterly)

pandas.core.series.Series

In [85]:
print(quarterly)

0      1959Q1
1      1959Q2
2      1959Q3
3      1959Q4
4      1960Q1
5      1960Q2
6      1960Q3
7      1960Q4
8      1961Q1
9      1961Q2
10     1961Q3
11     1961Q4
12     1962Q1
13     1962Q2
14     1962Q3
15     1962Q4
16     1963Q1
17     1963Q2
18     1963Q3
19     1963Q4
20     1964Q1
21     1964Q2
22     1964Q3
23     1964Q4
24     1965Q1
25     1965Q2
26     1965Q3
27     1965Q4
28     1966Q1
29     1966Q2
        ...  
173    2002Q2
174    2002Q3
175    2002Q4
176    2003Q1
177    2003Q2
178    2003Q3
179    2003Q4
180    2004Q1
181    2004Q2
182    2004Q3
183    2004Q4
184    2005Q1
185    2005Q2
186    2005Q3
187    2005Q4
188    2006Q1
189    2006Q2
190    2006Q3
191    2006Q4
192    2007Q1
193    2007Q2
194    2007Q3
195    2007Q4
196    2008Q1
197    2008Q2
198    2008Q3
199    2008Q4
200    2009Q1
201    2009Q2
202    2009Q3
dtype: object
