In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
import datetime

In [2]:
dataFull = pd.read_csv("data/NE_LINCOLN_2008_2024.csv")
dataNoNa = pd.read_csv("data/NE_LINCOLN_NO_NA.csv")

In [3]:
# dropping columns not used for VAR
dataFull.drop(columns = ["Unnamed: 0", "id", "location", "doseEquivalent", "status", "gammaSum"], axis = 1, inplace = True)
dataNoNa.drop(columns = ["Unnamed: 0", "id", "location", "doseEquivalent", "status", "gammaSum"], axis = 1, inplace = True)

In [4]:
# augmented dickey-fuller test to make sure data is stationary using 
# https://www.analyticsvidhya.com/blog/2021/08/vector-autoregressive-model-in-python/
def adfTest(series, title = ""):
    print(f'Augmented Dickey-Fuller Test: {title}')
    results = adfuller(series.dropna(), autolag = "AIC")
    labels = ['ADF test statistic', 'p-value', '# lags used', '# observations']
    out = pd.Series(results[0:4], index = labels)
    for key, val in results[4].items():
        out[f'critical value({key})']=val
    print(out.to_string())
    if results[1] <= 0.05:
        #print("Strong evidence against the null hypothesis")
        #print("Reject the null hypothesis")
        print("Data has no unit root and is stationary")
    else:
        #print("Weak evidence against the null hypothesis")
        #print("Fail to reject the null hypothesis")
        print("Data has a unit root and is non-stationary")

In [24]:
# testing if the data is stationary
# since dataNoNa is subset of dataFull, only dataFull is tested
for i in range(2, 10):
    adfTest(dataFull['gamma' + str(i)], i)

Augmented Dickey-Fuller Test: 2
ADF test statistic    -7.740945e+00
p-value                1.062356e-11
# lags used            6.900000e+01
# observations         1.220110e+05
critical value(1%)    -3.430404e+00
critical value(5%)    -2.861564e+00
critical value(10%)   -2.566783e+00
Data has no unit root and is stationary
Augmented Dickey-Fuller Test: 3
ADF test statistic    -1.444869e+01
p-value                7.158214e-27
# lags used            7.100000e+01
# observations         1.219930e+05
critical value(1%)    -3.430404e+00
critical value(5%)    -2.861564e+00
critical value(10%)   -2.566783e+00
Data has no unit root and is stationary
Augmented Dickey-Fuller Test: 4
ADF test statistic    -1.699695e+01
p-value                8.785375e-30
# lags used            7.100000e+01
# observations         1.219910e+05
critical value(1%)    -3.430404e+00
critical value(5%)    -2.861564e+00
critical value(10%)   -2.566783e+00
Data has no unit root and is stationary
Augmented Dickey-Fuller Test

In [5]:
# simple data conversions to usable types
dataFull.dropna(inplace = True)
dataFull["time"] = pd.to_datetime(dataFull["time"])
for i in range(2, 10):
    dataFull["gamma" + str(i)] = dataFull['gamma' + str(i)].astype(float)

In [33]:
# counting number of records from 2024 to make the test group
entries2024 = int(sum(dataFull[dataFull['time'].dt.year == 2024].count()) / 9)

train = dataFull[:-entries2024]
test = dataFull[-entries2024:]


In [34]:
train = np.asarray(train)
for i in range(1, 9):
    train[:, i] = train[:, i].astype(np.float64)

In [35]:
train[0:5, ]

array([[Timestamp('2008-05-21 17:41:00'), 2967.0, 394.0, 160.0, 186.0,
        231.0, 176.0, 15.0, 20.0],
       [Timestamp('2008-05-21 19:09:00'), 2826.0, 382.0, 158.0, 185.0,
        233.0, 173.0, 14.0, 21.0],
       [Timestamp('2008-05-21 19:42:00'), 2770.0, 376.0, 159.0, 181.0,
        229.0, 176.0, 14.0, 21.0],
       [Timestamp('2008-05-21 20:57:00'), 2737.0, 371.0, 159.0, 185.0,
        229.0, 173.0, 15.0, 21.0],
       [Timestamp('2008-05-21 21:57:00'), 2727.0, 372.0, 158.0, 183.0,
        232.0, 172.0, 15.0, 21.0]], dtype=object)

In [36]:
for i in range(1, 11):
    model = VAR(np.asarray(train))
    results = model.fit(i)
    print("Order: " + i)
    print("AIC: " + results.aic)
    print("BIC: " + results.bic)

TypeError: loop of ufunc does not support argument 0 of type float which has no callable sqrt method

In [60]:
dataFull.dtypes

time      datetime64[ns]
gamma2           float64
gamma3           float64
gamma4           float64
gamma5           float64
gamma6           float64
gamma7           float64
gamma8           float64
gamma9           float64
dtype: object