In [2]:
### Step 1: Read inflow data

# Read data
import pandas as pd
pd. set_option('display.max_rows', None) 
inflow_data = "data/flow_data.csv"
inflow_data = pd.read_csv(inflow_data, parse_dates=True, index_col=0)

# Preprocessing
inflow_data = inflow_data.fillna(method='ffill')
inflow_data = inflow_data.fillna(method='bfill')
inflow_data.info()

# Add some noise
import numpy as np
noise = np.random.normal(0, 0.01, inflow_data.shape) 
inflow_data = inflow_data + noise

# Normalize to 0-1 range
inflow_data=(inflow_data-inflow_data.min())/(inflow_data.max()-inflow_data.min())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 61368 entries, 2014-12-31 23:00:00+00:00 to 2021-12-31 22:00:00+00:00
Columns: 222 entries, BE>NL to HR>SI
dtypes: float64(222)
memory usage: 104.4 MB


In [3]:
### Step 1a: Make data stationary
from statsmodels.tsa.stattools import adfuller

def adfuller_test(series, signif=0.05):
    """Perform ADFuller to test for Stationarity of given series and print report"""
    r = adfuller(series, autolag='AIC')
    output = {'test_statistic':round(r[0], 4), 'pvalue':round(r[1], 4), 'n_lags':round(r[2], 4), 'n_obs':r[3]}
    p_value = output['pvalue'] 
    def adjust(val, length= 6): return str(val).ljust(length)

    if p_value <= signif:
        return True
    else:
        return False

# Difference until data is stationary
# stationary = False
# while not stationary:
#     stationary = True
#     for name, column in inflow_data.iteritems():
#         if not adfuller_test(column, 0.05):
#             print("data not stationary, differencing...")
#             stationary = False
#             inflow_data = inflow_data.diff().dropna()
#             break

### TODO Step 1b: Split into training and testing data


In [4]:
# Step 2: Create Vector Autoregressive Model and determine best lag order
from statsmodels.tsa.vector_ar.var_model import VAR
var_model = VAR(inflow_data, freq='H')
x = var_model.select_order(maxlags=32)
x.summary()

In [None]:
# Step 3: Train model
var_trained_model = var_model.fit(1) # Put desired lag order here

In [None]:
# Step 4: Evaluate model
print("Params shape: "+ str(var_trained_model.params.shape))
print("Inflow data shape: " + str(inflow_data.shape))
print("VAR Final prediction error: ", var_trained_model.fpe)

In [None]:
# TODO Step 4b: Do Forecast with trained model

In [None]:
from statsmodels.tsa.statespace.varmax import VARMAX
varma_model = VARMAX(inflow_data, order=(1,2))
varma_model.summary()

In [None]:
trained_varma_model = varma_model.fit()

In [None]:
print("Params shape: "+ str(trained_varma_model.params.shape))
print("Inflow data shape: " + str(inflow_data.shape))
print("VARMA Final prediction error: ", trained_varma_model.fpe)

In [None]:
# from statsmodels.tsa.statespace.varmax import VARMAX
# from random import random

# def VARMA_model(train,test):
#     # fit model
#     model = VARMAX(train, order=(1, 2))
#     model_fit = model.fit(disp=False)
#     # make prediction
#     yhat = model_fit.forecast(steps=len(test))
#     res=pd.DataFrame({"Pred1":yhat['Act1'], "Pred2":yhat['Act2'], 
#                       "Act1":test["Act1"].values, "Act2":test["Act2"].values})
#     return res

# df_train = pd.DataFrame({'Act1':[x + random()*10 for x in range(0, 100)],
#                          'Act2':50+np.sin(np.linspace(0, 2*np.pi, 100))*50})
# df_test = pd.DataFrame({'Act1':[x + random()*10 for x in range(101, 201)],
#                          'Act2':50+np.sin(np.linspace(0, 2*np.pi, 100))*50})
# df_ret = VARMA_model(df_train, df_test)
# show_graph(df_train, df_ret, "Vector Autoregression Moving-Average (VARMA)")