In [2]:
import pandas as pd
import numpy as np
import optuna

In [3]:
from tg.datasets import DatasetFactoryLookupCallback
from tg.interactors import ModelInteractor
from tg.splitters import AnchoredSplitter
from tg.model_interfaces import OneAheadModel
from tg.utils import stack_lags
from typing import List, Tuple, Union, Type, Dict

In [4]:
dataset_name = 'AIR_PASSENGERS'

data_factory = DatasetFactoryLookupCallback(dataset_name=dataset_name)

model_name = 'ARIMA'
params = {'epochs': 100, 'hidden_units': 15}

y, X = data_factory(model_name=model_name)

In [5]:
class HybridModel(OneAheadModel):

    def __init__(self,
                    first_model_class: Type[OneAheadModel],
                    second_model_class: Type[OneAheadModel],
                    method: str = 'residue') -> None:
        super().__init__()
        self.first_model_class = first_model_class
        self.second_model_class = second_model_class
        self.method = method

    def fit(self,
            y: pd.Series,
            X: pd.DataFrame = None,
            timesteps: int = None) -> None:

        if self.second_model.single_input:
            raise ValueError("Second model not compatible with hybrid model")

        if self.method == 'residue':
            self.first_model.fit(y=y, X=X, timesteps=timesteps)
            first_model_residuals = self.first_model.predict_residuals()[1:]
            y_residuals = stack_lags(first_model_residuals, timesteps)
            y_lagged = stack_lags(y[1:], timesteps)
            X = pd.DataFrame(np.hstack([y_residuals, y_lagged]))
            y = y[timesteps+1:]
            self.second_model.fit(y=y, X=X, timesteps=timesteps)

    def predict_one_ahead(self) -> float:
        return self.second_model.predict_one_ahead()

In [6]:
from tg.models import ARIMAModel, RNNModel

class ARIMA_RNN(HybridModel):

    def __init__(self, hidden_units: int, epochs: int) -> None:
        super().__init__(ARIMAModel, RNNModel, method='residue')
        # self.first_model_params = {}
        # self.second_model_params = {'hidden_units': hidden_units, 'epochs': epochs}
        self.first_model = ARIMAModel()
        self.second_model = RNNModel(hidden_units=hidden_units, epochs=epochs)

    def fit(self,
            y: pd.Series,
            X: pd.DataFrame = None,
            timesteps: int = None) -> None:
        super().fit(y=y, X=X, timesteps=timesteps)

    def predict_one_ahead(self) -> float:
        return super().predict_one_ahead()

    @staticmethod
    def suggest_params(trial: optuna.Trial) -> dict:
        return {**ARIMAModel.suggest_params(trial), **RNNModel.suggest_params(trial)}

In [7]:
arima_rnn = ARIMA_RNN(hidden_units=15, epochs=100)

In [8]:
arima_rnn.fit(y=y, timesteps=12)

In [9]:
arima_rnn.predict_one_ahead()



469.28067

In [None]:
from tg.interactors import ModelInteractor, DataInteractor
from tg.splitters import AnchoredSplitter
from tg.datasets import DatasetFactoryLookupCallback
from tg.ts_models import ModelClassLookupCallback
from tg.utils import stack_lags

In [None]:
# dataset_name = "PERFECT_SINE30"

di = DataInteractor(dataset_name=dataset_name)

# model_name = "SARIMA"
# params = {"m": di.y.period}

# model_name = "NAIVE"
# params = {"constant": 0.0}

model_name = "ARIMA"
params = {}

# model_name = "RNN"
# params = {"epochs": 700, "hidden_units": 25}

y, X = di.get_data(model_name=model_name)

In [None]:
from tg.datasets import DATASET_FACTORY_LOOKUP

In [None]:
DATASET_FACTORY_LOOKUP['AIR_PASSENGERS']().period

In [None]:
ModelClassLookupCallback('RNN').single_input

In [None]:
x = y.iloc[0:10]

In [None]:
x.period

In [None]:
ARIMA_class = ModelClassLookupCallback("ARIMA")
ARIMA = ARIMA_class()


In [None]:
ARIMA.fit(y=y)

In [None]:
one_ahead_input = X.iloc[-1].values.reshape(1, -1)

In [None]:
X

In [None]:
df = X.copy()
one_ahead_input, df = df.iloc[-1].values.reshape(1, -1), df.iloc[:-1]

In [None]:
df

In [None]:
def _get_default_input(dataset) -> pd.Series:
    return y, None

In [None]:
timesteps = y.period
pd.DataFrame(stack_lags(y, timesteps))

In [None]:
first_model_residuals

In [None]:
first_model_residuals = ARIMA.predict_residuals()[1:]

In [None]:
first_model_residuals = ARIMA.predict_residuals()[1:]

y_residuals = stack_lags(first_model_residuals, 12)
y_lagged = stack_lags(y[1:], 12)

X = pd.DataFrame(np.hstack([y_residuals, y_lagged]))

In [None]:
X

In [None]:
first_model_residuals

In [None]:
stack_lags(first_model_residuals, 12)

In [None]:
X.iloc[-1].shift(-1).fillna(
            y.iloc[-1]).values.reshape(1, -1)

In [None]:
pd.DataFrame(X.iloc[-1].shift(-1).fillna(
            y.iloc[-1]).values.reshape(1, -1))

In [None]:
arima_errors = np.array(y - arima_y_pred)[1:]


last_arima_errors = lagmat(arima_errors, self.arima_m, "both")
lagged_y = lagmat(y[1:], self.arima_m, "both")
svm_features = np.hstack([last_arima_errors, lagged_y])
svm_y_train = y[-(y_len - self.arima_m) + 1:].reshape(-1)

self.svr.fit(svm_features, svm_y_train)

last_arima_errors = lagmat(arima_errors, self.arima_m - 1, "both",
                        "in")
lagged_y = lagmat(y, self.arima_m - 1, "both", "in")

self.svr_input = np.hstack([last_arima_errors[-1],
                            lagged_y[-1]]).reshape(1, -1)

In [None]:
ARIMA.model.predict_in_sample()

In [None]:
last_arima_errors = lagmat(arima_errors, self.arima_m, "both")
lagged_y = lagmat(y[1:], self.arima_m, "both")