In [46]:
# -*- coding: utf-8 -*-
"""Multi Layer Perceptron Network (MLP) for forecasting."""

from sktime.forecasting.deep_learning.base import BaseDeepForecastor
from sktime.networks.mlp import MLPNetwork
from sktime.utils.validation._dependencies import _check_dl_dependencies

_check_dl_dependencies(severity="warning")


class MLPForecaster(BaseDeepForecastor):
    """Multi Layer Perceptron Network (MLP), derived from [1].

    Parameters
    ----------
    n_epochs       : int, default = 2000
        the number of epochs to train the model
    batch_size      : int, default = 4
        the number of samples per gradient update.
    steps           : int, default = 3
        the lookback window for forecasting.
    random_state    : int or None, default=None
        Seed for random number generation.
    verbose         : boolean, default = False
        whether to output extra information
    loss            : string, default="mean_squared_error"
        fit parameter for the keras model
    optimizer       : keras.optimizer, default=keras.optimizers.Adam(),
    metrics         : list of strings, default=["accuracy"],
    activation      : string or a tf callable, default="sigmoid"
        Activation function used in the output linear layer.
        List of available activation functions:
        https://keras.io/api/layers/activations/
    use_bias        : boolean, default = True
        whether the layer uses a bias vector.
    optimizer       : keras.optimizers object, default = Adam(lr=0.01)
        specify the optimizer and the learning rate to be used.

    Notes
    -----
    .. .. [1]  Network originally defined in:
    @inproceedings{wang2017time, title={Time series classification from
    scratch with deep neural networks: A strong baseline}, author={Wang,
    Zhiguang and Yan, Weizhong and Oates, Tim}, booktitle={2017
    International joint conference on neural networks (IJCNN)}, pages={
    1578--1585}, year={2017}, organization={IEEE} }

    Derived from the implementation from source code
    https://github.com/hfawaz/dl-4-tsc/blob/master/classifiers/mlp.py
    """

    def __init__(
        self,
        n_epochs=200,
        batch_size=4,
        steps=3,
        callbacks=None,
        verbose=False,
        loss="mse",
        metrics=None,
        random_state=None,
        activation="relu",
        use_bias=True,
        optimizer=None,
    ):
        _check_dl_dependencies(severity="error")
        super(MLPForecaster, self).__init__()
        self.callbacks = callbacks
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.steps = steps
        self.verbose = verbose
        self.loss = loss
        self.metrics = metrics
        self.random_state = random_state
        self.activation = activation
        self.use_bias = use_bias
        self.optimizer = optimizer
        self.history = None
        self._network = MLPNetwork()

    def build_model(self, input_shape, **kwargs):
        """Construct a compiled, un-trained, keras model that is ready for training.

        In sktime, time series are stored in numpy arrays of shape (d,m), where d
        is the number of dimensions, m is the series length. Keras/tensorflow assume
        data is in shape (m,d). This method also assumes (m,d). Transpose should
        happen in fit.

        Parameters
        ----------
        input_shape : tuple
            The shape of the data fed into the input layer, should be (m,d)
        n_classes: int
            The number of classes, which becomes the size of the output layer

        Returns
        -------
        output : a compiled Keras Model
        """
        from tensorflow import keras

        self.metrics = ["accuracy"] if self.metrics is None else self.metrics
        input_layer, output_layer = self._network.build_network(
            input_shape,
        )
        output_layer = keras.layers.Dense(units=1, activation=self.activation)(
            output_layer
        )

        self.optimizer_ = (
            keras.optimizers.Adam(learning_rate=0.0001)
            if self.optimizer is None
            else self.optimizer
        )

        model = keras.models.Model(inputs=input_layer, outputs=output_layer)
        model.compile(loss=self.loss, optimizer=self.optimizer_, metrics=self.metrics)
        return model

    def _fit(self, y, fh=None, X=None):
        """Fit the forecaster on the training set (y) with exog data (X).

        Parameters
        ----------
        y: np.array of shape = (n_instances (n))
            The main data which needs to be predicted.
        fh: list of int
            Forecasting Horizon for the forecaster.
        X: np.ndarray of shape = (n_instances (n), exog_dimensions (d))
            Exogeneous data for data prediction.

        Returns
        -------
        self: object
        """
        import numpy as np

        source, target = self.splitSeq(self.steps, y)
        if X is not None:
            src_x, _ = self.splitSeq(self.steps, X)
            # currently takes care of cases where exog data is
            # greater than 1 in length
            source = [
                [_sx + [_sy] for _sx, _sy in zip(sx, sy)]
                for sx, sy in zip(src_x, source)
            ]

        source, target = np.array(source), np.array(target)
        if X is None:
            source = source.reshape((*source.shape, 1))
        source = source.transpose(0, 2, 1)
        self.input_shape = source.shape[1:]
        self.source, self.target = source, target

        self.model_ = self.build_model(self.input_shape)
        if self.verbose:
            self.model_.summary()

        self.history = self.model_.fit(
            source,
            target,
            batch_size=self.batch_size,
            epochs=self.n_epochs,
            verbose=self.verbose,
            callbacks=self.callbacks,
        )
        return self

    def _predict(self, fh, X=None):
        """Get predictions for steps mentioned in fh based on given y and X.

        Parameters
        ----------
        fh: list of int
            Forecasting Horizon for the forecaster.
        X: np.ndarray of shape = (n_instances (n), exog_dimensions (d))
            Exogeneous data for data prediction.

        Returns
        -------
        fvalues: list with predictions of relevant fh.
        """
        import numpy as np

        currentPred = 1
        lastPred = max(fh)
        fvalues = []
        fh = set(fh)
        source = self.source[-1]
        source = source[np.newaxis, :, :]
        while currentPred <= lastPred:
            yhat = self.model_.predict(source)
            source = np.delete(source, axis=2, obj=0)
            if X is not None:
                source = np.insert(
                    source,
                    obj=source.shape[-1],
                    values=[*exog_p[currentPred - 1], yhat],
                    axis=-1,
                )
            else:
                source = np.insert(source, obj=source.shape[-1], values=yhat, axis=-1)
            if currentPred in fh:
                fvalues.append(yhat)

            currentPred += 1
        return fvalues

    def splitSeq(self, steps, seq):
        """Get window sized instances of sequence.

        Parameters
        ----------
        steps: int
            Window Size of the forecaster.
        seq: np.ndarray of shape = (n_instances (n), n_dimensions (d))
            Data to split in window-sized instances.

        Returns
        -------
        source: list containing the data on which model is trained.
        target: list of future predictions of data.
        """
        source, target = [], []
        for i in range(len(seq)):
            end_idx = i + steps
            if end_idx > len(seq) - 1:
                break
            seq_src, seq_tgt = seq[i:end_idx], seq[end_idx]
            source.append(seq_src)
            target.append(seq_tgt)
        return source, target

In [47]:
raw_seq = [10, 20, 30, 40, 50, 60, 70, 80, 90]

In [48]:
exog = []
for i in range(len(raw_seq)):
    exog.append([i, i + 1])

In [49]:
fcn = MLPForecaster()

In [50]:
fcn._fit(y=raw_seq, X=exog)

In [51]:
fcn.history.history

{'loss': [3883.087890625,
  3189.367919921875,
  3026.829833984375,
  2806.221435546875,
  2514.705078125,
  2194.936767578125,
  2227.121826171875,
  1252.4317626953125,
  1096.2259521484375,
  981.8132934570312,
  719.13623046875,
  636.0213012695312,
  319.41802978515625,
  221.5382843017578,
  333.3432922363281,
  493.6846008300781,
  119.61406707763672,
  196.69720458984375,
  64.91850280761719,
  258.434326171875,
  304.7683410644531,
  218.7667694091797,
  65.25494384765625,
  70.20673370361328,
  88.60517120361328,
  165.02186584472656,
  127.563720703125,
  268.02886962890625,
  148.12831115722656,
  265.0610046386719,
  168.50340270996094,
  109.50827026367188,
  111.7464828491211,
  220.85565185546875,
  143.10516357421875,
  136.03765869140625,
  38.81959533691406,
  124.9878158569336,
  176.6869659423828,
  81.2987289428711,
  181.4322509765625,
  96.5024642944336,
  198.1737518310547,
  162.8346710205078,
  183.6537628173828,
  112.6746826171875,
  112.7409896850586,
  58

In [52]:
exog_p = []
for i in range(len(raw_seq), len(raw_seq) + 4):
    exog_p.append([i, i + 1])
exog_p

[[9, 10], [10, 11], [11, 12], [12, 13]]

In [53]:
fcn._predict(fh=[1, 2, 3, 4], X=exog_p)



ic| source.shape: (1, 3, 3)
ic| source.shape: (1, 3, 2)
ic| exog_p[currentPred-1]: [9, 10]
ic| source.shape: (1, 3, 3)




ic| source.shape: (1, 3, 3)
ic| source.shape: (1, 3, 2)
ic| exog_p[currentPred-1]: [10, 11]
ic| source.shape: (1, 3, 3)




ic| source.shape: (1, 3, 3)
ic| source.shape: (1, 3, 2)
ic| exog_p[currentPred-1]: [11, 12]
ic| source.shape: (1, 3, 3)




ic| source.shape: (1, 3, 3)
ic| source.shape: (1, 3, 2)
ic| exog_p[currentPred-1]: [12, 13]
ic| source.shape: (1, 3, 3)


[array([[79.4205]], dtype=float32),
 array([[86.35225]], dtype=float32),
 array([[92.918106]], dtype=float32),
 array([[97.81372]], dtype=float32)]