# Time-series Forecasting

## Imports

In [125]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model, Sequential

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [27]:
packetLossRate = np.random.random((100,))
jitter = np.arange(100)
latency = np.random.random((100,))

In [24]:
from datetime import datetime, timedelta

current = datetime.now()
times = [(current + timedelta(seconds=i)).strftime("%Y-%m-%d %H:%M:%S") for i in range(100)]

In [37]:
df = pd.DataFrame({
    'packetLossRate': packetLossRate,
    'jitter': jitter,
    'latency': latency,
    'timestamp': times
})
df['timestamp'] = pd.to_datetime(df['timestamp'], format="%Y-%m-%d %H:%M:%S")

In [53]:
# df.pop('timestamp')
ds = tf.keras.utils.timeseries_dataset_from_array(
      data=df,
      targets=None,
      sequence_length=21,
      sequence_stride=1,
      shuffle=False,
      batch_size=32
)

In [91]:
len(df)

100

In [126]:
class WindowGenerator():

    def __init__(self,
                 input_width, label_width, shift=0,
                 train_df=None, val_df=None, test_df=None,
                 input_columns=None, label_columns=None, timestamp_column='timestamp'):

        # Store the raw data
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df

        # Work out the label column indices.
        self.input_columns = input_columns
        self.label_columns = label_columns
        self.timestamp_column = timestamp_column
        if input_columns is not None:
            self.input_columns_indices = {name: i for i, name in enumerate(input_columns)}
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in enumerate(label_columns)}
        self.column_indices = {name: i for i, name in enumerate(train_df.columns)}

        # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift + label_width

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.labels_slice, :]
        if self.input_columns is not None:
            inputs = tf.stack(
                [inputs[:, :, self.column_indices[name]] for name in self.input_columns],
                axis=-1
            )
        if self.label_columns is not None:
            labels = tf.stack(
                [labels[:, :, self.column_indices[name]] for name in self.label_columns],
                axis=-1
            )

        # Slicing doesn't preserve static shape information, so set the shapes
        # manually. This way the `tf.data.Datasets` are easier to inspect.
        inputs.set_shape([None, self.input_width, len(self.input_columns)])
        labels.set_shape([None, self.label_width, len(self.label_columns)])

        return inputs, labels
    
    def make_dataset(self, data, shuffle=True):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.utils.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            shuffle=shuffle,
            batch_size=1
        )

        ds = ds.map(self.split_window)

        return ds
    
    def plot(self, sample_set, plot_col, model=None, max_subplots=3):
        plt.figure(figsize=(12, 8))
        plot_col_index = self.column_indices[plot_col]
        for inputs, outputs in sample_set.take(max_subplots):
            plt.subplot(max_subplots, 1, n+1)
            plt.ylabel(f'{plot_col} [normed]')
            plt.plot(self.input_indices, inputs[n, :, plot_col_index],
                     label='Inputs', marker='.', zorder=-10)

            if self.label_columns:
                label_col_index = self.label_columns_indices.get(plot_col, None)
            else:
                label_col_index = plot_col_index

            if label_col_index is None:
                continue

        plt.scatter(self.label_indices, labels[n, :, label_col_index],
                    edgecolors='k', label='Labels', c='#2ca02c', s=64)
        if model is not None:
            predictions = model(inputs)
            plt.scatter(self.label_indices, predictions[n, :, label_col_index],
                      marker='X', edgecolors='k', label='Predictions',
                      c='#ff7f0e', s=64)

        if n == 0:
            plt.legend()

        plt.xlabel('Time [sec]')

    @property
    def train(self):
        return self.make_dataset(self.train_df)

In [89]:
s1 = WindowGenerator(20, 2, 1, train_df=df, input_columns=['jitter', 'packetLossRate', 'jitter'], label_columns=['packetLossRate', 'jitter'])
s1.train

<MapDataset element_spec=(TensorSpec(shape=(None, 20, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 2, 2), dtype=tf.float32, name=None))>

In [90]:
for inputs, outputs in s1.train.take(2):
    print(inputs.numpy(), outputs.numpy())

[[[20.          0.04142274 20.        ]
  [21.          0.93170613 21.        ]
  [22.          0.26347166 22.        ]
  [23.          0.49446526 23.        ]
  [24.          0.09528789 24.        ]
  [25.          0.8497314  25.        ]
  [26.          0.752091   26.        ]
  [27.          0.42919093 27.        ]
  [28.          0.48214233 28.        ]
  [29.          0.33425155 29.        ]
  [30.          0.22185107 30.        ]
  [31.          0.5295909  31.        ]
  [32.          0.15594777 32.        ]
  [33.          0.9462705  33.        ]
  [34.          0.9260007  34.        ]
  [35.          0.88813335 35.        ]
  [36.          0.5067773  36.        ]
  [37.          0.9992331  37.        ]
  [38.          0.7014517  38.        ]
  [39.          0.60993737 39.        ]]] [[[ 0.38814342 41.        ]
  [ 0.27783567 42.        ]]]
[[[7.5000000e+01 1.0215090e-01 7.5000000e+01]
  [7.6000000e+01 6.7611349e-01 7.6000000e+01]
  [7.7000000e+01 6.3840276e-01 7.7000000e+01]
  

In [80]:
slice([1, 2, 3])

slice(None, [1, 2, 3], None)

In [83]:
[0, 1, 2, 3, 4][slice(1, 2, 3)]

[1]

In [106]:
test = np.random.random(24).reshape(4, 6)
test.shape

(4, 6)

In [121]:
np.arange(4).reshape(-1, 1)

array([[0],
       [1],
       [2],
       [3]])

In [124]:
np.hstack([test, np.arange(4).reshape(-1, 1)])

array([[0.80587177, 0.12688409, 0.42749691, 0.27178802, 0.35268006,
        0.01014872, 0.        ],
       [0.23899643, 0.29177204, 0.28929395, 0.34374339, 0.04585866,
        0.05445515, 1.        ],
       [0.23098239, 0.65763688, 0.7426619 , 0.92468782, 0.03925473,
        0.15305659, 2.        ],
       [0.98729009, 0.77883367, 0.99945088, 0.6316951 , 0.52600888,
        0.04926711, 3.        ]])

In [118]:
np.array(df, dtype=np.float32).shape

(100, 3)