# Modelling Libraries - Tensorflow

In [42]:
# Import to be able to import python package from src
import sys
sys.path.insert(0, '../src')

In [43]:
import pandas as pd
import numpy as np
import ontime as on
from darts.datasets import EnergyDataset

---
## Load data

In [44]:
ts = EnergyDataset().load()
ts = ts.astype(np.float32)

---

## Preprocessing

- [x] Normalize
- [x] Split train, test, val
- [ ] Feature engineering
        - add weather for location
        - add day of the week, month, year, etc.
        - add whatever
- [x] Windowing
- [x] Windowing - Split (parts to train as X, parts to predict as y)
- [ ] Windowing - to tf.data.Dataset
- [ ] Windowing - to Pytorch DataLoaders

In [45]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from darts.dataprocessing.transformers import Scaler

def normalize(ts: on.TimeSeries, type='minmax', return_transformer=False):
    match type:
        case 'minmax':
            scaler = MinMaxScaler()
        case 'zscore':
            scaler = StandardScaler()
    transformer = Scaler(scaler)
    ts_transformed = transformer.fit_transform(ts)
    if return_transformer:
        return ts_transformed, transformer
    else:
        return ts_transformed

In [46]:
def train_test_split(ts: on.TimeSeries, test_split=None, train_split=None) -> tuple:
    """
    Description
    
    :param ts: TimeSeries to split
    :param test_split: float, int or pd.TimeStamp
    :param train_split: float, int or pd.TimeStamp
    """
    
    if train_split is not None and test_split is not None:
        raise Exception('Only one of those two parameters can be set : train_split, test_split.')

    if train_split is None and test_split is None:
        test_split = 0.25
    
    # split ts in subts : train, test
    if test_split is not None: 
        train_set, test_set = ts.split_after(1-test_split)
    
    if train_split is not None:
        train_set, test_set = ts.split_after(train_split)

    return train_set, test_set

In [47]:
def split_by_n(ts, n, drop_last=True):

    # Get DataFrame
    df = ts.pd_dataframe()
    
    # Calculate the total number of splits needed
    total_splits = -(-len(df) // n)  # Ceiling division to get the number of parts
    
    # Initialize a list to hold the DataFrame splits
    splits_df = []
    
    # Loop through the DataFrame and split it
    for split in range(total_splits):
        start_index = split * n
        end_index = start_index + n
        # Append the part to the list, using slicing with .iloc
        splits_df.append(df.iloc[start_index:end_index])

    # If the last dataframe has a different length, then drop it.
    if drop_last:
        last_df = splits_df[-1]
        second_last = splits_df[-2]        
        if len(last_df) != len(second_last):
            splits_df = splits_df[:-1]

    # Change the data sctructure from DataFrame to TimeSeries
    return list(map(on.TimeSeries.from_dataframe, splits_df))
    

In [48]:
def split_inputs_from_targets(ts_list, input_len, target_len):

    # Change inner data structure to DataFrame
    dfs = [ts.pd_dataframe() for ts in ts_list]

    # Create initial arrays
    input_series_list = []
    target_series_list = []
    
    # Iterate over each DataFrame in the list
    for df in dfs:
        # Check if the DataFrame is large enough to accommodate input_len and label_len
        if len(df) >= input_len + target_len:
            # Get the first input_len items
            input_series = df.iloc[:input_len]
            input_series_list.append(input_series)
            
            # Get the last label_len items
            target_series = df.iloc[-target_len:]
            target_series_list.append(target_series)
        else:
            raise Exception('input_len + label_len is longer that the total length of the DataFrame')

    input_ts_list = list(map(on.TimeSeries.from_dataframe, input_series_list))
    target_ts_list = list(map(on.TimeSeries.from_dataframe, target_series_list))
    
    return input_ts_list, target_ts_list

In [49]:
def to_numpy(ts_list):
    return np.array([ts.pd_dataframe().to_numpy() for ts in ts_list])  

In [50]:
import numpy as np
import tensorflow as tf


class WindowGenerator:
    def __init__(self, input_width, target_width, offset, ts, target_columns=None):
        # Store the raw data.
        self.ts = ts
        self.df = ts.pd_dataframe()

        # Work out the target column indices.
        self.target_columns = target_columns
        if target_columns is not None:
            self.target_columns_indices = {name: i for i, name in
                                           enumerate(target_columns)}
        self.column_indices = {name: i for i, name in
                               enumerate(self.df.columns)}

        # Work out the window parameters.
        self.input_width = input_width
        self.target_width = target_width
        self.offset = offset

        self.total_window_size = input_width + offset

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.target_start = self.total_window_size - self.target_width
        self.targets_slice = slice(self.target_start, None)
        self.target_indices = np.arange(self.total_window_size)[self.targets_slice]

    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Target indices: {self.target_indices}',
            f'Target column name(s): {self.target_columns}'])

    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        targets = features[:, self.targets_slice, :]
        if self.target_columns is not None:
            targets = tf.stack(
                [targets[:, :, self.column_indices[name]] for name in self.target_columns],
                axis=-1)

        # Slicing doesn't preserve static shape information, so set the shapes
        # manually. This way the `tf.data.Datasets` are easier to inspect.
        inputs.set_shape([None, self.input_width, None])
        targets.set_shape([None, self.target_width, None])

        return inputs, targets

    def make_dataset(self, data):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.utils.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            shuffle=True,
            batch_size=32,)
        return ds.map(self.split_window)

    @property
    def dataset(self):
        return self.make_dataset(self.df)

    @property
    def example(self):
        """Get and cache an example batch of `inputs, targets` for plotting."""
        result = getattr(self, '_example', None)
        if result is None:
            # No example batch was found, so get one from the dataset
            result = next(iter(self.dataset))
            # And cache it for next time
            self._example = result
        return result



### Test with WindowGenerator

In [74]:
df = ts.pd_dataframe()
df = df.interpolate()
ts = on.TimeSeries.from_dataframe(df)

In [75]:
ts_t = normalize(ts)

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


In [76]:
train, test = train_test_split(ts_t, train_split=0.8)
train, val = train_test_split(train, train_split=0.8)

In [77]:
target_columns = ['generation solar']

train_window = WindowGenerator(
    input_width=5, 
    target_width=1, 
    offset=1, 
    target_columns=target_columns,
    ts=train)

val_window = WindowGenerator(
    input_width=5, 
    target_width=1, 
    offset=1, 
    target_columns=target_columns,
    ts=val)

test_window = WindowGenerator(
    input_width=5, 
    target_width=1, 
    offset=1, 
    target_columns=target_columns,
    ts=test)

In [78]:
train_window

Total window size: 6
Input indices: [0 1 2 3 4]
Target indices: [5]
Target column name(s): ['generation solar']

In [79]:
train_window.dataset.element_spec

(TensorSpec(shape=(None, 5, 28), dtype=tf.float32, name=None),
 TensorSpec(shape=(None, 1, 1), dtype=tf.float32, name=None))

In [80]:
test_window.dataset.element_spec

(TensorSpec(shape=(None, 5, 28), dtype=tf.float32, name=None),
 TensorSpec(shape=(None, 1, 1), dtype=tf.float32, name=None))

## TensorFlow Modelling

### Define data

In [81]:
dataset = {
    'train': train_window.dataset,
    'val': val_window.dataset,
    'test': test_window.dataset,
}

### Define model

In [82]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=1)
])

### Training

In [86]:
MAX_EPOCHS = 20

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=2,
    mode='min'
)

model.compile(
    loss=tf.keras.losses.MeanSquaredError(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=[tf.keras.metrics.MeanAbsoluteError()]
)

history = model.fit(
    dataset['train'], 
    epochs=MAX_EPOCHS,
    validation_data=dataset['val'],
    #callbacks=[early_stopping]
)




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Evaluate

In [84]:
performance = model.evaluate(dataset['test'], verbose=0)

In [85]:
performance

[nan, nan]

---
## Models

- [x] Darts
- [x] Scikit-learn API compatible regressor
- [ ] GluonTS
- [ ] Kats
- [ ] Custom PyTorch
- [ ] Custom TensorFlow

### Darts models

In [4]:
from darts.models import BlockRNNModel

In [15]:
model = on.Model(BlockRNNModel,
    input_chunk_length=12,
    output_chunk_length=6,
    n_rnn_layers=2,
    n_epochs=50
)
model.fit(ts)
model.predict(5)

darts.models.forecasting.torch_forecasting_model INFO  Train dataset contains 348 samples.
darts.models.forecasting.torch_forecasting_model INFO  Time series values are 32-bits; casting model to float32.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | RNN              | 2.0 K 
4 | fc            | Sequential       | 156   
---------------------------------------------------
2.2 K     Trainable params
0         Non-trainable params
2.2 K     Total params
0.009     Total estimated model params size (MB)


Epoch 49: 100%|██████████████████████████████| 11/11 [00:00<00:00, 46.29it/s, train_loss=4.480]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████████████████████████| 11/11 [00:00<00:00, 46.16it/s, train_loss=4.480]


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████████████████████████████| 1/1 [00:00<00:00, 124.78it/s]


### Scikit-learn API compatible models

In [9]:
from sklearn.neural_network import MLPRegressor

In [14]:
model = on.Model(MLPRegressor,
                 lags=30)
model.fit(ts)
model.predict(5)

