<a href="https://colab.research.google.com/github/lsteffenel/CHPS0704/blob/main/TP5/4-CNN-UK_Minimal_temperature.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# UK minimal temperature prediction

Uploader le fichier "MET_Office_Weather_Data.xlsx"

In [None]:
import pandas as pd
import os

import torch
import torch.nn.functional as F

import copy
import random
import sys

import matplotlib.pyplot as plt


random.seed(1)
torch.manual_seed(1)

features = 120
ts_len = 60



In [None]:
def raw_time_series():
    ts_df = pd.read_csv('MET_Office_Weather_Data.csv',delimiter=';')
    ts = ts_df.loc[ts_df['station'] == 'oxford']['tmin'].tolist()
    return ts

In [None]:
def interpolated_time_series():
    ts_df = pd.read_csv('MET_Office_Weather_Data.csv',delimiter=';')
    ts = ts_df.loc[ts_df['station'] == 'oxford']['tmin']\
        .interpolate().dropna().tolist()
    return ts

In [None]:
def sliding_window(ts, features):
    X = []
    Y = []

    for i in range(features + 1, len(ts) + 1):
        X.append(ts[i - (features + 1):i - 1])
        Y.append([ts[i - 1]])

    return X, Y


In [None]:
def get_training_datasets(features, test_len):
    ts = interpolated_time_series()
    X, Y = sliding_window(ts, features)

    X_train, Y_train, X_test, Y_test = X[0:-test_len], \
                                       Y[0:-test_len], \
                                       X[-test_len:], \
                                       Y[-test_len:]

    train_len = round(len(ts) * 0.7)

    X_train, X_val, Y_train, Y_val = X_train[0:train_len],\
                                     X_train[train_len:],\
                                     Y_train[0:train_len],\
                                     Y_train[train_len:]

    x_train = torch.tensor(data = X_train)
    y_train = torch.tensor(data = Y_train)

    x_val = torch.tensor(data = X_val)
    y_val = torch.tensor(data = Y_val)

    x_test = torch.tensor(data = X_test)
    y_test = torch.tensor(data = Y_test)



    return x_train, x_val, x_test, y_train, y_val, y_test

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

class HwesPredictor(torch.nn.Module):

    def forward(self, x):
        last_values = []
        for r in x.tolist():
            model = ExponentialSmoothing(r,
                                         trend = None,
                                         seasonal = "add",
                                         seasonal_periods = 12
                                         )
            results = model.fit()
            forecast = results.forecast()
            last_values.append([forecast[0]])
        return torch.tensor(data = last_values)

In [None]:
#TODO -> Remplir la classe suivante pour avoir un predicteur SARIMA

from statsmodels.tsa.statespace.sarimax import SARIMAX

class SarimaxPredictor(torch.nn.Module):

    def forward(self, x):
    # TODO - vous pouvez vous inspirer de l'exemple "HWESPredictor" précédent
    # la doc de SARIMAX se trouve ici : https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html
    # suggestion : utiliser order = (1, 1, 1) et seasonal_order = (1, 1, 1, 12)


        return torch.tensor(data = last_values)

In [None]:
class DL(torch.nn.Module):

    def __init__(self, n_inp, l_1, l_2, conv1_out, conv1_kernel, conv2_kernel, drop1 = 0):
        super(DL, self).__init__()
        conv1_out_ch = conv1_out
        conv2_out_ch = conv1_out * 2
        conv1_kernel = conv1_kernel
        conv2_kernel = conv2_kernel
        self.dropout_lin1 = drop1

        self.pool = torch.nn.MaxPool1d(kernel_size = 2)

        self.conv1 = torch.nn.Conv1d(in_channels = 1, out_channels = conv1_out_ch, kernel_size = conv1_kernel,
                                     padding = conv1_kernel - 1)

        self.conv2 = torch.nn.Conv1d(in_channels = conv1_out_ch, out_channels = conv2_out_ch,
                                     kernel_size = conv2_kernel,
                                     padding = conv2_kernel - 1)

        feature_tensor = self.feature_stack(torch.Tensor([[0] * n_inp]))
        self.lin1 = torch.nn.Linear(feature_tensor.size()[1], l_1)
        self.lin2 = torch.nn.Linear(l_1, l_2)
        self.lin3 = torch.nn.Linear(l_2, 1)

    def feature_stack(self, x):
        x = x.unsqueeze(1)
        x = F.relu(self.pool(self.conv1(x)))
        x = F.relu(self.pool(self.conv2(x)))
        x = x.flatten(start_dim = 1)
        return x

    def fclassification_stack(self, x):
        x1 = F.dropout(F.relu(self.lin1(x)), p = self.dropout_lin1)
        x2 = F.relu(self.lin2(x1))
        y = self.lin3(x2)
        return y

    def forward(self, x):
        x = self.feature_stack(x)
        y = self.fclassification_stack(x)
        return y

In [None]:
x_train, x_val, x_test, y_train, y_val, y_test = get_training_datasets(features, ts_len)

sarima_predictor = SarimaxPredictor()
hwes_predictor = HwesPredictor()

net = DL( n_inp = features,
    l_1 = 400,
    l_2 = 48,
    conv1_out = 6,
    conv1_kernel = 36,
    conv2_kernel = 12,
    drop1 = .1
)

net.train()

optimizer = torch.optim.Adam(params = net.parameters())
abs_loss = torch.nn.L1Loss()


best_model = None
min_val_loss = sys.maxsize

training_loss = []
validation_loss = []

for t in range(150):

    prediction = net(x_train)
    loss = abs_loss(prediction, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    val_prediction = net(x_val)
    val_loss = abs_loss(val_prediction, y_val)

    training_loss.append(loss.item())
    validation_loss.append(val_loss.item())

    if val_loss.item() < min_val_loss:
        best_model = copy.deepcopy(net)
        min_val_loss = val_loss.item()
        #print(t," - meilleur")

    if t % 10 == 0:
        print(f'epoch {t}: train - {round(loss.item(), 4)}, val: - {round(val_loss.item(), 4)}')

best_model.eval()

In [None]:
dl_prediction = best_model(x_test)
sarima_prediction = sarima_predictor(x_test)
hwes_prediction = hwes_predictor(x_test)

dl_abs_loss = round(abs_loss(dl_prediction, y_test).item(), 4)
sarima_abs_loss = round(abs_loss(sarima_prediction, y_test).item(), 4)
hwes_abs_loss = round(abs_loss(hwes_prediction, y_test).item(), 4)

print('===')
print('Results on Test Dataset')
print(f'DL Loss: {dl_abs_loss}')
print(f'SARIMA Loss: {sarima_abs_loss}')
print(f'HWES Loss: {hwes_abs_loss}')

In [None]:
plt.title("Training progress")
plt.plot(training_loss, label = 'training loss')
plt.plot(validation_loss, label = 'validation loss')
plt.legend()
plt.show()


## Ecart entre actuel et prédit

In [None]:
test_n = len(y_test)
dl_abs_dev = (dl_prediction - y_test).abs_()
sarima_abs_dev = (sarima_prediction - y_test).abs_()
hwes_abs_dev = (hwes_prediction - y_test).abs_()

fig = plt.figure()

ax1 = fig.add_subplot(311)
ax2 = fig.add_subplot(312)
ax3 = fig.add_subplot(313)

ax1.set_title(f'Deep Learning Model: {dl_abs_loss}')
ax1.bar(list(range(test_n)), dl_abs_dev.view(test_n).tolist(), color = 'g')

ax2.set_title(f'SARIMA Model: {sarima_abs_loss}')
ax2.bar(list(range(test_n)), sarima_abs_dev.view(test_n).tolist(), color = 'r')

ax3.set_title(f'HWES Model: {hwes_abs_loss}')
ax3.bar(list(range(test_n)), hwes_abs_dev.view(test_n).tolist(), color = 'brown')

plt.show()

# Challenge

Modifier votre réseau de neurones pour obtenir un écart moyen inférieur à 1.25