# Bonus assignment

**Joris LIMONIER**

---

In this assignment, we try to predict the number of passengers through time. We will use the airline dataset.


## Data Preprocessing

In [44]:
from pathlib import Path

import airline
import airline_simple as asi
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from torch.utils.data import DataLoader, Dataset, TensorDataset
from tqdm import tqdm

pio.templates.default = "plotly_white"

In [45]:
%reload_ext autoreload
%autoreload 2

### Load the dataset


In [46]:
filepath = Path("airline_passenger.txt")
air = pd.read_csv(
  filepath,
  parse_dates=["month"],
  names=["month", "passengers"],
  index_col="month",
  header=0,
  dtype={"passengers": "float32"},
)
air

Unnamed: 0_level_0,passengers
month,Unnamed: 1_level_1
1949-01-01,112.0
1949-02-01,118.0
1949-03-01,132.0
1949-04-01,129.0
1949-05-01,121.0
...,...
1960-08-01,606.0
1960-09-01,508.0
1960-10-01,461.0
1960-11-01,390.0


### Split the dataset into train and test
We use 1/3 of the dataset for testing. The remaining 2/3 is further split into 80% training and 20% validation.

In [102]:
test_size = 1/3  # % of the data is used for testing
val_size = 0.2  # % of the training set is used for validation

train_val, test = train_test_split(air, test_size=test_size, shuffle=False)

# Scale the data
# scaler = StandardScaler()
scaler = MinMaxScaler(feature_range=(0, 1))
train_val = pd.DataFrame(
  data=scaler.fit_transform(train_val),
  columns=train_val.columns,
  index=train_val.index,
)
test = pd.DataFrame(
  scaler.transform(test),
  columns=test.columns,
  index=test.index,
)
train, val = train_test_split(train_val, test_size=val_size, shuffle=False)
# train = train_val

In [103]:
scaler.__dict__

{'feature_range': (0, 1),
 'copy': True,
 'clip': False,
 'feature_names_in_': array(['passengers'], dtype=object),
 'n_features_in_': 1,
 'n_samples_seen_': 96,
 'scale_': array([0.00323625], dtype=float32),
 'min_': array([-0.33656958], dtype=float32),
 'data_min_': array([104.], dtype=float32),
 'data_max_': array([413.], dtype=float32),
 'data_range_': array([309.], dtype=float32)}

We plot the train, validation and test sets with different colors.

In [105]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train.index, y=train.passengers, name="train", mode="lines"))
fig.add_trace(go.Scatter(x=val.index, y=val.passengers, name="val", mode="lines"))
fig.add_trace(go.Scatter(x=test.index, y=test.passengers, name="test", mode="lines"))
fig.update_layout(title="Airline passengers")
fig.show()


We define constants to use for training:
- `SEQ_LENGTH`: the number of time steps to use for training, *i.e.* the number of previous months to use to predict the next month
- `N_EPOCHS`: the maximum number of epochs to train for
- `BATCH_SIZE`: the batch size to use for training

In [133]:
SEQ_LENGTH = 2
N_EPOCHS = 150
BATCH_SIZE = 4


In [134]:
# Generate train, validation and test sequences
train_sequences = airline.create_sequences(data=train, seq_length=SEQ_LENGTH)
val_sequences = airline.create_sequences(data=val, seq_length=SEQ_LENGTH)
test_sequences = airline.create_sequences(data=test, seq_length=SEQ_LENGTH)
train_sequences[0]

(            passengers
 month                 
 1949-01-01    0.025890
 1949-02-01    0.045307,
 passengers    0.090615
 Name: 1949-03-01 00:00:00, dtype: float32)

In [135]:
train_dataset = airline.AirlineDataset(sequences=train_sequences)
val_dataset = airline.AirlineDataset(sequences=val_sequences)
test_dataset = airline.AirlineDataset(sequences=test_sequences)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

for batch in train_dataloader:
  print(batch)
  break

{'sequence': tensor([[[0.0259],
         [0.0453]],

        [[0.0453],
         [0.0906]],

        [[0.0906],
         [0.0809]],

        [[0.0809],
         [0.0550]]]), 'label': tensor([[0.0906],
        [0.0809],
        [0.0550],
        [0.1003]])}


In [136]:
lstm = asi.CustomLSTM(input_size=1, hidden_size=50, num_layers=3, output_size=1)
print(lstm)
optimizer = optim.Adam(lstm.parameters(), lr=0.0002)
loss_fn = nn.MSELoss()
predictor = asi.PassengerPredictor(model=lstm, optimizer=optimizer, loss_fn=loss_fn)
train_losses, val_losses = predictor.train(
  model=lstm,
  train_dataloader=train_dataloader,
  val_dataloader=val_dataloader,
  optimizer=optimizer,
  loss_fn=loss_fn,
  n_epochs=N_EPOCHS,
)

losses = pd.DataFrame({"train": train_losses, "val": val_losses})
px.line(losses, y=["train", "val"], title="Losses")

CustomLSTM(
  (lstm): LSTM(1, 50, num_layers=3, batch_first=True)
  (fc): Linear(in_features=50, out_features=1, bias=True)
)
Epoch 0: train loss 0.0713, val loss 0.3923
Epoch 1: train loss 0.0579, val loss 0.3539
Epoch 2: train loss 0.0468, val loss 0.3169
Epoch 3: train loss 0.0376, val loss 0.2805
Epoch 4: train loss 0.0306, val loss 0.2465
Epoch 5: train loss 0.0264, val loss 0.2184
Epoch 6: train loss 0.0249, val loss 0.1991
Epoch 7: train loss 0.0248, val loss 0.1882
Epoch 8: train loss 0.0251, val loss 0.1828
Epoch 9: train loss 0.0252, val loss 0.1802
Epoch 10: train loss 0.0252, val loss 0.1789
Epoch 11: train loss 0.0252, val loss 0.1780
Epoch 12: train loss 0.0251, val loss 0.1773
Epoch 13: train loss 0.0251, val loss 0.1765
Epoch 14: train loss 0.0250, val loss 0.1758
Epoch 15: train loss 0.0249, val loss 0.1751
Epoch 16: train loss 0.0249, val loss 0.1743
Epoch 17: train loss 0.0248, val loss 0.1735
Epoch 18: train loss 0.0247, val loss 0.1727
Epoch 19: train loss 0.0246, 

In [137]:
pred.head(), test.head()

(            passengers
 month                 
 1957-06-01    0.427235
 1957-07-01    0.426979
 1957-08-01    0.425136
 1957-09-01    0.425196
 1957-10-01    0.423515,
             passengers
 month                 
 1957-01-01    0.682848
 1957-02-01    0.637540
 1957-03-01    0.815534
 1957-04-01    0.789644
 1957-05-01    0.812298)

In [139]:
y_pred = predictor.predict(lstm, test_dataloader)
pred = pd.DataFrame(y_pred.flatten(), index=test.index[SEQ_LENGTH:], columns=["passengers"])

y_pred_val = predictor.predict(lstm, val_dataloader)
pred_val = pd.DataFrame(y_pred_val.flatten(), index=val.index[SEQ_LENGTH:], columns=["passengers"])

fig = go.Figure()
fig.add_trace(go.Scatter(x=train.index, y=train.passengers, name="train", mode="lines"))
fig.add_trace(go.Scatter(x=val.index, y=val.passengers, name="val", mode="lines"))
fig.add_trace(go.Scatter(x=test.index, y=test.passengers, name="test", mode="lines"))
fig.add_trace(go.Scatter(x=pred_val.index, y=pred_val.passengers, name="pred_val", mode="lines"))
fig.add_trace(go.Scatter(x=pred.index, y=pred.passengers, name="pred", mode="lines"))
fig.update_layout(title="Airline passengers")
fig.show()


In [254]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_data["Month"], y=train_data["Passengers"], name="train", mode="lines"))
fig.add_trace(go.Scatter(x=test_data["Month"], y=test_data["Passengers"], name="test", mode="lines"))
fig.add_trace(go.Scatter(x=test_data["Month"][1:], y=test_outputs.flatten(), name="pred", mode="lines"))
fig.update_layout(title="Airline passengers")



The behavior of `series[i:j]` with an integer-dtype index is deprecated. In a future version, this will be treated as *label-based* indexing, consistent with e.g. `series[i]` lookups. To retain the old behavior, use `series.iloc[i:j]`. To get the future behavior, use `series.loc[i:j]`.



## Model

We define the model and start training.

In [None]:
lr = 0.001 # learning rate

# Create the model
model = airline.PassengerPredictor(n_features=1, lr=lr, weight_decay=1e-2)
model


dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.1 and num_layers=1



PassengerPredictor(
  (model): PassengerPredictionModel(
    (lstm): LSTM(1, 20, batch_first=True, dropout=0.1)
    (linear): Linear(in_features=20, out_features=1, bias=True)
  )
  (criterion): MSELoss()
)

For monitoring the training, we use TensorBoard. We also save the best model based on the validation loss.

In [None]:
# Create a callback to save the best model
checkpoint_callback = pl.callbacks.ModelCheckpoint(
  monitor="val_loss",
  dirpath="checkpoints",
  save_top_k=1,
  mode="min",
  filename="best_checkpoint",
  verbose=True,
)

# Create a logger for TensorBoard
logger = pl.loggers.TensorBoardLogger("logs", name="airline")

# Create a callback to stop training early if the validation loss does not improve
early_stop_callback = pl.callbacks.EarlyStopping(
  monitor="val_loss",
  patience=10,
  min_delta=400,
  verbose=True,
  mode="min",
)

# Create the trainer
trainer = pl.Trainer(
  logger=logger,
  callbacks=[checkpoint_callback, early_stop_callback],
  max_epochs=N_EPOCHS,
  gpus=1,
)



Setting `Trainer(gpus=1)` is deprecated in v1.7 and will be removed in v2.0. Please use `Trainer(accelerator='gpu', devices=1)` instead.

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
# Train the model.
trainer.fit(model, data_module)


Checkpoint directory /media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints exists and is not empty.

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                     | Params
-------------------------------------------------------
0 | model     | PassengerPredictionModel | 1.9 K 
1 | criterion | MSELoss                  | 0     
-------------------------------------------------------
1.9 K     Trainable params
0         Non-trainable params
1.9 K     Total params
0.007     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]


The number of training batches (38) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.



Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved. New best score: 103895.609
Epoch 0, global step 38: 'val_loss' reached 103895.60938 (best 103895.60938), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 1, global step 76: 'val_loss' reached 103574.68750 (best 103574.68750), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 597.242 >= min_delta = 400. New best score: 103298.367
Epoch 2, global step 114: 'val_loss' reached 103298.36719 (best 103298.36719), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 3, global step 152: 'val_loss' reached 103110.75781 (best 103110.75781), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 4, global step 190: 'val_loss' reached 102909.85156 (best 102909.85156), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 899.141 >= min_delta = 400. New best score: 102399.227
Epoch 5, global step 228: 'val_loss' reached 102399.22656 (best 102399.22656), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 435.727 >= min_delta = 400. New best score: 101963.500
Epoch 6, global step 266: 'val_loss' reached 101963.50000 (best 101963.50000), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 417.211 >= min_delta = 400. New best score: 101546.289
Epoch 7, global step 304: 'val_loss' reached 101546.28906 (best 101546.28906), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 8, global step 342: 'val_loss' reached 101214.87500 (best 101214.87500), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 632.148 >= min_delta = 400. New best score: 100914.141
Epoch 9, global step 380: 'val_loss' reached 100914.14062 (best 100914.14062), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 10, global step 418: 'val_loss' reached 100630.14062 (best 100630.14062), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 557.492 >= min_delta = 400. New best score: 100356.648
Epoch 11, global step 456: 'val_loss' reached 100356.64844 (best 100356.64844), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 12, global step 494: 'val_loss' reached 100090.67188 (best 100090.67188), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


## Evaluation

We load the best model and evaluate it on the test set. However, we see that the model doesn't work since it always outputs the same value for all time steps. In our experiments, we tried to use different architectures (hidden size, number of stacked layers, dropout, learning rate, train/val/test splits), but we always got the same result. One possible explanation is that the data should be normalized before training, but I doubt it as, in my opinion, this should only be necessary if we have several features with different scales. In our case, we have only one feature (the number of passengers), so the network should be able to learn the scale.

It is also worth noting that I sent an email to abid.ali@inria.fr and francois.bremond@inria.fr to ask for help, but I didn't get any response. This is a pity as I would have liked to know what I did wrong and I believe there is not much change needed to make the model work.

In [None]:
checkpoint_number = 1
checkpoint_path = f"checkpoints/best_checkpoint-v{checkpoint_number}.ckpt"
trained_model = airline.PassengerPredictor.load_from_checkpoint(
  checkpoint_path=checkpoint_path, n_features=1
)
test_dataset = airline.AirlineDataset(sequences=test_sequences)

sequences = []
labels = []

for item in tqdm(test_dataset):
  sequence = item["sequence"]
  label = item["label"]
  _, prediction = trained_model(sequence)
  print(sequence, prediction, label)


100%|██████████| 47/47 [00:00<00:00, 939.77it/s]

tensor([[315.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([301.])
tensor([[301.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([356.])
tensor([[356.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([348.])
tensor([[348.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([355.])
tensor([[355.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([422.])
tensor([[422.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([465.])
tensor([[465.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([467.])
tensor([[467.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([404.])
tensor([[404.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([347.])
tensor([[347.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([305.])
tensor([[305.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([336.])
tensor([[336.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([340.])
tensor([[340.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([318.])
tensor([[318.]]) tensor([32.3449], gra


