# Bonus assignment

**Joris LIMONIER**

---

In this assignment, we try to predict the number of passengers through time. We will use the airline dataset.


## Data Preprocessing

In [62]:
from pathlib import Path

import airline
import airline_simple as asi
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, Dataset, TensorDataset
from tqdm import tqdm

pio.templates.default = "plotly_white"


In [63]:
%reload_ext autoreload
%autoreload 2

### Load the dataset


In [64]:
filepath = Path("airline_passenger.txt")
air = pd.read_csv(
  filepath,
  parse_dates=["month"],
  names=["month", "passengers"],
  index_col="month",
  header=0,
  dtype={"passengers": "float32"},
)
air.head()

Unnamed: 0_level_0,passengers
month,Unnamed: 1_level_1
1949-01-01,112.0
1949-02-01,118.0
1949-03-01,132.0
1949-04-01,129.0
1949-05-01,121.0


### Split the dataset into train and test
We use 1/3 of the dataset for testing. The remaining 2/3 is further split into 80% training and 20% validation.

In [65]:
test_size = 1 / 3 
val_size = 0.2 # 20% of the training set is used for validation

train_val, test = train_test_split(air, test_size=test_size, shuffle=False)

scaler = StandardScaler()
train_val = pd.DataFrame(scaler.fit_transform(train_val), columns=train_val.columns, index=train_val.index)
test = pd.DataFrame(scaler.transform(test), columns=test.columns, index=test.index)

# train_val = scaler.fit_transform(train_val)
# test = scaler.transform(test)

train, val = train_test_split(train_val, test_size=val_size, shuffle=False)

We plot the train, validation and test sets with different colors.

In [66]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train.index, y=train.passengers, name="train", mode="lines"))
fig.add_trace(go.Scatter(x=val.index, y=val.passengers, name="val", mode="lines"))
fig.add_trace(go.Scatter(x=test.index, y=test.passengers, name="test", mode="lines"))
fig.update_layout(title="Airline passengers")
fig.show()


We define constants to use for training:
- `SEQ_LENGTH`: the number of time steps to use for training, *i.e.* the number of previous months to use to predict the next month
- `N_EPOCHS`: the maximum number of epochs to train for
- `BATCH_SIZE`: the batch size to use for training

In [77]:
SEQ_LENGTH = 1
N_EPOCHS = 200
BATCH_SIZE = 8


In [78]:

# Generate train, validation and test sequences
train_sequences = airline.create_sequences(data=train, seq_length=SEQ_LENGTH)
val_sequences = airline.create_sequences(data=val, seq_length=SEQ_LENGTH)
test_sequences = airline.create_sequences(data=test, seq_length=SEQ_LENGTH)
train_sequences[0]

(            passengers
 month                 
 1949-01-01   -1.421646,
 passengers   -1.33778
 Name: 1949-02-01 00:00:00, dtype: float32)

In [79]:
train_dataset = airline.AirlineDataset(sequences=train_sequences)
val_dataset = airline.AirlineDataset(sequences=val_sequences)
test_dataset = airline.AirlineDataset(sequences=test_sequences)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

for batch in train_dataloader:
  print(batch)
  break

{'sequence': tensor([[[-1.4216]],

        [[-1.3378]],

        [[-1.1421]],

        [[-1.1840]],

        [[-1.2958]],

        [[-1.1002]],

        [[-0.9184]],

        [[-0.9184]]]), 'label': tensor([[-1.3378],
        [-1.1421],
        [-1.1840],
        [-1.2958],
        [-1.1002],
        [-0.9184],
        [-0.9184],
        [-1.0862]])}


In [80]:
lstm = asi.CustomLSTM(input_size=1, hidden_size=1, num_layers=1, output_size=1)
optimizer = optim.Adam(lstm.parameters(), lr=0.08)
loss_fn = nn.MSELoss()
predictor = asi.PassengerPredictor(model=lstm, optimizer=optimizer, loss_fn=loss_fn)
train_losses, val_losses = predictor.train(
  model=lstm,
  train_dataloader=train_dataloader,
  val_dataloader=val_dataloader,
  optimizer=optimizer,
  loss_fn=loss_fn,
  n_epochs=N_EPOCHS,
)


Epoch 0: train loss 0.7224, val loss 3.2247
Epoch 1: train loss 0.4406, val loss 2.4317
Epoch 2: train loss 0.2674, val loss 1.4010
Epoch 3: train loss 0.2244, val loss 1.0448
Epoch 4: train loss 0.1456, val loss 0.9895
Epoch 5: train loss 0.1191, val loss 1.2824
Epoch 6: train loss 0.1188, val loss 1.2749
Epoch 7: train loss 0.1349, val loss 1.2821
Epoch 8: train loss 0.1001, val loss 1.0758
Epoch 9: train loss 0.1028, val loss 1.1155
Epoch 10: train loss 0.0919, val loss 1.0884
Epoch 11: train loss 0.0996, val loss 1.0978
Epoch 12: train loss 0.0865, val loss 0.9864
Epoch 13: train loss 0.0964, val loss 1.0293
Epoch 14: train loss 0.0845, val loss 0.9199
Epoch 15: train loss 0.1018, val loss 0.9789
Epoch 16: train loss 0.0845, val loss 0.8462
Epoch 17: train loss 0.1037, val loss 0.9447
Epoch 18: train loss 0.0832, val loss 0.8001
Epoch 19: train loss 0.0992, val loss 0.9099
Epoch 20: train loss 0.0807, val loss 0.7720
Epoch 21: train loss 0.0932, val loss 0.8678
Epoch 22: train loss

In [81]:
def predict(model, dataloader):
  model.eval()
  preds = []
  for batch in dataloader:
    print(batch["sequence"])
    pred = model(batch["sequence"])
    preds.append(pred)
  return torch.cat(preds).detach().numpy()
predict(lstm, test_dataloader)

tensor([[[1.4158]],

        [[1.2201]],

        [[1.9889]],

        [[1.8771]],

        [[1.9749]],

        [[2.9114]],

        [[3.5125]],

        [[3.5404]]])
tensor([[[2.6598]],

        [[1.8631]],

        [[1.2760]],

        [[1.7094]],

        [[1.7653]],

        [[1.4578]],

        [[2.0728]],

        [[1.8771]]])
tensor([[[2.0868]],

        [[3.0931]],

        [[3.8759]],

        [[4.0716]],

        [[2.6598]],

        [[2.0308]],

        [[1.3459]],

        [[1.7233]]])
tensor([[[2.0448]],

        [[1.7932]],

        [[2.6878]],

        [[2.5480]],

        [[2.8835]],

        [[3.6103]],

        [[4.6726]],

        [[4.8264]]])
tensor([[[3.4845]],

        [[2.7018]],

        [[2.0728]],

        [[2.6738]],

        [[2.8415]],

        [[2.4781]],

        [[2.8695]],

        [[3.4566]]])
tensor([[[3.6103]],

        [[4.4909]],

        [[5.7070]],

        [[5.4833]],

        [[4.1135]],

        [[3.4566]],

        [[2.4641]]])


array([[0.98895586],
       [0.93959653],
       [1.0697905 ],
       [1.0593091 ],
       [1.0685829 ],
       [1.1103901 ],
       [1.1176673 ],
       [1.1178652 ],
       [1.104524  ],
       [1.0578591 ],
       [0.95525455],
       [1.039523  ],
       [1.0467268 ],
       [0.9977077 ],
       [1.0764824 ],
       [1.0593091 ],
       [1.077511  ],
       [1.1133622 ],
       [1.1196758 ],
       [1.1203736 ],
       [1.104524  ],
       [1.0732516 ],
       [0.97302234],
       [1.0413858 ],
       [1.0743531 ],
       [1.0500904 ],
       [1.1053001 ],
       [1.1010374 ],
       [1.10985   ],
       [1.1183239 ],
       [1.1215967 ],
       [1.1217741 ],
       [1.1174604 ],
       [1.1056753 ],
       [1.0764824 ],
       [1.1049167 ],
       [1.1089932 ],
       [1.0985113 ],
       [1.1095704 ],
       [1.1172446 ],
       [1.1183239 ],
       [1.1213318 ],
       [1.1223093 ],
       [1.1222249 ],
       [1.1204985 ],
       [1.1172446 ],
       [1.0979706 ]], dtype=float3

In [61]:
for idx in range(N_EPOCHS):
  for batch in tqdm(train_dataloader):
    optimizer.zero_grad()
    pred = lstm(batch["sequence"])
    loss = criterion(pred, batch["label"])
    loss.backward()
    optimizer.step()
  # sequence = torch.tensor(sequence.values).double()
  # label = torch.tensor(label.values).double()
  optimizer = optim.Adam(lstm.parameters(), lr=0.01)

  with torch.no_grad():
    future = 1
    pred = lstm(test_input, future=future)
    loss = criterion(pred[:, :-future], test_target)
    print("test loss:", loss.item())
    y = pred.detach().numpy()
  if idx >= 3:
    break


  0%|          | 0/37 [00:00<?, ?it/s]


NameError: name 'criterion' is not defined

In [None]:

data_module = airline.AirlineDataModule(
  train_sequences=train_sequences,
  val_sequences=val_sequences,
  test_sequences=test_sequences,
  batch_size=BATCH_SIZE,
)
data_module.setup()

# Create the dataset
train_dataset = airline.AirlineDataset(sequences=train_sequences)

# Print an example from the dataset
for item in train_dataset:
  print(f"""Using: {item["sequence"]}""")
  print(f"""The model is trying to predict: {item["label"]}""")
  break


## Model

We define the model and start training.

In [None]:
lr = 0.001 # learning rate

# Create the model
model = airline.PassengerPredictor(n_features=1, lr=lr, weight_decay=1e-2)
model


dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.1 and num_layers=1



PassengerPredictor(
  (model): PassengerPredictionModel(
    (lstm): LSTM(1, 20, batch_first=True, dropout=0.1)
    (linear): Linear(in_features=20, out_features=1, bias=True)
  )
  (criterion): MSELoss()
)

For monitoring the training, we use TensorBoard. We also save the best model based on the validation loss.

In [None]:
# Create a callback to save the best model
checkpoint_callback = pl.callbacks.ModelCheckpoint(
  monitor="val_loss",
  dirpath="checkpoints",
  save_top_k=1,
  mode="min",
  filename="best_checkpoint",
  verbose=True,
)

# Create a logger for TensorBoard
logger = pl.loggers.TensorBoardLogger("logs", name="airline")

# Create a callback to stop training early if the validation loss does not improve
early_stop_callback = pl.callbacks.EarlyStopping(
  monitor="val_loss",
  patience=10,
  min_delta=400,
  verbose=True,
  mode="min",
)

# Create the trainer
trainer = pl.Trainer(
  logger=logger,
  callbacks=[checkpoint_callback, early_stop_callback],
  max_epochs=N_EPOCHS,
  gpus=1,
)



Setting `Trainer(gpus=1)` is deprecated in v1.7 and will be removed in v2.0. Please use `Trainer(accelerator='gpu', devices=1)` instead.

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
# Train the model.
trainer.fit(model, data_module)


Checkpoint directory /media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints exists and is not empty.

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                     | Params
-------------------------------------------------------
0 | model     | PassengerPredictionModel | 1.9 K 
1 | criterion | MSELoss                  | 0     
-------------------------------------------------------
1.9 K     Trainable params
0         Non-trainable params
1.9 K     Total params
0.007     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]


The number of training batches (38) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.



Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved. New best score: 103895.609
Epoch 0, global step 38: 'val_loss' reached 103895.60938 (best 103895.60938), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 1, global step 76: 'val_loss' reached 103574.68750 (best 103574.68750), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 597.242 >= min_delta = 400. New best score: 103298.367
Epoch 2, global step 114: 'val_loss' reached 103298.36719 (best 103298.36719), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 3, global step 152: 'val_loss' reached 103110.75781 (best 103110.75781), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 4, global step 190: 'val_loss' reached 102909.85156 (best 102909.85156), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 899.141 >= min_delta = 400. New best score: 102399.227
Epoch 5, global step 228: 'val_loss' reached 102399.22656 (best 102399.22656), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 435.727 >= min_delta = 400. New best score: 101963.500
Epoch 6, global step 266: 'val_loss' reached 101963.50000 (best 101963.50000), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 417.211 >= min_delta = 400. New best score: 101546.289
Epoch 7, global step 304: 'val_loss' reached 101546.28906 (best 101546.28906), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 8, global step 342: 'val_loss' reached 101214.87500 (best 101214.87500), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 632.148 >= min_delta = 400. New best score: 100914.141
Epoch 9, global step 380: 'val_loss' reached 100914.14062 (best 100914.14062), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 10, global step 418: 'val_loss' reached 100630.14062 (best 100630.14062), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 557.492 >= min_delta = 400. New best score: 100356.648
Epoch 11, global step 456: 'val_loss' reached 100356.64844 (best 100356.64844), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 12, global step 494: 'val_loss' reached 100090.67188 (best 100090.67188), saving model to '/media/joris/uca-msc-dsai/dl_computer_vision/assignments/1/checkpoints/best_checkpoint-v1.ckpt' as top 1


## Evaluation

We load the best model and evaluate it on the test set. However, we see that the model doesn't work since it always outputs the same value for all time steps. In our experiments, we tried to use different architectures (hidden size, number of stacked layers, dropout, learning rate, train/val/test splits), but we always got the same result. One possible explanation is that the data should be normalized before training, but I doubt it as, in my opinion, this should only be necessary if we have several features with different scales. In our case, we have only one feature (the number of passengers), so the network should be able to learn the scale.

It is also worth noting that I sent an email to abid.ali@inria.fr and francois.bremond@inria.fr to ask for help, but I didn't get any response. This is a pity as I would have liked to know what I did wrong and I believe there is not much change needed to make the model work.

In [None]:
checkpoint_number = 1
checkpoint_path = f"checkpoints/best_checkpoint-v{checkpoint_number}.ckpt"
trained_model = airline.PassengerPredictor.load_from_checkpoint(
  checkpoint_path=checkpoint_path, n_features=1
)
test_dataset = airline.AirlineDataset(sequences=test_sequences)

sequences = []
labels = []

for item in tqdm(test_dataset):
  sequence = item["sequence"]
  label = item["label"]
  _, prediction = trained_model(sequence)
  print(sequence, prediction, label)


100%|██████████| 47/47 [00:00<00:00, 939.77it/s]

tensor([[315.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([301.])
tensor([[301.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([356.])
tensor([[356.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([348.])
tensor([[348.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([355.])
tensor([[355.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([422.])
tensor([[422.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([465.])
tensor([[465.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([467.])
tensor([[467.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([404.])
tensor([[404.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([347.])
tensor([[347.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([305.])
tensor([[305.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([336.])
tensor([[336.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([340.])
tensor([[340.]]) tensor([32.3449], grad_fn=<AddBackward0>) tensor([318.])
tensor([[318.]]) tensor([32.3449], gra


