# Reproducing the Experiment in "Prediction of Pedestrian Speed with Artificial Neural Networks" by Tordeux et al.

In [1]:
# Enable these if automatic reloading of modules is wanted

# Load extension for automatic reload of modules
%load_ext autoreload
# Enable autoreload for all modules
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import inspect

import torch
from torch.utils.data import DataLoader
import logging
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
import tensorboard
from sklearn.preprocessing import MinMaxScaler

import preprocessing
import plotting
import pedestrian_dataset
import pedestrian_net

## Tensorboard

Extension for visualizing the training results.
Should only be loaded once, otherwise there is probably an error message.

To start, run `tensorboard --logdir=dir --port 6006` in a terminal or run the following cell.

In [6]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs --port 6009

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


## Logging

I used Logging to print messages.
If more messages are welcome, use the logging level `logging.INFO` or even `logging.DEBUG`.
If not, use `logging.WARNING`.

In [7]:
# Set Logging Level
logger_format = '%(levelname)s - %(funcName)s \t%(message)s'
logger_level = logging.WARNING
logging.basicConfig(level=logger_level, format=logger_format)

In [8]:
# Set a torch seed
torch.manual_seed(1234)

<torch._C.Generator at 0x1887efbeb70>

## Preprocessing and Loading the Dataset

The different files of data in the format
(`PedID FrameID X Y Z`)
are loaded and converted into the following dictionary format:

`distances` | `speeds`
-|-
Input of our neural network. Array of size $2k+1$ containing the median speed of the $k$ nearest neighbors as the first element and the relative $x$- and $y$-positions of the $k$ nearest neighbors in the following pattern afterwards: $x_1$, $y_1$, $x_2$, $y_2$, ... | Truth value for our neural network. The speed that the pedestrian had in that frame.

To load a list of files, the method `pedestrian_dataset.create_dataset()` is used.
As its first parameter it either takes a list of data files that it should load
or a `pedestrian_dataset.PedestrianDataType` value,
which can be either `BOTTLENECK`, which loads all bottleneck files,
`CORRIDOR`, which loads all corridor files,
or `ALL`, which loads all files.

In [9]:
# Creating datasets with only the smallest corridor scenario with 30 participants
c_015_path = "./Data/Corridor_Data/ug-180-030.txt"
# Note: even when only loading one dataset, it has to be given in a list
c_015_train_val_datasets, c_015_test_dataset = pedestrian_dataset.create_dataset([c_015_path])

# Print the first item from the first train/val dataset part
print(c_015_train_val_datasets[0][0])

{'distances': array([ 296.80893951,   36.1585    ,  -91.69      ,   76.8931    ,
         62.0273    ,   -2.372     ,  201.1667    ,  -21.5438    ,
       -241.436     ,   29.8531    , -262.229     ,   32.8202    ,
        295.624     ,  -18.6053    ,  316.108     ,  -26.2509    ,
        437.719     ,   43.2941    ,  445.954     ,   -0.6295    ,
        559.384     ]), 'speed': 6.112364455756874}


In [10]:
batch_size = 16

# Create a PyTorch dataloader with the dataset

# TODO: I don't know how to do cross validation, so we combine the first 4 train/val datasets
#   to build the train dataset and use the last train/val dataset as the val dataset
#   Maybe we just have to do this everytime (and switch it up)? Could be, but not sure
c_015_temp_train_dataset = torch.utils.data.ConcatDataset(c_015_train_val_datasets[:4])
c_015_temp_val_dataset = c_015_train_val_datasets[4]

c_015_train_loader = DataLoader(c_015_temp_train_dataset, batch_size=batch_size, drop_last=True)
c_015_val_loader = DataLoader(c_015_temp_train_dataset, batch_size=batch_size, drop_last=False)

c_015_test_loader = DataLoader(c_015_test_dataset, batch_size=batch_size, drop_last=False)

# # Print the first value given by the train loader
# for item in c_015_train_loader:
#     print(item)
#     break  # break after printing the first item

In [11]:
# Creating datasets with all scenarios loaded
all_train_val_datasets, all_test_dataset = pedestrian_dataset.create_dataset(
    pedestrian_dataset.PedestrianDataType.ALL
)

# Print the first item from the first train/val dataset part
print(all_train_val_datasets[0][0])

{'distances': array([ 83.28914988, -32.428     ,  24.169     ,  37.9846    ,
        38.714     ,  -2.5528    , -57.585     ,   4.0326    ,
        69.008     ,  81.5544    ,   6.506     , -71.6767    ,
       -54.426     ,  49.6994    , -84.303     , -57.71222   ,
        80.09      ,  86.0264    , -55.428     ,  80.5664    ,
        75.733     ]), 'speed': 2.5783409394414765}


In [12]:
batch_size = 10000

# Create a PyTorch dataloader with the dataset

# TODO: I don't know how to do cross validation, so we combine the first 4 train/val datasets
#   to build the train dataset and use the last train/val dataset as the val dataset
#   Maybe we just have to do this everytime (and switch it up)? Could be, but not sure
all_temp_train_dataset = torch.utils.data.ConcatDataset(all_train_val_datasets[:4])
all_temp_val_dataset = all_train_val_datasets[4]

all_train_loader = DataLoader(all_temp_train_dataset, batch_size=batch_size, drop_last=True)
all_val_loader = DataLoader(all_temp_train_dataset, batch_size=batch_size, drop_last=False)

all_test_loader = DataLoader(all_test_dataset, batch_size=batch_size, drop_last=False)

# # Currently Disabled because for batch_size=16 this get's large
# # Print the first value given by the train loader
# for item in all_train_loader:
#     print(item)
#     break  # break after printing the first item

In [13]:
# Create normalized DataLoaders

normalized_train_loader = DataLoader(preprocessing.normalize_data(all_temp_train_dataset), batch_size=batch_size, drop_last=True)
normalized_val_loader = DataLoader(preprocessing.normalize_data(all_temp_val_dataset), batch_size=batch_size, drop_last=True)
normalized_test_loader = DataLoader(preprocessing.normalize_data(all_test_dataset), batch_size=batch_size, drop_last=False)

## Implementing and Training the Model

Now we need to define our model.

@Parim:
I got quite a lot of work done on the model, but there are still TODOs.
I probably won't be able to work more on the practicum this week, but I think everything should be documented well enough to be extended by you.
(I'm using PyTorch Lightning now, which you'll probably remember from I2DL, and the Neural Network is in `pedestrian_net.py`.)

In [14]:
checkpoint_name = "./.name/checkpoints/2023-07-05--dataAll-ep100-it001.ckpt"

max_epochs = 50
k = 10
hidden_size = 3
learning_rate = 1e-3
optimizer = torch.optim.Adam

In [15]:
# Use GPU if available
if torch.cuda.is_available():
    device = torch.device('cuda')
    print("GPU is available.")
else:
    device = torch.device('cpu')
    print("CPU will be used.")

GPU is available.


In [16]:
# Choose our dataloaders
# train_loader = all_train_loader
# val_loader   = all_val_loader
# test_loader  = all_test_loader

train_loader = normalized_train_loader
val_loader = normalized_val_loader
test_loader = normalized_test_loader

In [17]:
# define an early stopping callback
early_stop_callback = EarlyStopping(monitor="val_loss", mode='min', patience=20)

In [18]:
# Define our model
model = pedestrian_net.PedestrianNet(k=k,
                                     hidden_size=hidden_size,
                                     learning_rate=learning_rate,
                                     optimizer=optimizer
                                     )
model.to(device)

PedestrianNet(
  (model): Sequential(
    (0): Linear(in_features=21, out_features=3, bias=True)
    (1): ReLU()
    (2): Linear(in_features=3, out_features=1, bias=True)
  )
)

In [19]:
trainer = pl.Trainer(
    max_epochs=max_epochs,
    devices='auto',
    accelerator='gpu',
    callbacks=[early_stop_callback],
    log_every_n_steps=1,
    enable_checkpointing=True
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [20]:
trainer.fit(model, train_loader, val_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 70    
-------------------------------------
70        Trainable params
0         Non-trainable params
70        Total params
0.000     Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|                                                              | 0/2 [00:01<?, ?it/s]

  rank_zero_warn(


                                                                                                                       

  rank_zero_warn(


Epoch 0:  81%|█████████████████████▊     | 42/52 [00:05<00:01,  7.22it/s, loss=0.0267, v_num=2, train_loss_step=0.0278]
Validation: 0it [00:00, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/10 [00:00<?, ?it/s][A
Epoch 0:  83%|██████████████████████▎    | 43/52 [00:06<00:01,  7.13it/s, loss=0.0267, v_num=2, train_loss_step=0.0278][A
Epoch 0:  85%|██████████████████████▊    | 44/52 [00:06<00:01,  7.23it/s, loss=0.0267, v_num=2, train_loss_step=0.0278][A
Epoch 0:  87%|███████████████████████▎   | 45/52 [00:06<00:00,  7.34it/s, loss=0.0267, v_num=2, train_loss_step=0.0278][A
Epoch 0:  88%|███████████████████████▉   | 46/52 [00:06<00:00,  7.45it/s, loss=0.0267, v_num=2, train_loss_step=0.0278][A
Epoch 0:  90%|████████████████████████▍  | 47/52 [00:06<00:00,  7.55it/s, loss=0.0267, v_num=2, train_loss_step=0.0278][A
Epoch 0:  92%|████████████████████████▉  | 48/52 [00:06<00:00,  7.65it/s, loss=0.0267, v_num=2, train_loss_s

Epoch 5:  87%|▊| 45/52 [00:21<00:03,  2.10it/s, loss=0.0195, v_num=2, train_loss_step=0.0217, val_loss_step=0.0221, val[A
Epoch 5:  88%|▉| 46/52 [00:21<00:02,  2.14it/s, loss=0.0195, v_num=2, train_loss_step=0.0217, val_loss_step=0.0221, val[A
Epoch 5:  90%|▉| 47/52 [00:21<00:02,  2.18it/s, loss=0.0195, v_num=2, train_loss_step=0.0217, val_loss_step=0.0221, val[A
Epoch 5:  92%|▉| 48/52 [00:21<00:01,  2.22it/s, loss=0.0195, v_num=2, train_loss_step=0.0217, val_loss_step=0.0221, val[A
Epoch 5:  94%|▉| 49/52 [00:21<00:01,  2.26it/s, loss=0.0195, v_num=2, train_loss_step=0.0217, val_loss_step=0.0221, val[A
Epoch 5:  96%|▉| 50/52 [00:21<00:00,  2.30it/s, loss=0.0195, v_num=2, train_loss_step=0.0217, val_loss_step=0.0221, val[A
Epoch 5:  98%|▉| 51/52 [00:21<00:00,  2.34it/s, loss=0.0195, v_num=2, train_loss_step=0.0217, val_loss_step=0.0221, val[A
Epoch 5: 100%|█| 52/52 [00:21<00:00,  2.38it/s, loss=0.0195, v_num=2, train_loss_step=0.0217, val_loss_step=0.0218, val[A
Epoch 6:  81%|▊|

Epoch 10:  96%|▉| 50/52 [00:36<00:01,  1.35it/s, loss=0.0184, v_num=2, train_loss_step=0.0207, val_loss_step=0.0211, va[A
Epoch 10:  98%|▉| 51/52 [00:37<00:00,  1.38it/s, loss=0.0184, v_num=2, train_loss_step=0.0207, val_loss_step=0.0211, va[A
Epoch 10: 100%|█| 52/52 [00:37<00:00,  1.40it/s, loss=0.0184, v_num=2, train_loss_step=0.0207, val_loss_step=0.0209, va[A
Epoch 11:  81%|▊| 42/52 [00:39<00:09,  1.06it/s, loss=0.0182, v_num=2, train_loss_step=0.0206, val_loss_step=0.0209, va[A
Validation: 0it [00:00, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/10 [00:00<?, ?it/s][A
Epoch 11:  83%|▊| 43/52 [00:39<00:08,  1.09it/s, loss=0.0182, v_num=2, train_loss_step=0.0206, val_loss_step=0.0209, va[A
Epoch 11:  85%|▊| 44/52 [00:39<00:07,  1.11it/s, loss=0.0182, v_num=2, train_loss_step=0.0206, val_loss_step=0.0209, va[A
Epoch 11:  87%|▊| 45/52 [00:39<00:06,  1.13it/s, loss=0.0182, v_num=2, train_loss_step=0.0206, val_loss_s

Validation DataLoader 0:   0%|                                                                  | 0/10 [00:00<?, ?it/s][A
Epoch 16:  83%|▊| 43/52 [00:54<00:11,  1.27s/it, loss=0.0177, v_num=2, train_loss_step=0.0201, val_loss_step=0.0204, va[A
Epoch 16:  85%|▊| 44/52 [00:54<00:09,  1.25s/it, loss=0.0177, v_num=2, train_loss_step=0.0201, val_loss_step=0.0204, va[A
Epoch 16:  87%|▊| 45/52 [00:54<00:08,  1.22s/it, loss=0.0177, v_num=2, train_loss_step=0.0201, val_loss_step=0.0204, va[A
Epoch 16:  88%|▉| 46/52 [00:54<00:07,  1.19s/it, loss=0.0177, v_num=2, train_loss_step=0.0201, val_loss_step=0.0204, va[A
Epoch 16:  90%|▉| 47/52 [00:54<00:05,  1.17s/it, loss=0.0177, v_num=2, train_loss_step=0.0201, val_loss_step=0.0204, va[A
Epoch 16:  92%|▉| 48/52 [00:55<00:04,  1.15s/it, loss=0.0177, v_num=2, train_loss_step=0.0201, val_loss_step=0.0204, va[A
Epoch 16:  94%|▉| 49/52 [00:55<00:03,  1.12s/it, loss=0.0177, v_num=2, train_loss_step=0.0201, val_loss_step=0.0204, va[A
Epoch 16:  96%|▉

Epoch 21:  90%|▉| 47/52 [01:10<00:07,  1.49s/it, loss=0.0173, v_num=2, train_loss_step=0.0198, val_loss_step=0.0201, va[A
Epoch 21:  92%|▉| 48/52 [01:10<00:05,  1.46s/it, loss=0.0173, v_num=2, train_loss_step=0.0198, val_loss_step=0.0201, va[A
Epoch 21:  94%|▉| 49/52 [01:10<00:04,  1.44s/it, loss=0.0173, v_num=2, train_loss_step=0.0198, val_loss_step=0.0201, va[A
Epoch 21:  96%|▉| 50/52 [01:10<00:02,  1.41s/it, loss=0.0173, v_num=2, train_loss_step=0.0198, val_loss_step=0.0201, va[A
Epoch 21:  98%|▉| 51/52 [01:10<00:01,  1.38s/it, loss=0.0173, v_num=2, train_loss_step=0.0198, val_loss_step=0.0201, va[A
Epoch 21: 100%|█| 52/52 [01:10<00:00,  1.36s/it, loss=0.0173, v_num=2, train_loss_step=0.0198, val_loss_step=0.020, val[A
Epoch 22:  81%|▊| 42/52 [01:13<00:17,  1.74s/it, loss=0.0173, v_num=2, train_loss_step=0.0197, val_loss_step=0.020, val[A
Validation: 0it [00:00, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/10 [0

Epoch 26: 100%|█| 52/52 [01:25<00:00,  1.65s/it, loss=0.017, v_num=2, train_loss_step=0.0195, val_loss_step=0.0197, val[A
Epoch 27:  81%|▊| 42/52 [01:28<00:21,  2.10s/it, loss=0.017, v_num=2, train_loss_step=0.0194, val_loss_step=0.0197, val[A
Validation: 0it [00:00, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/10 [00:00<?, ?it/s][A
Epoch 27:  83%|▊| 43/52 [01:28<00:18,  2.05s/it, loss=0.017, v_num=2, train_loss_step=0.0194, val_loss_step=0.0197, val[A
Epoch 27:  85%|▊| 44/52 [01:28<00:16,  2.01s/it, loss=0.017, v_num=2, train_loss_step=0.0194, val_loss_step=0.0197, val[A
Epoch 27:  87%|▊| 45/52 [01:28<00:13,  1.96s/it, loss=0.017, v_num=2, train_loss_step=0.0194, val_loss_step=0.0197, val[A
Epoch 27:  88%|▉| 46/52 [01:28<00:11,  1.92s/it, loss=0.017, v_num=2, train_loss_step=0.0194, val_loss_step=0.0197, val[A
Epoch 27:  90%|▉| 47/52 [01:28<00:09,  1.88s/it, loss=0.017, v_num=2, train_loss_step=0.0194, val_loss_st

Epoch 32:  85%|▊| 44/52 [01:43<00:18,  2.35s/it, loss=0.0168, v_num=2, train_loss_step=0.0192, val_loss_step=0.0195, va[A
Epoch 32:  87%|▊| 45/52 [01:43<00:16,  2.30s/it, loss=0.0168, v_num=2, train_loss_step=0.0192, val_loss_step=0.0195, va[A
Epoch 32:  88%|▉| 46/52 [01:43<00:13,  2.25s/it, loss=0.0168, v_num=2, train_loss_step=0.0192, val_loss_step=0.0195, va[A
Epoch 32:  90%|▉| 47/52 [01:43<00:11,  2.21s/it, loss=0.0168, v_num=2, train_loss_step=0.0192, val_loss_step=0.0195, va[A
Epoch 32:  92%|▉| 48/52 [01:43<00:08,  2.16s/it, loss=0.0168, v_num=2, train_loss_step=0.0192, val_loss_step=0.0195, va[A
Epoch 32:  94%|▉| 49/52 [01:43<00:06,  2.12s/it, loss=0.0168, v_num=2, train_loss_step=0.0192, val_loss_step=0.0195, va[A
Epoch 32:  96%|▉| 50/52 [01:43<00:04,  2.08s/it, loss=0.0168, v_num=2, train_loss_step=0.0192, val_loss_step=0.0195, va[A
Epoch 32:  98%|▉| 51/52 [01:43<00:02,  2.04s/it, loss=0.0168, v_num=2, train_loss_step=0.0192, val_loss_step=0.0195, va[A
Epoch 32: 100%|█

Epoch 37:  94%|▉| 49/52 [01:59<00:07,  2.43s/it, loss=0.0165, v_num=2, train_loss_step=0.019, val_loss_step=0.0193, val[A
Epoch 37:  96%|▉| 50/52 [01:59<00:04,  2.38s/it, loss=0.0165, v_num=2, train_loss_step=0.019, val_loss_step=0.0193, val[A
Epoch 37:  98%|▉| 51/52 [01:59<00:02,  2.34s/it, loss=0.0165, v_num=2, train_loss_step=0.019, val_loss_step=0.0193, val[A
Epoch 37: 100%|█| 52/52 [01:59<00:00,  2.29s/it, loss=0.0165, v_num=2, train_loss_step=0.019, val_loss_step=0.0193, val[A
Epoch 38:  81%|▊| 42/52 [02:01<00:28,  2.89s/it, loss=0.0165, v_num=2, train_loss_step=0.0189, val_loss_step=0.0193, va[A
Validation: 0it [00:00, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/10 [00:00<?, ?it/s][A
Epoch 38:  83%|▊| 43/52 [02:01<00:25,  2.83s/it, loss=0.0165, v_num=2, train_loss_step=0.0189, val_loss_step=0.0193, va[A
Epoch 38:  85%|▊| 44/52 [02:01<00:22,  2.76s/it, loss=0.0165, v_num=2, train_loss_step=0.0189, val_loss_s

Validation: 0it [00:00, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/10 [00:00<?, ?it/s][A
Epoch 43:  83%|▊| 43/52 [02:16<00:28,  3.17s/it, loss=0.0163, v_num=2, train_loss_step=0.0188, val_loss_step=0.0191, va[A
Epoch 43:  85%|▊| 44/52 [02:16<00:24,  3.10s/it, loss=0.0163, v_num=2, train_loss_step=0.0188, val_loss_step=0.0191, va[A
Epoch 43:  87%|▊| 45/52 [02:16<00:21,  3.04s/it, loss=0.0163, v_num=2, train_loss_step=0.0188, val_loss_step=0.0191, va[A
Epoch 43:  88%|▉| 46/52 [02:16<00:17,  2.97s/it, loss=0.0163, v_num=2, train_loss_step=0.0188, val_loss_step=0.0191, va[A
Epoch 43:  90%|▉| 47/52 [02:16<00:14,  2.91s/it, loss=0.0163, v_num=2, train_loss_step=0.0188, val_loss_step=0.0191, va[A
Epoch 43:  92%|▉| 48/52 [02:16<00:11,  2.85s/it, loss=0.0163, v_num=2, train_loss_step=0.0188, val_loss_step=0.0191, va[A
Epoch 43:  94%|▉| 49/52 [02:16<00:08,  2.80s/it, loss=0.0163, v_num=2, train_loss_step=0.0188, val_loss_s

Epoch 48:  88%|▉| 46/52 [02:31<00:19,  3.30s/it, loss=0.0161, v_num=2, train_loss_step=0.0186, val_loss_step=0.0189, va[A
Epoch 48:  90%|▉| 47/52 [02:31<00:16,  3.23s/it, loss=0.0161, v_num=2, train_loss_step=0.0186, val_loss_step=0.0189, va[A
Epoch 48:  92%|▉| 48/52 [02:32<00:12,  3.17s/it, loss=0.0161, v_num=2, train_loss_step=0.0186, val_loss_step=0.0189, va[A
Epoch 48:  94%|▉| 49/52 [02:32<00:09,  3.10s/it, loss=0.0161, v_num=2, train_loss_step=0.0186, val_loss_step=0.0189, va[A
Epoch 48:  96%|▉| 50/52 [02:32<00:06,  3.04s/it, loss=0.0161, v_num=2, train_loss_step=0.0186, val_loss_step=0.0189, va[A
Epoch 48:  98%|▉| 51/52 [02:32<00:02,  2.98s/it, loss=0.0161, v_num=2, train_loss_step=0.0186, val_loss_step=0.0189, va[A
Epoch 48: 100%|█| 52/52 [02:32<00:00,  2.93s/it, loss=0.0161, v_num=2, train_loss_step=0.0186, val_loss_step=0.0189, va[A
Epoch 49:  81%|▊| 42/52 [02:34<00:36,  3.68s/it, loss=0.0161, v_num=2, train_loss_step=0.0185, val_loss_step=0.0189, va[A
Validation: 0it 

In [21]:
print("Done")

Done


In [22]:
trainer.save_checkpoint(checkpoint_name)

## Testing the Model

**After** training and tuning the model, we can test the model.

In [None]:
trainer.test(model, dataloaders=test_loader)

# TODO

A list of some things that are still to do.
Not a complete list.

- [x] Implement Preprocessing
- [x] Implement Data Preparation
- [x] Implement Model basic structure
- [x] Set up basic training for model
- [ ] Implement cross validation
- [ ] Implement hyper parameter tuning
- [ ] ...
- [ ]

In [23]:
model(torch.tensor(all_temp_train_dataset[0]['distances']))

tensor([-1.8042], dtype=torch.float64, grad_fn=<AddBackward0>)

In [24]:
all_temp_train_dataset[0]['speed']

2.5783409394414765