# Reproducing the Experiment in "Prediction of Pedestrian Speed with Artificial Neural Networks" by Tordeux et al.

In [1]:
# Enable these if automatic reloading of modules is wanted

# Load extension for automatic reload of modules
%load_ext autoreload
# Enable autoreload for all modules
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import inspect

import torch
from torch.utils.data import DataLoader
import logging
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
import tensorboard

import preprocessing
import plotting
import pedestrian_dataset
import pedestrian_net

## Tensorboard

Extension for visualizing the training results.
Should only be loaded once, otherwise there is probably an error message.

To start, run `tensorboard --logdir=dir --port 6006` in a terminal or run the following cell.

In [3]:
%load_ext tensorboard
%tensorboard --logdir .name/lightning_logs --port 6006

## Logging

I used Logging to print messages.
If more messages are welcome, use the logging level `logging.INFO` or even `logging.DEBUG`.
If not, use `logging.WARNING`.

In [None]:
# Set Logging Level
logger_format = '%(levelname)s - %(funcName)s \t%(message)s'
logger_level = logging.WARNING
logging.basicConfig(level=logger_level, format=logger_format)

In [4]:
# Set a torch seed
torch.manual_seed(1234)

{'distances': array([ 296.80893951,   36.1585    ,  -91.69      ,   76.8931    ,
         62.0273    ,   -2.372     ,  201.1667    ,  -21.5438    ,
       -241.436     ,   29.8531    , -262.229     ,   32.8202    ,
        295.624     ,  -18.6053    ,  316.108     ,  -26.2509    ,
        437.719     ,   43.2941    ,  445.954     ,   -0.6295    ,
        559.384     ]), 'speed': 6.112364455756874}


## Preprocessing and Loading the Dataset

The different files of data in the format
(`PedID FrameID X Y Z`)
are loaded and converted into the following dictionary format:

`distances` | `speeds`
-|-
Input of our neural network. Array of size $2k+1$ containing the median speed of the $k$ nearest neighbors as the first element and the relative $x$- and $y$-positions of the $k$ nearest neighbors in the following pattern afterwards: $x_1$, $y_1$, $x_2$, $y_2$, ... | Truth value for our neural network. The speed that the pedestrian had in that frame.

To load a list of files, the method `pedestrian_dataset.create_dataset()` is used.
As its first parameter it either takes a list of data files that it should load
or a `pedestrian_dataset.PedestrianDataType` value,
which can be either `BOTTLENECK`, which loads all bottleneck files,
`CORRIDOR`, which loads all corridor files,
or `ALL`, which loads all files.

In [6]:
# Creating datasets with only the smallest corridor scenario with 30 participants
c_015_path = "./Data/Corridor_Data/ug-180-030.txt"
# Note: even when only loading one dataset, it has to be given in a list
c_015_train_val_datasets, c_015_test_dataset = pedestrian_dataset.create_dataset([c_015_path])

# Print the first item from the first train/val dataset part
print(c_015_train_val_datasets[0][0])

{'distances': array([ 83.28914988, -32.428     ,  24.169     ,  37.9846    ,
        38.714     ,  -2.5528    , -57.585     ,   4.0326    ,
        69.008     ,  81.5544    ,   6.506     , -71.6767    ,
       -54.426     ,  49.6994    , -84.303     , -57.71222   ,
        80.09      ,  86.0264    , -55.428     ,  80.5664    ,
        75.733     ]), 'speed': 2.5783409394414765}


In [7]:
batch_size = 4

# Create a PyTorch dataloader with the dataset

# TODO: I don't know how to do cross validation, so we combine the first 4 train/val datasets
#   to build the train dataset and use the last train/val dataset as the val dataset
#   Maybe we just have to do this everytime (and switch it up)? Could be, but not sure
c_015_temp_train_dataset = torch.utils.data.ConcatDataset(c_015_train_val_datasets[:4])
c_015_temp_val_dataset = c_015_train_val_datasets[4]

c_015_train_loader = DataLoader(c_015_temp_train_dataset, batch_size=batch_size, drop_last=True)
c_015_val_loader = DataLoader(c_015_temp_train_dataset, batch_size=batch_size, drop_last=False)

c_015_test_loader = DataLoader(c_015_test_dataset, batch_size=batch_size, drop_last=False)

# Print the first value given by the train loader
for item in c_015_train_loader:
    print(item)
    break  # break after printing the first item

{'distances': tensor([[ 83.2891, -32.4280,  24.1690,  37.9846,  38.7140,  -2.5528, -57.5850,
           4.0326,  69.0080,  81.5544,   6.5060, -71.6767, -54.4260,  49.6994,
         -84.3030, -57.7122,  80.0900,  86.0264, -55.4280,  80.5664,  75.7330]],
       dtype=torch.float64), 'speed': tensor([2.5783], dtype=torch.float64)}


In [None]:
# Creating datasets with all scenarios loaded
all_train_val_datasets, all_test_dataset = pedestrian_dataset.create_dataset(
    pedestrian_dataset.PedestrianDataType.ALL
)

# Print the first item from the first train/val dataset part
print(all_train_val_datasets[0][0])

In [None]:
batch_size = 16

# Create a PyTorch dataloader with the dataset

# TODO: I don't know how to do cross validation, so we combine the first 4 train/val datasets
#   to build the train dataset and use the last train/val dataset as the val dataset
#   Maybe we just have to do this everytime (and switch it up)? Could be, but not sure
all_temp_train_dataset = torch.utils.data.ConcatDataset(all_train_val_datasets[:4])
all_temp_val_dataset = all_train_val_datasets[4]

all_train_loader = DataLoader(all_temp_train_dataset, batch_size=batch_size, drop_last=True)
all_val_loader = DataLoader(all_temp_train_dataset, batch_size=batch_size, drop_last=False)

all_test_loader = DataLoader(all_test_dataset, batch_size=batch_size, drop_last=False)

# # Currently Disabled because for batch_size=16 this get's large
# # Print the first value given by the train loader
# for item in all_train_loader:
#     print(item)
#     break  # break after printing the first item

## Implementing and Training the Model

Now we need to define our model.

@Parim:
I got quite a lot of work done on the model, but there are still TODOs.
I probably won't be able to work more on the practicum this week, but I think everything should be documented well enough to be extended by you.
(I'm using PyTorch Lightning now, which you'll probably remember from I2DL, and the Neural Network is in `pedestrian_net.py`.)

In [None]:
checkpoint_name = "./.name/checkpoints/2023-07-05--dataAll-ep100-it001.ckpt"

max_epochs = 100
k = 10
hidden_size = 3
learning_rate = 1e-3
optimizer = torch.optim.Adam

In [None]:
# Use GPU if available
if torch.cuda.is_available():
    device = torch.device('cuda')
    print("GPU is available.")
else:
    device = torch.device('cpu')
    print("CPU will be used.")

In [None]:
# Choose our dataloaders
train_loader = all_train_loader
val_loader   = all_val_loader
test_loader  = all_test_loader

In [None]:
# define an early stopping callback
early_stop_callback = EarlyStopping(monitor="val_loss", mode='min', patience=20)

In [None]:
# Define our model
model = pedestrian_net.PedestrianNet(k=k,
                                     hidden_size=hidden_size,
                                     learning_rate=learning_rate,
                                     optimizer=optimizer
                                     )
model.to(device)

In [None]:
trainer = pl.Trainer(
    max_epochs=max_epochs,
    devices='auto',
    accelerator='gpu',
    callbacks=[early_stop_callback],
    log_every_n_steps=1,
    enable_checkpointing=True
)

In [None]:
trainer.fit(model, train_loader, val_loader)

In [None]:
print("Done")

In [None]:
trainer.save_checkpoint(checkpoint_name)

## Testing the Model

**After** training and tuning the model, we can test the model.

In [None]:
# trainer.test(model, dataloaders=test_loader)

# TODO

A list of some things that are still to do.
Not a complete list.

- [x] Implement Preprocessing
- [x] Implement Data Preparation
- [x] Implement Model basic structure
- [x] Set up basic training for model
- [ ] Implement cross validation
- [ ] Implement hyper parameter tuning
- [ ] ...
- [ ]