# Driver State Anomaly Detection

[https://dagshub.com/matejfric/driver-state](https://dagshub.com/matejfric/driver-state)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime
import os
import re
from pathlib import Path
from pprint import pprint

import albumentations as albu
import dagshub
import matplotlib.pyplot as plt
import mlflow
import mlflow.pytorch
import pytorch_lightning as L
import torch
import torch.nn as nn

# Pytorch Lightning EarlyStopping callback does not recover the best weights as in Keras!
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

# https://github.com/Lightning-AI/pytorch-lightning/discussions/10399,
# https://pytorch-lightning.readthedocs.io/en/1.5.10/extensions/generated/pytorch_lightning.callbacks.ModelCheckpoint.html
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from pytorch_lightning.loggers import CSVLogger

from model import (
    AnomalyDataset,
    AutoencoderModel,
    SegmentationDatasetLoader,
    SegmentationDatasetSplit,
    SegmentationModel,
)
from model.augmentation import (
    compose_transforms,
    hard_transforms,
    post_transforms,
    pre_transforms,
    resize_transforms,
)
from model.git import get_commit_id, get_current_branch
from model.plot import (
    plot_learning_curves,
    plot_predictions,
    plot_predictions_compact,
    show_examples,
    show_random,
)

  from .autonotebook import tqdm as notebook_tqdm
INFO:albumentations.check_version:A new version of Albumentations is available: 1.4.18 (you have 1.4.13). Upgrade using: pip install -U albumentations. To disable automatic update checks, set the environment variable NO_ALBUMENTATIONS_UPDATE to 1.


## Configuration

In [3]:
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams.update({'font.size': 14})

# Experiment logging
REPO_NAME = 'driver-state'
USER_NAME = 'matejfric'
dagshub.init(REPO_NAME, USER_NAME, mlflow=True)

# Reproducibility
# https://lightning.ai/docs/pytorch/stable/common/trainer.html#reproducibility
SEED = 42
L.seed_everything(SEED, workers=True)

print(
    f'torch: {torch.__version__}, cuda: {torch.cuda.is_available()}, lightning: {L.__version__}'
)

INFO:dagshub:Accessing as matejfric


INFO:dagshub:Initialized MLflow to track repo "matejfric/driver-state"


INFO:dagshub:Repository matejfric/driver-state initialized!
INFO:lightning_fabric.utilities.seed:Seed set to 42


torch: 2.4.0+cu121, cuda: True, lightning: 2.2.4


In [14]:
# HYPERPARAMETERS
# ----------------------------------------
MAX_EPOCHS = 100  # default 500
MONITOR = 'val_loss'
PATIENCE = 20  # default 
IMAGE_SIZE = 128  # default 
BATCH_SIZE = 32  # default 8
LEARNING_RATE = 1e-4  # default 0.0001
AUGMENTATION = False  # default True

# LOGGING
# ----------------------------------------
LOG_DIR = Path('logs')
EXPERIMENT_NAME = f'{datetime.datetime.now().strftime("%Y-%m-%d-%H%M%S")}-anomaly-detection'
VERSION = 0
EXPERIMENT_DIR = LOG_DIR / EXPERIMENT_NAME / f'version_{VERSION}'
DATASET_NAME = '2024-09-15-driver-segmentation-dataset'

MLFLOW_ARTIFACT_DIR = 'outputs'
METRICS_CSV_NAME = 'metrics.csv'
LEARNING_CURVES_PDF_NAME = 'learning_curves.pdf'
PREDICTIONS_PNG_NAME = 'predictions.png'
TRAIN_TRANSFORMS_JSON_NAME = 'train_transforms.json'

# DATASET
# ----------------------------------------
DATASET_DIR = Path(f'/home/lanter/source/driver-dataset/{DATASET_NAME}')

TRAIN_SET_DIR = 'train'
VALID_SET_DIR = 'validation'
TEST_SET_DIR = 'test'

IMAGES_DIR = 'images'
MASKS_DIR = 'masks'
DEPTH_DIR = 'depth'

TRAIN_IMAGES = sorted((DATASET_DIR / TRAIN_SET_DIR / DEPTH_DIR).glob('*.png'))
TRAIN_MASKS = sorted((DATASET_DIR / TRAIN_SET_DIR / MASKS_DIR).glob('*.png'))

VALID_IMAGES = sorted((DATASET_DIR / VALID_SET_DIR / DEPTH_DIR).glob('*.png'))
VALID_MASKS = sorted((DATASET_DIR / VALID_SET_DIR / MASKS_DIR).glob('*.png'))

TEST_IMAGES = sorted((DATASET_DIR / TEST_SET_DIR / DEPTH_DIR).glob('*.png'))
TEST_MASKS = sorted((DATASET_DIR / TEST_SET_DIR / MASKS_DIR).glob('*.png'))

In [36]:
ENCODER = nn.Sequential(
    nn.Flatten(),
    nn.Linear(IMAGE_SIZE**2, 1024),
    nn.ReLU(),
    nn.Linear(1024, 512),
    nn.Linear(512, 256),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Linear(128, 128),
)
DECODER = nn.Sequential(
    # nn.Linear(128, 128),
    # nn.ReLU(),
    nn.Linear(128, 256),
    nn.Linear(256, 512),
    nn.ReLU(),
    nn.Linear(512, 1024),
    nn.ReLU(),
    nn.Linear(1024, IMAGE_SIZE**2),
    #nn.Linear(IMAGE_SIZE**2, IMAGE_SIZE**2),
    nn.Unflatten(1, torch.Size([1, IMAGE_SIZE, IMAGE_SIZE])),
    nn.Sigmoid(),
)
print(ENCODER)
print(DECODER)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=16384, out_features=1024, bias=True)
  (2): ReLU()
  (3): Linear(in_features=1024, out_features=512, bias=True)
  (4): Linear(in_features=512, out_features=256, bias=True)
  (5): Linear(in_features=256, out_features=128, bias=True)
  (6): ReLU()
  (7): Linear(in_features=128, out_features=128, bias=True)
)
Sequential(
  (0): Linear(in_features=128, out_features=256, bias=True)
  (1): Linear(in_features=256, out_features=512, bias=True)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=1024, bias=True)
  (4): ReLU()
  (5): Linear(in_features=1024, out_features=16384, bias=True)
  (6): Linear(in_features=16384, out_features=16384, bias=True)
  (7): Unflatten(dim=1, unflattened_size=torch.Size([1, 128, 128]))
  (8): Sigmoid()
)


In [17]:
# https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
# https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html

ENCODER = nn.Sequential(
    nn.Conv2d(in_channels=1, out_channels=8, kernel_size=7, padding=3),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),  # 256->128
    nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),  # 128->64
    nn.Conv2d(in_channels=16, out_channels=16, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),  # 64->32
    nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),  # 32->16
)
DECODER = nn.Sequential(
    nn.ConvTranspose2d(
        in_channels=16,
        out_channels=16,
        kernel_size=3,
        stride=2,
        padding=1,
        output_padding=1,
    ),
    nn.ReLU(),
    nn.ConvTranspose2d(
        in_channels=16,
        out_channels=16,
        kernel_size=5,
        stride=2,
        padding=2,
        output_padding=1,
    ),
    nn.ReLU(),
    nn.ConvTranspose2d(
        in_channels=16,
        out_channels=16,
        kernel_size=5,
        stride=2,
        padding=2,
        output_padding=1,
    ),
    nn.ReLU(),
    nn.ConvTranspose2d(
        in_channels=16,
        out_channels=8,
        kernel_size=7,
        stride=2,
        padding=3,
        output_padding=1,
    ),
    nn.ReLU(),
    nn.Conv2d(in_channels=8, out_channels=1, kernel_size=3, padding='same'),
    nn.Sigmoid(),
)
print(ENCODER)
print(DECODER)

Sequential(
  (0): Conv2d(1, 8, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(16, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (7): ReLU()
  (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (9): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (10): ReLU()
  (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
Sequential(
  (0): ConvTranspose2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
  (1): ReLU()
  (2): ConvTranspose2d(16, 16, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), output_padding=(1, 1))
  (3): ReLU()
  (4): ConvTranspose2d(16, 16, kernel_size=(5, 5), stride=(2,

In [37]:
# Test input tensor of size (batch_size, channels, height, width)
x = torch.randn(BATCH_SIZE, 1, IMAGE_SIZE, IMAGE_SIZE)

# Forward pass through the encoder and decoder
encoded = ENCODER(x)
decoded = DECODER(encoded)

# Check the shapes
print(f'Input shape: {x.shape}')
print(f'Encoded shape: {encoded.shape}')
print(f'Decoded shape: {decoded.shape}')

assert x.shape == decoded.shape, 'Input and output shapes do not match!'

Input shape: torch.Size([32, 1, 128, 128])
Encoded shape: torch.Size([32, 128])
Decoded shape: torch.Size([32, 1, 128, 128])


In [10]:
# Exclude images from the training set
# TRAIN_IMAGES = [img for img in TRAIN_IMAGES if 'stribny' not in img.stem]
# TRAIN_MASKS = [mask for mask in TRAIN_MASKS if 'stribny' not in mask.stem]

In [11]:
# show_random(TRAIN_IMAGES, TRAIN_MASKS)

In [12]:
# show_random(VALID_IMAGES, VALID_MASKS)

In [13]:
# show_random(TEST_IMAGES, TEST_MASKS)

## Augmentations and Transforms

In [20]:
if AUGMENTATION:
    train_transforms = compose_transforms(
        [
            pre_transforms(image_size=IMAGE_SIZE),
            hard_transforms(),
            post_transforms(),
        ]
    )
else:
    train_transforms = compose_transforms(
        [
            pre_transforms(image_size=IMAGE_SIZE),
            post_transforms(),
        ]
    )
valid_transforms = compose_transforms(
    [
        pre_transforms(image_size=IMAGE_SIZE),
        post_transforms(),
    ]
)
test_transforms = compose_transforms(
    [
        pre_transforms(image_size=IMAGE_SIZE),
        post_transforms(),
    ]
)

show_transforms = compose_transforms([pre_transforms(image_size=IMAGE_SIZE), hard_transforms()])

In [15]:
#show_random(TRAIN_IMAGES, TRAIN_MASKS, transforms=show_transforms)

In [16]:
# train_transforms.transforms

## Loaders

In [21]:
dataset_loader = SegmentationDatasetLoader(
    train=SegmentationDatasetSplit(images=TRAIN_IMAGES, masks=TRAIN_MASKS),
    valid=SegmentationDatasetSplit(images=VALID_IMAGES, masks=VALID_MASKS),
    test=SegmentationDatasetSplit(images=TEST_IMAGES, masks=TEST_MASKS),
)

INFO:root:Dataset paths validated successfully!


INFO:root:Shapes of images and masks validated successfully!


In [22]:
BATCH_SIZE_DICT = {'train': BATCH_SIZE, 'valid': BATCH_SIZE, 'test': BATCH_SIZE}
loaders = dataset_loader.get_loaders(
    # set to zero if RuntimeError: Trying to resize storage that is not resizable
    dataset='anomaly',
    num_workers=int(os.cpu_count()),  # type: ignore
    batch_size=BATCH_SIZE_DICT,
    train_transforms=train_transforms,
    valid_transforms=valid_transforms,
    test_transforms=test_transforms,
)

train_dataloader = loaders['train']
valid_dataloader = loaders['valid']
test_dataloader = loaders['test']

In [34]:
for batch in train_dataloader:
    print(batch['image'].shape)
    break

torch.Size([32, 1, 256, 256])


## Training

In [32]:
model = AutoencoderModel(
    DECODER,
    ENCODER,
    batch_size_dict=BATCH_SIZE_DICT,
    learning_rate=LEARNING_RATE,
)

In [26]:
csv_logger = CSVLogger(LOG_DIR, name=EXPERIMENT_NAME, version=VERSION)
early_stopping = EarlyStopping(
    monitor=MONITOR,
    mode='min',
    patience=PATIENCE,
)
model_checkpoint = ModelCheckpoint(
    dirpath=EXPERIMENT_DIR,
    filename='{epoch}-{val_loss:3f}',
    monitor=MONITOR,
    save_top_k=1,  # save only the best model
    mode='min',
)

In [27]:
trainer = L.Trainer(
    logger=csv_logger,
    callbacks=[model_checkpoint, early_stopping],
    max_epochs=MAX_EPOCHS,
    log_every_n_steps=1,  # log every batch
    # https://lightning.ai/docs/pytorch/stable/common/trainer.html#reproducibility
    deterministic=True,
)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [28]:
# https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
torch.set_float32_matmul_precision('medium')

In [38]:
trainer.fit(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=valid_dataloader,
)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 17.5 M
1 | decoder | Sequential | 17.5 M
2 | mse     | MSELoss    | 0     
---------------------------------------
35.0 M    Trainable params
0         Non-trainable params
35.0 M    Total params
139.935   Total estimated model params size (MB)


Image shape: torch.Size([32, 1, 128, 128])        


RuntimeError: unflatten: Provided sizes [1, 128, 128] don't multiply up to the size of dim 1 (1) in the input tensor

## Validation

In [None]:
# Load from MLflow
# model_name = 'pytorch-unet-resnet18'
# model_version = 2
# model_uri = f'models:/{model_name}/{model_version}'
# model_ = mlflow.pytorch.load_model(model_uri)

In [19]:
model_checkpoint_path = list(EXPERIMENT_DIR.glob('*.ckpt'))[0]
model_ = SegmentationModel.load_from_checkpoint(model_checkpoint_path)
trainer_ = L.Trainer(logger=False)  # no need to log anything for validation and testing

/home/lanter/source/driver-segmentation/segmentation-model/.venv/lib/python3.12/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
INFO:py

In [20]:
valid_metrics = trainer_.validate(model_, dataloaders=valid_dataloader, verbose=False)[0]
pprint(valid_metrics)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 68/68 [00:05<00:00, 12.72it/s]
{'val_f1s': 0.9934498071670532,
 'val_jaccard': 0.9870116114616394,
 'val_loss': 0.007266646716743708}


In [21]:
test_metrics = trainer_.test(
    model_, dataloaders=test_dataloader, verbose=False)[0]
pprint(test_metrics)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 68/68 [00:06<00:00, 11.27it/s]
{'test_f1s': 0.9930497407913208,
 'test_jaccard': 0.9862225651741028,
 'test_loss': 0.007655675057321787}


In [None]:
plot_predictions_compact(
    model_, test_dataloader, save_path=EXPERIMENT_DIR / PREDICTIONS_PNG_NAME, limit=10
)

In [None]:
# plot_predictions(model_, test_dataloader)

In [None]:
plot_learning_curves(
    EXPERIMENT_DIR / METRICS_CSV_NAME,
    save_path=EXPERIMENT_DIR / LEARNING_CURVES_PDF_NAME,
)

## Logging

In [24]:
# Save the transforms for experiment logging
albu.save(train_transforms, EXPERIMENT_DIR / TRAIN_TRANSFORMS_JSON_NAME)

In [25]:
def get_early_stopping_epoch() -> int | None:
    checkpoint = list(EXPERIMENT_DIR.glob('*.ckpt'))[0].stem
    pattern = r'epoch=(\d+)'
    match = re.search(pattern, checkpoint)
    if match:
        return int(match.group(1))
    else:
        return None

In [26]:
def log_dict_to_mlflow(dictionary: dict[str, float]) -> None:
    for k, v in dictionary.items():
        mlflow.log_metric(k, v)

In [None]:
with mlflow.start_run(run_name=f'{EXPERIMENT_NAME}') as run:
    try:
        mlflow.set_tag('Branch', get_current_branch())
        mlflow.set_tag('Commit ID', get_commit_id())
        mlflow.set_tag('Dataset', DATASET_NAME)
    except Exception as e:
        print(e)

    log_dict_to_mlflow(dict(valid_metrics))
    log_dict_to_mlflow(dict(test_metrics))

    mlflow.log_param('encoder', ENCODER)
    mlflow.log_param('decoder', DECODER)
    mlflow.log_param('batch_size', BATCH_SIZE)
    mlflow.log_param('max_epochs', MAX_EPOCHS)
    mlflow.log_param('early_stopping', get_early_stopping_epoch())
    mlflow.log_param('monitor', MONITOR)
    mlflow.log_param('patience', PATIENCE)
    mlflow.log_param('image_size', IMAGE_SIZE)
    mlflow.log_param('learning_rate', LEARNING_RATE)
    mlflow.log_param('augmentation', AUGMENTATION)

    mlflow.log_artifact(str(EXPERIMENT_DIR / METRICS_CSV_NAME), MLFLOW_ARTIFACT_DIR)
    mlflow.log_artifact(
        str(EXPERIMENT_DIR / LEARNING_CURVES_PDF_NAME), MLFLOW_ARTIFACT_DIR
    )
    mlflow.log_artifact(str(EXPERIMENT_DIR / PREDICTIONS_PNG_NAME), MLFLOW_ARTIFACT_DIR)
    mlflow.log_artifact(
        str(EXPERIMENT_DIR / TRAIN_TRANSFORMS_JSON_NAME), MLFLOW_ARTIFACT_DIR
    )
    mlflow.log_artifact('train.ipynb', MLFLOW_ARTIFACT_DIR)

    # Models are versioned by default
    mlflow.pytorch.log_model(
        pytorch_model=model_,
        artifact_path='model',
        registered_model_name=f'pytorch-{EXPERIMENT_NAME}',
    )