## Imports

In [1]:
import os

from mmaction.datasets import build_dataset, build_dataloader
from mmaction.models import build_model
from mmcv import Config
from mmaction.datasets import CutmixBlending
import torch.nn.functional as F
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def precision_score(y_true, y_pred):
    """Calculates precision score.

    Args:
        y_true: True labels.
        y_pred: Predicted labels.

    Returns:
        Precision score.
    """

    tp = sum(y_true[i] == y_pred[i] for i in range(len(y_true)) if y_pred[i] == 1)
    fp = sum(y_true[i] != y_pred[i] for i in range(len(y_true)) if y_pred[i] == 1)
    precision = tp / (tp + fp) if tp + fp != 0 else 0
    return precision

def recall_score(y_true, y_pred):
    """Calculates recall score.

    Args:
        y_true: True labels.
        y_pred: Predicted labels.

    Returns:
        Recall score.
    """

    tp = sum(y_true[i] == y_pred[i] for i in range(len(y_true)) if y_pred[i] == 1)
    fn = sum(y_true[i] == 1 and y_pred[i] != 1 for i in range(len(y_true)))
    recall = tp / (tp + fn) if tp + fn != 0 else 0
    return recall

def f1_score(y_true, y_pred):
    """Calculates F1 score.

    Args:
        y_true: True labels.
        y_pred: Predicted labels.

    Returns:
        F1 score.
    """

    p = precision_score(y_true, y_pred)
    r = recall_score(y_true, y_pred)
    return 2 * p * r / (p + r) if p + r != 0 else 0

def weighted_f1_score(y_true, y_pred):
    """Calculates the weighted F1 score, assuming equal class weights.

    Args:
        y_true: True labels.
        y_pred: Predicted labels.

    Returns:
        Weighted F1 score.
    """

    num_classes = len(set(y_true))
    f1_scores = []
    for i in range(num_classes):
        class_mask = [1 if y == i else 0 for y in y_true]
        class_f1 = f1_score(class_mask, [1 if y == i else 0 for y in y_pred])
        f1_scores.append(class_f1)
    return sum(f1_scores) / num_classes

def accuracy_score(y_true, y_pred):
    """Calculates the accuracy score.
    
    Args:
    y_true: True labels.
    y_pred: Predicted labels.
    
    Returns:
    Accuracy score.
    """
    
    correct_predictions = sum(np.array(y_true) == np.array(y_pred))
    total_predictions = len(y_true)
    accuracy = correct_predictions / total_predictions

    return accuracy

## Loading batches

In [3]:
cfg = Config.fromfile('./mixup.py')

In [4]:
os.chdir('../../..')

In [5]:
cfg.data.train

{'type': 'RawframeDataset',
 'ann_file': 'data/hmdb51/annotation_train.txt',
 'data_prefix': 'data/hmdb51/rawframes',
 'pipeline': [{'type': 'SampleFrames',
   'clip_len': 32,
   'frame_interval': 2,
   'num_clips': 1},
  {'type': 'RawFrameDecode'},
  {'type': 'Resize', 'scale': (-1, 256)},
  {'type': 'RandomResizedCrop'},
  {'type': 'Resize', 'scale': (224, 224), 'keep_ratio': False},
  {'type': 'Flip', 'flip_ratio': 0.5},
  {'type': 'Normalize',
   'mean': [123.675, 116.28, 103.53],
   'std': [58.395, 57.12, 57.375],
   'to_bgr': False},
  {'type': 'FormatShape', 'input_format': 'NCTHW'},
  {'type': 'Collect', 'keys': ['imgs', 'label'], 'meta_keys': []},
  {'type': 'ToTensor', 'keys': ['imgs', 'label']}]}

In [6]:
train_dataset = build_dataset(cfg=cfg.data.train)
train_loader = build_dataloader(
        train_dataset,
        videos_per_gpu=8,
        workers_per_gpu=4,
        persistent_workers=False,
        num_gpus=1,
        dist=False)

val_dataset = build_dataset(cfg=cfg.data.val)
val_loader = build_dataloader(
        val_dataset,
        videos_per_gpu=1,
        workers_per_gpu=4,
        persistent_workers=False,
        num_gpus=1,
        dist=False)

## Learning Hyperparameters

In [None]:
import optuna
import torch.nn as nn
import torch.optim as optim
import torch
import logging
import numpy as np
from sklearn.metrics import accuracy_score

# Best parameters from previous study
best_params = {
    'dropout_ratio': 0.6795542149013333,
    'lr': 7.886714129990479e-06,
    'max_norm': 41,
    'with_pool2': True,
    'bottleneck_mode': 'ir',
    'norm_eval': False,
    'bn_frozen': False
}

# Configure logging
logging.basicConfig(filename='optuna_training_CutmixBlending_hmdb.log', 
                    filemode='w', 
                    format='%(asctime)s - %(levelname)s - %(message)s', 
                    level=logging.INFO)

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Database file path for saving study
db_file = "sqlite:///optuna_study_CutmixBlending_hmdb.db"

sampler = optuna.samplers.TPESampler(seed=42)

# Set up study with the option to minimize validation loss
study = optuna.create_study(
    sampler=sampler,
    direction="maximize",
    study_name="CutmixBlending_hmdb", 
    storage=db_file,
    load_if_exists=True
)

def objective(trial):
    print("Starting a new trial...")

    # Hyperparameters to tune
    alpha = trial.suggest_float("alpha", 0.1, 10)  
    print(f"Trial {trial.number}: alpha = {alpha}")

    # Set hyperparameters
    dropout_ratio = best_params['dropout_ratio']
    lr = best_params['lr']
    max_norm = best_params['max_norm']

    # Backbone parameters
    cfg.model.backbone.with_pool2 = best_params['with_pool2']
    cfg.model.backbone.bottleneck_mode = best_params['bottleneck_mode']
    cfg.model.backbone.norm_eval = best_params['norm_eval']
    cfg.model.backbone.bn_frozen = best_params['bn_frozen']

    # Fixed pretrained URL
    cfg.model.backbone.pretrained = 'https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth'

    # Adjust config parameters
    cfg.model.cls_head.dropout_ratio = dropout_ratio

    # Initialize model, criterion, optimizer, scheduler
    print("Building the model...")
    model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')).to(device)
    
    optimizer = optim.Adam(
        model.parameters(),
        lr=lr,
        weight_decay=0.00001
    )

    print("Model built successfully!")
    
    # Early stopping parameters
    total_epochs = 60
    eval_interval = 1
    best_val_accuracy = 0

    # CutmixBlending Blending instance
    cutmix_blending = CutmixBlending(num_classes=cfg.model.cls_head.num_classes, alpha=alpha)

    print("Starting training...")
    for epoch in range(total_epochs):

        # Training loop
        model.train()
        epoch_loss = 0
        for batch_idx, data in enumerate(train_loader):
            inputs, labels = data['imgs'].to(device), data['label'].to(device)

            # Convert labels to one-hot encoding
            labels_one_hot = F.one_hot(labels, num_classes=cfg.model.cls_head.num_classes).float()

            # Apply CutmixBlending
            mixed_inputs, mixed_labels = cutmix_blending.do_blending(inputs, labels_one_hot)

            optimizer.zero_grad()
            outputs = model(mixed_inputs, mixed_labels, return_loss=True)
            loss = outputs['loss_cls']

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
            optimizer.step()

            epoch_loss += loss.item()
            if batch_idx % 10 == 0:
                print(f"Epoch [{epoch + 1}/{total_epochs}], Batch [{batch_idx}/{len(train_loader)}], Loss: {loss.item():.4f}")

        logging.info(f"Epoch [{epoch + 1}/{total_epochs}], Train Loss: {epoch_loss / len(train_loader):.4f}")
        print(f"Epoch [{epoch + 1}/{total_epochs}], Average Train Loss: {epoch_loss / len(train_loader):.4f}")

        # Validation loop (every `eval_interval` epochs)
        if (epoch + 1) % eval_interval == 0:
            model.eval()
            total_val_loss = 0
            all_preds = []
            all_labels = []
            with torch.no_grad():
                for val_data in val_loader:
                    val_inputs, val_labels = val_data['imgs'].to(device), val_data['label'].to(device)

                    val_results = model(val_inputs, return_loss=False)
                    val_loss = model(val_inputs, val_labels, return_loss=True)['loss_cls']

                    total_val_loss += val_loss.item()

                    # Collect predictions and true labels
                    predictions = np.argmax(val_results, axis=1)
                    true_labels = val_labels.cpu().numpy()

                    all_preds.extend(predictions)
                    all_labels.extend(true_labels)

            val_accuracy = accuracy_score(all_labels, all_preds)

            print(f"Epoch [{epoch + 1}/{total_epochs}], Validation Accuracy: {val_accuracy:.4f}, Validation Loss: {total_val_loss / len(val_loader):.4f}")
            logging.info(f"Epoch [{epoch + 1}/{total_epochs}], Validation Accuracy: {val_accuracy:.4f}")

            # Report validation loss to Optuna
            trial.report(val_accuracy, epoch)

            # Check if validation accuracy improved
            if val_accuracy > best_val_accuracy:
                best_val_accuracy = val_accuracy

            # Prune unpromising trials
            if trial.should_prune():
                print("Trial pruned due to lack of improvement.")
                raise optuna.exceptions.TrialPruned()

    return best_val_accuracy

# Run Optuna Study
print("Starting Optuna study...")
study.optimize(objective, n_trials=10)

print(f"Best hyperparameters: {study.best_params}")
print(f"Best validation accuracy: {study.best_value:.4f}")
logging.info("Best hyperparameters: %s", study.best_params)
logging.info("Best validation accuracy: %f", study.best_value)


[I 2024-12-13 14:47:28,990] Using an existing study with name 'CutmixBlending_hmdb' instead of creating a new one.


Starting Optuna study...
Starting a new trial...
Trial 2: alpha = 3.807947176588889
Building the model...


2024-12-13 14:47:29,341 - mmaction - INFO - load model from: https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth
2024-12-13 14:47:29,341 - mmaction - INFO - load checkpoint from http path: https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth


Model built successfully!
Starting training...


  bbx1 = torch.clamp(cx - cut_w // 2, 0, w)
  bby1 = torch.clamp(cy - cut_h // 2, 0, h)
  bbx2 = torch.clamp(cx + cut_w // 2, 0, w)
  bby2 = torch.clamp(cy + cut_h // 2, 0, h)


Epoch [1/60], Batch [0/447], Loss: 4.0017
Epoch [1/60], Batch [10/447], Loss: 3.8550
Epoch [1/60], Batch [20/447], Loss: 3.9037
Epoch [1/60], Batch [30/447], Loss: 4.0036
Epoch [1/60], Batch [40/447], Loss: 4.0364
Epoch [1/60], Batch [50/447], Loss: 3.9965
Epoch [1/60], Batch [60/447], Loss: 3.9191
Epoch [1/60], Batch [70/447], Loss: 3.8570
Epoch [1/60], Batch [80/447], Loss: 3.9293
Epoch [1/60], Batch [90/447], Loss: 3.8626
Epoch [1/60], Batch [100/447], Loss: 3.9847
Epoch [1/60], Batch [110/447], Loss: 3.9310
Epoch [1/60], Batch [120/447], Loss: 4.0001
Epoch [1/60], Batch [130/447], Loss: 3.9716
Epoch [1/60], Batch [140/447], Loss: 4.0392
Epoch [1/60], Batch [150/447], Loss: 3.9258
Epoch [1/60], Batch [160/447], Loss: 3.9586
Epoch [1/60], Batch [170/447], Loss: 3.8823
Epoch [1/60], Batch [180/447], Loss: 3.9372
Epoch [1/60], Batch [190/447], Loss: 3.9580
Epoch [1/60], Batch [200/447], Loss: 3.8277
Epoch [1/60], Batch [210/447], Loss: 3.8995
Epoch [1/60], Batch [220/447], Loss: 3.8848

  bbx1 = torch.clamp(cx - cut_w // 2, 0, w)
  bby1 = torch.clamp(cy - cut_h // 2, 0, h)
  bbx2 = torch.clamp(cx + cut_w // 2, 0, w)
  bby2 = torch.clamp(cy + cut_h // 2, 0, h)


Epoch [2/60], Batch [0/447], Loss: 3.8086
Epoch [2/60], Batch [10/447], Loss: 3.8130
Epoch [2/60], Batch [20/447], Loss: 3.9217
Epoch [2/60], Batch [30/447], Loss: 3.8896
Epoch [2/60], Batch [40/447], Loss: 3.6152
Epoch [2/60], Batch [50/447], Loss: 3.8402
Epoch [2/60], Batch [60/447], Loss: 3.8565
Epoch [2/60], Batch [70/447], Loss: 3.8753
Epoch [2/60], Batch [80/447], Loss: 3.8254
Epoch [2/60], Batch [90/447], Loss: 3.8829
Epoch [2/60], Batch [100/447], Loss: 3.9247
Epoch [2/60], Batch [110/447], Loss: 3.9302
Epoch [2/60], Batch [120/447], Loss: 3.7436
Epoch [2/60], Batch [130/447], Loss: 3.7009
Epoch [2/60], Batch [140/447], Loss: 3.7565
Epoch [2/60], Batch [150/447], Loss: 3.6470
Epoch [2/60], Batch [160/447], Loss: 3.7827
Epoch [2/60], Batch [170/447], Loss: 3.6660
Epoch [2/60], Batch [180/447], Loss: 3.7349
Epoch [2/60], Batch [190/447], Loss: 3.7314
Epoch [2/60], Batch [200/447], Loss: 3.7109
Epoch [2/60], Batch [210/447], Loss: 3.5050
Epoch [2/60], Batch [220/447], Loss: 3.7640

  bbx1 = torch.clamp(cx - cut_w // 2, 0, w)
  bby1 = torch.clamp(cy - cut_h // 2, 0, h)
  bbx2 = torch.clamp(cx + cut_w // 2, 0, w)
  bby2 = torch.clamp(cy + cut_h // 2, 0, h)


Epoch [3/60], Batch [0/447], Loss: 3.8049
Epoch [3/60], Batch [10/447], Loss: 3.6762
Epoch [3/60], Batch [20/447], Loss: 3.5102
Epoch [3/60], Batch [30/447], Loss: 3.7569
Epoch [3/60], Batch [40/447], Loss: 3.4445
Epoch [3/60], Batch [50/447], Loss: 3.7259
Epoch [3/60], Batch [60/447], Loss: 3.7490
Epoch [3/60], Batch [70/447], Loss: 3.6203
Epoch [3/60], Batch [80/447], Loss: 3.6655
Epoch [3/60], Batch [90/447], Loss: 3.6924
Epoch [3/60], Batch [100/447], Loss: 3.4790
Epoch [3/60], Batch [110/447], Loss: 3.8090
Epoch [3/60], Batch [120/447], Loss: 3.3918
Epoch [3/60], Batch [130/447], Loss: 3.4917
Epoch [3/60], Batch [140/447], Loss: 3.8106
Epoch [3/60], Batch [150/447], Loss: 3.6530
Epoch [3/60], Batch [160/447], Loss: 3.9826
Epoch [3/60], Batch [170/447], Loss: 3.6579
Epoch [3/60], Batch [180/447], Loss: 3.4836
Epoch [3/60], Batch [190/447], Loss: 3.9067
Epoch [3/60], Batch [200/447], Loss: 3.8630
Epoch [3/60], Batch [210/447], Loss: 3.7085
Epoch [3/60], Batch [220/447], Loss: 3.6508

  bbx1 = torch.clamp(cx - cut_w // 2, 0, w)
  bby1 = torch.clamp(cy - cut_h // 2, 0, h)
  bbx2 = torch.clamp(cx + cut_w // 2, 0, w)
  bby2 = torch.clamp(cy + cut_h // 2, 0, h)


Epoch [4/60], Batch [0/447], Loss: 3.6927
Epoch [4/60], Batch [10/447], Loss: 3.6496
Epoch [4/60], Batch [20/447], Loss: 3.5001
Epoch [4/60], Batch [30/447], Loss: 3.6296
Epoch [4/60], Batch [40/447], Loss: 3.5278
Epoch [4/60], Batch [50/447], Loss: 3.6392
Epoch [4/60], Batch [60/447], Loss: 3.5967
Epoch [4/60], Batch [70/447], Loss: 3.1779
Epoch [4/60], Batch [80/447], Loss: 3.6632
Epoch [4/60], Batch [90/447], Loss: 3.5288
Epoch [4/60], Batch [100/447], Loss: 3.8220
Epoch [4/60], Batch [110/447], Loss: 3.7019
Epoch [4/60], Batch [120/447], Loss: 3.2668
Epoch [4/60], Batch [130/447], Loss: 3.5615
Epoch [4/60], Batch [140/447], Loss: 3.5693
Epoch [4/60], Batch [150/447], Loss: 3.6382
Epoch [4/60], Batch [160/447], Loss: 3.2687
Epoch [4/60], Batch [170/447], Loss: 3.5777
Epoch [4/60], Batch [180/447], Loss: 3.7546
Epoch [4/60], Batch [190/447], Loss: 3.7040
Epoch [4/60], Batch [200/447], Loss: 3.2182
Epoch [4/60], Batch [210/447], Loss: 3.3139
Epoch [4/60], Batch [220/447], Loss: 3.3932

## Results

In [None]:
# Retrieve all trials and print their parameters
for trial in study.trials:
    print(f"Trial number: {trial.number}")
    print(f"Parameters: {trial.params}")
    print(f"Value (e.g., validation accuracy): {trial.value}")
    print("-" * 30)

In [None]:
best_trial = study.best_trial
print("Best trial number:", best_trial.number)
print("Best parameters:", best_trial.params)
print("Best validation loss:", best_trial.value)