In [1]:
%pip install SoccerNet
%pip install pytorchvideo

Collecting SoccerNet
  Downloading SoccerNet-0.1.61-py3-none-any.whl.metadata (13 kB)
Collecting scikit-video (from SoccerNet)
  Downloading scikit_video-1.1.11-py2.py3-none-any.whl.metadata (1.1 kB)
Collecting google-measurement-protocol (from SoccerNet)
  Downloading google_measurement_protocol-1.1.0-py2.py3-none-any.whl.metadata (845 bytes)
Collecting pycocoevalcap (from SoccerNet)
  Downloading pycocoevalcap-1.2-py3-none-any.whl.metadata (3.2 kB)
Collecting prices>=1.0.0 (from google-measurement-protocol->SoccerNet)
  Downloading prices-1.1.1-py3-none-any.whl.metadata (2.8 kB)
Collecting InquirerPy==0.3.4 (from huggingface-hub[cli]->SoccerNet)
  Downloading InquirerPy-0.3.4-py3-none-any.whl.metadata (8.1 kB)
Collecting pfzy<0.4.0,>=0.3.1 (from InquirerPy==0.3.4->huggingface-hub[cli]->SoccerNet)
  Downloading pfzy-0.3.4-py3-none-any.whl.metadata (4.9 kB)
Downloading SoccerNet-0.1.61-py3-none-any.whl (85 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.9/85.9 kB[

In [None]:
import os
import zipfile
from SoccerNet.Downloader import SoccerNetDownloader as SNdl

# Set up the downloader
local_directory = "path/to/SoccerNet"
mySNdl = SNdl(LocalDirectory=local_directory)

# Download the data
mySNdl.downloadDataTask(task="mvfouls", split=["train", "valid", "test", "challenge"], password="pass")

# Unzip the downloaded files
task_directory = os.path.join(local_directory, "mvfouls")
for split in ["train", "valid", "test", "challenge"]:
    zip_file = os.path.join(task_directory, f"{split}.zip")
    if os.path.exists(zip_file):
        # Create a new folder with the same name as the zip file
        extract_folder = os.path.join(task_directory, split)
        os.makedirs(extract_folder, exist_ok=True)

        # Extract the contents to the new folder
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall(extract_folder)
        print(f"Extracted {split}.zip to {extract_folder}")
    else:
        print(f"{split}.zip not found")

# Optionally, remove the zip files after extraction
for split in ["train", "valid", "test", "challenge"]:
    zip_file = os.path.join(task_directory, f"{split}.zip")
    if os.path.exists(zip_file):
        os.remove(zip_file)
        print(f"Removed {split}.zip")

Downloading path/to/SoccerNet/mvfouls/train.zip...:  14%|█▎        | 333M/2.46G [00:07<00:38, 54.7MiB/s]

In [24]:
import os
import torch
import json
import cv2
import numpy as np
from torchvision import transforms
from PIL import Image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set the desired frame count
DESIRED_FRAME_COUNT = 126

# Load the EVENT_DICTIONARY for mapping annotation labels
EVENT_DICTIONARY = {
    'action_class': {"Tackling": 0, "Standing tackling": 1, "High leg": 2, "Holding": 3, "Pushing": 4,
                     "Elbowing": 5, "Challenge": 6, "Dive": 7, "Dont know": 8},
    'offence_class': {"Offence": 0, "Between": 1, "No Offence": 2, "No offence": 2},
    'severity_class': {"1.0": 0, "2.0": 1, "3.0": 2, "4.0": 3, "5.0": 4},
    'bodypart_class': {"Upper body": 0, "Under body": 1},
    'offence_severity_class': {"No offence": 0, "Offence + No card": 1, "Offence + Yellow card": 2, "Offence + Red card": 3}
}

# Transformation for RGB preprocessing (resize to 224x224 for MViT)
rgb_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformation for optical flow preprocessing (resize to 224x224 for MViT)
flow_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

def load_filtered_clips_and_labels(DATA_PATH, split, max_samples_o, max_samples_no):
    rgb_clips, flow_clips = [], []
    labels_action, labels_offence, labels_severity, labels_bodypart, labels_offence_severity = [], [], [], [], []

    annotations_path = os.path.join(DATA_PATH, split, "annotations.json")
    print(f"Loading annotations from: {annotations_path}")

    with open(annotations_path, 'r') as f:
        annotations = json.load(f)
    print(f"Total actions found in annotations: {len(annotations['Actions'])}")

    offence_count, no_offence_count, skipped_actions = 0, 0, 0

    for action_index, (action_key, action_data) in enumerate(annotations['Actions'].items()):
        offence_class = action_data['Offence']
        if (offence_class == "Offence" and offence_count >= max_samples_o) or \
           (offence_class in ["No offence", "No Offence"] and no_offence_count >= max_samples_no):
            continue

        # Map labels to indices using the dictionary
        action_label = EVENT_DICTIONARY['action_class'].get(action_data['Action class'])
        offence_label = EVENT_DICTIONARY['offence_class'].get(offence_class)
        severity_label = EVENT_DICTIONARY['severity_class'].get(action_data.get('Severity', '1.0'))
        bodypart_label = EVENT_DICTIONARY['bodypart_class'].get(action_data.get('Bodypart', 'Upper body'))
        offence_severity = f"{offence_class} + {EVENT_DICTIONARY['severity_class'].get(severity_label, 'No card')}"
        offence_severity_label = EVENT_DICTIONARY['offence_severity_class'].get(offence_severity, 0)

        # Skip if any label is missing
        if None in [action_label, offence_label, severity_label, bodypart_label, offence_severity_label]:
            skipped_actions += 1
            continue

        action_folder = os.path.join(DATA_PATH, split, f"action_{action_key}")
        if not os.path.exists(action_folder):
            skipped_actions += 1
            continue

        rgb_action_clips, flow_action_clips = [], []
        for clip_idx in range(2):
            clip_path = os.path.join(action_folder, f"clip_{clip_idx}.mp4")
            if not os.path.exists(clip_path):
                continue

            cap = cv2.VideoCapture(clip_path)
            ret, prev_frame = cap.read()
            if not ret:
                continue

            prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
            rgb_frames, flow_frames = [], []

            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break

                # Process RGB frame
                rgb_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                rgb_frame = rgb_transform(rgb_frame).to(device)
                rgb_frames.append(rgb_frame)

                # Process Optical Flow
                curr_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                flow = cv2.calcOpticalFlowFarneback(prev_gray, curr_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
                flow = np.clip(flow, -20, 20)  # Clipping to limit extreme values
                flow = ((flow + 20) * (255.0 / 40)).astype(np.uint8)  # Normalizing to 0-255 range
                flow_frame = Image.fromarray(flow[..., 0])  # Taking the horizontal component for simplicity
                flow_frame = flow_transform(flow_frame).to(device)
                flow_frames.append(flow_frame)
                prev_gray = curr_gray

            cap.release()

            # Adjust frame count
            if len(rgb_frames) > DESIRED_FRAME_COUNT:
                indices = np.linspace(0, len(rgb_frames) - 1, DESIRED_FRAME_COUNT).astype(int)
                rgb_frames = [rgb_frames[i] for i in indices]
                flow_frames = [flow_frames[i] for i in indices]
            elif len(rgb_frames) < DESIRED_FRAME_COUNT:
                rgb_frames += [rgb_frames[-1]] * (DESIRED_FRAME_COUNT - len(rgb_frames))
                flow_frames += [flow_frames[-1]] * (DESIRED_FRAME_COUNT - len(flow_frames))

            rgb_action_clips.append(torch.stack(rgb_frames, dim=0))
            flow_action_clips.append(torch.stack(flow_frames, dim=0))

        if rgb_action_clips and flow_action_clips:
            rgb_clips.append(rgb_action_clips)
            flow_clips.append(flow_action_clips)
            labels_action.append(action_label)
            labels_offence.append(offence_label)
            labels_severity.append(severity_label)
            labels_bodypart.append(bodypart_label)
            labels_offence_severity.append(offence_severity_label)

            if offence_class == "Offence":
                offence_count += 1
            else:
                no_offence_count += 1

        if offence_count >= max_samples_o and no_offence_count >= max_samples_no:
            break

    print("\nSummary:")
    print(f"Total actions loaded: {len(rgb_clips)}")
    print(f"Total actions skipped: {skipped_actions}")
    return rgb_clips, flow_clips, labels_action, labels_offence, labels_severity, labels_bodypart, labels_offence_severity


In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.nn import TransformerEncoder, TransformerEncoderLayer

class ImprovedTwoStreamNetwork(nn.Module):
    def __init__(self, num_classes_action=9, num_classes_offence=3, num_classes_severity=5,
                 num_classes_bodypart=2, num_classes_offence_severity=4, freeze_backbone=True):
        super(ImprovedTwoStreamNetwork, self).__init__()

        # Load more advanced backbones
        # Using RegNet-Y as it shows better performance than ResNet
        self.rgb_backbone = models.regnet_y_32gf(weights='IMAGENET1K_V2')
        self.flow_backbone = models.regnet_y_32gf(weights='IMAGENET1K_V2')

        if freeze_backbone:
            for param in self.rgb_backbone.parameters():
                param.requires_grad = False
            for param in self.flow_backbone.parameters():
                param.requires_grad = False

        num_ftrs = self.rgb_backbone.fc.in_features

        # Replace final classification layers with Identity
        self.rgb_backbone.fc = nn.Identity()
        self.flow_backbone.fc = nn.Identity()

        # Temporal attention mechanism using Transformer Encoder
        encoder_layers = TransformerEncoderLayer(
            d_model=num_ftrs,
            nhead=8,
            dim_feedforward=2048,
            dropout=0.1,
            batch_first=True
        )
        self.temporal_encoder = TransformerEncoder(encoder_layers, num_layers=2)

        # Stream fusion module
        self.fusion_layer = nn.Sequential(
            nn.Linear(num_ftrs * 2, num_ftrs),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(num_ftrs, num_ftrs)
        )

        # Task-specific heads with deeper architecture
        self.fc_action = nn.Sequential(
            nn.Linear(num_ftrs, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes_action)
        )

        self.fc_offence = nn.Sequential(
            nn.Linear(num_ftrs, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes_offence)
        )

        self.fc_severity = nn.Sequential(
            nn.Linear(num_ftrs, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes_severity)
        )

        self.fc_bodypart = nn.Sequential(
            nn.Linear(num_ftrs, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes_bodypart)
        )

        self.fc_offence_severity = nn.Sequential(
            nn.Linear(num_ftrs, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes_offence_severity)
        )

    def temporal_attention(self, features):
        """
        Apply scaled dot-product attention over time.
        
        Args:
          features: Tensor of shape [batch_size * num_streams x frames x feature_dim]
        
        Returns:
          Attended features of shape [batch_size * num_streams x frames x feature_dim]
        """
        
        # Compute scaled dot-product attention
        attention_weights = torch.matmul(features, features.transpose(-2, -1)) / features.size(-1)**0.5
        attention_weights = torch.softmax(attention_weights, dim=-1)
        
        # Apply attention weights to the features
        attended_features = torch.matmul(attention_weights, features)
        
        return attended_features

    def forward(self, rgb_input, flow_input):
        batch_size, num_streams, num_frames, _, _, _ = rgb_input.shape

        # Reshape inputs for conv2d (merge batch_size * streams * frames into one dimension)
        rgb_input = rgb_input.view(batch_size * num_streams * num_frames, 3, 112, 112)
        flow_input = flow_input.view(batch_size * num_streams * num_frames, 1, 112, 112)

        # Repeat flow_input across channels to match RGB input dimensions (if needed)
        flow_input = flow_input.repeat(1, 3, 1, 1)

        # Pass inputs through respective backbones (feature extraction)
        rgb_features = self.rgb_backbone(rgb_input)   # Shape: [batch_size * streams * frames x feature_dim]
        flow_features = self.flow_backbone(flow_input)

        # Reshape features back to [batch_size * streams x frames x feature_dim]
        rgb_features = rgb_features.view(batch_size * num_streams, num_frames, -1)
        flow_features = flow_features.view(batch_size * num_streams, num_frames, -1)

        # Apply temporal attention and transformer encoding
        rgb_features = self.temporal_encoder(rgb_features)
        flow_features = self.temporal_encoder(flow_features)

        # Apply temporal attention (scaled dot-product attention)
        rgb_features = self.temporal_attention(rgb_features)
        flow_features = self.temporal_attention(flow_features)

        # Global average pooling over frames
        rgb_features = rgb_features.mean(dim=1)  # Shape: [batch_size * streams x feature_dim]
        flow_features = flow_features.mean(dim=1)

        # Reshape to [batch_size x streams x features]
        rgb_features = rgb_features.view(batch_size, num_streams, -1)
        flow_features = flow_features.view(batch_size, num_streams, -1)

        # Concatenate and fuse streams
        combined_features = torch.cat((rgb_features, flow_features), dim=-1)
        
        combined_features = self.fusion_layer(combined_features)

        # Average across streams (if multiple views exist per action)
        combined_features = combined_features.mean(dim=1)

        # Forward through task-specific layers for multi-task learning
        action_out = self.fc_action(combined_features)
        offence_out = self.fc_offence(combined_features)
        severity_out = self.fc_severity(combined_features)
        bodypart_out = self.fc_bodypart(combined_features)
        offence_severity_out = self.fc_offence_severity(combined_features)

        return action_out, offence_out, severity_out, bodypart_out, offence_severity_out

In [5]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from sklearn.metrics import accuracy_score
from torchvision import transforms
import numpy as np
#from model import ImprovedTwoStreamNetwork
from preprocess import load_filtered_clips_and_labels

# Import your model
#from model import TwoStreamNetwork  # Assuming the model code is saved as model.py

# Custom Dataset class
class ActionDataset(Dataset):
    def __init__(self, rgb_clips, flow_clips, labels, transform=None):
        self.rgb_clips = rgb_clips
        self.flow_clips = flow_clips
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.rgb_clips)

    def __getitem__(self, idx):
        rgb_frames = self.rgb_clips[idx]
        flow_frames = self.flow_clips[idx]

        # Apply transformation
        if self.transform:
            rgb_frames = [self.transform(frame) if not isinstance(frame, torch.Tensor) else frame for frame in rgb_frames]
            flow_frames = [self.transform(frame) if not isinstance(frame, torch.Tensor) else frame for frame in flow_frames]

        # Ensure dimensions are [num_frames, channels, height, width]
        rgb_frames = torch.stack(rgb_frames, dim=0)
        flow_frames = torch.stack(flow_frames, dim=0)

        label_dict = {key: torch.tensor(self.labels[key][idx]) for key in self.labels.keys()}

        return rgb_frames, flow_frames, label_dict


def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    all_preds = {key: [] for key in ['action', 'offence', 'severity', 'bodypart', 'offence_severity']}
    all_labels = {key: [] for key in all_preds.keys()}

    for rgb_input, flow_input, labels in tqdm(dataloader, desc="Training"):
        # Check input shapes and move to device
        rgb_input, flow_input = rgb_input.to(device), flow_input.to(device)

        # Verify dimensions; if missing batch dim, add it
        if len(rgb_input.shape) == 4:
            rgb_input = rgb_input.unsqueeze(0)  # Add batch dim if missing
        if len(flow_input.shape) == 4:
            flow_input = flow_input.unsqueeze(0)

        labels = {key: val.to(device) for key, val in labels.items()}

        optimizer.zero_grad()

        # Forward pass
        outputs = model(rgb_input, flow_input)

        # Compute losses for each task
        loss = 0.0
        for i, task in enumerate(all_preds.keys()):
            task_loss = criterion(outputs[i], labels[task])
            loss += task_loss
            all_preds[task].extend(outputs[i].argmax(dim=1).cpu().numpy())
            all_labels[task].extend(labels[task].cpu().numpy())

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(dataloader)
    accuracy = {task: accuracy_score(all_labels[task], all_preds[task]) for task in all_preds.keys()}

    return avg_loss, accuracy

# Validation function
def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = {key: [] for key in ['action', 'offence', 'severity', 'bodypart', 'offence_severity']}
    all_labels = {key: [] for key in all_preds.keys()}

    with torch.no_grad():
        for rgb_input, flow_input, labels in tqdm(dataloader, desc="Validation"):
            rgb_input, flow_input = rgb_input.to(device), flow_input.to(device)
            labels = {key: val.to(device) for key, val in labels.items()}

            # Forward pass
            outputs = model(rgb_input, flow_input)

            # Compute losses and predictions for each task
            loss = 0.0
            for i, task in enumerate(all_preds.keys()):
                task_loss = criterion(outputs[i], labels[task])
                loss += task_loss
                all_preds[task].extend(outputs[i].argmax(dim=1).cpu().numpy())
                all_labels[task].extend(labels[task].cpu().numpy())

            running_loss += loss.item()

    # Calculate average loss and accuracy
    avg_loss = running_loss / len(dataloader)
    accuracy = {task: accuracy_score(all_labels[task], all_preds[task]) for task in all_preds.keys()}

    return avg_loss, accuracy

def main(data_path, num_epochs=10, batch_size=1, learning_rate=1e-4, max_samples_o=1, max_samples_no =1):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load data
    train_rgb_clips, train_flow_clips, train_labels_action, train_labels_offence, train_labels_severity, train_labels_bodypart, train_labels_offence_severity = \
        load_filtered_clips_and_labels(data_path, "train", max_samples_o, max_samples_no)
    
    valid_rgb_clips, valid_flow_clips, valid_labels_action, valid_labels_offence, valid_labels_severity, valid_labels_bodypart, valid_labels_offence_severity = \
        load_filtered_clips_and_labels(data_path, "valid", max_samples_o, max_samples_no)

    # Organize labels in a dictionary format
    train_labels = {
        "action": train_labels_action,
        "offence": train_labels_offence,
        "severity": train_labels_severity,
        "bodypart": train_labels_bodypart,
        "offence_severity": train_labels_offence_severity
    }
    valid_labels = {
        "action": valid_labels_action,
        "offence": valid_labels_offence,
        "severity": valid_labels_severity,
        "bodypart": valid_labels_bodypart,
        "offence_severity": valid_labels_offence_severity
    }

    # Define transform
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Create datasets and loaders
    train_dataset = ActionDataset(train_rgb_clips, train_flow_clips, train_labels, transform=transform)
    valid_dataset = ActionDataset(valid_rgb_clips, valid_flow_clips, valid_labels, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

    # Initialize model, loss function, and optimizer
    model = ImprovedTwoStreamNetwork().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Training and validation loop
    best_val_loss = float('inf')
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")

        # Train
        train_loss, train_accuracy = train_one_epoch(model, train_loader, criterion, optimizer, device)
        print(f"Train Loss: {train_loss:.4f} | Train Accuracies: {train_accuracy}")

        # Validate
        val_loss, val_accuracy = validate(model, valid_loader, criterion, device)
        print(f"Val Loss: {val_loss:.4f} | Val Accuracies: {val_accuracy}")

        # Save the best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "best_model.pth")
            print("Saved best model.")

        torch.save(model.state_dict(), "final_model.pth")



if __name__ == "__main__":
    # Update this path with your actual data path
    DATA_PATH = 'mvfouls'
    main(data_path=DATA_PATH)


Loading annotations from: mvfouls\train\annotations.json
Total actions found in annotations: 2916

Summary:
Total actions loaded: 2
Total actions skipped: 6
Loading annotations from: mvfouls\valid\annotations.json
Total actions found in annotations: 411

Summary:
Total actions loaded: 2
Total actions skipped: 0

Epoch 1/10


Training: 100%|██████████| 2/2 [00:07<00:00,  3.62s/it]


Train Loss: 7.1290 | Train Accuracies: {'action': 0.0, 'offence': 0.0, 'severity': 0.5, 'bodypart': 0.5, 'offence_severity': 0.0}


Validation: 100%|██████████| 2/2 [00:03<00:00,  1.87s/it]


Val Loss: 6.2780 | Val Accuracies: {'action': 0.5, 'offence': 0.5, 'severity': 0.5, 'bodypart': 1.0, 'offence_severity': 0.5}
Saved best model.

Epoch 2/10


Training: 100%|██████████| 2/2 [00:07<00:00,  3.70s/it]


Train Loss: 5.4933 | Train Accuracies: {'action': 0.0, 'offence': 0.0, 'severity': 1.0, 'bodypart': 0.0, 'offence_severity': 0.0}


Validation: 100%|██████████| 2/2 [00:03<00:00,  1.83s/it]


Val Loss: 6.3634 | Val Accuracies: {'action': 0.0, 'offence': 0.5, 'severity': 0.5, 'bodypart': 0.0, 'offence_severity': 0.5}

Epoch 3/10


Training: 100%|██████████| 2/2 [00:07<00:00,  3.88s/it]


Train Loss: 4.2114 | Train Accuracies: {'action': 0.5, 'offence': 0.5, 'severity': 1.0, 'bodypart': 0.5, 'offence_severity': 0.5}


Validation: 100%|██████████| 2/2 [00:03<00:00,  1.82s/it]


Val Loss: 7.3725 | Val Accuracies: {'action': 0.0, 'offence': 0.5, 'severity': 0.5, 'bodypart': 0.0, 'offence_severity': 0.5}

Epoch 4/10


Training: 100%|██████████| 2/2 [00:07<00:00,  3.76s/it]


Train Loss: 3.7978 | Train Accuracies: {'action': 0.0, 'offence': 0.0, 'severity': 1.0, 'bodypart': 0.0, 'offence_severity': 1.0}


Validation: 100%|██████████| 2/2 [00:03<00:00,  1.85s/it]


Val Loss: 8.3225 | Val Accuracies: {'action': 0.5, 'offence': 0.5, 'severity': 0.5, 'bodypart': 1.0, 'offence_severity': 0.5}

Epoch 5/10


Training: 100%|██████████| 2/2 [00:07<00:00,  3.70s/it]


Train Loss: 3.2467 | Train Accuracies: {'action': 0.5, 'offence': 0.5, 'severity': 1.0, 'bodypart': 0.5, 'offence_severity': 0.5}


Validation: 100%|██████████| 2/2 [00:03<00:00,  1.82s/it]


Val Loss: 9.5705 | Val Accuracies: {'action': 0.0, 'offence': 0.5, 'severity': 0.5, 'bodypart': 0.0, 'offence_severity': 0.5}

Epoch 6/10


Training: 100%|██████████| 2/2 [00:07<00:00,  3.73s/it]


Train Loss: 2.7976 | Train Accuracies: {'action': 0.5, 'offence': 0.5, 'severity': 1.0, 'bodypart': 0.5, 'offence_severity': 0.5}


Validation: 100%|██████████| 2/2 [00:03<00:00,  1.86s/it]


Val Loss: 10.5432 | Val Accuracies: {'action': 0.0, 'offence': 0.5, 'severity': 0.5, 'bodypart': 0.0, 'offence_severity': 0.5}

Epoch 7/10


Training: 100%|██████████| 2/2 [00:07<00:00,  3.73s/it]


Train Loss: 3.3452 | Train Accuracies: {'action': 0.5, 'offence': 0.0, 'severity': 1.0, 'bodypart': 0.5, 'offence_severity': 0.5}


Validation: 100%|██████████| 2/2 [00:03<00:00,  1.84s/it]


Val Loss: 11.5540 | Val Accuracies: {'action': 0.0, 'offence': 0.5, 'severity': 0.5, 'bodypart': 0.0, 'offence_severity': 1.0}

Epoch 8/10


Training: 100%|██████████| 2/2 [00:07<00:00,  3.72s/it]


Train Loss: 3.4611 | Train Accuracies: {'action': 0.5, 'offence': 0.0, 'severity': 1.0, 'bodypart': 0.5, 'offence_severity': 0.5}


Validation: 100%|██████████| 2/2 [00:03<00:00,  1.90s/it]


Val Loss: 11.0010 | Val Accuracies: {'action': 0.0, 'offence': 0.5, 'severity': 0.5, 'bodypart': 1.0, 'offence_severity': 0.5}

Epoch 9/10


Training: 100%|██████████| 2/2 [00:07<00:00,  3.73s/it]


Train Loss: 5.3813 | Train Accuracies: {'action': 0.5, 'offence': 0.0, 'severity': 1.0, 'bodypart': 0.5, 'offence_severity': 0.0}


Validation: 100%|██████████| 2/2 [00:03<00:00,  1.83s/it]


Val Loss: 10.0522 | Val Accuracies: {'action': 0.5, 'offence': 0.5, 'severity': 0.5, 'bodypart': 1.0, 'offence_severity': 0.5}

Epoch 10/10


Training: 100%|██████████| 2/2 [00:07<00:00,  3.74s/it]


Train Loss: 3.7819 | Train Accuracies: {'action': 1.0, 'offence': 0.0, 'severity': 1.0, 'bodypart': 0.5, 'offence_severity': 0.5}


Validation: 100%|██████████| 2/2 [00:03<00:00,  1.88s/it]


Val Loss: 9.0070 | Val Accuracies: {'action': 0.5, 'offence': 0.5, 'severity': 0.5, 'bodypart': 1.0, 'offence_severity': 0.5}
