In [None]:
%pip install SoccerNet

In [2]:
import os
import zipfile
from SoccerNet.Downloader import SoccerNetDownloader as SNdl

# Set up the downloader
local_directory = "path/to/SoccerNet"
mySNdl = SNdl(LocalDirectory=local_directory)

# Download the data
mySNdl.downloadDataTask(task="mvfouls", split=["train", "valid", "test", "challenge"], password="s0cc3rn3t")

# Unzip the downloaded files
task_directory = os.path.join(local_directory, "mvfouls")
for split in ["train", "valid", "test", "challenge"]:
    zip_file = os.path.join(task_directory, f"{split}.zip")
    if os.path.exists(zip_file):
        # Create a new folder with the same name as the zip file
        extract_folder = os.path.join(task_directory, split)
        os.makedirs(extract_folder, exist_ok=True)

        # Extract the contents to the new folder
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall(extract_folder)
        print(f"Extracted {split}.zip to {extract_folder}")
    else:
        print(f"{split}.zip not found")

# Optionally, remove the zip files after extraction
for split in ["train", "valid", "test", "challenge"]:
    zip_file = os.path.join(task_directory, f"{split}.zip")
    if os.path.exists(zip_file):
        os.remove(zip_file)
        print(f"Removed {split}.zip")

Downloading path/to/SoccerNet/mvfouls/train.zip...: : 2.46GiB [04:17, 9.55MiB/s]                         
Downloading path/to/SoccerNet/mvfouls/valid.zip...: : 351MiB [00:23, 15.0MiB/s]                        
Downloading path/to/SoccerNet/mvfouls/test.zip...: : 268MiB [00:18, 14.6MiB/s]                        
Downloading path/to/SoccerNet/mvfouls/challenge.zip...: : 246MiB [00:16, 14.8MiB/s]                        


Extracted train.zip to path/to/SoccerNet/mvfouls/train
Extracted valid.zip to path/to/SoccerNet/mvfouls/valid
Extracted test.zip to path/to/SoccerNet/mvfouls/test
Extracted challenge.zip to path/to/SoccerNet/mvfouls/challenge
Removed train.zip
Removed valid.zip
Removed test.zip
Removed challenge.zip


In [3]:
import os
import torch
import json
import cv2
import numpy as np
from torchvision import transforms
from PIL import Image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set the desired frame count
DESIRED_FRAME_COUNT = 126

# Load the EVENT_DICTIONARY for mapping annotation labels
EVENT_DICTIONARY = {
    'action_class': {"Tackling": 0, "Standing tackling": 1, "High leg": 2, "Holding": 3, "Pushing": 4,
                     "Elbowing": 5, "Challenge": 6, "Dive": 7, "Dont know": 8},
    'offence_class': {"Offence": 0, "Between": 1, "No Offence": 2, "No offence": 2},
    'severity_class': {"1.0": 0, "2.0": 1, "3.0": 2, "4.0": 3, "5.0": 4},
    'bodypart_class': {"Upper body": 0, "Under body": 1},
    'offence_severity_class': {"No offence": 0, "Offence + No card": 1, "Offence + Yellow card": 2, "Offence + Red card": 3}
}

# Transformation for RGB preprocessing
rgb_transform = transforms.Compose([
    transforms.Resize((56, 56)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformation for flow preprocessing
flow_transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor()
])

def load_filtered_clips_and_labels(DATA_PATH, split, max_samples):
    rgb_clips, flow_clips = [], []
    labels_action, labels_offence, labels_severity, labels_bodypart, labels_offence_severity = [], [], [], [], []

    annotations_path = os.path.join(DATA_PATH, split, "annotations.json")
    print(f"Loading annotations from: {annotations_path}")

    with open(annotations_path, 'r') as f:
        annotations = json.load(f)
    print(f"Total actions found in annotations: {len(annotations['Actions'])}")

    offence_count, no_offence_count, skipped_actions = 0, 0, 0

    for action_index, (action_key, action_data) in enumerate(annotations['Actions'].items()):
        offence_class = action_data['Offence']
        if (offence_class == "Offence" and offence_count >= max_samples) or \
           (offence_class in ["No offence", "No Offence"] and no_offence_count >= max_samples):
            continue

        # Map labels to indices using the dictionary
        action_label = EVENT_DICTIONARY['action_class'].get(action_data['Action class'])
        offence_label = EVENT_DICTIONARY['offence_class'].get(offence_class)
        severity_label = EVENT_DICTIONARY['severity_class'].get(action_data.get('Severity', '1.0'))
        bodypart_label = EVENT_DICTIONARY['bodypart_class'].get(action_data.get('Bodypart', 'Upper body'))
        offence_severity = f"{offence_class} + {EVENT_DICTIONARY['severity_class'].get(severity_label, 'No card')}"
        offence_severity_label = EVENT_DICTIONARY['offence_severity_class'].get(offence_severity, 0)

        # Skip if any label is missing
        if None in [action_label, offence_label, severity_label, bodypart_label, offence_severity_label]:
            skipped_actions += 1
            continue

        action_folder = os.path.join(DATA_PATH, split, f"action_{action_key}")
        if not os.path.exists(action_folder):
            skipped_actions += 1
            continue

        rgb_action_clips, flow_action_clips = [], []
        for clip_idx in range(2):
            clip_path = os.path.join(action_folder, f"clip_{clip_idx}.mp4")
            if not os.path.exists(clip_path):
                continue

            cap = cv2.VideoCapture(clip_path)
            ret, prev_frame = cap.read()
            if not ret:
                continue

            prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
            rgb_frames, flow_frames = [], []

            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break

                # Process RGB frame
                rgb_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                rgb_frame = rgb_transform(rgb_frame)
                rgb_frames.append(rgb_frame)

                # Process Optical Flow
                curr_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                flow = cv2.calcOpticalFlowFarneback(prev_gray, curr_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
                flow = np.clip(flow, -20, 20)  # Clipping to limit extreme values
                flow = ((flow + 20) * (255.0 / 40)).astype(np.uint8)  # Normalizing to 0-255 range
                flow_frame = Image.fromarray(flow[..., 0])  # Taking the horizontal component for simplicity
                flow_frame = flow_transform(flow_frame)
                flow_frames.append(flow_frame)
                prev_gray = curr_gray

            cap.release()

            # Adjust frame count
            if len(rgb_frames) > DESIRED_FRAME_COUNT:
                indices = np.linspace(0, len(rgb_frames) - 1, DESIRED_FRAME_COUNT).astype(int)
                rgb_frames = [rgb_frames[i] for i in indices]
                flow_frames = [flow_frames[i] for i in indices]
            elif len(rgb_frames) < DESIRED_FRAME_COUNT:
                rgb_frames += [rgb_frames[-1]] * (DESIRED_FRAME_COUNT - len(rgb_frames))
                flow_frames += [flow_frames[-1]] * (DESIRED_FRAME_COUNT - len(flow_frames))

            rgb_action_clips.append(torch.stack(rgb_frames, dim=0))
            flow_action_clips.append(torch.stack(flow_frames, dim=0))

        if rgb_action_clips and flow_action_clips:
            rgb_clips.append(rgb_action_clips)
            flow_clips.append(flow_action_clips)
            labels_action.append(action_label)
            labels_offence.append(offence_label)
            labels_severity.append(severity_label)
            labels_bodypart.append(bodypart_label)
            labels_offence_severity.append(offence_severity_label)

            if offence_class == "Offence":
                offence_count += 1
            else:
                no_offence_count += 1

        if offence_count >= max_samples and no_offence_count >= max_samples:
            break

    print("\nSummary:")
    print(f"Total actions loaded: {len(rgb_clips)}")
    print(f"Total actions skipped: {skipped_actions}")
    return rgb_clips, flow_clips, labels_action, labels_offence, labels_severity, labels_bodypart, labels_offence_severity


In [4]:
import torch
import torch.nn as nn
import torchvision.models as models

class TwoStreamNetwork(nn.Module):
    def __init__(self, num_classes_action=9, num_classes_offence=3, num_classes_severity=5,
                 num_classes_bodypart=2, num_classes_offence_severity=4, freeze_backbone=True):
        super(TwoStreamNetwork, self).__init__()

        # Load the backbone for both streams
        self.rgb_backbone = models.resnet50(weights='IMAGENET1K_V1')
        self.flow_backbone = models.resnet50(weights='IMAGENET1K_V1')

        # Optionally freeze backbone layers
        if freeze_backbone:
            for param in self.rgb_backbone.parameters():
                param.requires_grad = False
            for param in self.flow_backbone.parameters():
                param.requires_grad = False

        # Replace the final layer with Identity for both backbones
        num_ftrs = self.rgb_backbone.fc.in_features
        self.rgb_backbone.fc = nn.Identity()  # RGB Stream
        self.flow_backbone.fc = nn.Identity()  # Optical Flow Stream

        # Define fully connected layers for classification
        self.fc_action = nn.Linear(num_ftrs * 2, num_classes_action)
        self.fc_offence = nn.Linear(num_ftrs * 2, num_classes_offence)
        self.fc_severity = nn.Linear(num_ftrs * 2, num_classes_severity)
        self.fc_bodypart = nn.Linear(num_ftrs * 2, num_classes_bodypart)
        self.fc_offence_severity = nn.Linear(num_ftrs * 2, num_classes_offence_severity)

    def forward(self, rgb_input, flow_input):
        # Extract batch size and frame count from the RGB input
        batch_size, num_streams, num_frames, _, _, _ = rgb_input.shape  # Shape: [batch_size, num_streams, num_frames, 3, 56, 56]

        # Reshape input tensors for per-frame processing
        rgb_input = rgb_input.view(batch_size * num_streams * num_frames, 3, 112, 112)  # Flatten to [batch_size * num_streams * num_frames, 3, 56, 56]
        flow_input = flow_input.view(batch_size * num_streams * num_frames, 1, 112, 112)  # Flatten to [batch_size * num_streams * num_frames, 1, 56, 56]
        flow_input = flow_input.repeat(1, 3, 1, 1)  # Repeat the single channel three times

        # Process each frame through backbones
        rgb_features = self.rgb_backbone(rgb_input)  # Shape: [batch_size * num_streams * num_frames, num_ftrs]
        flow_features = self.flow_backbone(flow_input)  # Shape: [batch_size * num_streams * num_frames, num_ftrs]

        # Reshape back to [batch_size, num_streams, num_frames, num_ftrs]
        rgb_features = rgb_features.view(batch_size, num_streams, num_frames, -1)
        flow_features = flow_features.view(batch_size, num_streams, num_frames, -1)

        # Aggregate features across frames (mean pooling over frames)
        rgb_features = rgb_features.mean(dim=2)  # Shape: [batch_size, num_streams, num_ftrs]
        flow_features = flow_features.mean(dim=2)  # Shape: [batch_size, num_streams, num_ftrs]

        # Combine features from both streams
        combined_features = torch.cat((rgb_features, flow_features), dim=2)  # Shape: [batch_size, num_streams, num_ftrs * 2]

        # You may want to aggregate across streams as well, if applicable
        combined_features = combined_features.mean(dim=1)  # Optionally, take mean across streams

        # Forward through task-specific layers
        action_out = self.fc_action(combined_features)
        offence_out = self.fc_offence(combined_features)
        severity_out = self.fc_severity(combined_features)
        bodypart_out = self.fc_bodypart(combined_features)
        offence_severity_out = self.fc_offence_severity(combined_features)

        return action_out, offence_out, severity_out, bodypart_out, offence_severity_out

# # Example of model instantiation
# model = TwoStreamNetwork()

# # Move model to device
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

# # Example input for a batch of 2 videos, each with 126 frames
# rgb_input = torch.randn(2, 126, 3, 224, 224).to(device)  # RGB frames
# flow_input = torch.randn(2, 126, 3, 224, 224).to(device)  # Flow frames

# # Forward pass
# outputs = model(rgb_input, flow_input)
# for output in outputs:
#     print(output.shape)  # Should print the shape of each output tensor


In [11]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from sklearn.metrics import accuracy_score
from torchvision import transforms
import numpy as np

# Import your model
#from model import TwoStreamNetwork  # Assuming the model code is saved as model.py

# Custom Dataset class
class ActionDataset(Dataset):
    def __init__(self, rgb_clips, flow_clips, labels, transform=None):
        self.rgb_clips = rgb_clips
        self.flow_clips = flow_clips
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.rgb_clips)

    def __getitem__(self, idx):
        rgb_frames = self.rgb_clips[idx]
        flow_frames = self.flow_clips[idx]

        # Apply transformation
        if self.transform:
            rgb_frames = [self.transform(frame) if not isinstance(frame, torch.Tensor) else frame for frame in rgb_frames]
            flow_frames = [self.transform(frame) if not isinstance(frame, torch.Tensor) else frame for frame in flow_frames]

        # Ensure dimensions are [num_frames, channels, height, width]
        rgb_frames = torch.stack(rgb_frames, dim=0)
        flow_frames = torch.stack(flow_frames, dim=0)

        label_dict = {key: torch.tensor(self.labels[key][idx]) for key in self.labels.keys()}

        return rgb_frames, flow_frames, label_dict


def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    all_preds = {key: [] for key in ['action', 'offence', 'severity', 'bodypart', 'offence_severity']}
    all_labels = {key: [] for key in all_preds.keys()}

    for rgb_input, flow_input, labels in tqdm(dataloader, desc="Training"):
        # Check input shapes and move to device
        rgb_input, flow_input = rgb_input.to(device), flow_input.to(device)

        # Verify dimensions; if missing batch dim, add it
        if len(rgb_input.shape) == 4:
            rgb_input = rgb_input.unsqueeze(0)  # Add batch dim if missing
        if len(flow_input.shape) == 4:
            flow_input = flow_input.unsqueeze(0)

        labels = {key: val.to(device) for key, val in labels.items()}

        optimizer.zero_grad()

        # Forward pass
        outputs = model(rgb_input, flow_input)

        # Compute losses for each task
        loss = 0.0
        for i, task in enumerate(all_preds.keys()):
            task_loss = criterion(outputs[i], labels[task])
            loss += task_loss
            all_preds[task].extend(outputs[i].argmax(dim=1).cpu().numpy())
            all_labels[task].extend(labels[task].cpu().numpy())

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(dataloader)
    accuracy = {task: accuracy_score(all_labels[task], all_preds[task]) for task in all_preds.keys()}

    return avg_loss, accuracy

# Validation function
def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = {key: [] for key in ['action', 'offence', 'severity', 'bodypart', 'offence_severity']}
    all_labels = {key: [] for key in all_preds.keys()}

    with torch.no_grad():
        for rgb_input, flow_input, labels in tqdm(dataloader, desc="Validation"):
            rgb_input, flow_input = rgb_input.to(device), flow_input.to(device)
            labels = {key: val.to(device) for key, val in labels.items()}

            # Forward pass
            outputs = model(rgb_input, flow_input)

            # Compute losses and predictions for each task
            loss = 0.0
            for i, task in enumerate(all_preds.keys()):
                task_loss = criterion(outputs[i], labels[task])
                loss += task_loss
                all_preds[task].extend(outputs[i].argmax(dim=1).cpu().numpy())
                all_labels[task].extend(labels[task].cpu().numpy())

            running_loss += loss.item()

    # Calculate average loss and accuracy
    avg_loss = running_loss / len(dataloader)
    accuracy = {task: accuracy_score(all_labels[task], all_preds[task]) for task in all_preds.keys()}

    return avg_loss, accuracy

def main(data_path, num_epochs=20, batch_size=4, learning_rate=1e-3, max_samples=50):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load data
    train_rgb_clips, train_flow_clips, train_labels_action, train_labels_offence, train_labels_severity, train_labels_bodypart, train_labels_offence_severity = \
        load_filtered_clips_and_labels(data_path, "train", max_samples)

    valid_rgb_clips, valid_flow_clips, valid_labels_action, valid_labels_offence, valid_labels_severity, valid_labels_bodypart, valid_labels_offence_severity = \
        load_filtered_clips_and_labels(data_path, "valid", max_samples)

    # Organize labels in a dictionary format
    train_labels = {
        "action": train_labels_action,
        "offence": train_labels_offence,
        "severity": train_labels_severity,
        "bodypart": train_labels_bodypart,
        "offence_severity": train_labels_offence_severity
    }
    valid_labels = {
        "action": valid_labels_action,
        "offence": valid_labels_offence,
        "severity": valid_labels_severity,
        "bodypart": valid_labels_bodypart,
        "offence_severity": valid_labels_offence_severity
    }

    # Define transform
    transform = transforms.Compose([
        transforms.Resize((112, 112)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Create datasets and loaders
    train_dataset = ActionDataset(train_rgb_clips, train_flow_clips, train_labels, transform=transform)
    valid_dataset = ActionDataset(valid_rgb_clips, valid_flow_clips, valid_labels, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

    # Initialize model, loss function, and optimizer
    model = TwoStreamNetwork().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Training and validation loop
    best_val_loss = float('inf')
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")

        # Train
        train_loss, train_accuracy = train_one_epoch(model, train_loader, criterion, optimizer, device)
        print(f"Train Loss: {train_loss:.4f} | Train Accuracies: {train_accuracy}")

        # Validate
        val_loss, val_accuracy = validate(model, valid_loader, criterion, device)
        print(f"Val Loss: {val_loss:.4f} | Val Accuracies: {val_accuracy}")

        # Save the best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "best_model.pth")
            print("Saved best model.")



if __name__ == "__main__":
    # Update this path with your actual data path
    DATA_PATH = 'path/to/SoccerNet/mvfouls'
    main(data_path=DATA_PATH)


Loading annotations from: path/to/SoccerNet/mvfouls/train/annotations.json
Total actions found in annotations: 2916

Summary:
Total actions loaded: 100
Total actions skipped: 142
Loading annotations from: path/to/SoccerNet/mvfouls/valid/annotations.json
Total actions found in annotations: 411

Summary:
Total actions loaded: 64
Total actions skipped: 36

Epoch 1/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.79it/s]


Train Loss: 7.8979 | Train Accuracies: {'action': 0.17, 'offence': 0.52, 'severity': 0.75, 'bodypart': 0.46, 'offence_severity': 0.52}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.79it/s]


Val Loss: 16.4196 | Val Accuracies: {'action': 0.171875, 'offence': 0.203125, 'severity': 0.5625, 'bodypart': 0.421875, 'offence_severity': 0.21875}
Saved best model.

Epoch 2/20


Training: 100%|██████████| 25/25 [00:14<00:00,  1.77it/s]


Train Loss: 6.6775 | Train Accuracies: {'action': 0.18, 'offence': 0.57, 'severity': 0.77, 'bodypart': 0.43, 'offence_severity': 0.61}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.85it/s]


Val Loss: 7.4137 | Val Accuracies: {'action': 0.359375, 'offence': 0.359375, 'severity': 0.5625, 'bodypart': 0.421875, 'offence_severity': 0.625}
Saved best model.

Epoch 3/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.80it/s]


Train Loss: 5.5753 | Train Accuracies: {'action': 0.2, 'offence': 0.63, 'severity': 0.76, 'bodypart': 0.53, 'offence_severity': 0.64}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.86it/s]


Val Loss: 6.1830 | Val Accuracies: {'action': 0.28125, 'offence': 0.21875, 'severity': 0.5625, 'bodypart': 0.578125, 'offence_severity': 0.21875}
Saved best model.

Epoch 4/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.83it/s]


Train Loss: 5.9257 | Train Accuracies: {'action': 0.19, 'offence': 0.63, 'severity': 0.73, 'bodypart': 0.48, 'offence_severity': 0.72}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.87it/s]


Val Loss: 6.2357 | Val Accuracies: {'action': 0.234375, 'offence': 0.828125, 'severity': 0.5625, 'bodypart': 0.578125, 'offence_severity': 0.375}

Epoch 5/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.81it/s]


Train Loss: 5.6519 | Train Accuracies: {'action': 0.26, 'offence': 0.53, 'severity': 0.62, 'bodypart': 0.55, 'offence_severity': 0.61}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.84it/s]


Val Loss: 8.3467 | Val Accuracies: {'action': 0.296875, 'offence': 0.203125, 'severity': 0.5625, 'bodypart': 0.40625, 'offence_severity': 0.21875}

Epoch 6/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.81it/s]


Train Loss: 5.8886 | Train Accuracies: {'action': 0.2, 'offence': 0.58, 'severity': 0.72, 'bodypart': 0.58, 'offence_severity': 0.58}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.85it/s]


Val Loss: 6.9246 | Val Accuracies: {'action': 0.34375, 'offence': 0.265625, 'severity': 0.5625, 'bodypart': 0.5625, 'offence_severity': 0.796875}

Epoch 7/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.81it/s]


Train Loss: 4.8049 | Train Accuracies: {'action': 0.29, 'offence': 0.61, 'severity': 0.66, 'bodypart': 0.63, 'offence_severity': 0.72}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.85it/s]


Val Loss: 6.1599 | Val Accuracies: {'action': 0.15625, 'offence': 0.46875, 'severity': 0.5625, 'bodypart': 0.546875, 'offence_severity': 0.5}
Saved best model.

Epoch 8/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.81it/s]


Train Loss: 4.7196 | Train Accuracies: {'action': 0.3, 'offence': 0.74, 'severity': 0.78, 'bodypart': 0.57, 'offence_severity': 0.83}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.85it/s]


Val Loss: 5.4313 | Val Accuracies: {'action': 0.171875, 'offence': 0.78125, 'severity': 0.5625, 'bodypart': 0.546875, 'offence_severity': 0.734375}
Saved best model.

Epoch 9/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.81it/s]


Train Loss: 4.0278 | Train Accuracies: {'action': 0.45, 'offence': 0.65, 'severity': 0.79, 'bodypart': 0.59, 'offence_severity': 0.76}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.88it/s]


Val Loss: 5.8051 | Val Accuracies: {'action': 0.375, 'offence': 0.609375, 'severity': 0.5625, 'bodypart': 0.515625, 'offence_severity': 0.703125}

Epoch 10/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.83it/s]


Train Loss: 4.3324 | Train Accuracies: {'action': 0.34, 'offence': 0.76, 'severity': 0.76, 'bodypart': 0.73, 'offence_severity': 0.71}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.88it/s]


Val Loss: 8.1233 | Val Accuracies: {'action': 0.203125, 'offence': 0.296875, 'severity': 0.5625, 'bodypart': 0.359375, 'offence_severity': 0.234375}

Epoch 11/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.83it/s]


Train Loss: 4.2207 | Train Accuracies: {'action': 0.37, 'offence': 0.68, 'severity': 0.85, 'bodypart': 0.59, 'offence_severity': 0.75}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.87it/s]


Val Loss: 7.0404 | Val Accuracies: {'action': 0.203125, 'offence': 0.6875, 'severity': 0.578125, 'bodypart': 0.421875, 'offence_severity': 0.4375}

Epoch 12/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.84it/s]


Train Loss: 4.0509 | Train Accuracies: {'action': 0.41, 'offence': 0.73, 'severity': 0.75, 'bodypart': 0.69, 'offence_severity': 0.69}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.89it/s]


Val Loss: 5.8314 | Val Accuracies: {'action': 0.171875, 'offence': 0.71875, 'severity': 0.59375, 'bodypart': 0.453125, 'offence_severity': 0.65625}

Epoch 13/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.85it/s]


Train Loss: 3.5554 | Train Accuracies: {'action': 0.5, 'offence': 0.72, 'severity': 0.83, 'bodypart': 0.69, 'offence_severity': 0.83}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.90it/s]


Val Loss: 5.1290 | Val Accuracies: {'action': 0.328125, 'offence': 0.796875, 'severity': 0.578125, 'bodypart': 0.390625, 'offence_severity': 0.84375}
Saved best model.

Epoch 14/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.82it/s]


Train Loss: 4.7871 | Train Accuracies: {'action': 0.35, 'offence': 0.63, 'severity': 0.77, 'bodypart': 0.69, 'offence_severity': 0.63}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.87it/s]


Val Loss: 10.9261 | Val Accuracies: {'action': 0.140625, 'offence': 0.21875, 'severity': 0.5625, 'bodypart': 0.421875, 'offence_severity': 0.21875}

Epoch 15/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.83it/s]


Train Loss: 4.4412 | Train Accuracies: {'action': 0.42, 'offence': 0.64, 'severity': 0.84, 'bodypart': 0.54, 'offence_severity': 0.74}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.84it/s]


Val Loss: 5.5870 | Val Accuracies: {'action': 0.25, 'offence': 0.640625, 'severity': 0.59375, 'bodypart': 0.4375, 'offence_severity': 0.6875}

Epoch 16/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.81it/s]


Train Loss: 3.4598 | Train Accuracies: {'action': 0.42, 'offence': 0.78, 'severity': 0.85, 'bodypart': 0.65, 'offence_severity': 0.86}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.84it/s]


Val Loss: 6.3781 | Val Accuracies: {'action': 0.125, 'offence': 0.34375, 'severity': 0.578125, 'bodypart': 0.578125, 'offence_severity': 0.625}

Epoch 17/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.81it/s]


Train Loss: 3.4218 | Train Accuracies: {'action': 0.55, 'offence': 0.68, 'severity': 0.88, 'bodypart': 0.68, 'offence_severity': 0.83}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.87it/s]


Val Loss: 7.8406 | Val Accuracies: {'action': 0.296875, 'offence': 0.296875, 'severity': 0.5625, 'bodypart': 0.515625, 'offence_severity': 0.390625}

Epoch 18/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.80it/s]


Train Loss: 4.0153 | Train Accuracies: {'action': 0.45, 'offence': 0.67, 'severity': 0.85, 'bodypart': 0.66, 'offence_severity': 0.72}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.84it/s]


Val Loss: 7.1777 | Val Accuracies: {'action': 0.15625, 'offence': 0.71875, 'severity': 0.578125, 'bodypart': 0.4375, 'offence_severity': 0.578125}

Epoch 19/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.81it/s]


Train Loss: 3.5689 | Train Accuracies: {'action': 0.49, 'offence': 0.78, 'severity': 0.87, 'bodypart': 0.66, 'offence_severity': 0.81}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.84it/s]


Val Loss: 5.8281 | Val Accuracies: {'action': 0.328125, 'offence': 0.671875, 'severity': 0.578125, 'bodypart': 0.5625, 'offence_severity': 0.734375}

Epoch 20/20


Training: 100%|██████████| 25/25 [00:13<00:00,  1.81it/s]


Train Loss: 3.8561 | Train Accuracies: {'action': 0.51, 'offence': 0.75, 'severity': 0.8, 'bodypart': 0.64, 'offence_severity': 0.79}


Validation: 100%|██████████| 16/16 [00:08<00:00,  1.83it/s]

Val Loss: 7.0829 | Val Accuracies: {'action': 0.203125, 'offence': 0.375, 'severity': 0.609375, 'bodypart': 0.375, 'offence_severity': 0.640625}





In [10]:
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.metrics import accuracy_score

# Test function
def test(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = {key: [] for key in ['action', 'offence', 'severity', 'bodypart', 'offence_severity']}
    all_labels = {key: [] for key in all_preds.keys()}

    with torch.no_grad():
        for rgb_input, flow_input, labels in tqdm(dataloader, desc="Testing"):
            rgb_input, flow_input = rgb_input.to(device), flow_input.to(device)
            labels = {key: val.to(device) for key, val in labels.items()}

            # Forward pass
            outputs = model(rgb_input, flow_input)

            # Compute losses and predictions for each task
            loss = 0.0
            for i, task in enumerate(all_preds.keys()):
                task_loss = criterion(outputs[i], labels[task])
                loss += task_loss
                all_preds[task].extend(outputs[i].argmax(dim=1).cpu().numpy())
                all_labels[task].extend(labels[task].cpu().numpy())

            running_loss += loss.item()

    # Calculate average loss and accuracy
    avg_loss = running_loss / len(dataloader)
    accuracy = {task: accuracy_score(all_labels[task], all_preds[task]) for task in all_preds.keys()}

    return avg_loss, accuracy

# Main testing function
def main_test(data_path, batch_size=4, max_samples=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load test data
    test_rgb_clips, test_flow_clips, test_labels_action, test_labels_offence, test_labels_severity, test_labels_bodypart, test_labels_offence_severity = load_filtered_clips_and_labels(data_path, "test", max_samples)

    # Organize labels in a dictionary format
    test_labels = {
        "action": test_labels_action,
        "offence": test_labels_offence,
        "severity": test_labels_severity,
        "bodypart": test_labels_bodypart,
        "offence_severity": test_labels_offence_severity
    }

    # Define transform (same as used in training)
    transform = transforms.Compose([
        transforms.Resize((56, 56)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Create test dataset and loader
    test_dataset = ActionDataset(test_rgb_clips, test_flow_clips, test_labels, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Initialize model and load best weights
    model = TwoStreamNetwork().to(device)
    model.load_state_dict(torch.load("best_model.pth"))

    # Define the loss criterion
    criterion = nn.CrossEntropyLoss()

    # Test the model
    test_loss, test_accuracy = test(model, test_loader, criterion, device)
    print(f"Test Loss: {test_loss:.4f} | Test Accuracies: {test_accuracy}")

# Run the test
if __name__ == "__main__":
    # Update this path with your actual data path
    DATA_PATH = 'path/to/SoccerNet/mvfouls'
    main_test(data_path=DATA_PATH)


Loading annotations from: path/to/SoccerNet/mvfouls/test/annotations.json
Total actions found in annotations: 301

Summary:
Total actions loaded: 20
Total actions skipped: 9


  model.load_state_dict(torch.load("best_model.pth"))
Testing: 100%|██████████| 5/5 [00:02<00:00,  1.79it/s]

Test Loss: 5.2123 | Test Accuracies: {'action': 0.3, 'offence': 0.55, 'severity': 0.75, 'bodypart': 0.4, 'offence_severity': 0.55}



