In [2]:
!pip -q install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip -q install opencv-python-headless scikit-learn matplotlib tqdm pandas
# optional but faster video decode
!pip -q install decord

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/13.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/13.6 MB[0m [31m29.0 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━[0m [32m7.8/13.6 MB[0m [31m113.8 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m13.6/13.6 MB[0m [31m260.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.6/13.6 MB[0m [31m139.9 MB/s[0m eta [36m0:00:00[0m
[?25h

### Create files

In [3]:
import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=FutureWarning)

In [4]:
import os, textwrap, pathlib

ROOT = pathlib.Path("/content/soccer_rnn")
ROOT.mkdir(parents=True, exist_ok=True)

def write(relpath: str, content: str):
    p = ROOT / relpath
    p.parent.mkdir(parents=True, exist_ok=True)
    p.write_text(textwrap.dedent(content).lstrip(), encoding="utf-8")

In [7]:
%%writefile soccer_rnn/__init__.py

Writing soccer_rnn/__init__.py


In [8]:
%%writefile soccer_rnn/config.py
from dataclasses import dataclass

@dataclass
class Config:
    # data
    dataset_source: str = "kaggle"  # "kaggle" or "soccernet"
    raw_root: str = "/content/data_raw"
    processed_root: str = "/content/data_processed"
    cache_root: str = "/content/feature_cache"

    # for clip-based classification
    clip_num_frames: int = 32
    clip_fps: int = 8
    clip_radius_sec: float = 1.5  # only for SoccerNet extraction (window = 2*radius)
    image_size: int = 224

    # split
    seed: int = 42
    test_size: float = 0.15
    val_size: float = 0.15  # from remaining

    # train
    batch_size: int = 16
    num_workers: int = 0
    epochs: int = 10
    lr: float = 1e-3
    weight_decay: float = 1e-4
    grad_clip: float = 1.0
    use_amp: bool = True

    # model
    cnn_backbone: str = "resnet18"  # fixed in this baseline
    feature_dim: int = 512
    hidden_size: int = 256
    num_layers: int = 1
    dropout: float = 0.2

    # output
    out_root: str = "/content/runs"

Overwriting soccer_rnn/config.py


In [9]:
os.mkdir("soccer_rnn/data")
os.mkdir("soccer_rnn/models")
os.mkdir("soccer_rnn/utils")

In [10]:
%%writefile soccer_rnn/utils/seed.py
import random, os
import numpy as np
import torch

def set_seed(seed: int):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

Writing soccer_rnn/utils/seed.py


In [11]:
%%writefile soccer_rnn/data/indexing.py
import os
from pathlib import Path
import pandas as pd

VIDEO_EXTS = {".mp4", ".mkv", ".avi", ".mov", ".webm"}

def build_index_folder_per_class(dataset_dir: str, out_csv: str):
    '''
    Expects:
      dataset_dir/
        Pass/*.mp4
        Shot/*.mp4
        Dribble/*.mp4
      or any set of class folders.
    '''
    dataset_dir = Path(dataset_dir)
    rows = []
    for class_dir in sorted([p for p in dataset_dir.iterdir() if p.is_dir()]):
        label = class_dir.name
        for vid in class_dir.rglob("*"):
            if vid.is_file() and vid.suffix.lower() in VIDEO_EXTS:
                rows.append({"path": str(vid), "label": label})
    if not rows:
        raise RuntimeError(f"No videos found under {dataset_dir}. Expected class subfolders.")
    df = pd.DataFrame(rows)
    df.to_csv(out_csv, index=False)
    return df

def ensure_dir(p: str):
    os.makedirs(p, exist_ok=True)

Writing soccer_rnn/data/indexing.py


In [12]:
%%writefile soccer_rnn/data/splits.py
import pandas as pd
from sklearn.model_selection import train_test_split

def stratified_split(df: pd.DataFrame, seed: int, test_size: float, val_size: float):
    '''
    val_size is relative to full set, but we do it as:
      train_val, test = split(df, test_size)
      train, val = split(train_val, val_size / (1 - test_size))
    '''
    y = df["label"].values
    train_val, test = train_test_split(df, test_size=test_size, random_state=seed, stratify=y)

    y_tv = train_val["label"].values
    rel_val = val_size / (1.0 - test_size)
    train, val = train_test_split(train_val, test_size=rel_val, random_state=seed, stratify=y_tv)

    return train.reset_index(drop=True), val.reset_index(drop=True), test.reset_index(drop=True)


Writing soccer_rnn/data/splits.py


In [13]:
%%writefile soccer_rnn/data/video_decode.py
import numpy as np

def _uniform_indices(num_frames_total: int, num_samples: int):
    if num_frames_total <= 0:
        return np.zeros((num_samples,), dtype=np.int64)
    if num_frames_total >= num_samples:
        return np.linspace(0, num_frames_total - 1, num_samples).round().astype(np.int64)
    # pad by repeating last frame
    idx = np.linspace(0, num_frames_total - 1, num_frames_total).round().astype(np.int64)
    pad = np.full((num_samples - num_frames_total,), idx[-1], dtype=np.int64)
    return np.concatenate([idx, pad], axis=0)

def decode_frames(path: str, num_frames: int, target_fps: int = None):
    '''
    Returns uint8 frames: (T, H, W, 3), RGB.
    Tries decord first, falls back to cv2.
    '''
    try:
        import decord
        from decord import VideoReader, cpu
        vr = VideoReader(path, ctx=cpu(0))
        total = len(vr)
        idx = _uniform_indices(total, num_frames)
        frames = vr.get_batch(idx).asnumpy()  # (T,H,W,3) RGB
        return frames.astype(np.uint8)
    except Exception:
        pass

    import cv2
    cap = cv2.VideoCapture(path)
    if not cap.isOpened():
        raise RuntimeError(f"Failed to open video: {path}")

    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
    idx = _uniform_indices(total, num_frames)
    frames = []
    wanted = set(idx.tolist())
    i = 0
    grabbed = {}
    while True:
        ok, frame = cap.read()
        if not ok:
            break
        if i in wanted:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            grabbed[i] = frame
        i += 1
    cap.release()

    # if total unknown or we missed, do a simpler fallback: reuse last available
    if not grabbed:
        raise RuntimeError(f"Could not decode frames from: {path}")

    last = grabbed[max(grabbed.keys())]
    for j in idx:
        frames.append(grabbed.get(int(j), last))
    return np.stack(frames, axis=0).astype(np.uint8)


Writing soccer_rnn/data/video_decode.py


In [14]:
%%writefile soccer_rnn/data/soccernet_extract.py
import json, os, re, subprocess
from pathlib import Path
from typing import Iterable, List, Dict

# SoccerNet label format often includes:
#  - "gameTime": "1 - 06:35" (half - mm:ss)
#  - "position": "395728" (milliseconds from start of match/half)
# We prefer "position" when present; otherwise parse gameTime.

GAME_TIME_RE = re.compile(r"^\s*([12])\s*-\s*(\d{1,2}):(\d{2})")

def _pos_seconds(ann: Dict):
    if "position" in ann:
        try:
            return float(ann["position"]) / 1000.0
        except Exception:
            pass
    gt = ann.get("gameTime", "")
    m = GAME_TIME_RE.match(gt)
    if not m:
        return None
    mm = int(m.group(2))
    ss = int(m.group(3))
    return float(mm * 60 + ss)

def _half_index(ann: Dict):
    gt = ann.get("gameTime", "")
    m = GAME_TIME_RE.match(gt)
    if m:
        return int(m.group(1))
    # fallback: guess 1
    return 1

def extract_clips_from_labels(
    soccernet_game_dir: str,
    labels_json_path: str,
    out_dir: str,
    keep_labels: Iterable[str],
    clip_radius_sec: float = 1.5,
    video_name_half1: str = "1_224p.mkv",
    video_name_half2: str = "2_224p.mkv",
    limit_per_label: int = 400,
):
    '''
    Creates trimmed mp4 clips in:
      out_dir/<label>/<game_id>_<half>_<posms>.mp4

    Requires ffmpeg (available on Colab).
    '''
    keep_labels = set(keep_labels)
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    with open(labels_json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    anns = data.get("annotations", [])
    counts = {k: 0 for k in keep_labels}

    game_dir = Path(soccernet_game_dir)
    v1 = game_dir / video_name_half1
    v2 = game_dir / video_name_half2
    if not v1.exists() or not v2.exists():
        raise RuntimeError(f"Missing half videos in {game_dir}. Expected {video_name_half1} and {video_name_half2}")

    for ann in anns:
        label = ann.get("label")
        if label not in keep_labels:
            continue
        if counts[label] >= limit_per_label:
            continue

        half = _half_index(ann)
        pos_sec = _pos_seconds(ann)
        if pos_sec is None:
            continue

        start = max(0.0, pos_sec - clip_radius_sec)
        dur = 2.0 * clip_radius_sec

        label_dir = out_dir / label
        label_dir.mkdir(parents=True, exist_ok=True)

        pos_ms = ann.get("position", "")
        fname = f"{game_dir.name}_H{half}_P{pos_ms if pos_ms else int(pos_sec*1000)}.mp4"
        out_path = label_dir / fname
        if out_path.exists():
            counts[label] += 1
            continue

        src = v1 if half == 1 else v2

        cmd = [
            "ffmpeg", "-hide_banner", "-loglevel", "error",
            "-ss", str(start), "-i", str(src),
            "-t", str(dur),
            "-vf", "scale=320:-2",
            "-r", "25",
            "-c:v", "libx264", "-preset", "veryfast", "-crf", "23",
            "-an",
            str(out_path)
        ]
        subprocess.run(cmd, check=False)
        if out_path.exists() and out_path.stat().st_size > 0:
            counts[label] += 1

    return counts

Writing soccer_rnn/data/soccernet_extract.py


In [15]:
%%writefile soccer_rnn/models/seq_models.py
import torch
import torch.nn as nn

class SeqClassifier(nn.Module):
    def __init__(self, rnn_type: str, input_dim: int, hidden_size: int, num_layers: int, num_classes: int, dropout: float, bidirectional: bool):
        super().__init__()
        self.rnn_type = rnn_type.lower()
        self.bidirectional = bidirectional
        rnn_dropout = dropout if num_layers > 1 else 0.0

        if self.rnn_type == "rnn":
            self.rnn = nn.RNN(
                input_size=input_dim,
                hidden_size=hidden_size,
                num_layers=num_layers,
                batch_first=True,
                dropout=rnn_dropout,
                bidirectional=bidirectional,
                nonlinearity="tanh",
            )
        elif self.rnn_type == "lstm":
            self.rnn = nn.LSTM(
                input_size=input_dim,
                hidden_size=hidden_size,
                num_layers=num_layers,
                batch_first=True,
                dropout=rnn_dropout,
                bidirectional=bidirectional,
            )
        else:
            raise ValueError("rnn_type must be one of: rnn, lstm")

        out_dim = hidden_size * (2 if bidirectional else 1)
        self.head = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(out_dim, num_classes),
        )

    def forward(self, x):
        # x: (B, T, D)
        if self.rnn_type == "lstm":
            out, (h_n, c_n) = self.rnn(x)
            # h_n: (num_layers * num_directions, B, H)
            if self.bidirectional:
                # take last layer forward + backward
                h_f = h_n[-2]
                h_b = h_n[-1]
                h = torch.cat([h_f, h_b], dim=-1)
            else:
                h = h_n[-1]
        else:
            out, h_n = self.rnn(x)
            if self.bidirectional:
                h_f = h_n[-2]
                h_b = h_n[-1]
                h = torch.cat([h_f, h_b], dim=-1)
            else:
                h = h_n[-1]

        logits = self.head(h)
        return logits

Writing soccer_rnn/models/seq_models.py


In [16]:
%%writefile soccer_rnn/models/frame_encoder.py
import torch
import torch.nn as nn
import torchvision.models as models

def build_resnet18_feature_extractor():
    m = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    # remove classifier
    backbone = nn.Sequential(*list(m.children())[:-1])  # (B,512,1,1)
    backbone.eval()
    for p in backbone.parameters():
        p.requires_grad = False
    return backbone

Writing soccer_rnn/models/frame_encoder.py


In [17]:
%%writefile soccer_rnn/data/dataset.py
import os, hashlib
from pathlib import Path
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset
import torchvision.transforms as T

from .video_decode import decode_frames

class VideoSeqDataset(Dataset):
    def __init__(self, df: pd.DataFrame, label_to_idx: dict, feature_extractor, cfg, cache_dir: str):
        self.df = df.reset_index(drop=True)
        self.label_to_idx = label_to_idx
        self.feature_extractor = feature_extractor
        self.cfg = cfg
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(parents=True, exist_ok=True)

        self.transform = T.Compose([
            T.ToPILImage(),
            T.Resize((cfg.image_size, cfg.image_size)),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225]),
        ])

    def _cache_path(self, video_path: str):
        key = f"{video_path}|T={self.cfg.clip_num_frames}|S={self.cfg.image_size}"
        h = hashlib.md5(key.encode("utf-8")).hexdigest()
        return self.cache_dir / f"{h}.npy"

    @torch.no_grad()
    def _compute_features(self, frames_u8: np.ndarray):
        # frames_u8: (T,H,W,3) RGB uint8
        xs = torch.stack([self.transform(fr) for fr in frames_u8], dim=0)  # (T,3,H,W)
        xs = xs.cuda(non_blocking=True)
        feats = self.feature_extractor(xs)  # (T,512,1,1)
        feats = feats.squeeze(-1).squeeze(-1)  # (T,512)
        return feats.float().cpu().numpy()

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx: int):
        row = self.df.iloc[idx]
        video_path = row["path"]
        label = row["label"]
        y = self.label_to_idx[label]

        cache_p = self._cache_path(video_path)
        if cache_p.exists():
            feat = np.load(cache_p)
        else:
            frames = decode_frames(video_path, num_frames=self.cfg.clip_num_frames, target_fps=self.cfg.clip_fps)
            feat = self._compute_features(frames)
            np.save(cache_p, feat)

        x = torch.from_numpy(feat)  # (T,D)
        y = torch.tensor(y, dtype=torch.long)
        return x, y

Writing soccer_rnn/data/dataset.py


In [31]:
%%writefile /content/soccer_rnn/utils/metrics.py
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

def compute_metrics(y_true, y_pred, labels=None, target_names=None):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)

    acc = accuracy_score(y_true, y_pred)
    f1m = f1_score(y_true, y_pred, average="macro")
    f1w = f1_score(y_true, y_pred, average="weighted")

    report = classification_report(
        y_true, y_pred,
        labels=labels,
        target_names=target_names,
        digits=4,
        zero_division=0
    )

    cm = confusion_matrix(y_true, y_pred, labels=labels)

    # per-class accuracy (recall)
    row_sum = cm.sum(axis=1)
    diag = np.diag(cm)
    per_class_acc = np.divide(diag, row_sum, out=np.zeros_like(diag, dtype=float), where=row_sum != 0)

    return {
        "acc": float(acc),
        "f1_macro": float(f1m),
        "f1_weighted": float(f1w),
        "report": report,
        "cm": cm,
        "per_class_acc": per_class_acc,
        "per_class_support": row_sum,
    }


Overwriting /content/soccer_rnn/utils/metrics.py


In [19]:
%%writefile soccer_rnn/utils/plots.py
import os
import numpy as np
import matplotlib.pyplot as plt

def plot_curves(history: dict, out_path: str, title: str):
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    epochs = np.arange(1, len(history["train_loss"]) + 1)

    plt.figure()
    plt.plot(epochs, history["train_loss"], label="train_loss")
    plt.plot(epochs, history["val_loss"], label="val_loss")
    plt.xlabel("epoch")
    plt.ylabel("loss")
    plt.title(title + " - loss")
    plt.legend()
    plt.grid(True)
    plt.savefig(out_path.replace(".png", "_loss.png"), bbox_inches="tight")
    plt.close()

    plt.figure()
    plt.plot(epochs, history["train_acc"], label="train_acc")
    plt.plot(epochs, history["val_acc"], label="val_acc")
    plt.xlabel("epoch")
    plt.ylabel("accuracy")
    plt.title(title + " - accuracy")
    plt.legend()
    plt.grid(True)
    plt.savefig(out_path.replace(".png", "_acc.png"), bbox_inches="tight")
    plt.close()

def plot_confusion_matrix(cm, class_names, out_path: str, title: str):
    os.makedirs(os.path.dirname(out_path), exist_ok=True)

    cm = np.asarray(cm)
    n = len(class_names)

    plt.figure(figsize=(max(6, n * 1.6), max(5, n * 1.3)))
    plt.imshow(cm, interpolation="nearest")
    plt.title(title)
    plt.colorbar()

    ticks = np.arange(n)
    plt.xticks(ticks, class_names, rotation=45, ha="right")
    plt.yticks(ticks, class_names)
    plt.ylabel("true")
    plt.xlabel("pred")

    maxv = cm.max() if cm.size else 0
    thresh = maxv * 0.5

    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            val = int(cm[i, j])
            color = "white" if cm[i, j] > thresh else "black"
            plt.text(j, i, str(val), ha="center", va="center", color=color, fontsize=10)

    plt.tight_layout()
    plt.savefig(out_path, bbox_inches="tight")
    plt.close()

def bar_compare(names, values, out_path: str, title: str, ylabel: str):
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    plt.figure(figsize=(6,4))
    plt.bar(names, values)
    plt.title(title)
    plt.ylabel(ylabel)
    plt.xticks(rotation=20, ha="right")
    plt.grid(True, axis="y")
    plt.tight_layout()
    plt.savefig(out_path, bbox_inches="tight")
    plt.close()

def plot_split_distribution(train_counts, val_counts, test_counts, class_names, out_path: str, title: str):
    os.makedirs(os.path.dirname(out_path), exist_ok=True)

    train_counts = np.array(train_counts, dtype=int)
    val_counts = np.array(val_counts, dtype=int)
    test_counts = np.array(test_counts, dtype=int)

    x = np.arange(len(class_names))
    width = 0.25

    plt.figure(figsize=(max(7, len(class_names) * 1.6), 4.8))
    plt.bar(x - width, train_counts, width, label="train")
    plt.bar(x,         val_counts,   width, label="val")
    plt.bar(x + width, test_counts,  width, label="test")

    plt.xticks(x, class_names, rotation=35, ha="right")
    plt.ylabel("count")
    plt.title(title)
    plt.grid(True, axis="y")
    plt.legend()
    plt.tight_layout()
    plt.savefig(out_path, bbox_inches="tight")
    plt.close()

Writing soccer_rnn/utils/plots.py


In [35]:
%%writefile /content/soccer_rnn/train_eval.py
import os, time
import numpy as np
import torch
from torch import nn
from tqdm import tqdm

def run_one_epoch(model, loader, optimizer, scaler, device, train: bool):
    if train:
        model.train()
    else:
        model.eval()

    ce = nn.CrossEntropyLoss()
    losses = []
    correct = 0
    total = 0

    use_amp = (scaler is not None) and (device.type == "cuda")

    # IMPORTANT: no tqdm here (keeps output clean)
    for x, y in loader:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        with torch.amp.autocast(device_type="cuda", enabled=use_amp):
            logits = model(x)
            loss = ce(logits, y)

        if train:
            optimizer.zero_grad(set_to_none=True)
            if use_amp:
                scaler.scale(loss).backward()
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                scaler.step(optimizer)
                scaler.update()
            else:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()

        losses.append(loss.detach().float().cpu().item())
        pred = logits.argmax(dim=1)
        correct += (pred == y).sum().item()
        total += y.numel()

    return float(np.mean(losses)), float(correct / max(1, total))

@torch.no_grad()
def predict(model, loader, device):
    model.eval()
    ys, ps = [], []
    for x, y in loader:  # no tqdm to keep logs clean
        x = x.to(device, non_blocking=True)
        logits = model(x)
        pred = logits.argmax(dim=1).cpu().numpy().tolist()
        ps.extend(pred)
        ys.extend(y.numpy().tolist())
    return np.array(ys), np.array(ps)

def fit_model(model, train_loader, val_loader, cfg, run_dir: str, name: str):
    os.makedirs(run_dir, exist_ok=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay)
    use_amp = bool(cfg.use_amp and device.type == "cuda")
    scaler = torch.amp.GradScaler("cuda", enabled=use_amp)

    history = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": []}
    best_val_acc = -1.0
    best_path = None

    pbar = tqdm(range(1, cfg.epochs + 1), desc=f"{name}", unit="epoch", leave=True)
    for epoch in pbar:
        t0 = time.time()
        tr_loss, tr_acc = run_one_epoch(model, train_loader, optimizer, scaler if use_amp else None, device, train=True)
        va_loss, va_acc = run_one_epoch(model, val_loader, optimizer, scaler if use_amp else None, device, train=False)
        dt = time.time() - t0

        history["train_loss"].append(tr_loss)
        history["val_loss"].append(va_loss)
        history["train_acc"].append(tr_acc)
        history["val_acc"].append(va_acc)

        pbar.set_postfix({
            "tr_loss": f"{tr_loss:.4f}",
            "va_loss": f"{va_loss:.4f}",
            "tr_acc": f"{tr_acc:.3f}",
            "va_acc": f"{va_acc:.3f}",
            "sec": f"{dt:.1f}",
        })

        ckpt = {
            "epoch": epoch,
            "model_state": model.state_dict(),
            "optimizer_state": optimizer.state_dict(),
            "history": history,
            "cfg": cfg.__dict__,
        }

        if va_acc > best_val_acc:
            best_val_acc = va_acc
            best_path = os.path.join(run_dir, f"{name}_best.pt")
            torch.save(ckpt, best_path)

    return history, best_path


Overwriting /content/soccer_rnn/train_eval.py


In [38]:
%%writefile run_colab.py
import os, json, shutil
import pandas as pd
import torch

from soccer_rnn.config import Config
from soccer_rnn.utils.seed import set_seed
from soccer_rnn.data.indexing import build_index_folder_per_class, ensure_dir
from soccer_rnn.data.splits import stratified_split
from soccer_rnn.models.frame_encoder import build_resnet18_feature_extractor
from soccer_rnn.data.dataset import VideoSeqDataset
from soccer_rnn.models.seq_models import SeqClassifier
from soccer_rnn.train_eval import fit_model, predict
from soccer_rnn.utils.metrics import compute_metrics
from soccer_rnn.utils.plots import plot_curves, plot_confusion_matrix, bar_compare, plot_split_distribution
from torch.utils.data import DataLoader

def main():
    cfg = Config()

    # edit these in the Colab cell that calls this script
    dataset_source = os.environ.get("DATASET_SOURCE", cfg.dataset_source)
    cfg.dataset_source = dataset_source

    RAW_ROOT = os.environ.get("RAW_ROOT", cfg.raw_root)
    PROC_ROOT = os.environ.get("PROC_ROOT", cfg.processed_root)
    cfg.raw_root = RAW_ROOT
    cfg.processed_root = PROC_ROOT

    set_seed(cfg.seed)
    ensure_dir(cfg.out_root)
    ensure_dir(cfg.cache_root)

    # 1) build index.csv (path,label)
    index_csv = os.path.join(cfg.processed_root, "index.csv")
    ensure_dir(cfg.processed_root)

    if not os.path.exists(index_csv):
        # user must ensure clips exist in RAW_ROOT/clips/<class>/*.mp4
        clips_dir = os.path.join(cfg.raw_root, "clips")
        df = build_index_folder_per_class(clips_dir, index_csv)
    else:
        df = pd.read_csv(index_csv)

    # 2) split
    train_df, val_df, test_df = stratified_split(df, cfg.seed, cfg.test_size, cfg.val_size)
    train_df.to_csv(os.path.join(cfg.processed_root, "train.csv"), index=False)
    val_df.to_csv(os.path.join(cfg.processed_root, "val.csv"), index=False)
    test_df.to_csv(os.path.join(cfg.processed_root, "test.csv"), index=False)

    classes = sorted(df["label"].unique().tolist())
    label_to_idx = {c: i for i, c in enumerate(classes)}
    idx_to_label = {i: c for c, i in label_to_idx.items()}
    print("classes:", classes)

    # class distribution plot (train/val/test) as grouped bars
    train_counts = [int((train_df["label"] == c).sum()) for c in classes]
    val_counts   = [int((val_df["label"] == c).sum()) for c in classes]
    test_counts  = [int((test_df["label"] == c).sum()) for c in classes]

    plot_split_distribution(
        train_counts, val_counts, test_counts,
        classes,
        out_path=os.path.join(cfg.out_root, "soccer_seq", "split_distribution.png"),
        title="Class distribution across splits",
    )
    print("Saved split distribution to:", os.path.join(cfg.out_root, "soccer_seq", "split_distribution.png"))

    # 3) feature extractor
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    feat_extractor = build_resnet18_feature_extractor().to(device)

    # 4) datasets/loaders
    train_ds = VideoSeqDataset(train_df, label_to_idx, feat_extractor, cfg, cache_dir=os.path.join(cfg.cache_root, "train"))
    val_ds   = VideoSeqDataset(val_df,   label_to_idx, feat_extractor, cfg, cache_dir=os.path.join(cfg.cache_root, "val"))
    test_ds  = VideoSeqDataset(test_df,  label_to_idx, feat_extractor, cfg, cache_dir=os.path.join(cfg.cache_root, "test"))

    train_loader = DataLoader(train_ds, batch_size=cfg.batch_size, shuffle=True,  num_workers=cfg.num_workers, pin_memory=True)
    val_loader   = DataLoader(val_ds,   batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.num_workers, pin_memory=True)
    test_loader  = DataLoader(test_ds,  batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.num_workers, pin_memory=True)

    # 5) train models
    run_root = os.path.join(cfg.out_root, "soccer_seq")
    ensure_dir(run_root)

    results = {}
    model_specs = [
        ("rnn",   dict(rnn_type="rnn",  bidirectional=False)),
        ("lstm",  dict(rnn_type="lstm", bidirectional=False)),
        ("bilstm",dict(rnn_type="lstm", bidirectional=True)),
    ]

    for name, spec in model_specs:
        model = SeqClassifier(
            rnn_type=spec["rnn_type"],
            input_dim=cfg.feature_dim,
            hidden_size=cfg.hidden_size,
            num_layers=cfg.num_layers,
            num_classes=len(classes),
            dropout=cfg.dropout,
            bidirectional=spec["bidirectional"],
        )
        run_dir = os.path.join(run_root, name)
        history, best_path = fit_model(model, train_loader, val_loader, cfg, run_dir, name)
        plot_curves(history, os.path.join(run_dir, f"{name}.png"), title=name)

        # load best
        ckpt = torch.load(best_path, map_location="cpu")
        model.load_state_dict(ckpt["model_state"])
        model = model.to(device)

        y_true, y_pred = predict(model, test_loader, device)
        metrics = compute_metrics(y_true, y_pred, labels=list(range(len(classes))), target_names=classes)
        print("\n", name, "test acc", metrics["acc"], "macro_f1", metrics["f1_macro"])
        print(metrics["report"])

        plot_confusion_matrix(metrics["cm"], classes, os.path.join(run_dir, f"{name}_cm.png"), title=f"{name} confusion matrix")

        # per-class accuracy
        pca = metrics["per_class_acc"]
        print("per-class accuracy:")
        for cls, v in zip(classes, pca):
            print(f"  {cls}: {v:.4f}")

        # plot per-class accuracy as a bar chart (uses bar_compare from plots.py) :contentReference[oaicite:0]{index=0}
        bar_compare(
            classes,
            pca.tolist(),
            os.path.join(run_dir, f"{name}_per_class_acc.png"),
            title=f"{name} per-class accuracy",
            ylabel="accuracy",
        )

        results[name] = {
            "acc": metrics["acc"],
            "f1_macro": metrics["f1_macro"],
            "f1_weighted": metrics["f1_weighted"],
            "best_ckpt": best_path,
        }

    # 6) comparison plots
    names = list(results.keys())
    bar_compare(names, [results[n]["acc"] for n in names], os.path.join(run_root, "compare_acc.png"), "model comparison", "test accuracy")
    bar_compare(names, [results[n]["f1_macro"] for n in names], os.path.join(run_root, "compare_f1macro.png"), "model comparison", "macro F1")

    # save results.json
    with open(os.path.join(run_root, "results.json"), "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2)

    print("\nSaved everything to:", run_root)

if __name__ == "__main__":
    main()

Overwriting run_colab.py


## s1

### Kaggle download (trimmed clips dataset)

In [22]:
!mkdir -p /content/data_raw
!pip -q install kaggle

!mkdir -p ~/.kaggle
!cp /content/kaggle.json ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json

!kaggle config view

# download + unzip (overwrite without prompting)
!kaggle datasets download -d itarek898/football-match-actions-video-dataset -p /content/data_raw
!unzip -q -o /content/data_raw/football-match-actions-video-dataset.zip -d /content/data_raw/kaggle_ds

# recreate clips so nothing prompts later
!rm -rf /content/data_raw/clips
!mkdir -p /content/data_raw/clips

# symlink the class folders (spaces handled with quotes)
!ln -s "/content/data_raw/kaggle_ds/football match action video dataset/"* /content/data_raw/clips/

# verify
!ls -lah /content/data_raw/clips | head -n 30
!find /content/data_raw/clips -maxdepth 2 -type f | head -n 20

Configuration values from /root/.kaggle
- username: omidnaeej
- path: None
- proxy: None
- competition: None
Dataset URL: https://www.kaggle.com/datasets/itarek898/football-match-actions-video-dataset
License(s): MIT
Downloading football-match-actions-video-dataset.zip to /content/data_raw
100% 2.77G/2.78G [00:27<00:00, 83.3MB/s]
100% 2.78G/2.78G [00:27<00:00, 108MB/s] 
total 20K
drwxr-xr-x 2 root root 4.0K Dec 29 17:28 .
drwxr-xr-x 4 root root 4.0K Dec 29 17:28 ..
lrwxrwxrwx 1 root root   72 Dec 29 17:28 Red Card -> /content/data_raw/kaggle_ds/football match action video dataset/Red Card
lrwxrwxrwx 1 root root   71 Dec 29 17:28 scoring -> /content/data_raw/kaggle_ds/football match action video dataset/scoring
lrwxrwxrwx 1 root root   71 Dec 29 17:28 takling -> /content/data_raw/kaggle_ds/football match action video dataset/takling


### Train

In [39]:
!export DATASET_SOURCE="kaggle"   # or "soccernet" (doesn’t change training; only for your own tracking)
!export RAW_ROOT="/content/data_raw"
!export PROC_ROOT="/content/data_processed"
!python /content/run_colab.py


classes: ['Red Card', 'scoring', 'takling']
Saved split distribution to: /content/runs/soccer_seq/split_distribution.png
rnn: 100% 10/10 [00:02<00:00,  4.86epoch/s, tr_loss=0.0093, va_loss=0.1958, tr_acc=1.000, va_acc=0.951, sec=0.2]

 rnn test acc 0.8524590163934426 macro_f1 0.7670397257511595
              precision    recall  f1-score   support

    Red Card     0.8571    0.6667    0.7500         9
     scoring     0.8696    0.9756    0.9195        41
     takling     0.7500    0.5455    0.6316        11

    accuracy                         0.8525        61
   macro avg     0.8256    0.7292    0.7670        61
weighted avg     0.8462    0.8525    0.8426        61

per-class accuracy:
  Red Card: 0.6667
  scoring: 0.9756
  takling: 0.5455
lstm: 100% 10/10 [00:02<00:00,  4.81epoch/s, tr_loss=0.0775, va_loss=0.2199, tr_acc=0.968, va_acc=0.934, sec=0.2]

 lstm test acc 0.9344262295081968 macro_f1 0.8989495798319328
              precision    recall  f1-score   support

    Red Card    

### Save to Drive


In [40]:
!rsync -av \
  --exclude='/.config' \
  --exclude='/drive' \
  --exclude='/data_raw' \
  --exclude='/data_processed' \
  --exclude='/sample_data' \
  --exclude='/feature_cache' \
  /content/ /content/drive/MyDrive/GAR/Soccer-kaggle/

sending incremental file list
./
kaggle.json
run_colab.py
runs/
runs/soccer_seq/
runs/soccer_seq/compare_acc.png
runs/soccer_seq/compare_f1macro.png
runs/soccer_seq/results.json
runs/soccer_seq/split_distribution.png
runs/soccer_seq/bilstm/
runs/soccer_seq/bilstm/bilstm_acc.png
runs/soccer_seq/bilstm/bilstm_best.pt
runs/soccer_seq/bilstm/bilstm_cm.png
runs/soccer_seq/bilstm/bilstm_loss.png
runs/soccer_seq/bilstm/bilstm_per_class_acc.png
runs/soccer_seq/lstm/
runs/soccer_seq/lstm/lstm_acc.png
runs/soccer_seq/lstm/lstm_best.pt
runs/soccer_seq/lstm/lstm_cm.png
runs/soccer_seq/lstm/lstm_loss.png
runs/soccer_seq/lstm/lstm_per_class_acc.png
runs/soccer_seq/rnn/
runs/soccer_seq/rnn/rnn_acc.png
runs/soccer_seq/rnn/rnn_best.pt
runs/soccer_seq/rnn/rnn_cm.png
runs/soccer_seq/rnn/rnn_loss.png
runs/soccer_seq/rnn/rnn_per_class_acc.png
soccer_rnn/
soccer_rnn/__init__.py
soccer_rnn/config.py
soccer_rnn/train_eval.py
soccer_rnn/.ipynb_checkpoints/
soccer_rnn/__pycache__/
soccer_rnn/__pycache__/__init_

## SoccerNet

In [None]:
from getpass import getpass

pw = getpass("SoccerNet video password (from NDA): ")

SoccerNet video password (from NDA): ··········


In [None]:
import os, json
from soccer_rnn.data.soccernet_extract import extract_clips_from_labels
from soccer_rnn.data.indexing import ensure_dir

RAW_ROOT = "/content/data_raw"
ensure_dir(RAW_ROOT)

# 1) download SoccerNet via official pip package (requires password for videos)
!pip -q install SoccerNet

from SoccerNet.Downloader import SoccerNetDownloader as SNdl
dl = SNdl(LocalDirectory=f"{RAW_ROOT}/SoccerNet")
# pw = input("SoccerNet video password (from NDA): ").strip()

# ball spotting task download (as referenced in SoccerNet devkit)
dl.downloadDataTask(task="spotting-ball-2024", split=["train","valid","test"], password=pw)

# 2) extract short clips around timestamps to make a normal classification dataset
# expected per-game folders under RAW_ROOT/SoccerNet/... (depends on SoccerNet downloader layout)
# You may need to set GAME_DIRS manually if your folder structure differs.
KEEP = ["Pass", "Drive", "Shot"]  # Drive is often used as ball-carry/dribble-like
OUT_CLIPS = f"{RAW_ROOT}/clips"
ensure_dir(OUT_CLIPS)

# Example scan: find labels-ball.json and its parent game directory
import pathlib
labels = list(pathlib.Path(f"{RAW_ROOT}/SoccerNet").rglob("Labels-ball.json"))
print("found Labels-ball.json files:", len(labels))

total = {}
for lab_path in labels[:10]:  # keep it small first
    game_dir = str(lab_path.parent)
    counts = extract_clips_from_labels(
        soccernet_game_dir=game_dir,
        labels_json_path=str(lab_path),
        out_dir=OUT_CLIPS,
        keep_labels=KEEP,
        clip_radius_sec=1.5,
        video_name_half1="1_224p.mkv",
        video_name_half2="2_224p.mkv",
        limit_per_label=300,
    )
    for k,v in counts.items():
        total[k] = total.get(k, 0) + v

print("extracted:", total)
print("clips dir:", OUT_CLIPS)


Downloading /content/data_raw/SoccerNet/spotting-ball-2024/train.zip...: : 8.45GiB [08:25, 16.7MiB/s]                         
Downloading /content/data_raw/SoccerNet/spotting-ball-2024/valid.zip...:  27%|██▋       | 550M/2.04G [31:16<1:25:53, 290kiB/s]

KeyboardInterrupt: 

In [None]:
!export DATASET_SOURCE="soccernet"
!export RAW_ROOT="/content/data_raw"
!export PROC_ROOT="/content/data_processed"
!python /content/soccer_rnn_project/run_colab.py


In [None]:
!rsync -av \
  --exclude='/.config' \
  --exclude='/drive' \
  --exclude='/data_raw' \
  --exclude='/data_processed' \
  --exclude='/sample_data' \
  --exclude='/feature_cache' \
  /content/ /content/drive/MyDrive/GAR/Soccer-soccernet/