In [1]:
##############################################################################
# БЛОК 0‑A. Проверяем, что сессия идёт на GPU (T4 / A100 / …)
##############################################################################
import subprocess, json, sys, os, re, torch

def get_gpu_name():
    try:
        smi = subprocess.check_output(["nvidia-smi", "--query-gpu=name",
                                       "--format=csv,noheader"], text=True)
        return smi.strip()
    except Exception:
        return None

gpu_name = get_gpu_name()
if not torch.cuda.is_available() or gpu_name is None:
    sys.exit(
        "❌ GPU не подключён.\n"
        "Перейдите в Colab → Runtime → Change runtime type → Hardware accelerator: GPU."
    )

print(f"✅ GPU обнаружен: {gpu_name}")
if not re.search(r"T4", gpu_name, re.I):
    print("ℹ️ Это не T4")


✅ GPU обнаружен: Tesla T4


In [2]:
##############################################################################
# БЛОК 0. УСТАНОВКА ЗАВИСИМОСТЕЙ (PyTorch + kagglehub + остальное)
##############################################################################
# 0‑A. Сносим всё старое
!pip uninstall -y -q torch torchvision torchaudio pytorchvideo kaggle kagglehub

# 0‑B. Ставим согласованный стек под CUDA 11.8 (Colab)
!pip install -q --no-cache-dir \
  torch==2.3.0+cu118 torchvision==0.18.0+cu118 torchaudio==2.3.0+cu118 \
  -f https://download.pytorch.org/whl/cu118/torch_stable.html

# 0‑C. kaggle‑CLI, kagglehub и прочие утилиты
!pip install -q kaggle kagglehub numpy opencv-python tqdm optuna lightning torchmetrics

# 0‑D. PyTorchVideo (из GitHub, совместим со свежими torch/TV)
!pip install -q --no-cache-dir git+https://github.com/facebookresearch/pytorchvideo.git@main


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m839.7/839.7 MB[0m [31m268.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m291.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m121.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for pytorchvideo (setup.py) ... [?25l[?25hdone


In [3]:
import torch, torchvision, torchaudio, pytorchvideo
print("torch      :", torch.__version__)
print("torchvision:", torchvision.__version__)
print("torchaudio :", torchaudio.__version__)
print("pytorchvideo (git):", pytorchvideo.__version__)

torch      : 2.3.0+cu118
torchvision: 0.18.0+cu118
torchaudio : 2.3.0+cu118
pytorchvideo (git): 0.1.5


In [4]:
from pytorchvideo.transforms import UniformTemporalSubsample

In [5]:
##############################################################################
# БЛОК 1. Импорты и константы (обновлённые)
##############################################################################
import os, zipfile, random, warnings
from pathlib import Path

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from torchvision.io import read_video          # ⬅️ замена LabeledVideoDataset

import lightning as L
from torchmetrics.classification import Accuracy
import optuna

from pytorchvideo.transforms import (
    UniformTemporalSubsample,
    RandomShortSideScale
)
from pytorchvideo.models.hub import slowfast_r50

# собственные видео‑трансформы (NormalizeVideo, ShortSideScaleVideo, …)
# уже объявлены выше и остаются без изменений

DATA_ROOT = Path("/content/foul_detection_dataset")

In [6]:
##############################################################################
# БЛОК 2. НАДЁЖНАЯ ЗАГРУЗКА ДАТАСЕТА ЧЕРЕЗ kagglehub
##############################################################################
# ~/.kaggle/kaggle.json уже должен быть настроен
import kagglehub, zipfile
from pathlib import Path

DATASET_SLUG = "sesmlhs/foul-detection-test"

# 2‑A. Скачиваем; kagglehub.dataset_download вернёт:
#      • путь к .zip при первой загрузке
#      • путь к распакованной папке при повторном вызове
raw_path = Path(kagglehub.dataset_download(DATASET_SLUG))
print("kagglehub вернул:", raw_path)

# 2‑B. Определяем DATASET_ROOT независимо от того, zip это или уже директория
if raw_path.is_file():                       # ↔ *.zip
    DATASET_ROOT = raw_path.with_suffix("")  # …/foul-detection-test
    if not DATASET_ROOT.exists():
        with zipfile.ZipFile(raw_path, "r") as zf:
            zf.extractall(DATASET_ROOT)
        print("Распаковали архив в:", DATASET_ROOT)
    else:
        print("Папка уже распакована:", DATASET_ROOT)
else:
    DATASET_ROOT = raw_path                  # уже папка
    print("Датасет уже распакован:", DATASET_ROOT)

# 2‑C. (опционально) удаляем zip, чтобы не занимать место
# if raw_path.is_file(): raw_path.unlink()


kagglehub вернул: /kaggle/input/foul-detection-test
Датасет уже распакован: /kaggle/input/foul-detection-test


In [7]:
##############################################################################
# БЛОК 3. ПОСТРОЕНИЕ ИНДЕКСА (build_index) + train/val split
##############################################################################
import json, random, warnings
from pathlib import Path

def build_index(data_dir: Path, json_name: str = "24_videos.json"):
    """Читает JSON‑разметку и возвращает список (video_path, label_idx)."""
    meta = json.loads((data_dir / json_name).read_text(encoding="utf-8"))
    items = meta.get("root", meta) if isinstance(meta, dict) else meta
    mapping = []
    for item in items:
        raw = item.get("file_upload", "")
        if not raw.lower().endswith(".mp4"):          # пропускаем не‑видео
            continue
        fname = raw.split("-")[-1]                    # M2.mp4
        vpath = data_dir / fname
        if not vpath.exists():
            warnings.warn(f"Файл {vpath} не найден, пропускаем")
            continue
        # метка: foul → 0, not_foul/no_foul → 1
        lbl = 0
        ann = item.get("annotations", [])
        if ann:
            first = ann[0]["result"][0]["value"]["labels"][0]
            if first in ("not_foul", "no_foul"):
                lbl = 1
        mapping.append((str(vpath), lbl))
    return mapping

# 3‑A. Ищем JSON где‑угодно внутри DATASET_ROOT
json_path = next(DATASET_ROOT.rglob("24_videos.json"))
VIDEO_DIR = json_path.parent

# 3‑B. Строим индекс
index = build_index(VIDEO_DIR, json_name=json_path.name)
print("Всего клипов:", len(index))

# 3‑C. Делим на train/val (80/20) — дальше используйте эти переменные
random.seed(42)
random.shuffle(index)
split = int(0.8 * len(index))
train_samples, val_samples = index[:split], index[split:]

print(f"Train: {len(train_samples)}  |  Val: {len(val_samples)}")


Всего клипов: 22
Train: 17  |  Val: 5




In [8]:
##############################################################################
# БЛОК 3.5  (КАСТОМНЫЕ ВИДЕО‑ТРАНСФОРМЫ)  ← запустить ОДИН РАЗ!
##############################################################################
import torch
import torch.nn.functional as F

class NormalizeVideo:
    def __init__(self, mean, std):
        self.mean = torch.tensor(mean).view(-1,1,1,1)
        self.std  = torch.tensor(std).view(-1,1,1,1)
    def __call__(self, clip):
        return (clip - self.mean) / self.std

class ShortSideScaleVideo:
    """Ресайзирует так, чтобы короткая сторона = short_side."""
    def __init__(self, short_side):
        self.short_side = short_side
    def __call__(self, clip):
        c,t,h,w = clip.shape
        if h < w:
            new_h, new_w = self.short_side, int(w * self.short_side / h)
        else:
            new_w, new_h = self.short_side, int(h * self.short_side / w)
        clip = F.interpolate(clip.permute(1,0,2,3), size=(new_h,new_w),
                             mode="bilinear", align_corners=False)
        return clip.permute(1,0,2,3)

class CenterCropVideo:
    def __init__(self, size):
        self.th, self.tw = (size, size) if isinstance(size, int) else size
    def __call__(self, clip):
        c,t,h,w = clip.shape
        i, j = (h - self.th)//2, (w - self.tw)//2
        return clip[:,:, i:i+self.th, j:j+self.tw]

class RandomCropVideo:
    def __init__(self, size):
        self.th, self.tw = (size, size) if isinstance(size, int) else size
    def __call__(self, clip):
        c,t,h,w = clip.shape
        i = torch.randint(0, h - self.th + 1, ()).item()
        j = torch.randint(0, w - self.tw + 1, ()).item()
        return clip[:,:, i:i+self.th, j:j+self.tw]

class RandomHorizontalFlipVideo:
    def __init__(self, p=0.5): self.p = p
    def __call__(self, clip):
        return torch.flip(clip, dims=[3]) if torch.rand(()) < self.p else clip


In [9]:
##############################################################################
# БЛОК 4. Dataset + DataLoader  (добавили try/except  и num_workers=0)
##############################################################################
from torchvision.io import read_video

class FoulDataset(Dataset):
    def __init__(self, samples, num_frames=32, alpha=4, crop_size=224, mode="train"):
        self.samples     = samples
        self.num_frames  = num_frames
        self.alpha       = alpha
        is_train         = mode == "train"

        jitter = RandomShortSideScale(min_size=256, max_size=320)
        self.transform = T.Compose([
            UniformTemporalSubsample(num_frames),
            lambda x: x / 255.0,
            NormalizeVideo((0.45,0.45,0.45),(0.225,0.225,0.225)),
            (jitter if is_train else ShortSideScaleVideo(256)),
            (RandomCropVideo(crop_size) if is_train else CenterCropVideo(crop_size)),
            (RandomHorizontalFlipVideo(0.5) if is_train else T.Lambda(lambda x: x)),
        ])

    def __len__(self): return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]

        # 📉 Безопасное чтение: если mp4 кривой → возвращаем нули вместо падения
        try:
            video, _, _ = read_video(str(path), pts_unit="sec")  # (T,H,W,C)
            video = video.permute(3,0,1,2)                       # (C,T,H,W)
        except Exception as e:
            warnings.warn(f"Чтение {path} упало ({e}); заполняю нулями")
            video = torch.zeros((3, self.num_frames, 256, 256), dtype=torch.uint8)

        video = self.transform(video)
        fast  = video
        slow  = fast[:, torch.linspace(0, self.num_frames-1,
                                       self.num_frames//self.alpha).long()]
        return {"slow": slow, "fast": fast, "label": label}

# ---------- split и DataLoader (num_workers = 0!) ----------
random.seed(42); random.shuffle(index)
split = int(0.8 * len(index))
train_ds = FoulDataset(index[:split], mode="train")
val_ds   = FoulDataset(index[split:],  mode="val")

train_loader = DataLoader(train_ds, batch_size=4, shuffle=True,
                          num_workers=0, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=4, shuffle=False,
                          num_workers=0, pin_memory=True)

print("Train:", len(train_ds), "  Val:", len(val_ds))


Train: 17   Val: 5


In [10]:
# БЛОК 5. Модель SlowFast‑R50
def build_model(dropout: float, num_classes: int = 2):
    model = slowfast_r50(pretrained=True)
    in_feats = model.blocks[-1].proj.in_features
    model.blocks[-1].proj = torch.nn.Linear(in_feats, num_classes)
    if dropout > 0:
        model.blocks[-1].dropout.p = dropout
    return model


In [11]:
# БЛОК 6. LightningModule
class LitFoul(L.LightningModule):
    def __init__(self, lr=1e-4, dropout=0.2):
        super().__init__()
        self.save_hyperparameters()
        self.model = build_model(dropout)
        self.criterion = torch.nn.CrossEntropyLoss()
        self.train_acc = Accuracy(task="multiclass", num_classes=2)
        self.val_acc   = Accuracy(task="multiclass", num_classes=2)

    def forward(self, slow, fast):
        return self.model([slow, fast])

    def _step(self, batch, stage):
        logits = self(batch["slow"], batch["fast"])
        loss   = self.criterion(logits, batch["label"])
        acc    = (self.train_acc if stage=="train" else self.val_acc)(logits, batch["label"])
        self.log(f"{stage}_loss", loss, prog_bar=True)
        self.log(f"{stage}_acc",  acc,  prog_bar=True)
        return loss

    def training_step(self, batch, nb):   return self._step(batch, "train")
    def validation_step(self, batch, nb): return self._step(batch, "val")

    def configure_optimizers(self):
        opt   = torch.optim.AdamW(self.parameters(), lr=self.hparams.lr, weight_decay=1e-4)
        sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=self.trainer.max_epochs)
        return [opt], [sched]


In [12]:
##############################################################################
# БЛОК 7. Hyperparameter Search (Optuna) –‑ исправленный под lightning v2
##############################################################################
from lightning.pytorch.callbacks import EarlyStopping  # ⬅️ новый импорт

NUM_EPOCHS = 1    # demo‑режим; в проде 10‑20
N_TRIALS   = 2    # demo‑режим; в проде 30‑50

def objective(trial):
    lr      = trial.suggest_float("lr", 1e-5, 5e-4, log=True)
    dropout = trial.suggest_float("dropout", 0.0, 0.5)
    model   = LitFoul(lr=lr, dropout=dropout)

    trainer = L.Trainer(
        max_epochs        = NUM_EPOCHS,
        accelerator       = "auto",
        devices           = "auto",
        enable_progress_bar=False,
        logger            = False,
        callbacks         = [EarlyStopping(monitor="val_acc", mode="max", patience=2)],
    )
    trainer.fit(model, train_loader, val_loader)
    return trainer.callback_metrics["val_acc"].item()

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=True)

best_params = study.best_params
print("🏆 Лучшие гиперпараметры:", best_params)



[I 2025-04-18 14:09:16,750] A new study created in memory with name: no-name-8454edb6-f5e5-4a6c-a460-ad06327ebae8


  0%|          | 0/2 [00:00<?, ?it/s]

INFO: You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:lightning.pytorch.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.11/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /content/checkpoints exists and is not empty.
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name

[I 2025-04-18 14:11:49,950] Trial 0 finished with value: 0.800000011920929 and parameters: {'lr': 3.634077352882493e-05, 'dropout': 0.14856771638394978}. Best is trial 0 with value: 0.800000011920929.


INFO: You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:lightning.pytorch.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | model     | Net                | 33.6 M | train
1 | cr

[I 2025-04-18 14:14:01,751] Trial 1 finished with value: 0.4000000059604645 and parameters: {'lr': 1.2334923576518062e-05, 'dropout': 0.49019644054819717}. Best is trial 0 with value: 0.800000011920929.
🏆 Лучшие гиперпараметры: {'lr': 3.634077352882493e-05, 'dropout': 0.14856771638394978}


In [13]:
##############################################################################
# БЛОК 8. Финальное обучение с лучшими гиперпараметрами (Lightning v2 API)
##############################################################################
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping

final_model = LitFoul(**best_params)

trainer = L.Trainer(
    max_epochs   = NUM_EPOCHS * 2,
    accelerator  = "auto",
    devices      = "auto",
    logger       = TensorBoardLogger("tb_logs", name="foul_detector"),
    callbacks    = [
        ModelCheckpoint(monitor="val_acc", mode="max"),
        EarlyStopping(monitor="val_acc", mode="max", patience=4),
    ],
)

trainer.fit(final_model, train_loader, val_loader)

# сохраняем веса
torch.save(final_model.model.state_dict(), "foul_detector_slowfast.pth")
print("✅ Сохранено: foul_detector_slowfast.pth")



INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | model     | Net                | 33.6 M | train
1 | criterion | CrossEntropyLoss   | 0      | train
2 | train_acc | MulticlassAccuracy | 0      | train
3 | val_acc   | MulticlassAccuracy | 0      | train
---------------------------------------------------------
33.6 M    Trainable params
0         Non-trainable params
33.6 M    Total params
134.596   Total estimated 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.11/dist-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (5) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=2` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2` reached.


✅ Сохранено: foul_detector_slowfast.pth


In [14]:
# БЛОК 9. Функция инференса
def predict_video(path: str, state_dict="foul_detector_slowfast.pth", device="cuda"):
    model = build_model(dropout=best_params["dropout"])
    model.load_state_dict(torch.load(state_dict, map_location=device))
    model.eval().to(device)
    ds = FoulDataset([(path,0)], mode="val")
    batch = next(iter(DataLoader(ds, batch_size=1)))
    slow, fast = batch["slow"].to(device), batch["fast"].to(device)
    with torch.no_grad():
        logits = model([slow, fast])
        prob   = torch.softmax(logits, dim=1).cpu()[0]
    cls = "foul" if prob[0] > prob[1] else "not_foul"
    print(f"{Path(path).name}: {cls}  (p={prob.max():.2f})")

# Пример:
predict_video(str(VIDEO_DIR / "M9.mp4"))


M9.mp4: foul  (p=0.68)
