In [1]:
import os
import glob
import random
import math
import json
import re
from pathlib import Path
import pandas as pd
from sklearn.model_selection import GroupShuffleSplit
import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from PIL import Image
import torchvision.transforms as T
import timm
import torch.nn as nn
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR
from torchmetrics.classification import MulticlassAccuracy
from tqdm.auto import tqdm
import torch.nn.functional as F
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np




In [2]:
DATA_ROOT = Path("/kaggle/input/behaviours-features-merged/Behaviors_Features_Final")

# Collect all images recursively and derive labels from behavior folder name.
records = []
for behavior_dir in sorted([p for p in DATA_ROOT.iterdir() if p.is_dir()]):
    behavior = behavior_dir.name  # e.g., 'Looking_Forward'
    for id_dir in behavior_dir.glob("*"):
        if not id_dir.is_dir(): 
            continue
        for seq_dir in id_dir.glob("*"):
            if not seq_dir.is_dir():
                continue
            # Group key: person+sequence folder to avoid near-duplicate leakage
            group_key = f"{behavior}/{id_dir.name}/{seq_dir.name}"
            for img_path in seq_dir.rglob("*.png"):
                records.append({
                    "path": str(img_path),
                    "label": behavior,
                    "group": group_key,
                    "person": id_dir.name,
                    "sequence": seq_dir.name,
                })

df = pd.DataFrame(records)
print("Total images:", len(df))
df.head()


Total images: 632620


Unnamed: 0,path,label,group,person,sequence
0,/kaggle/input/behaviours-features-merged/Behav...,Looking_Forward,Looking_Forward/ID6/Forward44_id6_Act1_rgb,ID6,Forward44_id6_Act1_rgb
1,/kaggle/input/behaviours-features-merged/Behav...,Looking_Forward,Looking_Forward/ID6/Forward44_id6_Act1_rgb,ID6,Forward44_id6_Act1_rgb
2,/kaggle/input/behaviours-features-merged/Behav...,Looking_Forward,Looking_Forward/ID6/Forward44_id6_Act1_rgb,ID6,Forward44_id6_Act1_rgb
3,/kaggle/input/behaviours-features-merged/Behav...,Looking_Forward,Looking_Forward/ID6/Forward44_id6_Act1_rgb,ID6,Forward44_id6_Act1_rgb
4,/kaggle/input/behaviours-features-merged/Behav...,Looking_Forward,Looking_Forward/ID6/Forward44_id6_Act1_rgb,ID6,Forward44_id6_Act1_rgb


In [3]:
# Map class names to indices; keep a clean label list for the model head.
class_names = sorted(df["label"].unique())
class2idx = {c:i for i,c in enumerate(class_names)}
df["y"] = df["label"].map(class2idx)

# First split: train+val vs test by groups (sequence level).
gss = GroupShuffleSplit(n_splits=1, test_size=0.15, random_state=42)
trainval_idx, test_idx = next(gss.split(df, groups=df["group"]))
df_trainval, df_test = df.iloc[trainval_idx].reset_index(drop=True), df.iloc[test_idx].reset_index(drop=True)

# Second split: train vs val (still grouped to prevent leakage).
gss2 = GroupShuffleSplit(n_splits=1, test_size=0.15, random_state=123)
tr_idx, va_idx = next(gss2.split(df_trainval, groups=df_trainval["group"]))
df_train, df_val = df_trainval.iloc[tr_idx].reset_index(drop=True), df_trainval.iloc[va_idx].reset_index(drop=True)

print(len(df_train), len(df_val), len(df_test))
class_names


464108 77042 91470


['Looking_Forward',
 'Raising_Hand',
 'Reading',
 'Sleeping',
 'Standing',
 'Turning_Around',
 'Writing']

In [4]:
df.drop_duplicates('group').groupby('label').size().sort_values(ascending=False)


label
Reading            1894
Turning_Around      884
Sleeping            725
Looking_Forward     576
Raising_Hand        421
Writing             406
Standing            133
dtype: int64

In [5]:
def _natural_key(value: str):
    return [int(tok) if tok.isdigit() else tok.lower() for tok in re.findall(r"\d+|\D+", str(value))]

def to_sequence_df(df_imgs: pd.DataFrame) -> pd.DataFrame:
    grouped = (
        df_imgs.groupby("group")
        .agg(paths=("path", list), label=("label", "first"), y=("y", "first"))
        .reset_index()
    )
    grouped["paths"] = grouped["paths"].apply(lambda items: sorted(items, key=_natural_key))
    return grouped

df_train_seq = to_sequence_df(df_train)
df_val_seq = to_sequence_df(df_val)
df_test_seq = to_sequence_df(df_test)
print(len(df_train_seq), len(df_val_seq), len(df_test_seq))

seq_counts = df_train_seq.groupby("label").size().reindex(class_names, fill_value=0)
seq_counts_clipped = seq_counts.replace(0, 1)
sequence_class_weights = (1.0 / seq_counts_clipped)
sequence_class_weights = sequence_class_weights / sequence_class_weights.sum() * len(sequence_class_weights)
sequence_class_weights

3640 643 756


label
Looking_Forward    0.698052
Raising_Hand       1.020230
Reading            0.213919
Sleeping           0.543363
Standing           3.104103
Turning_Around     0.457344
Writing            0.962989
dtype: float64

In [6]:
IMG_SIZE = 224  # Base spatial resolution.

train_tfms = T.Compose([
    T.RandomResizedCrop(IMG_SIZE, scale=(0.7, 1.0)),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomRotation(degrees=10),
    T.ToTensor(),
    T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

valid_tfms = T.Compose([
    T.Resize(int(IMG_SIZE * 1.14)),
    T.CenterCrop(IMG_SIZE),
    T.ToTensor(),
    T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

class SequenceDataset(Dataset):
    """Return a fixed-length clip of frames for sequence-level classification."""
    def __init__(self, df_seq: pd.DataFrame, transforms, clip_len: int = 8):
        self.paths_list = df_seq["paths"].tolist()
        self.labels = df_seq["y"].astype(int).tolist()
        self.transforms = transforms
        self.clip_len = clip_len

    def __len__(self):
        return len(self.paths_list)

    def _sample_indices(self, n_frames: int):
        if n_frames >= self.clip_len:
            return np.linspace(0, n_frames - 1, self.clip_len).astype(int).tolist()
        return list(range(n_frames)) + [n_frames - 1] * (self.clip_len - n_frames)

    def __getitem__(self, idx):
        paths = self.paths_list[idx]
        label = self.labels[idx]
        indices = self._sample_indices(len(paths))
        frames = []
        for frame_idx in indices:
            img = Image.open(paths[frame_idx]).convert("RGB")
            frames.append(self.transforms(img))
        clip = torch.stack(frames, dim=0)
        return clip, label

CLIP_LEN = 8
train_ds = SequenceDataset(df_train_seq, train_tfms, clip_len=CLIP_LEN)
val_ds = SequenceDataset(df_val_seq, valid_tfms, clip_len=CLIP_LEN)
test_ds = SequenceDataset(df_test_seq, valid_tfms, clip_len=CLIP_LEN)

seq_weight_lookup = sequence_class_weights.to_dict()
train_sample_weights = df_train_seq["label"].map(seq_weight_lookup).astype(float).values
train_sampler = WeightedRandomSampler(
    torch.as_tensor(train_sample_weights, dtype=torch.double),
    num_samples=len(train_sample_weights),
    replacement=True,
 )

BATCH_SIZE = 16
NUM_WORKERS = 2

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=NUM_WORKERS, pin_memory=True)
val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)


In [7]:
SEED = 42

torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

N_CLASSES = len(class_names)
MODEL_NAME = "convnext_small_in22ft1k"

class TemporalMeanNet(nn.Module):
    def __init__(self, backbone_name: str, n_classes: int):
        super().__init__()
        self.backbone = timm.create_model(
            backbone_name,
            pretrained=True,
            num_classes=0,
            global_pool="avg",
            drop_path_rate=0.2,
        )
        self.embed_dim = self.backbone.num_features
        self.head = nn.Linear(self.embed_dim, n_classes)

    def forward(self, clips):
        # clips: (batch, time, channels, height, width)
        b, t, c, h, w = clips.shape
        clips = clips.view(b * t, c, h, w)
        feats = self.backbone(clips)  # (b * t, feat_dim)
        feats = feats.view(b, t, -1).mean(dim=1)
        return self.head(feats)

model = TemporalMeanNet(MODEL_NAME, N_CLASSES).to(device)
use_amp = device.type == "cuda"

class_weight_tensor = torch.tensor(
    sequence_class_weights.values, dtype=torch.float32, device=device
)
criterion = nn.CrossEntropyLoss(weight=class_weight_tensor, label_smoothing=0.1)
optimizer = AdamW(model.parameters(), lr=5e-4, weight_decay=1e-4)
EPOCHS = 10
steps_per_epoch = len(train_dl)
scheduler = OneCycleLR(optimizer, max_lr=5e-4, epochs=EPOCHS, steps_per_epoch=steps_per_epoch)
metric_acc = MulticlassAccuracy(num_classes=N_CLASSES).to(device)
scaler = torch.cuda.amp.GradScaler(enabled=use_amp)

def run_one_epoch(dataloader, train=True):
    model.train(train)
    total_loss = 0.0
    metric_acc.reset()
    pbar = tqdm(dataloader, leave=False)
    for clips, targets in pbar:
        clips = clips.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)

        with torch.set_grad_enabled(train):
            with torch.cuda.amp.autocast(enabled=use_amp):
                logits = model(clips)
                loss = criterion(logits, targets)

        if train:
            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()
            current_lr = scheduler.get_last_lr()[0]
            pbar.set_postfix(loss=loss.item(), lr=f"{current_lr:.2e}")
        else:
            pbar.set_postfix(loss=loss.item())

        total_loss += loss.item() * clips.size(0)
        preds = logits.argmax(dim=1)
        metric_acc.update(preds, targets)

    avg_loss = total_loss / len(dataloader.dataset)
    avg_acc = metric_acc.compute().item()
    return avg_loss, avg_acc

best_val = 0.0
for epoch in range(1, EPOCHS + 1):
    tr_loss, tr_acc = run_one_epoch(train_dl, train=True)
    va_loss, va_acc = run_one_epoch(val_dl, train=False)
    print(f"Epoch {epoch:02d} | train loss {tr_loss:.4f} acc {tr_acc:.4f} | val loss {va_loss:.4f} acc {va_acc:.4f}")
    if va_acc > best_val:
        best_val = va_acc
        torch.save({
            "model_name": MODEL_NAME,
            "state_dict": model.state_dict(),
            "class_names": class_names,
            "clip_len": CLIP_LEN,
        }, "/kaggle/working/best_model.pth")
        print("Saved new best model.")


  model = create_fn(


model.safetensors:   0%|          | 0.00/201M [00:00<?, ?B/s]

  scaler = torch.cuda.amp.GradScaler(enabled=use_amp)


  0%|          | 0/228 [00:00<?, ?it/s]

  with torch.cuda.amp.autocast(enabled=use_amp):


  0%|          | 0/41 [00:00<?, ?it/s]

Epoch 01 | train loss 0.6887 acc 0.7874 | val loss 1.1343 acc 0.9832
Saved new best model.


  0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

Epoch 02 | train loss 0.4548 acc 0.9772 | val loss 1.1431 acc 0.9570


  0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

Epoch 03 | train loss 0.4549 acc 0.9789 | val loss 1.1252 acc 0.9717


  0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

Epoch 04 | train loss 0.4350 acc 0.9893 | val loss 1.0350 acc 0.9794


  0%|          | 0/228 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7bef345cb1a0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7bef345cb1a0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

  0%|          | 0/41 [00:00<?, ?it/s]

Epoch 05 | train loss 0.4189 acc 0.9934 | val loss 1.0101 acc 0.9868
Saved new best model.


  0%|          | 0/228 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7bef345cb1a0>Exception ignored in: 
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7bef345cb1a0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1564, in _shutdown_workers
    self._pin_memory_thread.join()
  File "/usr/lib/python3.11/threading.py", line 1116, in join
    raise RuntimeError("cannot join current thread")
RuntimeError: cannot join current thread
Traceback (most recent call last):
<function _MultiProcessingDataLoaderIter.__del__ at 0x7bef345cb1a0>
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    Traceback (most recent call last):
self._shutdown_workers()  File "/usr/local/lib/python3.11/dist-packages/torch/util

  0%|          | 0/41 [00:00<?, ?it/s]

Epoch 06 | train loss 0.4109 acc 0.9981 | val loss 0.9918 acc 0.9946
Saved new best model.


  0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

Epoch 07 | train loss 0.4100 acc 0.9995 | val loss 0.9869 acc 0.9979
Saved new best model.


  0%|          | 0/228 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7bef345cb1a0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
  Exception ignored in:    <function _MultiProcessingDataLoaderIter.__del__ at 0x7bef345cb1a0> 
  Traceback (most recent call last):
    File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
 ^    ^self._shutdown_workers()^
^^  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
^    ^if w.is_alive():^
^  ^  ^^  ^^^ ^^^^^^^^^^

  0%|          | 0/41 [00:00<?, ?it/s]

Epoch 08 | train loss 0.4024 acc 0.9997 | val loss 0.9832 acc 0.9979


  0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

Epoch 09 | train loss 0.4070 acc 0.9997 | val loss 0.9833 acc 0.9994
Saved new best model.


  0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

Epoch 10 | train loss 0.4070 acc 1.0000 | val loss 0.9833 acc 0.9994


In [8]:

# # Path to your saved version’s files
# MODEL_DIR = "/kaggle/input/classroom-behavior-model"  # ← change to your actual notebook input path

# # Load class names
# with open(f"{MODEL_DIR}/label_map.json", "r") as f:
#     class_names = json.load(f)

# # Recreate the model (same architecture used before)
# MODEL_NAME = "efficientnet_b0"
# N_CLASSES = len(class_names)
# model = timm.create_model(MODEL_NAME, pretrained=False, num_classes=N_CLASSES)

# # Load saved weights
# ckpt = torch.load(f"{MODEL_DIR}/best_model.pth", map_location="cuda")
# model.load_state_dict(ckpt["state_dict"])
# model.eval().to("cuda")

# print("✅ Model reloaded and ready for inference!")


In [9]:
ckpt = torch.load("/kaggle/working/best_model.pth", map_location=device)
eval_model = TemporalMeanNet(ckpt["model_name"], len(ckpt["class_names"])).to(device)
eval_model.load_state_dict(ckpt["state_dict"])
eval_model.eval()

all_preds, all_targs = [], []
with torch.no_grad():
    for clips, targets in tqdm(test_dl):
        clips = clips.to(device)
        logits = eval_model(clips)
        preds = logits.argmax(1).cpu().numpy()
        all_preds.append(preds)
        all_targs.append(targets.numpy())

y_pred = np.concatenate(all_preds)
y_true = np.concatenate(all_targs)

print(classification_report(y_true, y_pred, target_names=class_names))
print(confusion_matrix(y_true, y_pred))


  0%|          | 0/48 [00:00<?, ?it/s]

                 precision    recall  f1-score   support

Looking_Forward       0.99      1.00      0.99        99
   Raising_Hand       1.00      1.00      1.00        68
        Reading       1.00      0.99      0.99       279
       Sleeping       0.97      1.00      0.98        95
       Standing       1.00      1.00      1.00        26
 Turning_Around       1.00      0.99      1.00       131
        Writing       1.00      1.00      1.00        58

       accuracy                           0.99       756
      macro avg       0.99      1.00      1.00       756
   weighted avg       0.99      0.99      0.99       756

[[ 99   0   0   0   0   0   0]
 [  0  68   0   0   0   0   0]
 [  0   0 276   3   0   0   0]
 [  0   0   0  95   0   0   0]
 [  0   0   0   0  26   0   0]
 [  1   0   0   0   0 130   0]
 [  0   0   0   0   0   0  58]]


In [10]:
# def _select_clip_indices(n_frames: int, clip_len: int):
#     if n_frames >= clip_len:
#         return np.linspace(0, n_frames - 1, clip_len).astype(int).tolist()
#     return list(range(n_frames)) + [n_frames - 1] * (clip_len - n_frames)

# def prepare_clip(paths, clip_len=CLIP_LEN, transforms=valid_tfms):
#     ordered_paths = sorted(paths, key=_natural_key)
#     indices = _select_clip_indices(len(ordered_paths), clip_len)
#     frames = []
#     for idx in indices:
#         img = Image.open(ordered_paths[idx]).convert("RGB")
#         frames.append(transforms(img))
#     return torch.stack(frames, dim=0)

# def predict_sequence(paths, model_instance=None, clip_len=CLIP_LEN):
#     if model_instance is None:
#         model_instance = eval_model if "eval_model" in globals() else model
#     model_instance.eval()
#     clip = prepare_clip(paths, clip_len=clip_len).unsqueeze(0).to(device)
#     with torch.no_grad():
#         logits = model_instance(clip)
#         probs = torch.softmax(logits, dim=1).squeeze(0).cpu().numpy()
#     top_idx = probs.argmax()
#     return class_names[top_idx], float(probs[top_idx]), {c: float(p) for c, p in zip(class_names, probs)}

# # Example usage:
# # seq_paths = df_test_seq.iloc[0]["paths"]
# # predict_sequence(seq_paths)


In [11]:
# # Save class names for future inference.
# with open("/kaggle/working/label_map.json", "w") as f:
#     json.dump(class_names, f, indent=2)

# # The model weights are already at /kaggle/working/best_model.pth


In [12]:
# # Check how many 'group' values overlap between train and test
# set_train_groups = set(df_train["group"])
# set_test_groups = set(df_test["group"])

# len(set_train_groups & set_test_groups)
