In [3]:
import os
import glob
import random
import math
import json
from pathlib import Path
import pandas as pd
from sklearn.model_selection import GroupShuffleSplit
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as T
import timm
import torch.nn as nn
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR
from torchmetrics.classification import MulticlassAccuracy
from tqdm.auto import tqdm
import torch.nn.functional as F
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np


In [4]:
DATA_ROOT = Path("/kaggle/input/classroom-student-behaviors/Behaviors_Features")

# Collect all images recursively and derive labels from behavior folder name.
records = []
for behavior_dir in sorted([p for p in DATA_ROOT.iterdir() if p.is_dir()]):
    behavior = behavior_dir.name  # e.g., 'Looking_Forward'
    for id_dir in behavior_dir.glob("*"):
        if not id_dir.is_dir(): 
            continue
        for seq_dir in id_dir.glob("*"):
            if not seq_dir.is_dir():
                continue
            # Group key: person+sequence folder to avoid near-duplicate leakage
            group_key = f"{behavior}/{id_dir.name}/{seq_dir.name}"
            for img_path in seq_dir.rglob("*.png"):
                records.append({
                    "path": str(img_path),
                    "label": behavior,
                    "group": group_key,
                    "person": id_dir.name,
                    "sequence": seq_dir.name,
                })

df = pd.DataFrame(records)
print("Total images:", len(df))
df.head()


Total images: 252223


Unnamed: 0,path,label,group,person,sequence
0,/kaggle/input/classroom-student-behaviors/Beha...,Looking_Forward,Looking_Forward/ID4/Forward28_id4_Act1_rgb,ID4,Forward28_id4_Act1_rgb
1,/kaggle/input/classroom-student-behaviors/Beha...,Looking_Forward,Looking_Forward/ID4/Forward28_id4_Act1_rgb,ID4,Forward28_id4_Act1_rgb
2,/kaggle/input/classroom-student-behaviors/Beha...,Looking_Forward,Looking_Forward/ID4/Forward28_id4_Act1_rgb,ID4,Forward28_id4_Act1_rgb
3,/kaggle/input/classroom-student-behaviors/Beha...,Looking_Forward,Looking_Forward/ID4/Forward28_id4_Act1_rgb,ID4,Forward28_id4_Act1_rgb
4,/kaggle/input/classroom-student-behaviors/Beha...,Looking_Forward,Looking_Forward/ID4/Forward28_id4_Act1_rgb,ID4,Forward28_id4_Act1_rgb


In [5]:
# Map class names to indices; keep a clean label list for the model head.
class_names = sorted(df["label"].unique())
class2idx = {c:i for i,c in enumerate(class_names)}
df["y"] = df["label"].map(class2idx)

# First split: train+val vs test by groups (sequence level).
gss = GroupShuffleSplit(n_splits=1, test_size=0.15, random_state=42)
trainval_idx, test_idx = next(gss.split(df, groups=df["group"]))
df_trainval, df_test = df.iloc[trainval_idx].reset_index(drop=True), df.iloc[test_idx].reset_index(drop=True)

# Second split: train vs val (still grouped to prevent leakage).
gss2 = GroupShuffleSplit(n_splits=1, test_size=0.15, random_state=123)
tr_idx, va_idx = next(gss2.split(df_trainval, groups=df_trainval["group"]))
df_train, df_val = df_trainval.iloc[tr_idx].reset_index(drop=True), df_trainval.iloc[va_idx].reset_index(drop=True)

print(len(df_train), len(df_val), len(df_test))
class_names


184571 31469 36183


['Looking_Forward',
 'Raising_Hand',
 'Reading',
 'Sleeping',
 'Standing',
 'Turning_Around',
 'Writting']

In [6]:
IMG_SIZE = 224  # You can try 256 or 384 later.

# Training-time augmentations for robustness.
train_tfms = T.Compose([
    T.RandomResizedCrop(IMG_SIZE, scale=(0.7, 1.0)),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomRotation(degrees=10),
    T.ToTensor(),
    T.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),  # ImageNet stats
])

# Validation/Test transforms must be deterministic.
valid_tfms = T.Compose([
    T.Resize(int(IMG_SIZE*1.14)),
    T.CenterCrop(IMG_SIZE),
    T.ToTensor(),
    T.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
])

class BehaviorDataset(Dataset):
    # This dataset reads image paths and returns (tensor, label)
    def __init__(self, df, transforms):
        self.paths = df["path"].tolist()
        self.labels = df["y"].astype(int).tolist()
        self.transforms = transforms

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        p = self.paths[idx]
        y = self.labels[idx]
        img = Image.open(p).convert("RGB")
        img = self.transforms(img)
        return img, y

train_ds = BehaviorDataset(df_train, train_tfms)
val_ds   = BehaviorDataset(df_val, valid_tfms)
test_ds  = BehaviorDataset(df_test, valid_tfms)

BATCH_SIZE = 64
NUM_WORKERS = 2  # Kaggle often limits >2; adjust if needed.

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=NUM_WORKERS, pin_memory=True)
val_dl   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_dl  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

N_CLASSES = len(class_names)
MODEL_NAME = "efficientnet_b0"  # Try 'convnext_tiny', 'vit_base_patch16_224' later.

# Create a timm model with a classification head sized to our classes.
model = timm.create_model(MODEL_NAME, pretrained=True, num_classes=N_CLASSES)
model.to(device)

# Loss, optimizer, scheduler, metrics.
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
EPOCHS = 1 # Better with 10
steps_per_epoch = math.ceil(len(train_dl.dataset)/BATCH_SIZE)
scheduler = OneCycleLR(optimizer, max_lr=1e-3, epochs=EPOCHS, steps_per_epoch=steps_per_epoch)
metric_acc = MulticlassAccuracy(num_classes=N_CLASSES).to(device)

def run_one_epoch(dataloader, train=True):
    # This function runs one epoch for either training or validation.
    model.train(train)
    total_loss = 0.0
    metric_acc.reset()
    pbar = tqdm(dataloader, leave=False)
    for x, y in pbar:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        with torch.set_grad_enabled(train):
            logits = model(x)
            loss = criterion(logits, y)

        if train:
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)
            optimizer.step()
            scheduler.step()

        total_loss += loss.item() * x.size(0)
        preds = logits.argmax(dim=1)
        metric_acc.update(preds, y)
        pbar.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(dataloader.dataset)
    avg_acc = metric_acc.compute().item()
    return avg_loss, avg_acc

best_val = 0.0
for epoch in range(1, EPOCHS+1):
    tr_loss, tr_acc = run_one_epoch(train_dl, train=True)
    va_loss, va_acc = run_one_epoch(val_dl,   train=False)
    print(f"Epoch {epoch:02d} | train loss {tr_loss:.4f} acc {tr_acc:.4f} | val loss {va_loss:.4f} acc {va_acc:.4f}")
    if va_acc > best_val:
        best_val = va_acc
        torch.save({
            "model_name": MODEL_NAME,
            "state_dict": model.state_dict(),
            "class_names": class_names
        }, "/kaggle/working/best_model.pth")
        print("Saved new best model.")


model.safetensors:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

  0%|          | 0/2884 [00:00<?, ?it/s]

  0%|          | 0/492 [00:00<?, ?it/s]

Epoch 01 | train loss 0.0469 acc 0.9823 | val loss 0.0077 acc 0.9963
Saved new best model.


In [None]:
# Code from here is just a playground, skip them all

# Path to your saved version’s files
MODEL_DIR = "/kaggle/input/classroom-behavior-model"  # ← change to your actual notebook input path

# Load class names
with open(f"{MODEL_DIR}/label_map.json", "r") as f:
    class_names = json.load(f)

# Recreate the model (same architecture used before)
MODEL_NAME = "efficientnet_b0"
N_CLASSES = len(class_names)
model = timm.create_model(MODEL_NAME, pretrained=False, num_classes=N_CLASSES)

# Load saved weights
ckpt = torch.load(f"{MODEL_DIR}/best_model.pth", map_location="cuda")
model.load_state_dict(ckpt["state_dict"])
model.eval().to("cuda")

print("✅ Model reloaded and ready for inference!")


In [8]:
# Load best weights just in case.
ckpt = torch.load("/kaggle/working/best_model.pth", map_location=device)
model.load_state_dict(ckpt["state_dict"])
model.eval()

all_preds, all_targs = [], []
with torch.no_grad():
    for x, y in tqdm(test_dl):
        x = x.to(device)
        logits = model(x)
        preds = logits.argmax(1).cpu().numpy()
        all_preds.append(preds)
        all_targs.append(y.numpy())

y_pred = np.concatenate(all_preds)
y_true = np.concatenate(all_targs)

print(classification_report(y_true, y_pred, target_names=class_names))
print(confusion_matrix(y_true, y_pred))


  0%|          | 0/566 [00:00<?, ?it/s]

                 precision    recall  f1-score   support

Looking_Forward       0.99      1.00      1.00      4333
   Raising_Hand       1.00      0.99      0.99      2979
        Reading       1.00      1.00      1.00      6975
       Sleeping       1.00      1.00      1.00      8401
       Standing       1.00      1.00      1.00      1195
 Turning_Around       1.00      1.00      1.00      5939
       Writting       1.00      1.00      1.00      6361

       accuracy                           1.00     36183
      macro avg       1.00      1.00      1.00     36183
   weighted avg       1.00      1.00      1.00     36183

[[4333    0    0    0    0    0    0]
 [  22 2952    0    5    0    0    0]
 [   0    2 6966    0    0    6    1]
 [   0    2    0 8377    0   21    1]
 [   0    1    0    4 1190    0    0]
 [   0    0    0    0    0 5939    0]
 [   0    0    0    0    0    0 6361]]


In [9]:

# Reuse valid_tfms for deterministic preprocessing.
def predict_image(path):
    # This function predicts the behavior class for a single image path.
    img = Image.open(path).convert("RGB")
    x = valid_tfms(img).unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(x)
        probs = torch.softmax(logits, dim=1).squeeze(0).cpu().numpy()
    top = probs.argmax()
    return class_names[top], float(probs[top]), {c: float(p) for c,p in zip(class_names, probs)}

# Example:
# predict_image(df_test.iloc[0]["path"])


In [10]:
# Save class names for future inference.
with open("/kaggle/working/label_map.json", "w") as f:
    json.dump(class_names, f, indent=2)

# The model weights are already at /kaggle/working/best_model.pth


In [11]:
# Check how many 'group' values overlap between train and test
set_train_groups = set(df_train["group"])
set_test_groups = set(df_test["group"])

len(set_train_groups & set_test_groups)


0