<a href="https://colab.research.google.com/github/tariqramzeengit/ML-COURSEWORK-2025/blob/main/DSGP_CCTV_Behaviour_Monitoring_XGBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DSGP â€” CCTV Passenger Behaviour Monitoring (Feature Engineering + XGBoost)

This Colab notebook implements:

- **Data preprocessing**
- **EDA**
- **CNN feature extraction** (EfficientNet embeddings)
- **Feature engineering** (brightness/contrast/blur)
- **XGBoost** classifier
- **Evaluation**
- **Basic prediction program**
- *(Optional)* Video inference (frame sampling)

## Dataset structure (recommended)

```
dataset/
  train/
    normal/
    stealing/
    fight/
    medical_emergency/
  val/
    normal/
    ...
  test/
    normal/
    ...
```

If you only have `dataset/<class>/*` without `train/val/test`, the notebook will automatically split.


In [22]:
!pip -q install ultralytics opencv-python

from google.colab import drive
drive.mount('/content/drive')

DATA_DIR = "/content/drive/MyDrive/frames_dataset"  #


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
import os, glob, random, shutil
from pathlib import Path

train_dir = Path(DATA_DIR) / "train"
val_dir   = Path(DATA_DIR) / "val"

if not val_dir.exists():
    val_dir.mkdir(parents=True, exist_ok=True)

    split = 0.2  # 20% to val
    classes = [d.name for d in train_dir.iterdir() if d.is_dir()]

    for c in classes:
        (val_dir / c).mkdir(parents=True, exist_ok=True)
        imgs = glob.glob(str(train_dir / c / "*"))
        imgs = [p for p in imgs if p.lower().endswith((".jpg",".jpeg",".png",".bmp",".webp"))]
        random.shuffle(imgs)
        k = int(len(imgs) * split)
        for p in imgs[:k]:
            shutil.move(p, str(val_dir / c / Path(p).name))

    print("âœ… Created val split automatically.")
else:
    print("âœ… val/ already exists. Skipping split.")



âœ… Created val split automatically.


In [38]:

# YOLOv8-CLS Train + Real-time Video Label Overlay (ALL-IN-ONE)


!pip -q install ultralytics opencv-python

import os, glob, random, shutil
from pathlib import Path
import cv2
from collections import deque
from ultralytics import YOLO

from google.colab import drive
drive.mount('/content/drive')

# ---------- EDIT THESE ----------
DATA_DIR  = "/content/drive/MyDrive/frames_dataset"
VIDEO_IN  = "/content/drive/MyDrive/test_video.mp4"   # video to label
VIDEO_OUT = "/content/drive/MyDrive/annotated_out.mp4"
EPOCHS    = 30
IMGSZ     = 224
BATCH     = 64
# --------------------------------

train_dir = Path(DATA_DIR) / "train"
val_dir   = Path(DATA_DIR) / "val"

# 1) Sanity checks
if not train_dir.exists():
    raise FileNotFoundError(f"train/ not found at: {train_dir}\nMake sure DATA_DIR is correct.")

train_imgs = glob.glob(str(train_dir / "*" / "*.*"))
train_imgs = [p for p in train_imgs if p.lower().endswith((".jpg",".jpeg",".png",".bmp",".webp"))]
print(" Train images found:", len(train_imgs))

if len(train_imgs) == 0:
    raise RuntimeError(" No training images found. Your folder must be train/<class>/*.jpg")

# 2) Auto-create val/ if missing (20% split)
if not val_dir.exists() or len(glob.glob(str(val_dir / "*" / "*.*"))) == 0:
    print("val/ missing or empty -> creating 20% validation split from train/ ...")
    val_dir.mkdir(parents=True, exist_ok=True)

    split = 0.2
    classes = [d.name for d in train_dir.iterdir() if d.is_dir()]
    for c in classes:
        (val_dir / c).mkdir(parents=True, exist_ok=True)
        imgs = glob.glob(str(train_dir / c / "*"))
        imgs = [p for p in imgs if p.lower().endswith((".jpg",".jpeg",".png",".bmp",".webp"))]
        random.shuffle(imgs)
        k = int(len(imgs) * split)
        for p in imgs[:k]:
            shutil.move(p, str(val_dir / c / Path(p).name))

val_imgs = glob.glob(str(val_dir / "*" / "*.*"))
val_imgs = [p for p in val_imgs if p.lower().endswith((".jpg",".jpeg",".png",".bmp",".webp"))]
print("âœ… Val images found:", len(val_imgs))

# 3) Train YOLOv8 classification
print("\n Training YOLOv8-CLS ...")
!yolo task=classify mode=train model=yolov8n-cls.pt data={DATA_DIR} epochs={EPOCHS} imgsz={IMGSZ} batch={BATCH}

# 4) Auto-find latest best.pt
best_paths = sorted(glob.glob("/content/runs/classify/train*/weights/best.pt"))
if len(best_paths) == 0:
    raise FileNotFoundError("best.pt not found. Training failed.")

MODEL_PATH = best_paths[-1]
print("\n Using model:", MODEL_PATH)

# 5) YOLO-style overlay on video
if not os.path.exists(VIDEO_IN):
    raise FileNotFoundError(f" VIDEO_IN not found: {VIDEO_IN}")

model = YOLO(MODEL_PATH)


ALERT_CLASSES = None
ALERT_THRESH = 0.65

cap = cv2.VideoCapture(VIDEO_IN)
fps = cap.get(cv2.CAP_PROP_FPS) or 25
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

writer = cv2.VideoWriter(VIDEO_OUT, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

label_hist = deque(maxlen=15)
conf_hist  = deque(maxlen=15)

PRED_EVERY_N_FRAMES = 30

frame_id = 0
last_label, last_conf = "unknown", 0.0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    if frame_id % PRED_EVERY_N_FRAMES == 0:
        r = model.predict(frame, imgsz=IMGSZ, verbose=False)[0]
        probs = r.probs
        top_i = int(probs.top1)
        conf  = float(probs.top1conf)
        label = model.names[top_i]

        last_label, last_conf = label, conf
        label_hist.append(label)
        conf_hist.append(conf)

    if len(label_hist):
        smooth_label = max(set(label_hist), key=list(label_hist).count)
        smooth_conf = float(sum(conf_hist) / len(conf_hist))
    else:
        smooth_label, smooth_conf = last_label, last_conf

    # Determine alert logic
    if ALERT_CLASSES is None:
        # auto: alert on anything not named "normal"
        alert = (smooth_label.lower() != "normal") and (smooth_conf >= ALERT_THRESH)
    else:
        alert = (smooth_label.lower() in set([c.lower() for c in ALERT_CLASSES])) and (smooth_conf >= ALERT_THRESH)

    banner = (0,0,255) if alert else (0,200,0)
    cv2.rectangle(frame, (0,0), (w, 70), banner, -1)

    text = f"{smooth_label.upper()} | conf={smooth_conf:.2f}"
    if alert:
        text += " | ALERT!"
    cv2.putText(frame, text, (15, 45), cv2.FONT_HERSHEY_SIMPLEX, 1.1, (255,255,255), 2)

    writer.write(frame)
    frame_id += 1

cap.release()
writer.release()

print("\nðŸŽ¥ Done! Saved annotated video to:", VIDEO_OUT)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
 Train images found: 0


RuntimeError:  No training images found. Your folder must be train/<class>/*.jpg