In [None]:


from google.colab import files
uploaded = files.upload()  # Upload HC_Dataset.zip and your video file


Saving AdobeStock_362880886_Video_4K_Preview.mp4 to AdobeStock_362880886_Video_4K_Preview.mp4


In [None]:
# STEP 0: Install dependencies
!pip install ultralytics opencv-python deep_sort_realtime

# STEP 1: Import libraries
from ultralytics import YOLO
import os
import shutil
import cv2
from google.colab.patches import cv2_imshow
from google.colab import files
from deep_sort_realtime.deepsort_tracker import DeepSort

# STEP 2: Extract dataset
if not os.path.exists('dataset'):
    !unzip /content/HC_Dataset.zip -d dataset

print("✅ Dataset contents:")
print(os.listdir('dataset'))
print(os.listdir('dataset/HC_Dataset'))

# STEP 3: Prepare dataset for classification
def prepare_dataset():
    if os.path.exists("temp_dataset"):
        shutil.rmtree("temp_dataset")

    os.makedirs('temp_dataset/train/staff', exist_ok=True)
    os.makedirs('temp_dataset/train/non_staff', exist_ok=True)

    for category in ['staff', 'non_staff']:
        src = f'dataset/HC_Dataset/{category}'
        dst = f'temp_dataset/train/{category}'
        for file in os.listdir(src):
            shutil.copy2(os.path.join(src, file), dst)

# STEP 4: Train classification model
def train_model():
    prepare_dataset()
    model = YOLO('yolov8n-cls.pt')
    model.train(data='temp_dataset/train', epochs=10, imgsz=224, batch=16, val=False, split='train')
    shutil.copy("runs/classify/train/weights/best.pt", "yolov8n-cls.pt")
    shutil.rmtree('temp_dataset')
    return YOLO("yolov8n-cls.pt")

# STEP 5: Process video with detection, DeepSORT tracking & classification
def process_video_with_bytetrack(video_path, cls_model):
    from ultralytics import YOLO
    import cv2
    import numpy as np
    import os

    cap = cv2.VideoCapture(video_path)
    det_model = YOLO('yolov8n.pt')  # Person detection
    model = cls_model  # Staff vs Non-Staff classifier

    # Read first frame
    ret, frame = cap.read()
    if not ret:
        print("❌ Cannot open video.")
        return

    frame = cv2.resize(frame, (1280, 720))
    h, w = frame.shape[:2]
    fps = cap.get(cv2.CAP_PROP_FPS)
    if fps == 0 or np.isnan(fps):
        fps = 25

    # VideoWriter for output
    out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))

    # Track database
    staff_ids, non_staff_ids, id_to_class = set(), set(), {}

    frame_idx = 0
    while True:
        if frame_idx > 0:
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (1280, 720))

        frame_idx += 1
        print(f"🧠 Processing frame {frame_idx}")

        # Use built-in ByteTrack
        result = det_model.track(frame, persist=True, tracker="bytetrack.yaml", conf=0.25)[0]

        if not hasattr(result, "boxes") or result.boxes is None:
            out.write(frame)
            continue

        for i, box in enumerate(result.boxes):
            cls_id = int(box.cls[0])
            if cls_id != 0:
                continue

            x1, y1, x2, y2 = map(int, box.xyxy[0])
            track_id = int(box.id[0]) if box.id is not None else i

            # Classification (only once per ID)
            if track_id not in id_to_class:
                person_crop = frame[y1:y2, x1:x2]
                if person_crop.size == 0:
                    continue
                resized_crop = cv2.resize(person_crop, (224, 224))
                cls_results = model(resized_crop)
                pred_class = cls_results[0].probs.top1
                class_name = model.names[pred_class]
                id_to_class[track_id] = class_name

                if class_name == "staff":
                    staff_ids.add(track_id)
                else:
                    non_staff_ids.add(track_id)

            class_name = id_to_class[track_id]
            color = (0, 255, 0) if class_name == "staff" else (0, 0, 255)
            label = f"{track_id}: {class_name}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

        # Add count
        count_text = f"STAFF: {len(staff_ids)} | NON-STAFF: {len(non_staff_ids)}"
        cv2.putText(frame, count_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)

        out.write(frame)

    cap.release()
    out.release()
    print("✅ ByteTrack video processing complete.")


# STEP 6: Execute everything
print("📦 Training classification model...")
cls_model = train_model()

print("🎥 Processing video and classifying roles...")
process_video_with_bytetrack("/content/AdobeStock_1018209226_Video_HD_Preview.mp4", cls_model)

# STEP 7: Download results
files.download("output.mp4")
files.download("yolov8n-cls.pt")


✅ Dataset contents:
['HC_Dataset']
['staff', 'non_staff']
📦 Training classification model...
Ultralytics 8.3.120 🚀 Python-3.11.12 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0mtask=classify, mode=train, model=yolov8n-cls.pt, data=temp_dataset/train, epochs=10, time=None, patience=100, batch=16, imgsz=224, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train5, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=False, split=train, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_tx

[34m[1mtrain: [0mScanning /content/temp_dataset/train_split/train... 286 images, 0 corrupt: 100%|██████████| 286/286 [00:00<00:00, 4637.89it/s]

[34m[1mtrain: [0mNew cache created: /content/temp_dataset/train_split/train.cache
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1089.0±336.5 MB/s, size: 37.2 KB)



[34m[1mval: [0mScanning /content/temp_dataset/train_split/val... 72 images, 0 corrupt: 100%|██████████| 72/72 [00:00<00:00, 4124.93it/s]

[34m[1mval: [0mNew cache created: /content/temp_dataset/train_split/val.cache
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 26 weight(decay=0.0), 27 weight(decay=0.0005), 27 bias(decay=0.0)
Image sizes 224 train, 224 val
Using 0 dataloader workers
Logging results to [1mruns/classify/train5[0m
Starting training for 10 epochs...

      Epoch    GPU_mem       loss  Instances       Size



       1/10         0G    0.03889         14        224: 100%|██████████| 18/18 [00:16<00:00,  1.07it/s]


      Epoch    GPU_mem       loss  Instances       Size



       2/10         0G    0.04191         14        224: 100%|██████████| 18/18 [00:17<00:00,  1.04it/s]


      Epoch    GPU_mem       loss  Instances       Size



       3/10         0G    0.01868         14        224: 100%|██████████| 18/18 [00:16<00:00,  1.09it/s]


      Epoch    GPU_mem       loss  Instances       Size



       4/10         0G    0.01692         14        224: 100%|██████████| 18/18 [00:16<00:00,  1.10it/s]


      Epoch    GPU_mem       loss  Instances       Size



       5/10         0G   0.009211         14        224: 100%|██████████| 18/18 [00:17<00:00,  1.04it/s]


      Epoch    GPU_mem       loss  Instances       Size



       6/10         0G   0.008677         14        224: 100%|██████████| 18/18 [00:17<00:00,  1.05it/s]


      Epoch    GPU_mem       loss  Instances       Size



       7/10         0G    0.03784         14        224: 100%|██████████| 18/18 [00:16<00:00,  1.11it/s]


      Epoch    GPU_mem       loss  Instances       Size



       8/10         0G    0.01134         14        224: 100%|██████████| 18/18 [00:16<00:00,  1.06it/s]


      Epoch    GPU_mem       loss  Instances       Size



       9/10         0G     0.0194         14        224: 100%|██████████| 18/18 [00:16<00:00,  1.10it/s]


      Epoch    GPU_mem       loss  Instances       Size



      10/10         0G    0.02379         14        224: 100%|██████████| 18/18 [00:16<00:00,  1.11it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 3/3 [00:01<00:00,  1.82it/s]


                   all      0.986          1

10 epochs completed in 0.048 hours.
Optimizer stripped from runs/classify/train5/weights/last.pt, 3.0MB
Optimizer stripped from runs/classify/train5/weights/best.pt, 3.0MB

Validating runs/classify/train5/weights/best.pt...
Ultralytics 8.3.120 🚀 Python-3.11.12 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
YOLOv8n-cls summary (fused): 30 layers, 1,437,442 parameters, 0 gradients, 3.3 GFLOPs
Found 358 images in subdirectories. Attempting to split...
Splitting /content/temp_dataset/train (2 classes, 358 images) into 80% train, 20% val...
Split complete in /content/temp_dataset/train_split ✅
[34m[1mtrain:[0m /content/temp_dataset/train_split/train... found 342 images in 2 classes ✅ 
[34m[1mval:[0m /content/temp_dataset/train_split/val... found 128 images in 2 classes ✅ 
[34m[1mtest:[0m None...


               classes   top1_acc   top5_acc: 100%|██████████| 3/3 [00:02<00:00,  1.23it/s]


                   all      0.986          1
Speed: 0.0ms preprocess, 19.0ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns/classify/train5[0m
🎥 Processing video and classifying roles...
🧠 Processing frame 1

0: 384x640 3 persons, 152.7ms
Speed: 2.5ms preprocess, 152.7ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 224x224 non_staff 0.73, staff 0.27, 18.3ms
Speed: 3.1ms preprocess, 18.3ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 staff 0.97, non_staff 0.03, 19.2ms
Speed: 3.2ms preprocess, 19.2ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 staff 0.98, non_staff 0.02, 17.6ms
Speed: 3.1ms preprocess, 17.6ms inference, 0.1ms postprocess per image at shape (1, 3, 224, 224)
🧠 Processing frame 2

0: 384x640 3 persons, 152.0ms
Speed: 3.1ms preprocess, 152.0ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)
🧠 Processing frame 3

0: 384x640 3 persons, 152.9m

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!cp runs/classify/train/weights/best.pt yolov8n-cls.pt


In [None]:
files.download("yolov8n-cls.pt")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>