In [1]:
import cv2
import mediapipe as mp
import numpy as np
from ultralytics import YOLO
from collections import deque

# Initialize models
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(refine_landmarks=True)
mp_drawing = mp.solutions.drawing_utils
yolo = YOLO("yolov8n.pt")  # Replace with fine-tuned weights later

# Counters
phone_count = 0
background_person_count = 0

# Filter thresholds
PHONE_ASPECT_RATIO_RANGE = (1.3, 2.5)  # Typical phone shape
PHONE_MIN_AREA = 5000  # Minimum pixel area for phone detection

def is_probable_phone(x1, y1, x2, y2):
    w, h = x2 - x1, y2 - y1
    aspect_ratio = h / w if w > 0 else 0
    area = w * h
    return (PHONE_ASPECT_RATIO_RANGE[0] <= aspect_ratio <= PHONE_ASPECT_RATIO_RANGE[1]) and (area >= PHONE_MIN_AREA)

def draw_stop_button(frame):
    btn_color = (0, 0, 255)
    btn_pos = (frame.shape[1] - 120, 20)
    btn_size = (100, 50)
    cv2.rectangle(frame, btn_pos, (btn_pos[0]+btn_size[0], btn_pos[1]+btn_size[1]), btn_color, -1)
    cv2.putText(frame, "STOP", (btn_pos[0]+10, btn_pos[1]+35),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    return btn_pos, btn_size

def is_button_clicked(event, x, y, flags, param):
    global stop_requested
    bx, by, bw, bh = param
    if event == cv2.EVENT_LBUTTONDOWN:
        if bx <= x <= bx + bw and by <= y <= by + bh:
            stop_requested = True

# Camera & window
cap = cv2.VideoCapture(0)
cv2.namedWindow("Detection")
stop_requested = False
cv2.setMouseCallback("Detection", is_button_clicked, param=(0, 0, 0, 0))

while True:
    ret, frame = cap.read()
    if not ret or stop_requested:
        break

    results = yolo.predict(source=frame, save=False, conf=0.5, verbose=False)

    for r in results:
        for box in r.boxes:
            cls = int(box.cls[0])
            label = yolo.names[cls]
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = box.conf[0]

            if label == "cell phone" and is_probable_phone(x1, y1, x2, y2):
                phone_count += 1
                color = (0, 0, 255)
                cv2.putText(frame, "PHONE", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
            elif label == "person":
                background_person_count += 1
                color = (255, 255, 0)
                cv2.putText(frame, "PERSON", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
            else:
                continue

            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

    btn_pos, btn_size = draw_stop_button(frame)
    cv2.setMouseCallback("Detection", is_button_clicked,
                         param=(btn_pos[0], btn_pos[1], btn_size[0], btn_size[1]))

    cv2.imshow("Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q') or stop_requested:
        break

cap.release()
cv2.destroyAllWindows()

print("\n📊 Detection Summary")
print(f"📱 Phones detected: {phone_count}")
print(f"🧍‍♂️ Background people detected: {background_person_count}")



📊 Detection Summary
📱 Phones detected: 141
🧍‍♂️ Background people detected: 1415


In [7]:
import cv2
import os

# 📂 Paths
BASE_PATH = r"C:\Users\sagni\Downloads\Exam Checker\dataset"
TRAIN_PATH = os.path.join(BASE_PATH, "train")
VALID_PATH = os.path.join(BASE_PATH, "validation")
CATEGORIES = ["phone", "not_phone"]

# 🎯 Number of images
TRAIN_COUNT = 50
VALID_COUNT = 20

# 🛠 Create folders if not exist
for category in CATEGORIES:
    os.makedirs(os.path.join(TRAIN_PATH, category), exist_ok=True)
    os.makedirs(os.path.join(VALID_PATH, category), exist_ok=True)

def capture_images(category):
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("🚨 Error: Could not access the camera")
        return

    print(f"🎥 Capturing images for category: '{category}'")
    print("Press SPACE to capture, ESC to quit")

    count_train = 0
    count_valid = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            print("🚨 Failed to grab frame")
            break

        cv2.imshow("Capture Dataset", frame)

        k = cv2.waitKey(1)
        if k % 256 == 27:  # ESC pressed
            print("❌ Exiting capture")
            break
        elif k % 256 == 32:  # SPACE pressed
            # Decide train or validation
            if count_train < TRAIN_COUNT:
                save_path = os.path.join(TRAIN_PATH, category, f"{category}_{count_train}.jpg")
                count_train += 1
                print(f"✅ Saved to Train: {save_path}")
            elif count_valid < VALID_COUNT:
                save_path = os.path.join(VALID_PATH, category, f"{category}_{count_valid}.jpg")
                count_valid += 1
                print(f"✅ Saved to Validation: {save_path}")
            else:
                print("🎯 Collected enough images!")
                break

            cv2.imwrite(save_path, frame)

    cap.release()
    cv2.destroyAllWindows()

# 🏃‍♂️ Main
print("Select Category:")
print("1: phone")
print("2: not_phone")
choice = input("Enter 1 or 2: ")

if choice == "1":
    capture_images("phone")
elif choice == "2":
    capture_images("not_phone")
else:
    print("🚨 Invalid choice. Exiting.")


Select Category:
1: phone
2: not_phone


Enter 1 or 2:  2


🎥 Capturing images for category: 'not_phone'
Press SPACE to capture, ESC to quit
✅ Saved to Train: C:\Users\sagni\Downloads\Exam Checker\dataset\train\not_phone\not_phone_0.jpg
✅ Saved to Train: C:\Users\sagni\Downloads\Exam Checker\dataset\train\not_phone\not_phone_1.jpg
✅ Saved to Train: C:\Users\sagni\Downloads\Exam Checker\dataset\train\not_phone\not_phone_2.jpg
✅ Saved to Train: C:\Users\sagni\Downloads\Exam Checker\dataset\train\not_phone\not_phone_3.jpg
✅ Saved to Train: C:\Users\sagni\Downloads\Exam Checker\dataset\train\not_phone\not_phone_4.jpg
✅ Saved to Train: C:\Users\sagni\Downloads\Exam Checker\dataset\train\not_phone\not_phone_5.jpg
✅ Saved to Train: C:\Users\sagni\Downloads\Exam Checker\dataset\train\not_phone\not_phone_6.jpg
✅ Saved to Train: C:\Users\sagni\Downloads\Exam Checker\dataset\train\not_phone\not_phone_7.jpg
✅ Saved to Train: C:\Users\sagni\Downloads\Exam Checker\dataset\train\not_phone\not_phone_8.jpg
✅ Saved to Train: C:\Users\sagni\Downloads\Exam Checker

In [8]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV3Small
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
import os

# Paths
TRAIN_DIR = r"C:\Users\sagni\Downloads\Exam Checker\dataset\train"
VAL_DIR = r"C:\Users\sagni\Downloads\Exam Checker\dataset\validation"

# Data augmentation for training
train_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    zoom_range=0.2,
    horizontal_flip=True
)

val_gen = ImageDataGenerator(rescale=1./255)

train_data = train_gen.flow_from_directory(
    TRAIN_DIR, target_size=(224, 224), batch_size=8, class_mode='categorical'
)
val_data = val_gen.flow_from_directory(
    VAL_DIR, target_size=(224, 224), batch_size=8, class_mode='categorical'
)

# Build model
base_model = MobileNetV3Small(
    include_top=False, weights="imagenet", input_shape=(224, 224, 3)
)
base_model.trainable = False  # Freeze base

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(2, activation='softmax')  # 2 classes: phone, not_phone
])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# Train
model.fit(train_data, validation_data=val_data, epochs=5)

# Save
output_dir = r"C:\Users\sagni\Downloads\Exam Checker\models"
if not os.path.exists(output_dir): os.makedirs(output_dir)
model.save(os.path.join(output_dir, "phone_classifier.h5"))
print("✅ Saved as models/phone_classifier.h5")


Found 100 images belonging to 2 classes.
Found 40 images belonging to 2 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/weights_mobilenet_v3_small_224_1.0_float_no_top_v2.h5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
✅ Saved as models/phone_classifier.h5


In [10]:
from ultralytics import YOLO

yolo = YOLO("yolov8n.pt")  # or yolov10n.pt
print(yolo.names)


{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microw

In [11]:
import cv2
import numpy as np
from ultralytics import YOLO
from tensorflow.keras.applications.mobilenet_v3 import preprocess_input
from tensorflow.keras.models import load_model

# 📦 Load YOLO and MobileNetV3 models
yolo = YOLO("yolov8n.pt")  # Use YOLOv8 COCO weights (supports 'cell phone', 'person')
classifier = load_model(r"C:\Users\sagni\Downloads\Exam Checker\models\phone_classifier.h5")

# Class labels for MobileNetV3
CLASS_LABELS = ['not_phone', 'phone']

# Detection counters
phone_count = 0
person_count = 0

def verify_phone(crop):
    """Use MobileNetV3 to verify if crop is actually a phone."""
    crop_resized = cv2.resize(crop, (224, 224))
    crop_array = np.expand_dims(crop_resized, axis=0)
    crop_array = preprocess_input(crop_array)
    pred = classifier.predict(crop_array, verbose=0)
    label_idx = np.argmax(pred)
    confidence = pred[0][label_idx]
    return CLASS_LABELS[label_idx], confidence

# 🎥 Start video capture
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("🚨 Error: Cannot access webcam")
    exit()

print("📡 Starting detection... Press ESC to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        print("🚨 Failed to grab frame")
        break

    # Run YOLO detection (lower confidence threshold to 0.3)
    results = yolo.predict(source=frame, save=False, conf=0.3, verbose=False)

    for r in results:
        for box in r.boxes:
            cls = int(box.cls[0])
            label = yolo.names[cls]
            x1, y1, x2, y2 = map(int, box.xyxy[0])

            color = (0, 255, 0)  # Default box color: green

            if label in ["cell phone", "mobile phone"]:
                # Pass crop to MobileNetV3 for verification
                crop = frame[y1:y2, x1:x2]
                verified_label, confidence = verify_phone(crop)
                if verified_label == "phone" and confidence > 0.7:
                    phone_count += 1
                    color = (0, 0, 255)  # Red for phone
                    label_text = f"PHONE ({confidence:.2f})"
                else:
                    label_text = f"Not Phone ({confidence:.2f})"

            elif label == "person":
                person_count += 1
                color = (255, 255, 0)  # Cyan for person
                label_text = "PERSON"

            else:
                # Draw other YOLO classes (for debugging)
                label_text = label

            # Draw box and label
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label_text, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

    # Display frame
    cv2.imshow("Exam Monitoring", frame)

    # Exit on ESC
    k = cv2.waitKey(1)
    if k % 256 == 27:
        print("❌ Exiting...")
        break

cap.release()
cv2.destroyAllWindows()

# 📊 Print detection summary
print("\n📊 Detection Summary")
print(f"📱 Phones detected: {phone_count}")
print(f"🧍‍♂️ People detected: {person_count}")


📡 Starting detection... Press ESC to quit.
❌ Exiting...

📊 Detection Summary
📱 Phones detected: 1
🧍‍♂️ People detected: 2875


In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from tensorflow.keras.applications.mobilenet_v3 import preprocess_input
from tensorflow.keras.models import load_model
import time

# 📦 Load YOLO (use medium or large weights for better accuracy)
yolo = YOLO("yolov8m.pt")  # or yolov8l.pt if you have good GPU

# 📦 Load MobileNetV3 classifier
mobilenet = load_model(r"C:\Users\sagni\Downloads\Exam Checker\models\phone_classifier.h5")

# Labels
CLASS_LABELS = ['not_phone', 'phone']

# Counters
phone_count = 0
person_count = 0

# 📌 Helper: Verify phone with MobileNetV3
def verify_phone(crop):
    try:
        crop_resized = cv2.resize(crop, (224, 224))
        crop_array = np.expand_dims(crop_resized, axis=0)
        crop_array = preprocess_input(crop_array)
        pred = mobilenet.predict(crop_array, verbose=0)
        label_idx = np.argmax(pred)
        confidence = pred[0][label_idx]
        return CLASS_LABELS[label_idx], confidence
    except:
        return "not_phone", 0.0

# 🎥 Open webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("🚨 Error: Cannot open webcam")
    exit()

print("📡 Starting detection... Press ESC to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        print("🚨 Failed to grab frame")
        break

    # Run YOLO detection (conf threshold = 0.25)
    results = yolo.predict(source=frame, save=False, conf=0.25, verbose=False)

    for r in results:
        for box in r.boxes:
            cls_id = int(box.cls[0])
            yolo_label = yolo.names[cls_id]
            confidence = float(box.conf[0])
            x1, y1, x2, y2 = map(int, box.xyxy[0])

            color = (0, 255, 0)  # Default green box
            label_text = f"{yolo_label} ({confidence:.2f})"

            if yolo_label == "cell phone":
                # If YOLO confidence < 0.5, verify with MobileNetV3
                if confidence < 0.5:
                    crop = frame[y1:y2, x1:x2]
                    verified_label, verify_conf = verify_phone(crop)
                    if verified_label == "phone" and verify_conf > 0.7:
                        phone_count += 1
                        color = (0, 0, 255)  # Red box for phone
                        label_text = f"PHONE ✔ ({verify_conf:.2f})"
                    else:
                        label_text = f"Not Phone ✖ ({verify_conf:.2f})"
                else:
                    phone_count += 1
                    color = (0, 0, 255)  # Red box for phone

            elif yolo_label == "person":
                person_count += 1
                color = (255, 255, 0)  # Cyan box for person

            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label_text, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

    # Show frame
    cv2.imshow("YOLO + MobileNetV3 Detection", frame)

    # Exit on ESC
    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()

# 📊 Summary
print("\n📊 Detection Summary:")
print(f"📱 Phones detected: {phone_count}")
print(f"🧍‍♂️ People detected: {person_count}")


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m.pt to 'yolov8m.pt'...


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 49.7M/49.7M [02:06<00:00, 411kB/s]


📡 Starting detection... Press ESC to quit.
