# Handheld Phone Detection

- Imports

In [None]:
# !pip install -r requirements.txt

import os, json, shutil, cv2, math, numpy as np, pandas as pd
from pathlib import Path
from shutil import copy2
from tqdm import tqdm
from sklearn.model_selection import train_test_split

- Download training set from kaggle

In [None]:
# import kagglehub

# # Download latest version
# dataset_root = kagglehub.dataset_download("lakshyataragi/mobilephoneusagedatasetiitr")

# print("Path to dataset files:", dataset_root)
dataset_root = "dataset/muid-iitr"

- Structure files for YOLO training

In [None]:
pos_dir = f"{dataset_root}/positive"
neg_dir = f"{dataset_root}/negative"
csv_path = f"{dataset_root}/labels.csv"

out_root = "train_data/muid-iitr"
img_out  = Path(out_root)/"images"
lbl_out  = Path(out_root)/"labels"

# Output YOLO structure
(img_out/"train").mkdir(parents=True, exist_ok=True)
(img_out/"val").mkdir(parents=True, exist_ok=True)
(lbl_out/"train").mkdir(parents=True, exist_ok=True)
(lbl_out/"val").mkdir(parents=True, exist_ok=True)

# load labels
df = pd.read_csv(csv_path)

from sklearn.model_selection import train_test_split
all_files = list(Path(pos_dir).glob("*")) + list(Path(neg_dir).glob("*"))
train_files, val_files = train_test_split(all_files, test_size=0.0, random_state=42)

def process_split(files, split):
    for img_path in files:
        dst_img = img_out/split/img_path.name
        copy2(img_path, dst_img)

        # get label rows
        rows = df[df.filename == img_path.name]
        if len(rows) == 0:
            # negative samples
            open(lbl_out/split/(img_path.stem + ".txt"), "w").close()
        else:
            W, H = rows.iloc[0].width, rows.iloc[0].height
            lines = []
            for _, r in rows.iterrows():
                # convert to yolo format
                x_center = (r.xmin + r.xmax)/2.0 / W
                y_center = (r.ymin + r.ymax)/2.0 / H
                w = (r.xmax - r.xmin) / W
                h = (r.ymax - r.ymin) / H
                lines.append(f"0 {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}")
            with open(lbl_out/split/(img_path.stem + ".txt"), "w") as f:
                f.write("\n".join(lines))

process_split(train_files, "train")
process_split(val_files, "val")
print("Conversion done â†’ YOLO format ready in dataset/yolo/")

- Create YAML for YOLO training

In [None]:
import yaml

data_yaml = {
    "path": "train_data/muid-iitr",  # dataset root dir
    "train": "images/train",
    "val": "images/val",
    "names": ["phone"]
}
with open("train_data/muid-iitr/data.yaml","w") as f:
    yaml.safe_dump(data_yaml, f)

print(open("train_data/muid-iitr/data.yaml").read())

In [None]:
from ultralytics import YOLO
import torch

print("Torch version:", torch.__version__)

- Start Training

In [None]:
model = YOLO("yolov8n.pt")  # start small for speed
model.train(
    data="train_data/muid-iitr/data.yaml",
    epochs=50,
    imgsz=640,
    batch=16,
    device=0  # set to "cpu" if no GPU
)

# model = YOLO("runs/detect/train/weights/best.pt")
# model.train(data="train_data/muid-iitr/data.yaml", epochs=10, imgsz=640, batch=16)

- Run inference on test videos

In [None]:
sources = [
    "test_videos/20250715_142638_e37e7821.mp4",
    "test_videos/20250715_144536_9170ec05.mp4",
    "test_videos/20250715_160539_6c132ed0.mp4",
    "test_videos/20250715_173316_8cb090ab.mp4",
    "test_videos/20250718_145802_46039155.mp4",
    "test_videos/20250718_150650_075a44fc.mp4"
]

for source in sources:
    !python test.py {source} -w runs/detect/train/weights/best.pt -c 0.35 -o results