In [None]:
# 1. Install / import
!pip install ultralytics
from ultralytics import YOLO
import os
from pathlib import Path
from sklearn.model_selection import train_test_split
import yaml

# 2. Dataset paths (change if your root is somewhere else)
dataset_root = Path('/mnt/data/IITR_dataset')
pos_img_dir = dataset_root / 'positive' / 'images'
neg_img_dir = dataset_root / 'negative' / 'images'

# 3. Gather all image paths
all_images = []
for img_dir in [pos_img_dir, neg_img_dir]:
    for ext in ('*.jpg','*.jpeg','*.png'):
        all_images.extend(img_dir.glob(ext))

# 4. Split into train/val
train_imgs, val_imgs = train_test_split(
    [str(p) for p in all_images],
    test_size=0.2,
    random_state=42,
    shuffle=True
)

# 5. Ensure every image has a corresponding label file (create empty .txt if missing)
for img_path_str in train_imgs + val_imgs:
    img_path = Path(img_path_str)
    lbl_path = img_path.parent.parent / 'labels' / f"{img_path.stem}.txt"
    if not lbl_path.exists():
        lbl_path.parent.mkdir(parents=True, exist_ok=True)
        lbl_path.write_text('')  # empty file for negative/no-object images

# 6. Write a data.yaml for YOLOv8
data_dict = {
    'train': train_imgs,
    'val': val_imgs,
    'nc': 1,                      # number of classes
    'names': ['object']           # class names; rename 'object' as appropriate
}
with open('iitr_data.yaml', 'w') as f:
    yaml.dump(data_dict, f, default_flow_style=False)

# 7. Train!
model = YOLO('yolov8s.pt')       # or yolov8n.pt, yolov8m.pt… whichever backbone you prefer
model.train(
    data='iitr_data.yaml',       # our YAML file
    epochs=100,
    imgsz=640,
    batch=16,                     # adjust to your GPU
    project='iitr_yolov8',        # output folder
    name='exp1'
)
