## Import libraries

In [None]:
# Install YOLOv8
!pip install ultralytics -q

In [None]:
# Import libraries

from ultralytics import YOLO
from ultralytics.utils.plotting import plot_images

import os, shutil, glob, cv2, random
from pathlib import Path
from collections import Counter
from google.colab import files
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns


## Import data

In [None]:
# import data
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ahmedsorour1/mri-for-brain-tumor-with-bounding-boxes")

print("Path to dataset files:", path)

## Data preparation

Preparing data to compatible with YOLO format. All images for a split should be in one folder, and all corresponding label files are in another folder, regardless of class.

In [None]:
# --- CONFIGURATION ---
BASE_DATA_DIR = Path(path) # kagglehub path
YOLO_ROOT_DIR = Path("/content/yolov8_data")

CLASS_NAMES = ['Glioma', 'Meningioma', 'No Tumor', 'Pituitary']

In [None]:
# --- DEFINE YOLOV8 DIRECTORY STRUCTURE ---
def create_yolo_dirs(yolo_root):
    """Creates the necessary target directories for YOLOv8."""
    for split in ['train', 'val']:
        for sub_dir in ['images', 'labels']:
            path = os.path.join(yolo_root, sub_dir, split)
            os.makedirs(path, exist_ok=True)
            print(f"Created: {path}")

In [None]:
# --- RESTRUCTURE FILES ---
def restructure_dataset(base_dir, yolo_root):
    print("\n--- Starting Data Restructuring ---")
    splits = {'Train': 'train', 'Val': 'val'}

    for source_split, target_split in splits.items():
        for class_name in CLASS_NAMES:
            source_class_path = os.path.join(base_dir, source_split, class_name)
            source_images_path = os.path.join(source_class_path, 'images')
            source_labels_path = os.path.join(source_class_path, 'labels')

            dest_images_path = os.path.join(yolo_root, 'images', target_split)
            dest_labels_path = os.path.join(yolo_root, 'labels', target_split)

            if not os.path.exists(source_images_path):
                print(f"‚ö†Ô∏è Missing: {source_images_path}")
                continue

            for filename in os.listdir(source_images_path):
                if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                    # prefix with class name (replace spaces with underscores to avoid issues)
                    safe_class = class_name.replace(" ", "_")
                    new_name = f"{safe_class}__{filename}"

                    shutil.copy(os.path.join(source_images_path, filename),
                                os.path.join(dest_images_path, new_name)
                                )

                    # copy label with the same renamed basename
                    lbl_file = os.path.splitext(filename)[0] + ".txt"
                    lbl_src = os.path.join(source_labels_path, lbl_file)
                    lbl_dst = os.path.join(dest_labels_path, os.path.splitext(new_name)[0] + ".txt")

                    if os.path.exists(lbl_src):
                        shutil.copy(lbl_src, lbl_dst)

        print(f"‚úÖ Finished restructuring {target_split} split.")

In [None]:
# --- CREATE data.yaml ---
def create_data_yaml(yolo_root, class_names):
    yaml_content = f"""
# YOLOv8 Data Configuration
path: {yolo_root}
train: images/train
val: images/val

nc: {len(class_names)}
names: { [c.replace(" ", "_").lower() for c in class_names] }
"""
    yaml_path = os.path.join(yolo_root, 'data.yaml')
    with open(yaml_path, 'w') as f:
        f.write(yaml_content)

    print(f"\n‚úÖ Created data.yaml at: {yaml_path}")
    print(yaml_content)
    return yaml_path

In [None]:
# --- EXECUTION ---
create_yolo_dirs(YOLO_ROOT_DIR)
restructure_dataset(BASE_DATA_DIR, YOLO_ROOT_DIR)
DATA_YAML_PATH = create_data_yaml(YOLO_ROOT_DIR, CLASS_NAMES)

print(f"\nüöÄ Data ready! Use this yaml for YOLOv8 training: {DATA_YAML_PATH}")

## Data cleaning

In [None]:
YOLO_ROOT = Path("/content/yolov8_data")
IMG_TRAIN = YOLO_ROOT / "images" / "train"
LBL_TRAIN = YOLO_ROOT / "labels" / "train"
IMG_VAL = YOLO_ROOT / "images" / "val"
LBL_VAL = YOLO_ROOT / "labels" / "val"
DATA_YAML = YOLO_ROOT / "data.yaml"

In [None]:
def verify_pairing(img_dir, lbl_dir, preview_missing=10):
    imgs = sorted([p for p in img_dir.iterdir() if p.suffix.lower() in ('.jpg','.jpeg','.png')])
    lbls = sorted([p for p in lbl_dir.iterdir() if p.suffix.lower()=='.txt'])
    img_basenames = {p.stem for p in imgs}
    lbl_basenames = {p.stem for p in lbls}

    missing_labels = sorted(list(img_basenames - lbl_basenames))
    missing_images = sorted(list(lbl_basenames - img_basenames))

    print(f"{img_dir}: {len(imgs)} images, {len(lbls)} labels")
    print(f"  Images missing labels: {len(missing_labels)}")
    if missing_labels:
        print("   Example missing label files (images):", missing_labels[:preview_missing])
    print(f"  Labels without images: {len(missing_images)}")
    if missing_images:
        print("   Example missing image files (labels):", missing_images[:preview_missing])

    # quick class distribution from label files
    class_counts = Counter()
    for lf in lbls:
        with open(lf) as f:
            for line in f:
                parts = line.strip().split()
                if len(parts)>=5:
                    cls = int(float(parts[0]))
                    class_counts[cls] += 1
    return class_counts

print("Training split verification:")
train_counts = verify_pairing(IMG_TRAIN, LBL_TRAIN)

print("\nValidation split verification:")
val_counts = verify_pairing(IMG_VAL, LBL_VAL)

print("\nLabel object counts (train):", dict(sorted(train_counts.items())))
print("Label object counts (val):", dict(sorted(val_counts.items())))

print("\nData YAML path:", DATA_YAML.exists(), DATA_YAML)
if not DATA_YAML.exists():
    raise FileNotFoundError(f"data.yaml not found at {DATA_YAML}. Create it pointing to images/train and images/val.")

## Data visualisation

In [None]:
# Paths
IMG_DIR = Path("/content/yolov8_data/images/train")
LBL_DIR = Path("/content/yolov8_data/labels/train")

# Class
CLASS_NAMES = ['Glioma', 'Meningioma', 'No_Tumor', 'Pituitary']
DISPLAY_NAMES = ['glioma','meningioma','no_tumor','pituitary']  # for labels

In [None]:
def plot_with_boxes(img_path, lbl_path, class_names):
    img = cv2.imread(str(img_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    h, w, _ = img.shape
    plt.imshow(img); ax = plt.gca()

    if lbl_path.exists():
        with open(lbl_path) as f:
            for line in f:
                cls, x, y, bw, bh = map(float, line.strip().split())
                x1 = (x - bw/2) * w
                y1 = (y - bh/2) * h
                x2 = (x + bw/2) * w
                y2 = (y + bh/2) * h
                ax.add_patch(plt.Rectangle((x1, y1), x2-x1, y2-y1,
                                           fill=False, color='red', linewidth=2))
                ax.text(x1, y1, class_names[int(cls)], color='yellow',
                        fontsize=8, bbox=dict(facecolor='black', alpha=0.5))

    plt.axis('off')

In [None]:
for idx, cname in enumerate(CLASS_NAMES):
    # images are prefixed with e.g. "No_Tumor__"
    candidates = list(IMG_DIR.glob(f"{cname}__*"))
    if not candidates:
        print(f"‚ö†Ô∏è No images found for class {cname}")
        continue

    chosen = random.sample(candidates, min(3, len(candidates)))
    plt.figure(figsize=(12, 4))
    plt.suptitle(f"Class: {DISPLAY_NAMES[idx]}", fontsize=14)

    for i, img_path in enumerate(chosen):
        lbl_path = LBL_DIR / (img_path.stem + ".txt")
        plt.subplot(1, 3, i+1)
        plot_with_boxes(img_path, lbl_path, DISPLAY_NAMES)

    plt.show()

## Model training

In [None]:
data_yaml = str(DATA_YAML)
model = YOLO("yolov8s.pt") # select model

In [None]:
# Training config
train_params = dict(
    data=data_yaml,   # path to data.yaml
    epochs=20,        # start small; increase later (50-200)
    imgsz=416,        # can decrease this values (416)
    batch=4,
    lr0=0.001,        # learning rate
    name="yolov8s_brain_tumor_v1",
    # workers=2  # you can set if needed
)

print("Starting training (this will print progress).")
model.train(**train_params)

# After training, best weights are in runs/detect/<name>/weights/best.pt
print("Training finished. Check runs/detect for logs and weights.")

## Model evaluation

In [None]:
run_dir = Path("runs/detect/yolov8s_brain_tumor_v1")
best_weights = run_dir / "weights" / "best.pt"
if not best_weights.exists():
    print("Warning: best.pt not found, using last.pt if available.")
    last = run_dir / "weights" / "last.pt"
    if last.exists():
        best_weights = last
    else:
        raise FileNotFoundError(f"No trained weights in {run_dir / 'weights'}")

print("Using weights:", best_weights)

model = YOLO(str(best_weights))

# Run validation (prints mAP, precision, recall etc.)
metrics = model.val(data=data_yaml, imgsz=416, batch=4)
print(metrics)  # metrics object / dict with numeric results

## Predicting new MRI Picture

In [None]:
# Load trained best model
best_weights = "runs/detect/yolov8s_brain_tumor_v1/weights/best.pt"  # adjust if you trained yolov8s
model = YOLO(best_weights)

In [None]:
# Upload button
uploaded = files.upload()
for fname in uploaded.keys():
    test_img = fname
    print(f"‚úÖ Uploaded: {test_img}")

    # Run YOLO prediction
    results = model.predict(source=test_img, imgsz=640, conf=0.25)

    # Take first result
    res0 = results[0]

    # Draw boxes
    out_img = res0.plot()

    # Show image
    plt.figure(figsize=(8,8))
    plt.imshow(out_img[:,:,::-1])
    plt.axis('off')
    plt.show()

    # Print predicted class labels
    if len(res0.boxes) > 0:
        probs = [model.names[int(c)] for c in res0.boxes.cls]
        print("Predicted tumor types in this image:", probs)
    else:
        print("‚ö†Ô∏è No tumor detected in this image.")