<a href="https://www.kaggle.com/code/rebekadina/assignment1-yolov11-475?scriptVersionId=287210293" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Overview
This notebook illustrates the complete implementation of a YOLOv11 object detection pipeline using the Dental X-ray dataset. The project transitions from raw dataset handling to a fully trained model capable of identifying dental features in X-ray imagery.

## Steps Covered
1. Environment Setup

* Updating the ultralytics framework to the latest version to leverage YOLOv11 capabilities.

* Data Organization & Output Configuration

* Defining the source image directory and creating a dedicated output folder (/kaggle/working/dental_xray_boxed) for processed and annotated data.

2. Bounding Box Annotation Process

* Implementing a processing loop that reads COCO-formatted labels.

* Scaling normalized coordinates to absolute pixel values and drawing bounding rectangles using OpenCV.

3. Label Management

* Automating the movement and matching of label files to their corresponding annotated images in the working directory.

4. Sample Visualization

* Generating a 2x2 visual grid of processed X-rays to verify that the bounding boxes correctly align with dental structures.

## Environment Setup

In [None]:
!pip install ultralytics --no-deps

## Data Processing and Visualization

In [None]:
import os
import cv2
import random
import shutil
import matplotlib.pyplot as plt
from pathlib import Path

# Paths (from your info)

root = Path("/kaggle/input/dental-x-ray/Multi-Source Dental X-Ray Dataset Using Image-to-I/Teeth Xray Image Dataset/Teeth Xray Image Dataset/Teeth View Xray Image Dataset/Dental Xray (Ojected Detection)")
image_dir = root / "images"
label_dir = root / "labels"

# Output structure
output_root = Path("/kaggle/working/dental_xray_boxed")
output_images = output_root / "images"
output_labels = output_root / "labels"
output_images.mkdir(parents=True, exist_ok=True)
output_labels.mkdir(parents=True, exist_ok=True)


#  Random color generator

def random_color():
    return (random.randint(50, 255), random.randint(50, 255), random.randint(50, 255))


# Main loop

print(" Processing images...")

for img_file in image_dir.glob("*.jpg"):
    label_file = label_dir / f"{img_file.stem}.txt"
    if not label_file.exists():
        print(f" Missing label for {img_file.name}")
        continue

    img = cv2.imread(str(img_file))
    if img is None:
        print(f" Could not read {img_file}")
        continue

    h, w, _ = img.shape

    with open(label_file, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 5:  # safety check
                continue
            cls, x_c, y_c, bw, bh = map(float, parts)
            x_c, y_c, bw, bh = x_c * w, y_c * h, bw * w, bh * h
            x1, y1 = int(x_c - bw / 2), int(y_c - bh / 2)
            x2, y2 = int(x_c + bw / 2), int(y_c + bh / 2)
            color = random_color()
            cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
            cv2.putText(img, f"class {int(cls)}", (x1, y1 - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

    # Save boxed image
    save_img_path = output_images / img_file.name
    cv2.imwrite(str(save_img_path), img)

    # Copy corresponding label
    shutil.copy(label_file, output_labels / label_file.name)

print(f" All done!\n Output directory: {output_root}")


#  Show a few sample results

sample_images = list(output_images.glob("*.jpg"))[:4]
if sample_images:
    plt.figure(figsize=(12, 8))
    for i, img_path in enumerate(sample_images, 1):
        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.subplot(2, 2, i)
        plt.imshow(img)
        plt.title(img_path.name)
        plt.axis("off")
    plt.tight_layout()
    plt.show()
else:
    print("No images found to display ")


## Setup and Initialization

In [None]:
import os
import random
import shutil
import yaml
from pathlib import Path
from tqdm import tqdm

# -----------------------------
# Paths
# -----------------------------
DATA_ROOT = Path("/kaggle/working/dental_xray_boxed")
IMG_DIR = DATA_ROOT / "images"
LBL_DIR = DATA_ROOT / "labels"

OUTPUT_ROOT = Path("/kaggle/working/dental_xray_split")
splits = ["train", "val", "test"]
split_ratios = [0.7, 0.15, 0.15]

# Create output directories
for s in splits:
    (OUTPUT_ROOT / s / "images").mkdir(parents=True, exist_ok=True)
    (OUTPUT_ROOT / s / "labels").mkdir(parents=True, exist_ok=True)

# -----------------------------
# Gather and shuffle images
# -----------------------------
valid_exts = [".jpg", ".jpeg", ".png"]
all_images = [p for p in IMG_DIR.iterdir() if p.suffix.lower() in valid_exts]
if not all_images:
    raise FileNotFoundError(f"No images found in {IMG_DIR}!")

random.shuffle(all_images)
n = len(all_images)
n_train = int(split_ratios[0] * n)
n_val = int(split_ratios[1] * n)

splits_files = {
    "train": all_images[:n_train],
    "val": all_images[n_train:n_train + n_val],
    "test": all_images[n_train + n_val:]
}

# -----------------------------
# Copy images and labels
# -----------------------------
for split_name, files in splits_files.items():
    print(f"\n Copying {split_name} set ({len(files)} images)...")
    img_out_dir = OUTPUT_ROOT / split_name / "images"
    lbl_out_dir = OUTPUT_ROOT / split_name / "labels"

    for img_path in tqdm(files, desc=f"{split_name}", ncols=80):
        shutil.copy(img_path, img_out_dir / img_path.name)
        lbl_path = LBL_DIR / f"{img_path.stem}.txt"
        if lbl_path.exists():
            shutil.copy(lbl_path, lbl_out_dir / lbl_path.name)

# -----------------------------
# Create YOLO data.yaml
# -----------------------------
data_yaml = {
    "train": str((OUTPUT_ROOT / "train" / "images").resolve()).replace("\\", "/"),
    "val": str((OUTPUT_ROOT / "val" / "images").resolve()).replace("\\", "/"),
    "test": str((OUTPUT_ROOT / "test" / "images").resolve()).replace("\\", "/"),
    "nc": 6,
    "names": [
        "BDC-BDR Teeth",
        "Caries Teeth",
        "Fractured Teeth",
        "Healthy Teeth",
        "Impacted Teeth",
        "Inflection Teeth"
    ]
}

yaml_path = OUTPUT_ROOT / "dental.yaml"
with open(yaml_path, "w") as f:
    yaml.dump(data_yaml, f, sort_keys=False)

# -----------------------------
# Done 
# -----------------------------
print("\n Dataset split completed successfully!")
print(" Split directories:")
for s in splits:
    print(f"   - {s.upper():5}: {(OUTPUT_ROOT / s).resolve()}")

print(f"\nüìÑ YAML file created at: {yaml_path.resolve()}")


## Defining Dataset Paths

In [None]:
from ultralytics import YOLO
from pathlib import Path

# -----------------------------
# Paths
# -----------------------------
DATA_YAML = "/kaggle/working/dental_xray_split/dental.yaml"    # your dataset YAML
SAVE_DIR  = "/kaggle/working/yolo_output"                     # project output
PROJECT_NAME = "dental_counting"

# -----------------------------
# Load pretrained YOLO model
# -----------------------------
# Nano version for Kaggle GPU
model = YOLO("yolo11n.pt")  

# -----------------------------
# Train the model
# -----------------------------
model.train(
    data = DATA_YAML,
    epochs = 50,          # adjust if needed
    batch = 16,           # reduce if memory issues
    imgsz = 640,
    project = SAVE_DIR,   # project folder
    name = PROJECT_NAME,  
    exist_ok = True,      # overwrite if folder exists
    save = True           # ensure results are saved (results.csv & plots)
)


## Bounding Box Processing Loop

In [None]:
import pandas as pd

# Path to your YOLO results file
path = "/kaggle/working/yolo_output/dental_counting/results.csv"

# Read CSV
df = pd.read_csv(path)

# Show last few rows (contains precision/recall/mAP)
print(df.tail())

# Or just display nicely
df.style.set_caption("YOLOv8 Evaluation Results").format(precision=3)


## Coordinate Conversion & Image Decoration

In [None]:
import pandas as pd
from io import StringIO

# Raw YOLO result text
data = """all        247       2783      0.719      0.741      0.793      0.684
BDC-BDR Teeth        217        684       0.84      0.813        0.9      0.774
Caries Teeth        218        624      0.668      0.659       0.74      0.635
Fractured Teeth        140        311      0.638      0.598      0.669      0.558
Healthy Teeth        144        260      0.691      0.719      0.756      0.621
Impacted Teeth        183        384      0.839       0.87      0.911      0.821
Inflection Teeth        207        520       0.64      0.786      0.783      0.693"""

# Convert into a DataFrame
df = pd.read_csv(StringIO(data), sep=r"\s{2,}", engine="python")
df.columns = ["Class", "Images", "Instances", "Precision", "Recall", "mAP50", "mAP50-95"]

print(df)


## Data Saving and Organization

In [None]:
# -----------------------------
# 5Ô∏è‚É£ Print saved paths
# -----------------------------
print("‚úÖ Training completed!")
print(
f"Logs, results.csv, plots, and weights saved at: {SAVE_DIR}/{PROJECT_NAME}")
print(f"Best weights: {SAVE_DIR}/{PROJECT_NAME}/weights/best.pt")
print(f"Final weights: {SAVE_DIR}/{PROJECT_NAME}/weights/last.pt")

## Final Results Visualization

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# -----------------------------
# Paths
# -----------------------------
results_path = "/kaggle/working/yolo_output/dental_counting/results.csv"  # your dataset results
run_dir = "/kaggle/working/yolo_output/dental_counting"

# -----------------------------
# Check if results CSV exists
# -----------------------------
if os.path.exists(results_path):
    # Read the results CSV
    results = pd.read_csv(results_path)
    
    # Create subplots
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle('YOLO Dental X-ray - Training Metrics', fontsize=16, fontweight='bold')
    
    # Plot 1: Box Loss
    axes[0, 0].plot(results['epoch'], results['train/box_loss'], label='Train Box Loss', linewidth=2)
    axes[0, 0].plot(results['epoch'], results['val/box_loss'], label='Val Box Loss', linewidth=2)
    axes[0, 0].set_title('Box Loss')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # Plot 2: Segmentation Loss (if applicable)
    if 'train/seg_loss' in results.columns:
        axes[0, 1].plot(results['epoch'], results['train/seg_loss'], label='Train Seg Loss', linewidth=2)
        axes[0, 1].plot(results['epoch'], results['val/seg_loss'], label='Val Seg Loss', linewidth=2)
        axes[0, 1].set_title('Segmentation Loss')
        axes[0, 1].set_xlabel('Epoch')
        axes[0, 1].set_ylabel('Loss')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)
    else:
        axes[0, 1].axis('off')  # hide if no seg_loss

    # Plot 3: Classification Loss
    axes[1, 0].plot(results['epoch'], results['train/cls_loss'], label='Train Cls Loss', linewidth=2)
    axes[1, 0].plot(results['epoch'], results['val/cls_loss'], label='Val Cls Loss', linewidth=2)
    axes[1, 0].set_title('Classification Loss')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Loss')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    
    # Plot 4: Total Loss
    train_total = results['train/box_loss'] + results['train/cls_loss']
    val_total = results['val/box_loss'] + results['val/cls_loss']
    
    if 'train/seg_loss' in results.columns:
        train_total += results['train/seg_loss']
        val_total += results['val/seg_loss']
    
    axes[1, 1].plot(results['epoch'], train_total, label='Train Total Loss', linewidth=2)
    axes[1, 1].plot(results['epoch'], val_total, label='Val Total Loss', linewidth=2)
    axes[1, 1].set_title('Total Loss (Box + Cls + Seg)')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('Loss')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # -----------------------------
    # Print final loss values
    # -----------------------------
    print("=== Final Loss Values ===")
    print(f"Train Box Loss: {results['train/box_loss'].iloc[-1]:.4f}")
    print(f"Val Box Loss:   {results['val/box_loss'].iloc[-1]:.4f}")
    print(f"Train Cls Loss: {results['train/cls_loss'].iloc[-1]:.4f}")
    print(f"Val Cls Loss:   {results['val/cls_loss'].iloc[-1]:.4f}")
    
    if 'train/seg_loss' in results.columns:
        print(f"Train Seg Loss: {results['train/seg_loss'].iloc[-1]:.4f}")
        print(f"Val Seg Loss:   {results['val/seg_loss'].iloc[-1]:.4f}")
    
else:
    print(f"Results file not found at: {results_path}")
    print("Available files in run directory:")
    if os.path.exists(run_dir):
        for file in os.listdir(run_dir):
            print(f"  - {file}")
