In [None]:
!pip install -q -U ultralytics

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import xml.etree.ElementTree as ET
import yaml
import shutil
from pathlib import Path
from collections import Counter
from ultralytics import YOLO
from sklearn.model_selection import train_test_split

In [None]:
# Define paths
dataset_dir = '/kaggle/input/super-ai-engineer-5-DENTEX/DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023'
train_images_dir = os.path.join(dataset_dir, 'train')
test_images_dir = os.path.join(dataset_dir, 'test')
sample_submission_path = os.path.join(dataset_dir, 'sample_submission.csv')

# Class mapping
class_mapping = {
    'Caries': 0,
    'Deep Caries': 1,
    'Periapical Lesion': 2,
    'Impacted': 3
}

# Function to parse XML annotations
def parse_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    depth = int(size.find('depth').text)
    objects = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        objects.append({
            'name': name,
            'xmin': xmin,
            'ymin': ymin,
            'xmax': xmax,
            'ymax': ymax
        })
    return width, height, depth, objects

# Function to convert XML to YOLO format
def xml_to_yolo(xml_path, output_dir, class_mapping):
    width, height, depth, objects = parse_xml(xml_path)
    yolo_lines = []
    for obj in objects:
        class_id = class_mapping[obj['name']]
        x_center = (obj['xmin'] + obj['xmax']) / 2 / width
        y_center = (obj['ymin'] + obj['ymax']) / 2 / height
        bbox_width = (obj['xmax'] - obj['xmin']) / width
        bbox_height = (obj['ymax'] - obj['ymin']) / height
        yolo_lines.append(f"{class_id} {x_center} {y_center} {bbox_width} {bbox_height}")
    output_path = os.path.join(output_dir, os.path.basename(xml_path).replace('.xml', '.txt'))
    with open(output_path, 'w') as f:
        f.write('\n'.join(yolo_lines))

# Create YOLO format annotations in the writeable directory
yolo_labels_dir = '/kaggle/working/yolo_labels'  # Changed to /kaggle/working
os.makedirs(yolo_labels_dir, exist_ok=True)

# Convert all XML files to YOLO format
for xml_file in os.listdir(train_images_dir):
    if xml_file.endswith('.xml'):
        xml_path = os.path.join(train_images_dir, xml_file)
        xml_to_yolo(xml_path, yolo_labels_dir, class_mapping)

# Split dataset into training and validation sets
image_files = [f for f in os.listdir(train_images_dir) if f.endswith('.png')]
train_files, val_files = train_test_split(image_files, test_size=0.2, random_state=42)

# Create directories for training and validation sets
train_dir = '/kaggle/working/train'
val_dir = '/kaggle/working/val'
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Copy files to training and validation directories
for file in train_files:
    shutil.copy(os.path.join(train_images_dir, file), os.path.join(train_dir, file))
    shutil.copy(os.path.join(yolo_labels_dir, file.replace('.png', '.txt')), os.path.join(train_dir, file.replace('.png', '.txt')))

for file in val_files:
    shutil.copy(os.path.join(train_images_dir, file), os.path.join(val_dir, file))
    shutil.copy(os.path.join(yolo_labels_dir, file.replace('.png', '.txt')), os.path.join(val_dir, file.replace('.png', '.txt')))

# Create data.yaml in the writeable directory
data_yaml_content = f"""
train: {train_dir}
val: {val_dir}
nc: {len(class_mapping)}
names: {list(class_mapping.keys())}
"""
data_yaml_path = '/kaggle/working/data.yaml'  # Changed to /kaggle/working
with open(data_yaml_path, 'w') as f:
    f.write(data_yaml_content)

# Load data.yaml
with open(data_yaml_path, 'r') as file:
    data_yaml = yaml.safe_load(file)

# Model Training
model = YOLO('yolov8s.pt')

# Define training parameters
epochs = 50
batch_size = 16
imgsz = 640
lr0 = 4e-4
lrf = 1e-6
patience = 7
factor = 0.5

# Train the model
results = model.train(
    data=data_yaml_path,
    epochs=epochs,
    batch=batch_size,
    imgsz=imgsz,
    lr0=lr0,
    lrf=lrf,
    patience=patience,
    project='dentex_detection',
    name='yolov8s'
)

# Evaluate the model
metrics = model.val()
print(f"mAP@0.7: {metrics.box.map}")

# Make predictions on test data
test_images = [os.path.join(test_images_dir, img) for img in os.listdir(test_images_dir)]
predictions = model.predict(test_images)

# Create submission DataFrame
submission_data = []
for i, pred in enumerate(predictions):
    image_id = os.path.basename(test_images[i])
    boxes = pred.boxes.xyxy.cpu().numpy().tolist()
    labels = pred.boxes.cls.cpu().numpy().astype(int).tolist()
    scores = pred.boxes.conf.cpu().numpy().tolist()
    
    submission_data.append({
        'id': image_id,
        'boxes': boxes,
        'labels': labels,
        'scores': scores
    })

submission_df = pd.DataFrame(submission_data)
submission_df.to_csv('/kaggle/working/submission_dentex_detect_yolo8.csv', index=False)