In [None]:
!pip install ultralytics

In [None]:
import pandas as pd
import numpy as np
from dataclasses import dataclass
import os
import math
import shutil
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchinfo import summary as torch_summary
import cv2
import plotly.express as px
from pathlib import Path
from ultralytics import YOLO
import wandb
import time

In [None]:
@dataclass
class Config:
    train_images_folder: str
    train_labels_folder: str
    val_images_folder: str
    val_labels_folder: str
    train_csv: str
    val_csv: str
    yolo_config_yaml: str
    training_output_folder: str
    saved_weights_filepath: str
    video_input_filepath: str
    video_output_filepath: str
    device: str

    # noinspection PyAttributeOutsideInit
    def init(self, training):
        self.training = training
        if self.training:
            os.makedirs(self.training_output_folder, exist_ok=True)

        # self.train_ids = pd.read_csv(self.train_csv)['id'].to_numpy()
        # self.val_ids = pd.read_csv(self.val_csv)['id'].to_numpy()

        self.seed = 8675309
        self.batch_size = 32
        self.starting_learning_rate = 3e-4
        self.max_epochs = 40
        self.patience = 4
        self.num_workers = 8 if self.device == 'cuda' else 0
        self.pin_memory = self.num_workers > 0
        self.image_size = 640
        self.use_amp = self.device == 'cuda'
        self.verbose = False

        self.imagenet_mean_cpu_tensor = torch.tensor(imagenet_mean_array)
        self.imagenet_std_cpu_tensor = torch.tensor(imagenet_std_array)
        self.channelwise_imagenet_mean_cpu_tensor = self.imagenet_mean_cpu_tensor.view(3, 1, 1)
        self.channelwise_imagenet_std_cpu_tensor = self.imagenet_std_cpu_tensor.view(3, 1, 1)
        self.imagenet_mean_gpu_tensor = gpu_tensor(imagenet_mean_array)
        self.imagenet_std_gpu_tensor = gpu_tensor(imagenet_std_array)
        self.channelwise_imagenet_mean_gpu_tensor = self.imagenet_mean_gpu_tensor.view(3, 1, 1)
        self.channelwise_imagenet_std_gpu_tensor = self.imagenet_std_gpu_tensor.view(3, 1, 1)

        self.model_name = 'yolo26s.pt'


config: Config = None
""" Set to environment-relevant config before training/inference """;

In [None]:
local_config = Config(
    train_images_folder='data/license_plates/images/train/',
    train_labels_folder='data/license_plates/labels/train/',
    val_images_folder='data/license_plates/images/val/',
    val_labels_folder='data/license_plates/labels/val/',
    train_csv='data/train.csv',
    val_csv='data/val.csv',
    yolo_config_yaml='data/license_plates.yaml',
    training_output_folder='data_gen/',
    saved_weights_filepath='data_gen/yolo_best_weights.pt',
    video_input_filepath='data/license_plate_video.mp4',
    video_output_filepath='data_gen/license_plate_video_inference.mp4',
    device='cpu',
)
kaggle_config = Config(
    train_images_folder='N/A',
    train_labels_folder='N/A',
    val_images_folder='/kaggle/input/datasets/kyledunne/license-plates-dataset/data/license_plates/images/val/',
    val_labels_folder='N/A',
    train_csv='N/A',
    val_csv='N/A',
    yolo_config_yaml='/kaggle/input/datasets/kyledunne/license-plates-kaggle-yaml/license_plates_kaggle.yaml',
    training_output_folder='/kaggle/working/',
    saved_weights_filepath='/kaggle/input/datasets/kyledunne/license-plates-yolo-best-weights-sanity-1/best.pt',
    video_input_filepath='/kaggle/input/license_plate_input_video/license_plate_input_video.mp4',
    video_output_filepath='/kaggle/working/license_plate_video_inference.mp4',
    device='cuda',
)

In [None]:
imagenet_mean_tuple = (0.485, 0.456, 0.406)
imagenet_std_tuple = (0.229, 0.224, 0.225)
imagenet_mean_array = np.array([0.485, 0.456, 0.406], dtype=np.float32)
imagenet_std_array = np.array([0.229, 0.224, 0.225], dtype=np.float32)

def gpu_tensor(numpy_array):
    return torch.tensor(numpy_array, device=config.device)

def gpu_image_tensor_to_numpy_array(image_tensor):
    image = denormalize(image_tensor, config.channelwise_imagenet_mean_gpu_tensor, config.channelwise_imagenet_std_gpu_tensor)
    image = torch.clamp(image, 0, 1)
    image = image.permute(1, 2, 0).cpu().numpy()
    return (image * 255).astype(np.uint8)

def normalize(tensor, mean, std):
    return (tensor - mean) / std

def denormalize(tensor, mean, std):
    return tensor * std + mean

def print_model_torchinfo(model: nn.Module):
    print(torch_summary(model, input_size=(1, 3, config.image_width, config.image_height)))

def print_model(model: nn.Module):
    for name, module in model.named_modules():
        print(name, "->", module.__class__.__name__)

def create_dataloader(dataset, shuffle):
    return DataLoader(dataset, batch_size=config.batch_size, shuffle=shuffle, num_workers=config.num_workers, pin_memory=config.pin_memory, generator=config.generator)

def _num_batches(dataloader):
    return math.ceil(len(dataloader.dataset) / config.batch_size)

In [None]:
def plot_images_with_bounding_boxes(images, pred_bounding_boxes=None, pred_boxes_confidence=None, true_bounding_boxes=None):
    for i, image in enumerate(images):
        h, w = image.shape[:2]
        fig = px.imshow(image)

        if true_bounding_boxes is not None:
            for j, (x_center, y_center, bw, bh) in enumerate(true_bounding_boxes[i]):
                x0 = (x_center - bw / 2) * w
                y0 = (y_center - bh / 2) * h
                x1 = (x_center + bw / 2) * w
                y1 = (y_center + bh / 2) * h
                fig.add_shape(type='rect', x0=x0, y0=y0, x1=x1, y1=y1, line_color='orange')

        if pred_bounding_boxes is not None:
            for j, (x_center, y_center, bw, bh) in enumerate(pred_bounding_boxes[i]):
                x0 = (x_center - bw / 2) * w
                y0 = (y_center - bh / 2) * h
                x1 = (x_center + bw / 2) * w
                y1 = (y_center + bh / 2) * h
                fig.add_shape(type='rect', x0=x0, y0=y0, x1=x1, y1=y1, line_color='blue')
                if pred_boxes_confidence is not None:
                    label = f"{int(pred_boxes_confidence[i][j] * 100)}%"
                    fig.add_annotation(x=x0, y=y0, text=label, showarrow=False,
                                       font=dict(color='blue'), xanchor='left', yanchor='bottom')

        fig.show()

In [None]:
def plot_val_images_with_ground_truth_bounding_boxes(num_images_to_show=3):
    ids = np.random.choice(config.val_ids, num_images_to_show)
    images = []
    all_boxes = []
    for image_id in ids:
        image_path = f'{config.val_images_folder}{image_id}.jpg'
        image_label = f'{config.val_labels_folder}{image_id}.txt'
        image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
        images.append(image)
        boxes = []
        with open(image_label, 'r') as label_file:
            for line in label_file:
                coords = [float(c) for c in line.strip().split()[-4:]]
                boxes.append(coords)
        all_boxes.append(boxes)
    plot_images_with_bounding_boxes(images, true_bounding_boxes=all_boxes)

In [None]:
def train_yolo():
    start_time = time.time()
    print(f't=0: Initializing training setup...')

    # Initialize wandb run (logs hyperparams and enables automatic metric logging)
    wandb.init(
        project="license-plate-detection",
        name=f'run={int(start_time)}',
        config={
            "model": config.model_name,
            "epochs": config.max_epochs,
            "batch_size": config.batch_size,
            "image_size": config.image_size,
            "lr0": config.starting_learning_rate,
            "patience": config.patience,
            "seed": config.seed,
            "amp": config.use_amp,
            "device": config.device,
        }
    )

    model = YOLO(config.model_name)

    # Train — verbose=True enables per-epoch cell output logging
    results = model.train(
        data=config.yolo_config_yaml,
        epochs=config.max_epochs,
        imgsz=config.image_size,
        batch=config.batch_size,
        lr0=config.starting_learning_rate,
        patience=config.patience,
        seed=config.seed,
        amp=config.use_amp,
        workers=config.num_workers,
        device=config.device,
        verbose=True,
        plots=True,
    )

    # Copy best model weights to config.training_output_folder
    best_src = Path(results.save_dir) / 'weights' / 'best.pt'
    os.makedirs(config.training_output_folder, exist_ok=True)
    best_dst = Path(config.training_output_folder) / 'best.pt'
    shutil.copy(str(best_src), str(best_dst))
    print(f'Best model saved to: {best_dst}')

    # Upload best model to wandb as an artifact (replaces add_wandb_callback checkpointing,
    # which is broken in kaggle environment due to unresolved imports in the callback module)
    artifact = wandb.Artifact(name='best-model', type='model')
    artifact.add_file(str(best_dst))
    wandb.log_artifact(artifact)

    wandb.finish()
    return results


In [None]:
def predict_yolo():
    model = YOLO(config.saved_weights_filepath)

    # Run validation — logs all available COCO metrics to stdout.
    # save_json=True triggers pycocotools COCOeval, which prints all 12 standard
    # COCO metrics via summarize() when the dataset provides COCO-format annotations.
    # verbose=True prints the per-class mAP table from ultralytics.
    val_results = model.val(
        data=config.yolo_config_yaml,
        imgsz=config.image_size,
        batch=config.batch_size,
        workers=config.num_workers,
        device=config.device,
        verbose=True,
        plots=True,
        save_json=True,
    )

    # Print the three COCO metrics ultralytics always surfaces natively.
    # (The remaining 9 area-specific / AR metrics are printed by pycocotools above.)
    print('\n--- COCO Metrics (ultralytics-native) ---')
    print(f'  AP @[IoU=0.50:0.95 | area=all | maxDets=100] = {val_results.box.map:.3f}')
    print(f'  AP @[IoU=0.50      | area=all | maxDets=100] = {val_results.box.map50:.3f}')
    print(f'  AP @[IoU=0.75      | area=all | maxDets=100] = {val_results.box.map75:.3f}')

    # Run per-image inference on the val set to collect bounding boxes for visualization.
    # model.val() doesn't expose per-image Results objects, so model.predict() is used.
    predictions = model.predict(
        source=config.val_images_folder,
        imgsz=config.image_size,
        device=config.device,
        verbose=False,
    )

    return predictions

In [None]:
def predict_yolo_with_visualizations(num_to_visualize=3):
    predictions = predict_yolo()

    indices = np.random.choice(len(predictions), num_to_visualize, replace=False)
    subset = [predictions[i] for i in indices]

    images = [cv2.cvtColor(r.orig_img, cv2.COLOR_BGR2RGB) for r in subset]
    pred_bounding_boxes = [r.boxes.xywhn.cpu().numpy().tolist() for r in subset]
    pred_boxes_confidence = [r.boxes.conf.cpu().numpy().tolist() for r in subset]

    plot_images_with_bounding_boxes(
        images,
        pred_bounding_boxes=pred_bounding_boxes,
        pred_boxes_confidence=pred_boxes_confidence,
    )

In [None]:
def predict_video():
    model = YOLO(config.saved_weights_filepath)

    video = cv2.VideoCapture(config.video_input_filepath)
    if not video.isOpened():
        print("Error opening video file")
        return

    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)

    out_dir = os.path.dirname(config.video_output_filepath)
    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
    output_file = cv2.VideoWriter(
        filename=config.video_output_filepath,
        fourcc=cv2.VideoWriter_fourcc(*'mp4v'),
        fps=float(fps),
        frameSize=(width, height),
        isColor=True,
    )

    while video.isOpened():
        ret, frame = video.read()
        if not ret:
            break

        results = model.predict(
            source=frame,
            imgsz=config.image_size,
            device=config.device,
            verbose=False,
        )

        for result in results:
            boxes = result.boxes.xyxy.cpu().numpy()  # [N, 4] pixel coords (x1, y1, x2, y2)
            confs = result.boxes.conf.cpu().numpy()  # [N]
            for (x1, y1, x2, y2), conf in zip(boxes, confs):
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                cv2.rectangle(frame, (x1, y1), (x2, y2), color=(0, 165, 255), thickness=2)
                label = f'{int(conf * 100)}%'
                cv2.putText(frame, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX,
                            fontScale=0.6, color=(0, 165, 255), thickness=2)

        output_file.write(frame)

    video.release()
    output_file.release()
    print(f'Video saved to: {config.video_output_filepath}')

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
wandb_key = user_secrets.get_secret("wandb_key")
!wandb login $wandb_key

In [None]:
config = kaggle_config
config.init(training=True)
train_yolo()