In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

KeyboardInterrupt: 

In [None]:
!pip install osw

In [None]:
import os
import shutil
import cv2
import numpy as np
import hashlib
from pathlib import Path
import logging

# Configure logging to suppress per-image output
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class DatasetCleaner:
    """Cleans the dataset by removing duplicate, corrupted images, invalid/empty labels, and unmatched files."""
    
    def __init__(self, input_img_dir='/kaggle/input/license-detection/license_images', 
                 input_label_dir='/kaggle/input/license-detection/license_labels', 
                 output_dir='/kaggle/working/license-detection-cleaned'):
        self.input_img_dir = Path(input_img_dir)
        self.input_label_dir = Path(input_label_dir)
        self.output_dir = Path(output_dir)
        self.output_img_dir = self.output_dir / 'license_images'
        self.output_label_dir = self.output_dir / 'license_labels'
        self.duplicates = []
        self.corrupted_images = []
        self.invalid_labels = []
        self.empty_labels = []
        self.unmatched_images = []
        self.unmatched_labels = []
        
    def copy_dataset(self):
        """Copy the dataset to the output directory."""
        logger.info("Copying dataset to output directory...")
        print("INFO: Copying dataset to output directory...")
        self.output_img_dir.mkdir(parents=True, exist_ok=True)
        self.output_label_dir.mkdir(parents=True, exist_ok=True)
        
        # Copy images
        for img_file in self.input_img_dir.glob('*.jpg'):
            shutil.copy(img_file, self.output_img_dir / img_file.name)
        
        # Copy labels
        for label_file in self.input_label_dir.glob('*.txt'):
            shutil.copy(label_file, self.output_label_dir / label_file.name)
        
        logger.info("Dataset copied successfully.")
        print("INFO: Dataset copied successfully.")
    
    def compute_image_hash(self, img_path):
        """Compute SHA256 hash of an image for duplicate detection."""
        try:
            with open(img_path, 'rb') as f:
                return hashlib.sha256(f.read()).hexdigest()
        except Exception:
            return None
    
    def is_valid_image(self, img_path):
        """Check if an image is valid and not corrupted."""
        try:
            img = cv2.imread(str(img_path))
            if img is None or np.all(img == 0) or np.all(img == 255):
                return False
            return True
        except Exception:
            return False
    
    def is_valid_label(self, label_path, num_classes=1):
        """Check if a label file is valid and not empty."""
        try:
            with open(label_path, 'r') as f:
                lines = f.readlines()
            if not lines:
                return False, "empty"
            for line in lines:
                parts = line.strip().split()
                if len(parts) != 5:
                    return False, "invalid format"
                try:
                    class_id, x, y, w, h = map(float, parts)
                    if not (0 <= class_id < num_classes and class_id.is_integer()):
                        return False, "invalid class_id"
                    if not (0 <= x <= 1 and 0 <= y <= 1 and 0 < w <= 1 and 0 < h <= 1):
                        return False, "invalid coordinates"
                except ValueError:
                    return False, "non-numeric values"
            return True, "valid"
        except Exception:
            return False, "error reading file"
    
    def find_duplicates(self):
        """Find duplicate images based on content hash."""
        logger.info("Checking for duplicate images...")
        print("INFO: Checking for duplicate images...")
        hashes = {}
        for img_file in self.output_img_dir.glob('*.jpg'):
            img_hash = self.compute_image_hash(img_file)
            if img_hash:
                if img_hash in hashes:
                    self.duplicates.append(img_file)
                else:
                    hashes[img_hash] = img_file
    
    def find_corrupted_images(self):
        """Find corrupted images."""
        logger.info("Checking for corrupted images...")
        print("INFO: Checking for corrupted images...")
        for img_file in self.output_img_dir.glob('*.jpg'):
            if not self.is_valid_image(img_file):
                self.corrupted_images.append(img_file)
    
    def find_invalid_labels(self):
        """Find invalid or empty label files."""
        logger.info("Checking for invalid or empty labels...")
        print("INFO: Checking for invalid or empty labels...")
        for label_file in self.output_label_dir.glob('*.txt'):
            is_valid, reason = self.is_valid_label(label_file)
            if not is_valid:
                if reason == "empty":
                    self.empty_labels.append(label_file)
                else:
                    self.invalid_labels.append(label_file)
    
    def find_unmatched_files(self):
        """Find images without labels and labels without images."""
        logger.info("Checking for unmatched images and labels...")
        print("INFO: Checking for unmatched images and labels...")
        image_stems = {img_file.stem for img_file in self.output_img_dir.glob('*.jpg')}
        label_stems = {label_file.stem for label_file in self.output_label_dir.glob('*.txt')}
        
        # Images without labels
        for img_file in self.output_img_dir.glob('*.jpg'):
            if img_file.stem not in label_stems:
                self.unmatched_images.append(img_file)
        
        # Labels without images
        for label_file in self.output_label_dir.glob('*.txt'):
            if label_file.stem not in image_stems:
                self.unmatched_labels.append(label_file)
    
    def remove_files(self, files, file_type):
        """Remove files and their corresponding pairs."""
        removed_count = 0
        for file in files:
            # Determine if file is an image or label
            is_image = file.suffix == '.jpg'
            pair_file = (self.output_label_dir / f'{file.stem}.txt') if is_image else (self.output_img_dir / f'{file.stem}.jpg')
            try:
                if file.exists():
                    file.unlink()
                    removed_count += 1
                if pair_file.exists():
                    pair_file.unlink()
            except Exception as e:
                logger.warning(f"Failed to remove {file_type} file {file} or its pair: {e}")
                print(f"WARNING: Failed to remove {file_type} file {file} or its pair: {e}")
        return removed_count
    
    def clean(self):
        """Clean the dataset by removing duplicates, corrupted images, invalid/empty labels, and unmatched files."""
        # Step 1: Copy dataset
        self.copy_dataset()
        
        # Step 2: Find duplicates
        self.find_duplicates()
        duplicate_count = len(self.duplicates)
        
        # Step 3: Find corrupted images
        self.find_corrupted_images()
        corrupted_image_count = len(self.corrupted_images)
        
        # Step 4: Find invalid or empty labels
        self.find_invalid_labels()
        invalid_label_count = len(self.invalid_labels)
        empty_label_count = len(self.empty_labels)
        
        # Step 5: Find unmatched images and labels
        self.find_unmatched_files()
        unmatched_image_count = len(self.unmatched_images)
        unmatched_label_count = len(self.unmatched_labels)
        
        # Step 6: Handle corrupted images
        removed_corrupted_images = 0
        if corrupted_image_count > 0:
            print(f"INFO: Found {corrupted_image_count} corrupted images.")
            response = input("Do you want to remove corrupted images and their labels? (y/n): ").strip().lower()
            if response == 'y':
                removed_corrupted_images = self.remove_files(self.corrupted_images, "corrupted image")
            else:
                print("INFO: Corrupted images will not be removed.")
        
        # Step 7: Handle invalid labels
        removed_invalid_labels = 0
        if invalid_label_count > 0:
            print(f"INFO: Found {invalid_label_count} invalid label files.")
            response = input("Do you want to remove invalid labels and their images? (y/n): ").strip().lower()
            if response == 'y':
                removed_invalid_labels = self.remove_files(self.invalid_labels, "invalid label")
            else:
                print("INFO: Invalid labels will not be removed.")
        
        # Step 8: Handle empty labels
        removed_empty_labels = 0
        if empty_label_count > 0:
            print(f"INFO: Found {empty_label_count} empty label files.")
            response = input("Do you want to remove empty labels and their images? (y/n): ").strip().lower()
            if response == 'y':
                removed_empty_labels = self.remove_files(self.empty_labels, "empty label")
            else:
                print("INFO: Empty labels will not be removed.")
        
        # Step 9: Handle unmatched images
        removed_unmatched_images = self.remove_files(self.unmatched_images, "unmatched image")
        
        # Step 10: Handle unmatched labels
        removed_unmatched_labels = self.remove_files(self.unmatched_labels, "unmatched label")
        
        # Step 11: Remove duplicates
        removed_duplicates = self.remove_files(self.duplicates, "duplicate")
        
        # Step 12: Generate report
        total_images = len(list(self.output_img_dir.glob('*.jpg')))
        total_labels = len(list(self.output_label_dir.glob('*.txt')))
        report = (
            f"Cleaning Report:\n"
            f"- Total images after cleaning: {total_images}\n"
            f"- Total labels after cleaning: {total_labels}\n"
            f"- Duplicate images found and removed: {removed_duplicates}\n"
            f"- Corrupted images found: {corrupted_image_count}\n"
            f"- Corrupted images removed: {removed_corrupted_images}\n"
            f"- Invalid labels found: {invalid_label_count}\n"
            f"- Invalid labels removed: {removed_invalid_labels}\n"
            f"- Empty labels found: {empty_label_count}\n"
            f"- Empty labels removed: {removed_empty_labels}\n"
            f"- Unmatched images found and removed: {removed_unmatched_images}\n"
            f"- Unmatched labels found and removed: {removed_unmatched_labels}\n"
        )
        logger.info(report)
        print(f"INFO: {report}")
        
        # Save report to file
        with open(self.output_dir / 'cleaning_report.txt', 'w') as f:
            f.write(report)
        print(f"INFO: Report saved to {self.output_dir / 'cleaning_report.txt'}")

def main():
    cleaner = DatasetCleaner()
    cleaner.clean()

if __name__ == "__main__":
    main()

In [None]:
import os
import cv2
import numpy as np
import albumentations as A
from shutil import copyfile
import logging


# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class DatasetAugmenter:
    def __init__(self, image_dir, label_dir, output_image_dir, output_label_dir, image_exts=['.jpg', '.jpeg', '.png', '.bmp'], augmentations_per_image=1):
        """Initialize the augmenter with directories and settings."""
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.output_image_dir = output_image_dir
        self.output_label_dir = output_label_dir
        self.image_exts = image_exts
        self.augmentations_per_image = augmentations_per_image
        self.augmented_images = 0
        self.augmented_labels = 0
        self.skipped_files = 0
        self.valid_class_ids = {0}  # License plate class only

        # Create output directories
        os.makedirs(self.output_image_dir, exist_ok=True)
        os.makedirs(self.output_label_dir, exist_ok=True)

        # Define augmentation pipeline
        self.transform = A.Compose([
            A.HorizontalFlip(p=0.5),  # Matches fliplr: 0.5
            A.VerticalFlip(p=0.5),    # Matches flipud: 0.5
            A.Rotate(limit=10, p=0.5),  # Matches degrees: 10.0
            A.HueSaturationValue(hue_shift_limit=0.015*360, sat_shift_limit=0.7*100, val_shift_limit=0.4*100, p=0.5),  # Matches hsv_h, hsv_s, hsv_v
            A.Affine(translate_percent=0.1, scale=(0.5, 1.5), shear=2.0, p=0.5),  # Matches translate, scale, shear
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.3),
            A.GaussNoise(p=0.2),
        ], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'], min_visibility=0.3))

    def augment_image(self, image, bboxes, class_labels):
        """Apply augmentation to a single image and its labels."""
        try:
            augmented = self.transform(image=image, bboxes=bboxes, class_labels=class_labels)
            return augmented['image'], augmented['bboxes'], augmented['class_labels']
        except Exception as e:
            logging.warning(f"Augmentation failed: {e}")
            return None, None, None

    def process_dataset(self):
        """Process all images and labels to create augmented dataset."""
        logging.info("Starting dataset augmentation...")
        print("INFO: Starting dataset augmentation...")

        for image_file in os.listdir(self.image_dir):
            # Check if file has a supported extension
            if any(image_file.lower().endswith(ext) for ext in self.image_exts):
                image_path = os.path.join(self.image_dir, image_file)
                label_path = os.path.join(self.label_dir, image_file.rsplit('.', 1)[0] + '.txt')

                # Check if label exists
                if not os.path.exists(label_path):
                    logging.warning(f"No label found for {image_file}, skipping")
                    self.skipped_files += 1
                    continue

                # Read image
                image = cv2.imread(image_path)
                if image is None:
                    logging.warning(f"Failed to read image {image_file}, skipping")
                    self.skipped_files += 1
                    continue
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                # Read labels
                bboxes = []
                class_labels = []
                valid_label = True
                try:
                    with open(label_path, 'r') as f:
                        lines = f.readlines()
                    if not lines:
                        logging.warning(f"Empty label file {label_path}, skipping")
                        self.skipped_files += 1
                        continue
                    for line in lines:
                        parts = line.strip().split()
                        if len(parts) != 5:
                            logging.warning(f"Invalid label format in {label_path}, skipping")
                            valid_label = False
                            break
                        try:
                            class_id = int(parts[0])
                            if class_id not in self.valid_class_ids:
                                logging.warning(f"Invalid class_id {class_id} in {label_path}, skipping")
                                valid_label = False
                                break
                            x_center, y_center, width, height = map(float, parts[1:])
                            if not (0 <= x_center <= 1 and 0 <= y_center <= 1 and 0 < width <= 1 and 0 < height <= 1):
                                logging.warning(f"Invalid coordinates in {label_path}, skipping")
                                valid_label = False
                                break
                            bboxes.append([x_center, y_center, width, height])
                            class_labels.append(class_id)
                        except ValueError:
                            logging.warning(f"Non-numeric values in {label_path}, skipping")
                            valid_label = False
                            break
                except Exception as e:
                    logging.warning(f"Failed to read label {label_path}: {e}")
                    valid_label = False

                if not valid_label:
                    self.skipped_files += 1
                    continue

                # Copy original image and label
                copyfile(image_path, os.path.join(self.output_image_dir, image_file))
                copyfile(label_path, os.path.join(self.output_label_dir, image_file.rsplit('.', 1)[0] + '.txt'))

                # Create augmented versions
                for i in range(self.augmentations_per_image):
                    aug_image, aug_bboxes, aug_class_labels = self.augment_image(image, bboxes, class_labels)
                    if aug_image is not None:
                        # Save augmented image
                        aug_image_path = os.path.join(self.output_image_dir, f"aug_{i}_{image_file}")
                        cv2.imwrite(aug_image_path, cv2.cvtColor(aug_image, cv2.COLOR_RGB2BGR))
                        self.augmented_images += 1

                        # Save augmented labels
                        aug_label_path = os.path.join(self.output_label_dir, f"aug_{i}_{image_file.rsplit('.', 1)[0]}.txt")
                        with open(aug_label_path, 'w') as f:
                            for class_id, bbox in zip(aug_class_labels, aug_bboxes):
                                x_center, y_center, width, height = bbox
                                f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
                        self.augmented_labels += 1

        logging.info("Dataset augmentation completed.")
        print("INFO: Dataset augmentation completed.")

    def get_report(self):
        """Return a report of the augmentation results."""
        total_images = len([f for f in os.listdir(self.output_image_dir) if any(f.lower().endswith(ext) for ext in self.image_exts)])
        total_labels = len([f for f in os.listdir(self.output_label_dir) if f.endswith('.txt')])
        return (
            f"Augmentation Report:\n"
            f"- Created {self.augmented_images} augmented image files.\n"
            f"- Created {self.augmented_labels} augmented label files.\n"
            f"- Skipped {self.skipped_files} files due to invalid labels or images.\n"
            f"- Total files in augmented dataset: {total_images} images and {total_labels} labels."
        )

# Usage
if __name__ == "__main__":
    # Suppress Albumentations update warning
    os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'
    
    augmenter = DatasetAugmenter(
        image_dir='/kaggle/working/license-detection-cleaned/license_images',
        label_dir='/kaggle/working/license-detection-cleaned/license_labels',
        output_image_dir='/kaggle/working/detection-license-labels-cleaned/augmented_images',
        output_label_dir='/kaggle/working/detection-license-images-cleaned/augmented_labels',
        image_exts=['.jpg', '.jpeg', '.png', '.bmp'],
        augmentations_per_image=1
    )
    augmenter.process_dataset()
    print(augmenter.get_report())

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class DatasetSplitter:
    def __init__(self, image_dir, label_dir, output_dir, train_ratio=0.8, val_ratio=0.15, test_ratio=0.05, image_exts=['.jpg', '.jpeg', '.png', '.bmp']):
        """Initialize the dataset splitter with directories and ratios."""
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.output_dir = output_dir
        self.train_ratio = train_ratio
        self.val_ratio = val_ratio
        self.test_ratio = test_ratio
        self.image_exts = image_exts

        # Create output directories
        self.train_img_dir = os.path.join(output_dir, 'train', 'images')
        self.train_label_dir = os.path.join(output_dir, 'train', 'labels')
        self.val_img_dir = os.path.join(output_dir, 'val', 'images')
        self.val_label_dir = os.path.join(output_dir, 'val', 'labels')
        self.test_img_dir = os.path.join(output_dir, 'test', 'images')
        self.test_label_dir = os.path.join(output_dir, 'test', 'labels')

        os.makedirs(self.train_img_dir, exist_ok=True)
        os.makedirs(self.train_label_dir, exist_ok=True)
        os.makedirs(self.val_img_dir, exist_ok=True)
        os.makedirs(self.val_label_dir, exist_ok=True)
        os.makedirs(self.test_img_dir, exist_ok=True)
        os.makedirs(self.test_label_dir, exist_ok=True)

    def split_dataset(self):
        """Split the dataset into train, validation, and test sets."""
        logging.info("Starting dataset splitting...")
        print("INFO: Starting dataset splitting...")

        # Get list of images
        image_files = [f for f in os.listdir(self.image_dir) if any(f.lower().endswith(ext) for ext in self.image_exts)]
        label_files = [f.rsplit('.', 1)[0] + '.txt' for f in image_files]

        # Verify that each image has a corresponding label
        valid_pairs = []
        for img, lbl in zip(image_files, label_files):
            if os.path.exists(os.path.join(self.label_dir, lbl)):
                valid_pairs.append((img, lbl))
            else:
                logging.warning(f"No label found for {img}, skipping")

        if not valid_pairs:
            logging.error("No valid image-label pairs found!")
            return

        # Split into train+val and test
        train_val_pairs, test_pairs = train_test_split(valid_pairs, test_size=self.test_ratio, random_state=42)

        # Split train+val into train and val
        train_pairs, val_pairs = train_test_split(train_val_pairs, test_size=self.val_ratio/(self.train_ratio + self.val_ratio), random_state=42)

        # Copy files to respective directories
        self._copy_files(train_pairs, self.train_img_dir, self.train_label_dir)
        self._copy_files(val_pairs, self.val_img_dir, self.val_label_dir)
        self._copy_files(test_pairs, self.test_img_dir, self.test_label_dir)

        logging.info("Dataset splitting completed.")
        print("INFO: Dataset splitting completed.")

    def _copy_files(self, pairs, img_dir, label_dir):
        """Copy image and label files to the specified directories."""
        for img_file, lbl_file in pairs:
            shutil.copy(os.path.join(self.image_dir, img_file), os.path.join(img_dir, img_file))
            shutil.copy(os.path.join(self.label_dir, lbl_file), os.path.join(label_dir, lbl_file))

    def get_report(self):
        """Return a report of the splitting results."""
        train_images = len([f for f in os.listdir(self.train_img_dir) if any(f.lower().endswith(ext) for ext in self.image_exts)])
        val_images = len([f for f in os.listdir(self.val_img_dir) if any(f.lower().endswith(ext) for ext in self.image_exts)])
        test_images = len([f for f in os.listdir(self.test_img_dir) if any(f.lower().endswith(ext) for ext in self.image_exts)])
        train_labels = len([f for f in os.listdir(self.train_label_dir) if f.endswith('.txt')])
        val_labels = len([f for f in os.listdir(self.val_label_dir) if f.endswith('.txt')])
        test_labels = len([f for f in os.listdir(self.test_label_dir) if f.endswith('.txt')])

        return (
            f"Dataset Splitting Report:\n"
            f"- Train: {train_images} images, {train_labels} labels\n"
            f"- Validation: {val_images} images, {val_labels} labels\n"
            f"- Test: {test_images} images, {test_labels} labels\n"
        )

# Usage
if __name__ == "__main__":
    splitter = DatasetSplitter(
        image_dir='/kaggle/working/license-detection-cleaned/license_images',
        label_dir='/kaggle/working/license-detection-cleaned/license_labels',
        output_dir='/kaggle/working/license-detection-cleaned',
        train_ratio=0.8,
        val_ratio=0.15,
        test_ratio=0.05
    )
    splitter.split_dataset()
    print(splitter.get_report())

In [None]:
import os
import yaml
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def create_data_yaml(output_dir):
    """Create data.yaml file for YOLO training."""
    logging.info("Creating data.yaml file...")
    print("INFO: Creating data.yaml file...")

    # Define the content of data.yaml
    data_yaml = {
        'train': os.path.join(output_dir, 'train', 'images'),
        'val': os.path.join(output_dir, 'val', 'images'),
        'test': os.path.join(output_dir, 'test', 'images'),
        'nc': 1,
        'names': ['license_plate']
    }

    # Write to data.yaml file
    yaml_path = os.path.join(output_dir, 'data.yaml')
    with open(yaml_path, 'w') as f:
        yaml.dump(data_yaml, f, default_flow_style=False)

    logging.info(f"data.yaml created successfully at {yaml_path}")
    print(f"INFO: data.yaml created successfully at {yaml_path}")

if __name__ == "__main__":
    output_dir = '/kaggle/working/license-detection-cleaned'
    create_data_yaml(output_dir)

In [None]:
!pip -q install ultralytics

In [None]:
from ultralytics import YOLO
import os
import logging
import torch

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def train_yolo(resume_from=None):
    """Train YOLOv8l model with specified parameters or resume from checkpoint."""
    logging.info("Starting YOLOv8l training...")
    print("INFO: Starting YOLOv8l training...")

    # Load pre-trained YOLOv8l model or resume from checkpoint
    if resume_from and os.path.exists(resume_from):
        logging.info(f"Resuming training from checkpoint: {resume_from}")
        model = YOLO(resume_from)
    else:
        logging.info("Starting new training with yolov8l.pt")
        model = YOLO('yolov8l.pt')

    # Training parameters
    training_params = {
        'data': '/kaggle/working/license-detection-cleaned/data.yaml',
        'epochs': 30,  # Number of epochs
        'imgsz': 640,  # Image size
        'batch': 16,   # Batch size
        'optimizer': 'AdamW',  # Optimizer
        'lr0': 0.001,  # Initial learning rate
        'patience': 0,  # Disable early stopping
        'device': 0,  # Use GPU (0 for first GPU in Kaggle)
        'project': '/kaggle/working/runs',  # Output directory for training logs
        'name': 'platecheckpoints',  # Experiment name for checkpoints
        'exist_ok': True,  # Overwrite existing runs
        'cos_lr': True,  # Use cosine learning rate scheduler
        'weight_decay': 0.0005,  # Weight decay for AdamW
        'augment': True,  # Enable YOLOv8 built-in augmentation
        'mosaic': 1.0,  # Enable mosaic augmentation
        'mixup': 0.2,  # Increased mixup for better generalization
        'hsv_h': 0.015,  # HSV augmentation (hue)
        'hsv_s': 0.7,  # HSV augmentation (saturation)
        'hsv_v': 0.4,  # HSV augmentation (value)
        'degrees': 10.0,  # Rotation augmentation
        'translate': 0.1,  # Translation augmentation
        'scale': 0.5,  # Scaling augmentation
        'shear': 2.0,  # Shear augmentation
        'perspective': 0.0,  # Perspective augmentation
        'flipud': 0.5,  # Vertical flip augmentation
        'fliplr': 0.5,  # Horizontal flip augmentation
        'save': True,  # Save checkpoints
        'save_period': 3,  # Save checkpoint every 3 epochs
        'conf': 0.25,  # Confidence threshold (lowered to boost recall)
        'iou': 0.7,  # IoU threshold for NMS
        'max_det': 300,  # Maximum detections per image
        'rect': False,  # Disable rectangular training for better accuracy
        'amp': True,  # Enable Automatic Mixed Precision for faster training
        'verbose': True,  # Enable detailed logging to track recall
    }

    # Start or resume training
    results = model.train(**training_params)

    logging.info("YOLOv8l training completed.")
    print("INFO: YOLOv8l training completed.")

    # Evaluate model on test set with lower confidence threshold to maximize recall
    metrics = model.val(data=training_params['data'], split='test', conf=0.1)
    logging.info(f"Test metrics: {metrics}")
    print(f"INFO: Test metrics: {metrics}")

    # Save final model (last)
    last_model_path = '/kaggle/working/runs/platecheckpoints_last.pt'
    model.save(last_model_path)
    logging.info(f"Last model saved to {last_model_path}")
    print(f"INFO: Last model saved to {last_model_path}")

    # The best model is automatically saved by YOLOv8 as 'best.pt'
    best_model_path = '/kaggle/working/runs/platecheckpoints/best.pt'
    if os.path.exists(best_model_path):
        # Rename best.pt to platecheckpoints_best.pt for clarity
        new_best_path = '/kaggle/working/runs/platecheckpoints_best.pt'
        os.rename(best_model_path, new_best_path)
        logging.info(f"Best model saved to {new_best_path}")
        print(f"INFO: Best model saved to {new_best_path}")

    return results, metrics

if __name__ == "__main__":
    # Install ultralytics if not already installed
    os.system("pip install -U ultralytics")

    # Check for existing checkpoints to resume training
    checkpoint_dir = '/kaggle/working/runs/platecheckpoints/weights'
    checkpoint_files = [f for f in os.listdir(checkpoint_dir) if f.endswith('.pt')] if os.path.exists(checkpoint_dir) else []
    resume_from = None
    if checkpoint_files:
        # Select the latest checkpoint based on epoch number
        checkpoint_files.sort(key=lambda x: int(x.split('epoch')[1].split('.')[0]) if 'epoch' in x else 0)
        resume_from = os.path.join(checkpoint_dir, checkpoint_files[-1])
        logging.info(f"Found checkpoint: {resume_from}")

    # Start training
    results, metrics = train_yolo(resume_from)