## Initial Setup

### Install Packages

In [280]:
# !pip install numpy
# !pip install opencv-python
# !pip install pillow
# !pip install matplotlib
# !pip install tqdm
# !pip install scikit-learn
# !pip install torch torchvision
# !pip install ultralytics



In [281]:
# Core packages
import os
import shutil
import json
import zipfile
import xml.etree.ElementTree as ET
from pathlib import Path

# Math and array handling
import numpy as np
from sklearn.model_selection import train_test_split

# Image and visualization
import cv2
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# Progress bar
from tqdm.auto import tqdm

# Deep Learning Frameworks
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.models.segmentation as segmentation
import torchvision.transforms as transforms

# Object Detection and Segmentation
from ultralytics import YOLO

from torch.utils.data import DataLoader
import gc

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

# Automatically use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

%matplotlib inline

import random





### Download Datasets

In [282]:
def download_and_extract_from_gdrive(gdrive_url, extract_to="extracted"):
    # Convert shared drive URL to direct download URL
    file_id = gdrive_url.split("/d/")[1].split("/")[0]
    download_url = f"https://drive.google.com/uc?id={file_id}"

    # Create output folder
    os.makedirs(extract_to, exist_ok=True)

    zip_path = os.path.join(extract_to, "downloaded.zip")

    print("[INFO] Downloading ZIP from Google Drive...")
    gdown.download(download_url, zip_path, quiet=False)

    print("[INFO] Extracting ZIP...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

    # Optionally, remove the ZIP file after extraction
    os.remove(zip_path)

    print(f"[DONE] Extracted files to: {extract_to}")



### Convert downloaded Dataset into yolo format

In [283]:
# ----------------------------
# Parse polygon and convert to YOLO bbox
# ----------------------------
def parse_yolo_style_bbox_from_xml(xml_path, class_id_to_name):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    bboxes = []
    for obj in root.findall('object'):
        class_name = obj.find('name').text
        if class_name in [value[0] for value in class_id_to_name.values()]:
            polygon = obj.find('polygon')
            if polygon is not None:
                points = polygon.findall('pt')
                coords = [(float(pt.find('x').text), float(pt.find('y').text)) for pt in points]
                x_min = min(coord[0] for coord in coords)
                y_min = min(coord[1] for coord in coords)
                x_max = max(coord[0] for coord in coords)
                y_max = max(coord[1] for coord in coords)
                bboxes.append(((x_min, y_min), (x_max, y_max), class_name))
    return bboxes


# ----------------------------
# Save YOLO-format txt
# ----------------------------
def save_yolo_format(image_id, bboxes, image_width, image_height, output_path, class_id_to_name):
    with open(output_path, 'w') as f:
        for (x_min, y_min), (x_max, y_max), class_name in bboxes:
            class_id = next(cid for cid, (name, _) in class_id_to_name.items() if name == class_name)
            x_center = (x_min + x_max) / 2 / image_width
            y_center = (y_min + y_max) / 2 / image_height
            width = (x_max - x_min) / image_width
            height = (y_max - y_min) / image_height
            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")


# ----------------------------
# Convert dataset (YOLO only)
# ----------------------------
def convert_fulldataset_yolo_only(dataset_path, output_dir, class_id_to_name):
    image_ids = [img.split('.')[0] for img in os.listdir(f"{dataset_path}/images") if img.endswith(".jpg")]

    os.makedirs(f"{output_dir}/images", exist_ok=True)
    os.makedirs(f"{output_dir}/labels", exist_ok=True)

    for image_id in tqdm(image_ids, desc="Converting to YOLO"):
        img_path = f"{dataset_path}/images/{image_id}.jpg"
        bbox_xml_path = f"{dataset_path}/gt/bounding_box/label_me_xml/{image_id}.xml"
        semantic_xml_path = f"{dataset_path}/gt/semantic/label_me_xml/{image_id}.xml"

        if not os.path.exists(img_path):
            print(f"[WARNING] Image not found: {img_path}, skipping...")
            continue

        try:
            bboxes1 = parse_yolo_style_bbox_from_xml(bbox_xml_path, class_id_to_name)
            bboxes2 = parse_yolo_style_bbox_from_xml(semantic_xml_path, class_id_to_name)
            all_bboxes = bboxes1 + bboxes2
        except Exception as e:
            print(f"[WARNING] Skipping image {image_id} due to parse error: {e}")
            continue

        try:
            image = Image.open(img_path)
            image_np = np.array(image)
        except Exception as e:
            print(f"[WARNING] Could not load image {image_id}: {e}")
            continue

        # Save image
        image.save(f"{output_dir}/images/{image_id}.jpg")

        # Save YOLO labels
        yolo_annotation_path = f"{output_dir}/labels/{image_id}.txt"
        save_yolo_format(image_id, all_bboxes, image_np.shape[1], image_np.shape[0], yolo_annotation_path, class_id_to_name)

    print("✅ YOLO-format annotation conversion complete!")

### Visualizing images with bounding boxes

In [284]:

# Function to load YOLO bounding box annotations
def load_yolo_annotations(anno_file):
    with open(anno_file, 'r') as f:
        lines = f.readlines()
    boxes = []
    for line in lines:
        class_id, x_center, y_center, width, height = map(float, line.split())
        boxes.append((class_id, x_center, y_center, width, height))
    return boxes

# Function to visualize image and bounding boxes
def visualize_data(image_id, class_id_to_name, image_dir, annotations_dir):
    img_path = os.path.join(image_dir, f"{image_id}.jpg")
    annotation_path = os.path.join(annotations_dir, f"{image_id}.txt")

    if not os.path.exists(img_path) or not os.path.exists(annotation_path):
        print(f"[WARNING] Missing files for {image_id}, skipping...")
        return

    # Load image
    image = Image.open(img_path)
    image = np.array(image)

    # Load annotations
    boxes = load_yolo_annotations(annotation_path)

    # Draw boxes
    image_with_boxes = image.copy()
    for box in boxes:
        class_id, x_center, y_center, width, height = box
        class_name, color = class_id_to_name[int(class_id)]

        x_min = int((x_center - width / 2) * image.shape[1])
        y_min = int((y_center - height / 2) * image.shape[0])
        x_max = int((x_center + width / 2) * image.shape[1])
        y_max = int((y_center + height / 2) * image.shape[0])

        # Change the line width of the rectangle (increase from 4 to any desired thickness)
        rectangle_thickness = 15  # Adjust line thickness here
        cv2.rectangle(image_with_boxes, (x_min, y_min), (x_max, y_max), color, rectangle_thickness)

        # Change font size and color of the class name (increase font scale and adjust color)
        font_scale = 5  # Larger font size
        font_thickness = 5  # Thickness of the text
        text_color = (255, 0, 0)  # Set text color to white (change as desired)
        cv2.putText(image_with_boxes, class_name, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, font_thickness)

    # Plot
    plt.figure(figsize=(10, 8))
    plt.imshow(image_with_boxes)
    plt.title(f"Image ID: {image_id}")
    
    # Add class legend
    legend_handles = []
    for class_id, (class_name, class_rgb) in class_id_to_name.items():
        legend_handles.append(mpatches.Patch(color=np.array(class_rgb)/255.0, label=f"{class_name} ({class_id})"))
    plt.legend(handles=legend_handles, loc='upper right', fontsize=10)
    plt.axis('off')
    plt.show()


### Convert into train set and val set 

In [285]:
import os
import shutil
import random
from tqdm import tqdm

def move_files(file_list, 
               source_image_dir, 
               source_annotation_dir,
               target_image_dir, 
               target_annotation_dir):
    
    os.makedirs(target_image_dir, exist_ok=True)
    os.makedirs(target_annotation_dir, exist_ok=True)

    for image_id in tqdm(file_list, desc=f"Moving to {os.path.basename(os.path.dirname(target_image_dir))}"):
        image_path = os.path.join(source_image_dir, f"{image_id}.jpg")
        annotation_path = os.path.join(source_annotation_dir, f"{image_id}.txt")

        target_image_path = os.path.join(target_image_dir, f"{image_id}.jpg")
        target_annotation_path = os.path.join(target_annotation_dir, f"{image_id}.txt")

        if os.path.exists(image_path):
            shutil.copy(image_path, target_image_path)
        else:
            print(f"[Warning] Missing image: {image_path}")

        if os.path.exists(annotation_path):
            shutil.copy(annotation_path, target_annotation_path)
        else:
            print(f"[Warning] Missing annotation: {annotation_path}")

def split_and_move_dataset(source_base_dir="./datasets/new_dataset_yolo",
                           target_base_dir="./datasets/new_dataset_yolo_split",
                           split_ratio=0.8,
                           seed=42):
    random.seed(seed)

    image_dir = os.path.join(source_base_dir, "images")
    label_dir = os.path.join(source_base_dir, "labels")

    image_ids = [os.path.splitext(f)[0] for f in os.listdir(image_dir) if f.endswith(".jpg")]
    random.shuffle(image_ids)

    split_idx = int(len(image_ids) * split_ratio)
    train_ids = image_ids[:split_idx]
    val_ids = image_ids[split_idx:]

    # Train
    move_files(train_ids,
               source_image_dir=image_dir,
               source_annotation_dir=label_dir,
               target_image_dir=os.path.join(target_base_dir, "train/images"),
               target_annotation_dir=os.path.join(target_base_dir, "train/labels"))

    # Val
    move_files(val_ids,
               source_image_dir=image_dir,
               source_annotation_dir=label_dir,
               target_image_dir=os.path.join(target_base_dir, "val/images"),
               target_annotation_dir=os.path.join(target_base_dir, "val/labels"))

    print(f"\n[✓] Dataset split completed: {len(train_ids)} train / {len(val_ids)} val samples")


### Normalize labels

In [286]:
import os
from PIL import Image

def normalize_label_file(label_file, img_width, img_height):
    """
    Normalize the label coordinates in a label file to ensure they are within [0, 1] range.
    """
    with open(label_file, 'r') as f:
        lines = f.readlines()
    
    with open(label_file, 'w') as f:
        for line in lines:
            parts = line.strip().split()
            class_id = int(parts[0])
            x_center, y_center, width, height = map(float, parts[1:])
            
            # Normalize coordinates to ensure they are within the range [0, 1]
            x_center = min(1.0, max(0.0, x_center))
            y_center = min(1.0, max(0.0, y_center))
            width = min(1.0, max(0.0, width))
            height = min(1.0, max(0.0, height))

            # Write normalized values back to file
            f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")


def get_image_size(img_path):
    """
    Get the width and height of the image to normalize the coordinates properly.
    """
    with Image.open(img_path) as img:
        return img.size  # returns (width, height)


def normalize_all_labels(labels_dir, img_dir):
    """
    Normalize all label files in the specified directory.
    """
    for label_file in tqdm(os.listdir(labels_dir)):
       
        if label_file.endswith('.txt'):  # Process only label files
            label_path = os.path.join(labels_dir, label_file)
            img_path = os.path.join(img_dir, label_file.replace('.txt', '.jpg'))  # Assuming JPG images
            if os.path.exists(img_path):
                # Get image dimensions to normalize the labels
                img_width, img_height = get_image_size(img_path)
                # print(f"Normalizing {label_file}...")
                normalize_label_file(label_path, img_width, img_height)
            else:
                print(f"Warning: Image for label {label_file} not found!")
    print("Normalize Complete")



### Training 

In [287]:
def train_yolo(data_yaml="uavdt_yolo.yaml", epochs=40, imgsz=640, batch=32, name="yolov8-uavdt"):
    model = YOLO("yolov8n.pt")
    model.train(
        data=data_yaml,
        epochs=epochs,
        imgsz=imgsz,
        batch=batch,
        show=True,
        name=name,
        project="runs_yolo/train"
    )

### Finding best model path

In [288]:
def find_best_pt(base_dir='runs_yolo/'):
    best_paths = list(Path(base_dir).rglob('best.pt'))
    if not best_paths:
        raise FileNotFoundError("No 'best.pt' file found in the 'runs/' directory.")
    
    # Optionally, sort by latest modified time
    best_paths.sort(key=lambda p: p.stat().st_mtime, reverse=True)
    
    print(f"✅ Found best.pt at: {best_paths[0]}")
    return str(best_paths[0])


### Prediciton on val images

In [289]:
import os
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from ultralytics import YOLO

# -------- Load YOLO Model -------- #
def load_yolo_model(weight_path: str):
    return YOLO(weight_path)

# -------- Run YOLO Prediction -------- #
def run_yolo(model, image: Image.Image):
    temp_path = "temp_yolo.jpg"
    image.save(temp_path)
    result = model.predict(source=temp_path, conf=0.3, save=False, verbose=False)[0]
    os.remove(temp_path)
    return result

# -------- Draw Bounding Boxes on Image -------- #
def draw_yolo_boxes(draw: ImageDraw.ImageDraw, result):
    boxes = result.boxes
    labels = result.names
    for i in range(len(boxes)):
        box = boxes.xyxy[i].cpu().numpy()
        cls_id = int(boxes.cls[i])
        conf = float(boxes.conf[i])
        label = f"{labels[cls_id]} {conf:.2f}"
        draw.rectangle(box.tolist(), outline="red", width=2)
        draw.text((box[0], box[1]), label, fill="red")

# -------- Create Legend Image for Detected Classes -------- #
def generate_yolo_legend(result):
    legend_img = Image.new("RGB", (256, 256), (0, 0, 0))
    draw = ImageDraw.Draw(legend_img)
    class_ids = result.boxes.cls.cpu().numpy().astype(int)
    unique_ids = np.unique(class_ids)
    for idx, cls_id in enumerate(unique_ids):
        name = result.names[cls_id]
        draw.rectangle([10, 10 + idx * 25, 30, 30 + idx * 25], fill=(255, 0, 0))
        draw.text((40, 10 + idx * 25), f"{cls_id}: {name}", fill=(255, 255, 255))
    return legend_img

# -------- Visualize YOLO Predictions on 10 Images -------- #
def visualize_yolo_on_val_images(val_image_dir, yolo_model, max_images=10):
    image_files = sorted([f for f in os.listdir(val_image_dir) if f.endswith(".jpg")])
    
    for i, image_file in enumerate(image_files[:max_images]):  # Limit to max_images
        image_path = os.path.join(val_image_dir, image_file)
        original_image = Image.open(image_path).convert("RGB").resize((256, 256))

        # Run YOLO
        yolo_result = run_yolo(yolo_model, original_image)

        # Draw bounding boxes
        yolo_image = original_image.copy()
        draw = ImageDraw.Draw(yolo_image)
        draw_yolo_boxes(draw, yolo_result)

        # Create legend image
        yolo_legend_img = generate_yolo_legend(yolo_result)

        # Plot results
        fig, axs = plt.subplots(1, 2, figsize=(12, 6))
        axs[0].imshow(yolo_image)
        axs[0].set_title(f"YOLO Detection: {image_file}")
        axs[0].axis("off")

        axs[1].imshow(yolo_legend_img)
        axs[1].set_title("YOLO Class Legend")
        axs[1].axis("off")

        plt.tight_layout()
        plt.show()


### Predcitions on videos

In [290]:
import cv2
import torch
import numpy as np
from PIL import Image
from torchvision import transforms
from ultralytics import YOLO


output_base = './video_outputs'

# ================== OUTPUT FOLDERS ==================
img_dir = os.path.join(output_base, 'images')
label_dir = os.path.join(output_base, 'labels')
video_output_path = os.path.join(output_base, 'output_video.mp4')  # <-- Defined here

os.makedirs(img_dir, exist_ok=True)
os.makedirs(label_dir, exist_ok=True)

# ================== IMAGE TRANSFORM ==================
def get_transform():
    return transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor()
    ])

# ================== PROCESS FRAME ==================
def process_frame(frame, yolo_model, w, h, class_id_to_name):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(frame_rgb)

    # === YOLO Detection ===
    results = yolo_model(frame)
    boxes = results[0].boxes.xyxy.cpu().numpy()
    class_ids = results[0].boxes.cls.cpu().numpy()

    # Draw bounding boxes with corresponding colors and class names
    for box, cls_id in zip(boxes, class_ids):
        x1, y1, x2, y2 = map(int, box)
        
        # Get the class name and corresponding color from the dictionary
        class_name, color = class_id_to_name[int(cls_id)]
        
        # Draw the bounding box with the class color
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        
        # Put class name above the bounding box
        cv2.putText(frame, class_name, (x1, max(y1 - 10, 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 0), 2)

    return frame_rgb, boxes, class_ids

# ================== SAVE OUTPUTS ==================
def save_outputs(frame_rgb, boxes, class_ids, frame_count, w, h, img_dir, label_dir, class_id_to_name):
    # Save original frame
    out_img = os.path.join(img_dir, f'frame_{frame_count:04d}.jpg')
    out_label = os.path.join(label_dir, f'frame_{frame_count:04d}.txt')

    # Save original frame
    cv2.imwrite(out_img, frame_rgb[..., ::-1])  # RGB -> BGR

    # Save YOLO labels
    with open(out_label, 'w') as f:
        for box, cls_id in zip(boxes, class_ids):
            x1, y1, x2, y2 = box
            w_box = x2 - x1
            h_box = y2 - y1
            cx = x1 + w_box / 2
            cy = y1 + h_box / 2
            cx /= w
            cy /= h
            w_box /= w
            h_box /= h
            f.write(f"{int(cls_id)} {cx:.6f} {cy:.6f} {w_box:.6f} {h_box:.6f}  # {class_id_to_name[int(cls_id)][0]}\n")

# ================== VIDEO SETUP ==================
def setup_video_capture(video_path):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    return cap, total_frames, fps, w, h, fourcc

# ================== MAIN VIDEO PROCESSING ==================
def process_video(video_path, yolo_weights_path, class_id_to_name, max_frames=None):
    yolo_model = YOLO(yolo_weights_path)
    transform = get_transform()
    
    cap, total_frames, fps, w, h, fourcc = setup_video_capture(video_path)
    video_writer = cv2.VideoWriter(video_output_path, fourcc, fps, (w, h))
    
    frame_count = 0
    pbar = tqdm(total=max_frames if max_frames else total_frames, desc="Processing", leave=False)

    while True:
        ret, frame = cap.read()
        if not ret or (max_frames is not None and frame_count >= max_frames):
            break

        frame_rgb, boxes, class_ids = process_frame(frame, yolo_model, w, h, class_id_to_name)
        save_outputs(frame_rgb, boxes, class_ids, frame_count, w, h, img_dir, label_dir, class_id_to_name)

        # Blending frame with bounding boxes
        video_writer.write(frame)

        frame_count += 1
        pbar.update(1)

    cap.release()
    video_writer.release()
    pbar.close()

    print(f"\nDone! Processed {frame_count} frames.")

### Print Metrics

### Retrain and Quantization

## Full Pipeline

### Downaload and Convert Dataset

In [291]:
# # Class ID to name mapping with RGB values
class_id_to_name = {
    0: ('road', [28, 42, 168]),
    1: ('pool', [0, 50, 89]),
    2: ('vegetation', [107, 142, 35]),
    3: ('roof', [70, 70, 70]),
    4: ('wall', [102, 102, 156]),
    5: ('window', [254, 228, 12]),
    6: ('person', [255, 22, 96]),
    7: ('dog', [102, 51, 0]),
    8: ('car', [9, 143, 150]),
    9: ('bicycle', [119, 11, 32]),
    10: ('tree', [51, 51, 0]),
}



# Downlaod the datasets

# gdrive_url = "https://drive.google.com/file/d/1UppumYqYOi-kto6BWPfFxwJK2Eph46oY/view?usp=sharing"
# download_and_extract_from_gdrive(gdrive_url, extract_to="datasets")

# # Path to the dataset
# dataset_path = "./datasets/semantic_drone_dataset/training_set"
# output_dir = "./datasets/new_dataset_yolo"

# convert_fulldataset_yolo_only(dataset_path, output_dir, class_id_to_name)



In [292]:
# dataset_path = "./datasets/new_dataset_yolo"
# image_dir = os.path.join(dataset_path, "images")
# annotations_dir = os.path.join(dataset_path, "labels")


# # Visualize 10 random images
# image_ids = [f.split('.')[0] for f in os.listdir(image_dir) if f.endswith('.jpg')]
# random_image_ids = random.sample(image_ids, min(10, len(image_ids)))

# for image_id in random_image_ids:
#     visualize_data(image_id, class_id_to_name, image_dir, annotations_dir)



In [293]:
# Set your paths
dataset_path = "./datasets/new_dataset_yolo"
image_dir = os.path.join(dataset_path, "images")
annotations_dir = os.path.join(dataset_path, "labels")

normalize_all_labels(annotations_dir, image_dir)

100%|██████████| 399/399 [00:00<00:00, 717.17it/s]

Normalize Complete





In [294]:
# Run the split
split_and_move_dataset()

Moving to train: 100%|██████████| 319/319 [00:01<00:00, 207.14it/s]
Moving to val: 100%|██████████| 80/80 [00:00<00:00, 280.79it/s]


[✓] Dataset split completed: 319 train / 80 val samples





In [None]:
# print("[+] Training Start")

# # Train YOLOv8
train_yolo(data_yaml="uavdt_yolo.yaml",  epochs=2, imgsz=640, batch=32, name="yolov8-uavdt")


Ultralytics 8.3.107  Python-3.12.4 torch-2.5.1+cpu CPU (AMD Ryzen 7 5800H with Radeon Graphics)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=uavdt_yolo.yaml, epochs=20, time=None, patience=100, batch=32, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=runs_yolo/train, name=yolov8-uavdt, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=True, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=Tr

[34m[1mtrain: [0mScanning C:\Users\sande\OneDrive\Desktop\IITTP\2_Asgn\Sem2\2_dl\SemProject\Automated_SegmentAndYolo\datasets\new_dataset_yolo_split\train\labels.cache... 319 images, 2 backgrounds, 0 corrupt: 100%|██████████| 319/319 [00:00<?, ?it/s]
[34m[1mval: [0mScanning C:\Users\sande\OneDrive\Desktop\IITTP\2_Asgn\Sem2\2_dl\SemProject\Automated_SegmentAndYolo\datasets\new_dataset_yolo_split\val\labels.cache... 80 images, 1 backgrounds, 0 corrupt: 100%|██████████| 80/80 [00:00<?, ?it/s]

Plotting labels to runs_yolo\train\yolov8-uavdt\labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns_yolo\train\yolov8-uavdt[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/10 [00:19<?, ?it/s]


KeyboardInterrupt: 

In [None]:
# # Load the model
# best_pt_path = find_best_pt()

# val_image_dir = "./datasets/new_dataset_yolo_split/val/images"  # <<-- make sure this path exists
    
# yolo_model = load_yolo_model(best_pt_path)
# visualize_yolo_on_val_images(val_image_dir, yolo_model, max_images=10)  # Show only 10 images


In [None]:
# # ================== CONFIG ==================
# video_path = './video.mp4'
# max_frames = None  # Set to None for full video

# yolo_weights_path = best_pt_path  # <<-- change this
# process_video(video_path, yolo_weights_path, class_id_to_name, max_frames=max_frames)  # Show only 10 frames

