In [26]:

import os
import yaml
from pathlib import Path
from ultralytics import YOLO
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import cv2
import numpy as np

In [27]:
base_dir = "cleaned_data"
base_dir = Path(base_dir)

In [28]:
print("Exploring dataset structure...")
print(f"Train images: {len(list((base_dir / 'lidar_data' / 'train' / 'images').glob('*.png')))}")
print(f"Validation images: {len(list((base_dir / 'lidar_data' / 'valid' / 'images').glob('*.png')))}")
print(f"Test images: {len(list((base_dir / 'lidar_data' / 'test' / 'images').glob('*.png')))}")
print(f"Training labels: {len(list((base_dir / 'lidar_data' / 'train' / 'labels').glob('*.txt')))}")

Exploring dataset structure...
Train images: 866
Validation images: 388
Test images: 192
Training labels: 1864


In [None]:
model = YOLO('yolov8s.pt')  
model.train(
    data="/cluster/home/pettdalh/tdt4265_project/data/data.yaml",
    epochs=100,        # Reduced with early stopping
    batch=-1,         # Increased if GPU memory allows
    imgsz=640,
    amp=True,
    patience=10,      # Early stopping if no improvement
    device="cuda",
    optimizer='AdamW', # Better than default SGD for small datasets
    lr0=0.001,        # Lower initial learning rate
    weight_decay=0.05, # Regularization
    hsv_h=0.015,      # Subtle color augmentation
    hsv_s=0.7,
    hsv_v=0.4,
    degrees=5,        # Limited rotation (poles are vertical)
    translate=0.1,
    scale=0.1,
    fliplr=0.5,
    mosaic=0.75,      # Helps with small object detection
    mixup=0.15        # Mild regularization
)

New https://pypi.org/project/ultralytics/8.3.113 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.86 🚀 Python-3.11.11 torch-2.5.0 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=/cluster/home/pettdalh/tdt4265_project/data/data.yaml, epochs=80, time=None, patience=10, batch=-1, imgsz=640, save=True, save_period=-1, cache=False, device=cuda, workers=8, project=None, name=train8, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_m

[34m[1mtrain: [0mScanning /cluster/home/pettdalh/tdt4265_project/data/lidar_data/train/labels.cache... 366 images, 0 backgrounds, 3 corrupt: 100%|██████████| 369/369 [00:00<?, ?it/s]






[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mAutoBatch: [0mComputing optimal batch size for imgsz=640 at 60.0% CUDA memory utilization.
[34m[1mAutoBatch: [0mCUDA:0 (Tesla P100-PCIE-16GB) 15.89G total, 13.41G reserved, 0.13G allocated, 2.34G free
      Params      GFLOPs  GPU_mem (GB)  forward (ms) backward (ms)                   input                  output
    11135987       28.65         0.856          8966     1.129e+04        (1, 3, 640, 640)                    list
    11135987       57.29         1.137         137.2           276        (2, 3, 640, 640)                    list
    11135987       114.6         1.881         38.14          1419        (4, 3, 640, 640)                    list
    11135987       229.2         2.974         53.65          1093        (8, 3, 640, 640)         

[34m[1mtrain: [0mScanning /cluster/home/pettdalh/tdt4265_project/data/lidar_data/train/labels.cache... 366 images, 0 backgrounds, 3 corrupt: 100%|██████████| 369/369 [00:00<?, ?it/s]






[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /cluster/home/pettdalh/tdt4265_project/data/lidar_data/valid/labels.cache... 388 images, 0 backgrounds, 2 corrupt: 100%|██████████| 390/390 [00:00<?, ?it/s]






Plotting labels to runs/detect/train8/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.05), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/train8[0m
Starting training for 80 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/80     0.709G      2.392      37.22     0.8199          9        640:  18%|█▊        | 33/183 [04:29<1:26:22, 34.55s/it]

In [None]:
# Get results without saving
# model = YOLO('runs/detect/train5/weights/best.pt')
results = model.predict(
    source="/cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/",
    project="/cluster/home/pettdalh/tdt4265_project/lidar_predictions",
    name="predict_5",
    device="cuda",
    save_txt=True,
    save_conf=True
)


image 3/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1017.png: 96x640 1 pole, 51.2ms


libpng error: PNG input buffer is incomplete
libpng error: PNG input buffer is incomplete
libpng error: PNG input buffer is incomplete


image 5/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1037.png: 96x640 2 poles, 16.5ms
image 6/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1046.png: 96x640 2 poles, 11.7ms
image 7/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1056.png: 96x640 1 pole, 15.1ms
image 8/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1069.png: 96x640 1 pole, 17.0ms
image 9/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1077.png: 96x640 1 pole, 16.8ms
image 10/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1087.png: 96x640 2 poles, 11.6ms
image 11/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1098.png: 96x640 1 pole, 16.9ms
image 12/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1106.png: 96x640 1 pole, 10.7ms
image 13/197 /cluster/home/pettdalh/tdt4265_project/data/l

libpng error: PNG input buffer is incomplete
libpng error: PNG input buffer is incomplete


image 52/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1494.png: 96x640 1 pole, 15.9ms
image 53/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1500.png: 96x640 (no detections), 13.7ms
image 54/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1511.png: 96x640 2 poles, 19.2ms
image 55/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1519.png: 96x640 2 poles, 16.5ms
image 56/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1529.png: 96x640 1 pole, 16.9ms
image 57/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1537.png: 96x640 1 pole, 16.8ms
image 58/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1548.png: 96x640 1 pole, 16.7ms
image 59/197 /cluster/home/pettdalh/tdt4265_project/data/lidar_data/test/images/image_1556.png: 96x640 3 poles, 11.0ms
image 60/197 /cluster/home/pettdalh/tdt4265_

In [None]:
# # Get result for first image
# result = results[0]

# # Load the original image
# img = cv2.imread("/cluster/home/pettdalh/tdt4265_project/Poles/lidar/combined_color/test/image_44.png")
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB

# # Create figure and axes
# fig, ax = plt.subplots(1, figsize=(12, 9))
# ax.imshow(img)

# # Get the detection boxes
# boxes = result.boxes
# for box in boxes:
#     # Get coordinates
#     x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
#     confidence = float(box.conf[0])
    
#     # Create rectangle
#     rect = patches.Rectangle(
#         (x1, y1), x2-x1, y2-y1, 
#         linewidth=2, 
#         edgecolor='r', 
#         facecolor='none'
#     )
    
#     # Add rectangle to plot
#     ax.add_patch(rect)
    
#     # Add confidence label
#     ax.text(
#         x1, y1-10, 
#         f"Pole: {confidence:.2f}", 
#         color='white', 
#         fontweight='bold',
#         bbox=dict(facecolor='red', alpha=0.5)
#     )

# # Show plot
# plt.title(f"Detected {len(boxes)} poles")
# plt.axis('off')  # Hide axes
# plt.tight_layout()
# plt.show()