##### Import the libraries

In [1]:
import os
import json
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms
from torchvision.ops import box_iou
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from sklearn.metrics import precision_recall_fscore_support
import rich
import shutil
from sklearn.model_selection import train_test_split
from ultralytics import YOLO

##### split the dataset & create annotation bsed on yolo requirements

In [None]:
# Paths for input data
image_dir = os.path.join("ZJU_dataset_2", "images")
annotation_dir = os.path.join("ZJU_dataset_2", "annotation")

# Paths for YOLO-formatted dataset
output_dir = "datasets"
train_images_dir = os.path.join(output_dir, "train", "images")
train_labels_dir = os.path.join(output_dir, "train", "labels")
val_images_dir = os.path.join(output_dir, "val", "images")
val_labels_dir = os.path.join(output_dir, "val", "labels")

# Create output directories
os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)

# Function to convert annotations to YOLO format
def convert_to_yolo_format(annotation_path, image_width, image_height):
    with open(annotation_path) as f:
        data = json.load(f)

    yolo_annotations = []
    for shape in data["shapes"]:
        if shape["label"] == "window":  # Filter for "window" objects
            points = shape["points"]
            x_coords = [p[0] for p in points]
            y_coords = [p[1] for p in points]
            x_min = min(x_coords)
            y_min = min(y_coords)
            x_max = max(x_coords)
            y_max = max(y_coords)

            # Convert to YOLO format [class_id, center_x, center_y, width, height]
            center_x = ((x_min + x_max) / 2) / image_width
            center_y = ((y_min + y_max) / 2) / image_height
            width = (x_max - x_min) / image_width
            height = (y_max - y_min) / image_height

            yolo_annotations.append(f"0 {center_x} {center_y} {width} {height}")

    return yolo_annotations

# Load all image filenames and split into train/val sets
image_filenames = [f for f in os.listdir(image_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]
train_files, val_files = train_test_split(image_filenames, test_size=0.2, random_state=42)

# Process and save train and val data
for dataset, image_files, images_dir, labels_dir in [
    ("train", train_files, train_images_dir, train_labels_dir),
    ("val", val_files, val_images_dir, val_labels_dir)
]:
    for img_file in image_files:
        img_path = os.path.join(image_dir, img_file)
        annotation_path = os.path.join(annotation_dir, f"{os.path.splitext(img_file)[0]}.json")

        # Copy image to output directory
        shutil.copy(img_path, images_dir)

        # Open image to get dimensions
        with Image.open(img_path) as img:
            width, height = img.size

        # Convert annotations to YOLO format and save
        yolo_annotations = convert_to_yolo_format(annotation_path, width, height)
        label_path = os.path.join(labels_dir, f"{os.path.splitext(img_file)[0]}.txt")
        with open(label_path, "w") as label_file:
            label_file.write("\n".join(yolo_annotations))

print(f"Dataset organized successfully!")
print(f"Train images: {len(train_files)}, Validation images: {len(val_files)}")

##### checking the input data

In [None]:
def visualize_annotations_by_index(dataset_path, index):
    # Construct paths for images and labels
    images_path = os.path.join(dataset_path, "images")
    labels_path = os.path.join(dataset_path, "labels")

    # Get sorted list of image and label files
    image_files = sorted(os.listdir(images_path))
    label_files = sorted(os.listdir(labels_path))

    # Ensure the index is valid
    if index < 0 or index >= len(image_files):
        raise ValueError(f"Invalid index: {index}. Must be between 0 and {len(image_files) - 1}.")

    # Get the image and label file for the given index
    image_file = os.path.join(images_path, image_files[index])
    label_file = os.path.join(labels_path, label_files[index])

    # Load the image
    img = Image.open(image_file)
    img_width, img_height = img.size

    # Load annotations
    with open(label_file, 'r') as f:
        lines = f.readlines()

    # Create a figure and axis
    fig, ax = plt.subplots(1)
    ax.imshow(img)

    for line in lines:
        parts = line.strip().split()
        cls, x_center, y_center, width, height = map(float, parts)

        # Convert normalized coordinates to pixel values
        x_center *= img_width
        y_center *= img_height
        width *= img_width
        height *= img_height

        # Calculate box corners
        x_min = x_center - width / 2
        y_min = y_center - height / 2

        # Create a rectangle patch
        rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='red', facecolor='none')

        # Add the rectangle to the plot
        ax.add_patch(rect)

    plt.axis('off')  # Hide axes for better visualization
    plt.show()

# Example usage
dataset_path = "datasets/train"
index = 3  # 0-based index
visualize_annotations_by_index(dataset_path, index)

##### Define The Model

In [2]:
# Load a pretrained YOLO model (e.g., YOLOv8n for a lightweight version)
model = YOLO('yolov8n.pt')  # Replace with yolov8s.pt, yolov8m.pt, etc., as needed

##### Train the model

In [5]:
# Train the model
model.train(
    data="yolo_data.yaml",         # Path to data.yaml or dict specifying train/val paths
    epochs=2,                # Number of epochs
    batch=16,            # Batch size
    project="FacadeDetection", # Project folder for saving results
    name="YOLOv8_training",    # Experiment name
    pretrained=False,           # Use pretrained weights
)



New https://pypi.org/project/ultralytics/8.3.65 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.58  Python-3.10.0 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce GTX 1660 Ti, 6144MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=FacadeDetection/YOLOv8_training11/weights/best.pt, data=yolo_data.yaml, epochs=2, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=FacadeDetection, name=YOLOv8_training17, exist_ok=False, pretrained=False, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=

[34m[1mtrain: [0mScanning C:\Users\mohamad\WWR\datasets\train\labels.cache... 200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 200/200 [00:00<?, ?it/s]
[34m[1mval: [0mScanning C:\Users\mohamad\WWR\datasets\val\labels.cache... 50 images, 0 backgrounds, 0 corrupt: 100%|██████████| 50/50 [00:00<?, ?it/s]


Plotting labels to FacadeDetection\YOLOv8_training17\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mFacadeDetection\YOLOv8_training17[0m
Starting training for 2 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/2      4.19G      2.012      1.715      1.605        212        640: 100%|██████████| 13/13 [00:05<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  4.11it/s]

                   all         50        659     0.0403     0.0698     0.0363     0.0129






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/2      4.16G      1.579      1.317      1.371        157        640: 100%|██████████| 13/13 [00:03<00:00,  3.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  3.97it/s]

                   all         50        659      0.825     0.0379     0.0786     0.0318






2 epochs completed in 0.004 hours.
Optimizer stripped from FacadeDetection\YOLOv8_training17\weights\last.pt, 6.2MB
Optimizer stripped from FacadeDetection\YOLOv8_training17\weights\best.pt, 6.2MB

Validating FacadeDetection\YOLOv8_training17\weights\best.pt...
Ultralytics 8.3.58  Python-3.10.0 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce GTX 1660 Ti, 6144MiB)
Model summary (fused): 168 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.52it/s]


                   all         50        659      0.832     0.0379     0.0788     0.0317
Speed: 2.1ms preprocess, 4.2ms inference, 0.0ms loss, 3.0ms postprocess per image
Results saved to [1mFacadeDetection\YOLOv8_training17[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x000001BF00D2DBD0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.0480

##### load the trained weights

In [2]:
# Load the custom-trained weights
model = YOLO('FacadeDetection/YOLOv8_training11/weights/best.pt')

##### validate the model

In [6]:
results = model.val(data="yolo_data.yaml")
print(results)

Ultralytics 8.3.58  Python-3.10.0 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce GTX 1660 Ti, 6144MiB)
Model summary (fused): 168 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning C:\Users\mohamad\WWR\datasets\val\labels.cache... 50 images, 0 backgrounds, 0 corrupt: 100%|██████████| 50/50 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:05<00:00,  1.42s/it]


                   all         50        659      0.809     0.0386     0.0748     0.0315
Speed: 4.3ms preprocess, 16.5ms inference, 0.0ms loss, 2.0ms postprocess per image
Results saved to [1mFacadeDetection\YOLOv8_training172[0m
ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x000001C0410FAB60>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.

In [12]:
# Extract and display the IoU metric
mean_iou = results.box.map50  # mAP50 (mean Average Precision at IoU threshold 0.5)
print(f"Mean IoU (mIoU) at IoU=0.5: {mean_iou}")


Mean IoU (mIoU) at IoU=0.5: 0.0748355043930249


##### some checks to verify the results

In [None]:
# image_path = os.path.join("datasets", "train", "images", "00001.jpg")
image_path = os.path.join("Evaluation_subset", "rectified_facade_DENW11AL0000h3Gt.jpg")

# Run prediction
results = model.predict(source=image_path, conf=0.1, save=False, save_txt=False, show=False)

# Check results
for result in results:
    print("Number of detections:", len(result.boxes))
    print("Bounding boxes:", result.boxes.xyxy)  # Bounding box coordinates
    print("Confidence scores:", result.boxes.conf)  # Confidence scores
    print("Classes:", result.boxes.cls)  # Detected classes

In [None]:
print(model.names)

##### visualizing the results (predicitons)

In [None]:
# Define the image path
image_path = os.path.join("Evaluation_subset", "rectified_facade_DENW11AL0000h3Gt.jpg")
# image_path = os.path.join("datasets", "train", "images", "00001.jpg")
# image_path = os.path.join("Evaluation_subset", "rectified_facade_DENW11AL0000h3Ho.jpg")

# Load the YOLO model
model = YOLO('FacadeDetection/YOLOv8_training8/weights/best.pt')  # Load a pretrained YOLOv8 model

# Run prediction
results = model.predict(source=image_path, conf=0.1, save=False, save_txt=False, show=False)

print("results", results)

# Load and visualize the image with predictions
annotated_image = results[0].plot()  # Plot the results on the image

# Display the image using Matplotlibs
plt.figure(figsize=(10, 10))
plt.imshow(annotated_image)
plt.axis("off")
plt.show()