In [2]:
# -----------------------------------------------------------
# 🧩 Step 1: Chip TIFFs and Rasterize Masks
#
# This step creates 1600x1600 image/mask tiles from each map set
# - Uses 10% overlap (stride = 1440)
# - Includes background-only tiles
# - Rasterizes shapefile features to create mask chips
# - Outputs tile metadata for later preview and reconstruction
#
# Outputs:
# - /[Map Folder]/tiled/images/
# - /[Map Folder]/tiled/masks/
# - /[Map Folder]/tiled/tile_metadata.csv
# - /[Map Folder]/tiled/raster_shape.txt
# -----------------------------------------------------------

import os
import csv
import numpy as np
import rasterio
from rasterio.features import rasterize
from rasterio.windows import Window
from shapely.geometry import box
import geopandas as gpd
import cv2
from tqdm import tqdm

# --- Configuration ---
base_dir = "C:/QGIS"
chip_size = 1600
stride = 1440  # 10% overlap

all_maps = [
    "Bear_Creek_20250112",
    "Bear_Lane",
    "Flight_2",
    "Flight_2_25pct",
    "SFLBC",
    "Sugar_Refugia_20241112",
    "Wildcat_Creek"
]

# --- Main loop over all map sets ---
for map_folder in all_maps:
    print(f"\n🧩 Processing: {map_folder}")
    map_base = os.path.join(base_dir, map_folder)
    tif_path = os.path.join(map_base, f"{map_folder}.tiff")
    shp_path = os.path.join(map_base, f"{map_folder}.shp")

    out_img_dir = os.path.join(map_base, "tiled", "images")
    out_mask_dir = os.path.join(map_base, "tiled", "masks")
    meta_csv = os.path.join(map_base, "tiled", "tile_metadata.csv")
    shape_txt = os.path.join(map_base, "tiled", "raster_shape.txt")

    # Create fresh output folders
    os.makedirs(out_img_dir, exist_ok=True)
    os.makedirs(out_mask_dir, exist_ok=True)

    with rasterio.open(tif_path) as raster:
        labels = gpd.read_file(shp_path)
        if labels.crs != raster.crs:
            labels = labels.to_crs(raster.crs)

        # Save raster shape for stitching
        with open(shape_txt, "w") as f:
            f.write(f"{raster.height},{raster.width}")

        with open(meta_csv, mode="w", newline="") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["filename", "x", "y", "width", "height"])  # Updated header

            count = 0
            for y in tqdm(range(0, raster.height - chip_size + 1, stride), desc=f"Tiling {map_folder}"):
                for x in range(0, raster.width - chip_size + 1, stride):
                    window = Window(x, y, chip_size, chip_size)
                    transform = raster.window_transform(window)
                    bounds = box(*rasterio.windows.bounds(window, raster.transform))

                    intersecting = labels[labels.intersects(bounds)]

                    # --- Save image tile ---
                    image = raster.read([1, 2, 3], window=window)
                    image = np.transpose(image, (1, 2, 0))
                    img_filename = f"chip_{count}.png"
                    cv2.imwrite(os.path.join(out_img_dir, img_filename), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

                    # --- Save mask (empty if no features) ---
                    if not intersecting.empty:
                        shapes = [(geom, cid) for geom, cid in zip(intersecting.geometry, intersecting["class_id"])]
                        mask = rasterize(
                            shapes,
                            out_shape=(chip_size, chip_size),
                            transform=transform,
                            fill=0,
                            dtype=np.uint8
                        )
                    else:
                        mask = np.zeros((chip_size, chip_size), dtype=np.uint8)

                    cv2.imwrite(os.path.join(out_mask_dir, img_filename), mask)

                    # --- Record metadata ---
                    writer.writerow([img_filename, x, y, chip_size, chip_size])
                    count += 1

    print(f"✅ Created {count} tile+mask pairs for {map_folder}")


🧩 Processing: Bear_Creek_20250112


Tiling Bear_Creek_20250112: 100%|██████████████████████████████████████████████████████| 61/61 [17:33<00:00, 17.28s/it]


✅ Created 2928 tile+mask pairs for Bear_Creek_20250112

🧩 Processing: Bear_Lane


Tiling Bear_Lane: 100%|████████████████████████████████████████████████████████████████| 22/22 [05:48<00:00, 15.86s/it]


✅ Created 572 tile+mask pairs for Bear_Lane

🧩 Processing: Flight_2


Tiling Flight_2: 100%|█████████████████████████████████████████████████████████████████| 32/32 [12:02<00:00, 22.59s/it]


✅ Created 1408 tile+mask pairs for Flight_2

🧩 Processing: Flight_2_25pct


Tiling Flight_2_25pct: 100%|███████████████████████████████████████████████████████████| 14/14 [01:54<00:00,  8.17s/it]


✅ Created 252 tile+mask pairs for Flight_2_25pct

🧩 Processing: SFLBC


Tiling SFLBC: 100%|████████████████████████████████████████████████████████████████████| 38/38 [14:20<00:00, 22.65s/it]


✅ Created 2052 tile+mask pairs for SFLBC

🧩 Processing: Sugar_Refugia_20241112


Tiling Sugar_Refugia_20241112: 100%|███████████████████████████████████████████████████| 30/30 [06:53<00:00, 13.78s/it]


✅ Created 780 tile+mask pairs for Sugar_Refugia_20241112

🧩 Processing: Wildcat_Creek


Tiling Wildcat_Creek: 100%|████████████████████████████████████████████████████████████| 43/43 [07:19<00:00, 10.21s/it]


✅ Created 1505 tile+mask pairs for Wildcat_Creek


In [1]:
# -----------------------------------------------------------
# ⚡ Step 2: Convert Mask Tiles to YOLOv8 Polygon Labels
#
# This step converts rasterized mask tiles into YOLOv8-style
# polygon .txt labels, enabling training with segmentation models.
#
# For each map folder:
# - Loads corresponding image and mask tiles
# - Resizes both to 640×640 (matching YOLOv8 input size)
# - Extracts external contours for each class in the mask
# - Writes polygons in YOLO format:
#     • class_id x1 y1 x2 y2 ... xn yn (normalized coordinates)
#
# Enhancements:
# - Runs in parallel using multithreading for speed
# - Background-only tiles are preserved with empty label files
# - Automatically splits 70/30 into train and val subsets
#
# Input:
# - /[map]/tiled/images/      ← RGB tiles
# - /[map]/tiled/masks/       ← Grayscale masks (class IDs)
#
# Output:
# - /[map]/yolo_dataset_640/images/train/val/
# - /[map]/yolo_dataset_640/labels/train/val/
#
# Notes:
# - Supports 3-class masks: 0 = Water, 1 = Road, 2 = PVeg
# - No remapping is needed if classes are already 0-based
# - This format is compatible with YOLOv8-seg training
# -----------------------------------------------------------

import os
import cv2
import numpy as np
import random
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# --- Configuration ---
base_dir = "C:/QGIS"
target_size = 640
num_threads = 8

all_maps = [
    "Bear_Creek_20250112",
    "Bear_Lane",
    "Flight_2",
    "Flight_2_25pct",
    "SFLBC",
    "Sugar_Refugia_20241112",
    "Wildcat_Creek"
]

def mask_to_polygons(mask):
    contours = {}
    for cls_id in np.unique(mask):
        if cls_id == 0:
            continue
        binary = (mask == cls_id).astype(np.uint8)
        cnts, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if cnts:
            contours[cls_id] = cnts
    return contours

def process_file(fname, img_input_dir, mask_input_dir, img_out, lbl_out):
    img_path = os.path.join(img_input_dir, fname)
    mask_path = os.path.join(mask_input_dir, fname)

    try:
        img = cv2.imread(img_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if img is None or mask is None:
            return False  # Skip bad data

        img_resized = cv2.resize(img, (target_size, target_size), interpolation=cv2.INTER_AREA)
        mask_resized = cv2.resize(mask, (target_size, target_size), interpolation=cv2.INTER_NEAREST)
        cv2.imwrite(os.path.join(img_out, fname), img_resized)

        label_path = os.path.join(lbl_out, fname.replace(".png", ".txt"))
        contours = mask_to_polygons(mask_resized)

        if not contours:
            open(label_path, "w").close()
            return True  # Background-only tile

        with open(label_path, "w") as f:
            for cls_id, cnts in contours.items():
                for cnt in cnts:
                    if len(cnt) < 3:
                        continue
                    pts = cnt.reshape(-1, 2).astype(np.float32) / target_size
                    coords = " ".join(f"{x:.6f} {y:.6f}" for x, y in pts)
                    f.write(f"{cls_id} {coords}\n")

        return False
    except Exception as e:
        print(f"⚠️ Error processing {fname}: {e}")
        return False

# --- Process each map ---
for map_folder in all_maps:
    print(f"\n🧩 Converting: {map_folder}")

    base_map_dir = os.path.join(base_dir, map_folder)
    img_input_dir = os.path.join(base_map_dir, "tiled", "images")
    mask_input_dir = os.path.join(base_map_dir, "tiled", "masks")
    out_base = os.path.join(base_map_dir, "yolo_dataset_640")

    out_dirs = {
        "train_img": os.path.join(out_base, "images", "train"),
        "val_img": os.path.join(out_base, "images", "val"),
        "train_lbl": os.path.join(out_base, "labels", "train"),
        "val_lbl": os.path.join(out_base, "labels", "val"),
    }
    for d in out_dirs.values():
        os.makedirs(d, exist_ok=True)

    chip_files = [f for f in os.listdir(img_input_dir) if f.endswith(".png")]
    random.shuffle(chip_files)
    split_idx = int(len(chip_files) * 0.7)
    train_files = chip_files[:split_idx]
    val_files = chip_files[split_idx:]

    for mode, files in [("train", train_files), ("val", val_files)]:
        img_out = out_dirs[f"{mode}_img"]
        lbl_out = out_dirs[f"{mode}_lbl"]
        bg_count = 0

        with ThreadPoolExecutor(max_workers=num_threads) as executor:
            futures = [executor.submit(process_file, f, img_input_dir, mask_input_dir, img_out, lbl_out) for f in files]
            for f in tqdm(as_completed(futures), total=len(futures), desc=f"{map_folder} [{mode}]"):
                if f.result():
                    bg_count += 1

        print(f"✅ {map_folder} [{mode}]: {len(files)} tiles processed ({bg_count} background-only)")


🧩 Converting: Bear_Creek_20250112


Bear_Creek_20250112 [train]: 100%|█████████████████████████████████████████████████| 2049/2049 [01:12<00:00, 28.34it/s]


✅ Bear_Creek_20250112 [train]: 2049 tiles processed (1341 background-only)


Bear_Creek_20250112 [val]: 100%|█████████████████████████████████████████████████████| 879/879 [00:30<00:00, 28.60it/s]


✅ Bear_Creek_20250112 [val]: 879 tiles processed (585 background-only)

🧩 Converting: Bear_Lane


Bear_Lane [train]: 100%|█████████████████████████████████████████████████████████████| 400/400 [00:20<00:00, 19.29it/s]


✅ Bear_Lane [train]: 400 tiles processed (141 background-only)


Bear_Lane [val]: 100%|███████████████████████████████████████████████████████████████| 172/172 [00:09<00:00, 17.75it/s]


✅ Bear_Lane [val]: 172 tiles processed (53 background-only)

🧩 Converting: Flight_2


Flight_2 [train]: 100%|██████████████████████████████████████████████████████████████| 985/985 [00:51<00:00, 19.29it/s]


✅ Flight_2 [train]: 985 tiles processed (387 background-only)


Flight_2 [val]: 100%|████████████████████████████████████████████████████████████████| 423/423 [00:21<00:00, 19.25it/s]


✅ Flight_2 [val]: 423 tiles processed (169 background-only)

🧩 Converting: Flight_2_25pct


Flight_2_25pct [train]: 100%|████████████████████████████████████████████████████████| 176/176 [00:07<00:00, 24.32it/s]


✅ Flight_2_25pct [train]: 176 tiles processed (105 background-only)


Flight_2_25pct [val]: 100%|████████████████████████████████████████████████████████████| 76/76 [00:03<00:00, 22.38it/s]


✅ Flight_2_25pct [val]: 76 tiles processed (40 background-only)

🧩 Converting: SFLBC


SFLBC [train]: 100%|███████████████████████████████████████████████████████████████| 1436/1436 [01:01<00:00, 23.27it/s]


✅ SFLBC [train]: 1436 tiles processed (699 background-only)


SFLBC [val]: 100%|███████████████████████████████████████████████████████████████████| 616/616 [00:27<00:00, 22.62it/s]


✅ SFLBC [val]: 616 tiles processed (302 background-only)

🧩 Converting: Sugar_Refugia_20241112


Sugar_Refugia_20241112 [train]: 100%|████████████████████████████████████████████████| 546/546 [00:25<00:00, 21.62it/s]


✅ Sugar_Refugia_20241112 [train]: 546 tiles processed (232 background-only)


Sugar_Refugia_20241112 [val]: 100%|██████████████████████████████████████████████████| 234/234 [00:10<00:00, 22.01it/s]


✅ Sugar_Refugia_20241112 [val]: 234 tiles processed (108 background-only)

🧩 Converting: Wildcat_Creek


Wildcat_Creek [train]: 100%|███████████████████████████████████████████████████████| 1053/1053 [00:35<00:00, 29.44it/s]


✅ Wildcat_Creek [train]: 1053 tiles processed (803 background-only)


Wildcat_Creek [val]: 100%|███████████████████████████████████████████████████████████| 452/452 [00:15<00:00, 29.81it/s]

✅ Wildcat_Creek [val]: 452 tiles processed (346 background-only)





In [2]:
# -----------------------------------------------------------
# 📦 Unified YOLO Dataset Builder (Excludes Holdout Set)
#
# This script merges YOLO-formatted image and label tiles from
# multiple map sets into a single unified dataset structure.
#
# Excludes holdout map: Flight_2_25pct
# Output folders:
# - dataset/images/train/
# - dataset/images/val/
# - dataset/labels/train/
# - dataset/labels/val/
# - dataset/manifests/train_files.txt and val_files.txt
# -----------------------------------------------------------

import os
import shutil
import random
from tqdm import tqdm

# --- Configuration ---
base_dir = "C:/QGIS"
dataset_dir = os.path.join(base_dir, "dataset")
img_train_dir = os.path.join(dataset_dir, "images", "train")
img_val_dir = os.path.join(dataset_dir, "images", "val")
lbl_train_dir = os.path.join(dataset_dir, "labels", "train")
lbl_val_dir = os.path.join(dataset_dir, "labels", "val")
manifest_dir = os.path.join(dataset_dir, "manifests")
train_split = 0.8

holdout_set = "Flight_2_25pct"
all_sets = [
    "Bear_Creek_20250112", "Bear_Lane", "Flight_2",
    "SFLBC", "Sugar_Refugia_20241112", "Wildcat_Creek"
]  # Excludes holdout

for d in [img_train_dir, img_val_dir, lbl_train_dir, lbl_val_dir, manifest_dir]:
    os.makedirs(d, exist_ok=True)

train_list, val_list = [], []

# --- Merge sets ---
for set_name in tqdm(all_sets, desc="Merging sets"):
    base_map_dir = os.path.join(base_dir, set_name)
    img_base = os.path.join(base_map_dir, "yolo_dataset_640", "images")
    lbl_base = os.path.join(base_map_dir, "yolo_dataset_640", "labels")

    # Collect all labeled image paths
    all_images = []
    for split in ["train", "val"]:
        img_dir = os.path.join(img_base, split)
        lbl_dir = os.path.join(lbl_base, split)
        if os.path.exists(img_dir):
            for fname in os.listdir(img_dir):
                if fname.endswith(".png"):
                    all_images.append((os.path.join(img_dir, fname),
                                       os.path.join(lbl_dir, fname.replace(".png", ".txt"))))

    random.shuffle(all_images)
    split_idx = int(len(all_images) * train_split)
    split_files = [("train", all_images[:split_idx]), ("val", all_images[split_idx:])]

    for mode, files in split_files:
        img_out_dir = img_train_dir if mode == "train" else img_val_dir
        lbl_out_dir = lbl_train_dir if mode == "train" else lbl_val_dir
        manifest = train_list if mode == "train" else val_list

        for img_path, lbl_path in files:
            new_img_name = f"{set_name}_{os.path.basename(img_path)}"
            new_lbl_name = new_img_name.replace(".png", ".txt")

            shutil.copy(img_path, os.path.join(img_out_dir, new_img_name))
            if os.path.exists(lbl_path):
                shutil.copy(lbl_path, os.path.join(lbl_out_dir, new_lbl_name))
            else:
                open(os.path.join(lbl_out_dir, new_lbl_name), "w").close()

            manifest.append(f"images/{mode}/{new_img_name}")

# --- Save manifest files ---
with open(os.path.join(manifest_dir, "train_files.txt"), "w") as f:
    f.write("\n".join(sorted(train_list)) + "\n")

with open(os.path.join(manifest_dir, "val_files.txt"), "w") as f:
    f.write("\n".join(sorted(val_list)) + "\n")

print(f"✅ Merged {len(train_list)} train / {len(val_list)} val tiles into: {dataset_dir}")
print(f"ℹ️ Held-out map: {holdout_set}")

Merging sets: 100%|██████████████████████████████████████████████████████████████████████| 6/6 [03:12<00:00, 32.14s/it]

✅ Merged 7512 train / 1880 val tiles into: C:/QGIS\dataset
ℹ️ Held-out map: Flight_2_25pct





In [1]:
# --- Training done in Train.py 
import os
from ultralytics import YOLO
import torch

def main():
    torch.cuda.empty_cache()

    dataset_dir = "C:/QGIS/dataset"
    model_type = "yolov8s-seg.pt"
    img_size = 640
    save_dir = "C:/QGIS/runs/segment/train"
    class_names = ["Water", "Road", "PVeg"]

    data_yaml = os.path.join(dataset_dir, "data.yaml")
    with open(data_yaml, "w") as f:
        f.write(f"path: {dataset_dir}\n")
        f.write("train: images/train\n")
        f.write("val: images/val\n")
        f.write(f"nc: {len(class_names)}\n")
        f.write(f"names: {class_names}\n")

    print(f"✅ data.yaml written to: {data_yaml}")

    model = YOLO(model_type)
    model.train(
        data=data_yaml,
        imgsz=img_size,
        epochs=50,
        batch=16,
        workers=2,
        amp=True,
        patience=5,
        device="cuda",
        save=True,
        save_period=-1,
        project=save_dir,
        name="main_model",
        verbose=True,
        plots=False,
        cache=True
    )

if __name__ == "__main__":
    main()


✅ data.yaml written to: C:/QGIS/dataset\data.yaml
New https://pypi.org/project/ultralytics/8.3.124 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.111  Python-3.10.16 torch-1.12.1+cu113 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
[34m[1mengine\trainer: [0mtask=segment, mode=train, model=yolov8n-seg.pt, data=C:/QGIS/dataset\data.yaml, epochs=50, time=None, patience=5, batch=16, imgsz=640, save=True, save_period=-1, cache=True, device=cuda, workers=2, project=C:/QGIS/runs/segment/train, name=main_model8, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=False, source=None, vid_stride=1, stream_buffer=False, visualize=False, augme

[34m[1mtrain: [0mScanning C:\QGIS\dataset\labels\train.cache... 6770 images, 4205 backgrounds, 0 corrupt: 100%|██████████| 6770/6[0m




[34m[1mtrain: [0mCaching images (7.7GB RAM): 100%|██████████| 6770/6770 [00:27<00:00, 248.50it/s][0m


[34m[1mval: [0mFast image access  (ping: 0.30.1 ms, read: 155.2209.9 MB/s, size: 338.2 KB)


[34m[1mval: [0mScanning C:\QGIS\dataset\labels\val.cache... 3077 images, 1808 backgrounds, 0 corrupt: 100%|██████████| 3077/3077 [0m




[34m[1mval: [0mCaching images (3.5GB RAM): 100%|██████████| 3077/3077 [00:13<00:00, 233.78it/s][0m


[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001429, momentum=0.9) with parameter groups 66 weight(decay=0.0), 77 weight(decay=0.0005), 76 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mC:\QGIS\runs\segment\train\main_model8[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       1/50      3.01G      1.449      3.406      1.923      1.435         21        640: 100%|██████████| 424/424 [06:
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP


                   all       3077       9137      0.345      0.198      0.174     0.0911      0.331      0.171       0.14     0.0567

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       2/50      3.16G      1.284      2.959      1.328      1.315          6        640: 100%|██████████| 424/424 [05:
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP


                   all       3077       9137      0.298      0.261       0.19      0.106      0.294      0.243       0.18     0.0927

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       3/50      3.16G      1.189      2.829      1.133      1.251         17        640: 100%|██████████| 424/424 [05:
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP


                   all       3077       9137      0.301      0.211      0.173     0.0878      0.321       0.17      0.148     0.0639

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       4/50      3.16G      1.104      2.714      0.997      1.197         19        640: 100%|██████████| 424/424 [05:
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP


                   all       3077       9137      0.393      0.257      0.236      0.132      0.377      0.243      0.211      0.112

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       5/50      3.16G      1.019      2.598      0.892      1.147         18        640: 100%|██████████| 424/424 [04:
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-9


                   all       3077       9137       0.39      0.271      0.253      0.154      0.419      0.248      0.239      0.122

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       6/50      3.16G      0.952      2.509     0.8264      1.112         15        6
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-9


                   all       3077       9137       0.39      0.268      0.226      0.124      0.386      0.263       0.21      0.102

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       7/50      3.16G     0.9029      2.399     0.7708      1.086          8        6
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-9


                   all       3077       9137      0.426      0.279      0.274      0.173      0.417       0.26      0.248      0.119

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       8/50      3.16G     0.8693      2.353     0.7325      1.071        127        6


KeyboardInterrupt: 

In [None]:
# Model Report

In [7]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages

# --- Paths ---
results_path = "C:/QGIS/runs/segment/train12/results.csv"
report_path = "C:/QGIS/diagnostics/model_report.pdf"
os.makedirs(os.path.dirname(report_path), exist_ok=True)

# --- Load results and clean columns ---
df = pd.read_csv(results_path)
df.columns = df.columns.str.strip()

# --- Key epoch metrics ---
final_epoch = df.iloc[-1]
best_map_epoch = df["metrics/mAP50(B)"].idxmax()
best_row = df.iloc[best_map_epoch]

# --- Basic training config (update if needed) ---
training_config = {
    "Model": "YOLOv8n-seg",
    "Input Size": "640x640",
    "Epochs": len(df),
    "Batch Size": 8,
    "Optimizer": "SGD (default)",
    "Confidence Threshold": 0.1,
    "Tile Size": "1024x1024",
    "Mask Source": "Polygon label → Raster mask via cv2.fillPoly"
}

# --- Metrics to plot ---
metrics = {
    "metrics/mAP50(B)": "mAP@0.5",
    "metrics/mAP50-95(B)": "mAP@0.5–0.95",
    "metrics/precision(B)": "Precision",
    "metrics/recall(B)": "Recall"
}
colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728"]

# --- Create PDF report ---
with PdfPages(report_path) as pdf:
    # Title + Summary page
    plt.figure(figsize=(11, 8.5))
    plt.text(0.5, 0.78, "Model Performance Report", ha="center", fontsize=24)
    plt.text(0.5, 0.70, "Project: River and Road Segmentation", ha="center", fontsize=14)
    plt.text(0.5, 0.63, f"Best mAP@0.5: {best_row['metrics/mAP50(B)']:.3f} at epoch {best_map_epoch}", ha="center", fontsize=12)
    plt.text(0.5, 0.57, f"Final Epoch Precision: {final_epoch['metrics/precision(B)']:.3f}", ha="center", fontsize=12)
    plt.text(0.5, 0.51, f"Final Epoch Recall: {final_epoch['metrics/recall(B)']:.3f}", ha="center", fontsize=12)
    plt.text(0.5, 0.45, "Report generated from YOLOv8 results.csv", ha="center", fontsize=10)
    plt.axis("off")
    pdf.savefig()
    plt.close()

    # Training configuration page
    fig, ax = plt.subplots(figsize=(11, 4))
    ax.axis("off")
    table_data = list(training_config.items())
    table = ax.table(cellText=table_data, colLabels=["Parameter", "Value"], loc="center")
    table.auto_set_font_size(False)
    table.set_fontsize(11)
    table.scale(1.2, 1.4)
    plt.title("Training Configuration", fontsize=14)
    pdf.savefig()
    plt.close()

    # Metric plots
    for i, (key, label) in enumerate(metrics.items()):
        if key in df.columns:
            plt.figure(figsize=(10, 4))
            plt.plot(df[key], marker='o', linewidth=2, color=colors[i])
            plt.title(f"{label} Over Epochs")
            plt.xlabel("Epoch")
            plt.ylabel(label)
            plt.grid(True)
            plt.tight_layout()
            pdf.savefig()
            plt.close()

    # Performance summary table
    summary_data = {
        "Metric": ["mAP@0.5", "mAP@0.5–0.95", "Precision", "Recall"],
        "Final Epoch": [
            f"{final_epoch.get('metrics/mAP50(B)', 0):.3f}",
            f"{final_epoch.get('metrics/mAP50-95(B)', 0):.3f}",
            f"{final_epoch.get('metrics/precision(B)', 0):.3f}",
            f"{final_epoch.get('metrics/recall(B)', 0):.3f}"
        ],
        f"Best Epoch ({best_map_epoch})": [
            f"{best_row.get('metrics/mAP50(B)', 0):.3f}",
            f"{best_row.get('metrics/mAP50-95(B)', 0):.3f}",
            f"{best_row.get('metrics/precision(B)', 0):.3f}",
            f"{best_row.get('metrics/recall(B)', 0):.3f}"
        ]
    }
    summary_df = pd.DataFrame(summary_data)
    fig, ax = plt.subplots(figsize=(10, 2.5))
    ax.axis("off")
    table = ax.table(cellText=summary_df.values, colLabels=summary_df.columns, loc="center")
    table.auto_set_font_size(False)
    table.set_fontsize(12)
    table.scale(1.2, 1.8)
    plt.title("Summary of Model Performance", fontsize=14)
    pdf.savefig()
    plt.close()

print(f"✅ PDF report saved to: {report_path}")

✅ PDF report saved to: C:/QGIS/diagnostics/model_report.pdf


In [3]:
# -----------------------------------------------------------
# 🤖 Inference: Run YOLOv8 Segmentation on Tiled Map Images
#
# This step uses a trained YOLOv8 segmentation model to:
# - Predict object masks for each 1024×1024 tile image
# - Save polygon .txt files (YOLO format)
# - Optionally save visualized overlays
#
# It processes all tiles from the specified map folder:
# - /[Map Folder]/tiled/images/
# and outputs:
# - Polygon .txt labels to: /[Map Folder]/predictions/predict_txt/labels/
# - Optional .jpg overlays to: /[Map Folder]/predictions/predict_txt/images/
#
# Classes:
#     • 0 = Water
#     • 1 = Road
#     • 2 = PVeg (Perennial Vegetation)
#
# Parameters:
# - imgsz: Image size used during training (e.g. 640)
# - conf: Confidence threshold (e.g. 0.1 for permissive detection)
# - retina_masks: True for high-quality mask rendering
# - save_txt: Enables saving polygon .txt files (for reconstruction)
#
# Output:
# - One .txt polygon label file per tile (normalized coordinates)
# - Used in downstream mask and shapefile generation
#
# Notes:
# - This step does not create mask images directly, but polygon .txt labels
# - Masks are generated in the following step by rasterizing these polygons
# - Use consistent `imgsz` and class mapping with training
# -----------------------------------------------------------

from ultralytics import YOLO
import torch
import os

# --- Config ---
map_folder = "Flight_2_25pct"
base_dir = "C:/QGIS"
image_dir = os.path.join(base_dir, map_folder, "tiled", "images")

output_name = "predict_txt"
output_dir = os.path.join(base_dir, map_folder, "predictions")

# --- Load model ---
model_path = os.path.join(base_dir, "runs", "segment", "train", "main_model9", "weights", "best.pt")
model = YOLO(model_path)

# --- Predict with manageable size ---
model.predict(
    source=image_dir,
    imgsz=640,  # Lower if needed: 640, 768, etc.
    conf=0.01,
    save=False,
    save_txt=True,
    save_conf=False,
    retina_masks=False,
    name=output_name,
    project=output_dir,
    device="cuda" if torch.cuda.is_available() else "cpu"
)

print(f"✅ Inference complete. Check: {output_dir}/{output_name}/labels/")


image 1/252 C:\QGIS\Flight_2_25pct\tiled\images\chip_0.png: 640x640 (no detections), 122.0ms
image 2/252 C:\QGIS\Flight_2_25pct\tiled\images\chip_1.png: 640x640 (no detections), 81.3ms
image 3/252 C:\QGIS\Flight_2_25pct\tiled\images\chip_10.png: 640x640 (no detections), 70.2ms
image 4/252 C:\QGIS\Flight_2_25pct\tiled\images\chip_100.png: 640x640 205 PVegs, 74.5ms
image 5/252 C:\QGIS\Flight_2_25pct\tiled\images\chip_101.png: 640x640 250 PVegs, 80.9ms
image 6/252 C:\QGIS\Flight_2_25pct\tiled\images\chip_102.png: 640x640 300 PVegs, 107.7ms
image 7/252 C:\QGIS\Flight_2_25pct\tiled\images\chip_103.png: 640x640 234 PVegs, 67.1ms
image 8/252 C:\QGIS\Flight_2_25pct\tiled\images\chip_104.png: 640x640 80 PVegs, 88.6ms
image 9/252 C:\QGIS\Flight_2_25pct\tiled\images\chip_105.png: 640x640 (no detections), 74.7ms
image 10/252 C:\QGIS\Flight_2_25pct\tiled\images\chip_106.png: 640x640 (no detections), 78.3ms
image 11/252 C:\QGIS\Flight_2_25pct\tiled\images\chip_107.png: 640x640 (no detections), 79.1

In [4]:
# -----------------------------------------------------------
# 🗺️ Final Map Output: Generate Georeferenced Shapefile from Predicted Masks
# -----------------------------------------------------------

import os
import cv2
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import rasterio
from tqdm import tqdm

# --- Config ---
map_folder = "Flight_2_25pct"
base_dir = f"C:/QGIS/{map_folder}"
mask_dir = os.path.join(base_dir, "predictions", "predict_txt", "labels")
tile_metadata_path = os.path.join(base_dir, "tiled", "tile_metadata.csv")
raster_shape_path = os.path.join(base_dir, "tiled", "raster_shape.txt")
tif_path = os.path.join(base_dir, f"{map_folder}.tiff")
shapefile_path = os.path.join(base_dir, f"{map_folder} Segmentation.shp")

chip_size = 1600        # Original chip size used during tiling
inference_size = 640    # Size used during YOLOv8 inference

# --- Load metadata and raster georeferencing ---
tile_meta = pd.read_csv(tile_metadata_path)
with open(raster_shape_path, "r") as f:
    height, width = map(int, f.read().strip().split(","))

with rasterio.open(tif_path) as src:
    transform = src.transform
    crs = src.crs

# --- Collect all polygons from predicted labels ---
features = []

for _, row in tqdm(tile_meta.iterrows(), total=len(tile_meta), desc="Stitching Tiles"):
    fname, x, y = row["filename"], int(row["x"]), int(row["y"])
    label_path = os.path.join(mask_dir, fname.replace(".png", ".txt"))
    if not os.path.exists(label_path):
        continue

    try:
        with open(label_path, "r") as f:
            lines = f.readlines()
    except:
        continue

    for line in lines:
        parts = line.strip().split()
        if len(parts) < 7:
            continue  # Skip degenerate polygons

        cls_id = int(float(parts[0]))
        coords = list(map(float, parts[1:]))

        pts = np.array(coords, dtype=np.float32).reshape(-1, 2)
        pts *= chip_size  # Scale to original chip size (not inference size)

        # Offset to global pixel coordinates
        pts[:, 0] += x
        pts[:, 1] += y

        # Convert pixel to geographic coordinates
        geo_pts = [rasterio.transform.xy(transform, y_, x_, offset='center') for x_, y_ in pts]
        poly = Polygon(geo_pts)

        if poly.is_valid and poly.area > 0:
            features.append({
                "geometry": poly,
                "class_id": cls_id
            })

# --- Export to shapefile ---
gdf = gpd.GeoDataFrame(features, crs=crs)
gdf.to_file(shapefile_path)

print(f"✅ Shapefile saved to: {shapefile_path}")

Stitching Tiles: 100%|███████████████████████████████████████████████████████████████| 252/252 [08:50<00:00,  2.10s/it]


✅ Shapefile saved to: C:/QGIS/Flight_2_25pct\Flight_2_25pct Segmentation.shp
