In [6]:
import os
import csv
import numpy as np
import rasterio
from rasterio.features import rasterize
from rasterio.windows import Window
from shapely.geometry import box
import geopandas as gpd
import cv2
from tqdm import tqdm

# --- Config ---
map_folder = "Project_2024_09_20"
base_dir = "C:/QGIS"
chip_size = 1024
stride = 922

# Original → YOLO class IDs
class_remap = {
    1: 0,  # Road
    2: 1,  # PVeg
    3: 2   # Water
}

map_base = os.path.join(base_dir, map_folder)
tif_path = os.path.join(map_base, f"{map_folder}.tiff")
shp_path = os.path.join(map_base, f"{map_folder}.shp")

out_img_dir = os.path.join(map_base, "tiled", "images")
out_mask_dir = os.path.join(map_base, "tiled", "masks")
meta_csv = os.path.join(map_base, "tiled", "tile_metadata.csv")
shape_txt = os.path.join(map_base, "tiled", "raster_shape.txt")

os.makedirs(out_img_dir, exist_ok=True)
os.makedirs(out_mask_dir, exist_ok=True)

# --- Open raster once and store shape ---
with rasterio.open(tif_path) as raster:
    raster_height, raster_width = raster.height, raster.width
    raster_crs = raster.crs
    with open(shape_txt, "w") as f:
        f.write(f"{raster_height},{raster_width}")

# --- Load and reproject shapefile once ---
labels = gpd.read_file(shp_path).to_crs(raster_crs)
sindex = labels.sindex  # spatial index for fast lookups

# --- Begin processing tiles ---
results = []
with rasterio.open(tif_path) as raster:
    for idx, (y, x) in enumerate(tqdm(
        [(y, x) for y in range(0, raster_height - chip_size + 1, stride)
                 for x in range(0, raster_width - chip_size + 1, stride)],
        desc="🌍 Chipping tiles"
    )):
        window = Window(x, y, chip_size, chip_size)
        transform = raster.window_transform(window)
        bounds = box(*rasterio.windows.bounds(window, raster.transform))

        # Fast intersection
        possible_matches = labels.iloc[list(sindex.intersection(bounds.bounds))]
        intersecting = possible_matches[possible_matches.intersects(bounds)]

        # Read and save image chip
        img = raster.read([1, 2, 3], window=window)
        img = np.transpose(img, (1, 2, 0))
        img_name = f"chip_{idx}.png"
        cv2.imwrite(os.path.join(out_img_dir, img_name), cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

        # Rasterize and save mask
        if not intersecting.empty:
            shapes = [
                (geom, class_remap.get(cid, 255))  # Remap class, default to 255 (background/void)
                for geom, cid in zip(intersecting.geometry, intersecting["class_id"])
            ]
            mask = rasterize(
                shapes,
                out_shape=(chip_size, chip_size),
                transform=transform,
                fill=255,  # Background/void label
                dtype=np.uint8
            )
        else:
            mask = np.full((chip_size, chip_size), 255, dtype=np.uint8)

        cv2.imwrite(os.path.join(out_mask_dir, img_name), mask)
        results.append((img_name, x, y))

# --- Save metadata ---
with open(meta_csv, mode="w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["filename", "x", "y"])
    writer.writerows(results)

print(f"✅ {len(results)} tiles generated.")

🌍 Chipping tiles: 100%|███████████████████████████████████████████████████████████| 4560/4560 [12:14<00:00,  6.20it/s]


✅ 4560 tiles generated.


In [7]:
import os
import cv2
import numpy as np
import random
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# --- Configuration ---
base_dir = "C:/QGIS"
target_size = 640
num_threads = 8

# Note: masks already use YOLO-aligned class IDs: 0 = Road, 1 = PVeg, 2 = Water
all_maps = [
    "Bear_Creek_20250112",
    "Bear_Lane",
    "Flight_2",
    "Flight_2_25pct",
    "SFLBC",
    "Sugar_Refugia_20241112",
    "Wildcat_Creek",
    "Project_2024_09_20"
]

def mask_to_polygons(mask):
    contours = {}
    for cls_id in np.unique(mask):
        if cls_id == 255:
            continue  # Skip void/no-data class
        binary = (mask == cls_id).astype(np.uint8)
        cnts, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if cnts:
            contours[cls_id] = cnts
    return contours

def process_file(fname, img_input_dir, mask_input_dir, img_out, lbl_out):
    img_path = os.path.join(img_input_dir, fname)
    mask_path = os.path.join(mask_input_dir, fname)

    try:
        img = cv2.imread(img_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if img is None or mask is None:
            return False  # Skip bad data

        # Resize both
        img_resized = cv2.resize(img, (target_size, target_size), interpolation=cv2.INTER_AREA)
        mask_resized = cv2.resize(mask, (target_size, target_size), interpolation=cv2.INTER_NEAREST)

        label_path = os.path.join(lbl_out, fname.replace(".png", ".txt"))
        contours = mask_to_polygons(mask_resized)

        # --- Filter logic: Keep only meaningful background tiles ---
        if not contours:
            # Check if the image is visually flat/black (likely from border)
            if np.mean(img_resized) < 5 and np.std(img_resized) < 2:
                return True  # Skip black/void tile entirely
            else:
                open(label_path, "w").close()  # Keep as valid negative sample
                cv2.imwrite(os.path.join(img_out, fname), img_resized)
                return True  # Valid background-only tile

        # Write image and label file
        cv2.imwrite(os.path.join(img_out, fname), img_resized)
        with open(label_path, "w") as f:
            for cls_id, cnts in contours.items():
                for cnt in cnts:
                    if len(cnt) < 3:
                        continue
                    pts = cnt.reshape(-1, 2).astype(np.float32) / target_size
                    coords = " ".join(f"{x:.6f} {y:.6f}" for x, y in pts)
                    f.write(f"{cls_id} {coords}\n")

        return False
    except Exception as e:
        print(f"⚠️ Error processing {fname}: {e}")
        return False

# --- Process each map ---
for map_folder in all_maps:
    print(f"\n🧩 Converting: {map_folder}")

    base_map_dir = os.path.join(base_dir, map_folder)
    img_input_dir = os.path.join(base_map_dir, "tiled", "images")
    mask_input_dir = os.path.join(base_map_dir, "tiled", "masks")
    out_base = os.path.join(base_map_dir, "yolo_dataset_640")

    out_dirs = {
        "train_img": os.path.join(out_base, "images", "train"),
        "val_img": os.path.join(out_base, "images", "val"),
        "train_lbl": os.path.join(out_base, "labels", "train"),
        "val_lbl": os.path.join(out_base, "labels", "val"),
    }
    for d in out_dirs.values():
        os.makedirs(d, exist_ok=True)

    chip_files = [f for f in os.listdir(img_input_dir) if f.endswith(".png")]
    random.shuffle(chip_files)
    split_idx = int(len(chip_files) * 0.7)
    train_files = chip_files[:split_idx]
    val_files = chip_files[split_idx:]

    for mode, files in [("train", train_files), ("val", val_files)]:
        img_out = out_dirs[f"{mode}_img"]
        lbl_out = out_dirs[f"{mode}_lbl"]
        bg_count = 0
        skipped = 0

        with ThreadPoolExecutor(max_workers=num_threads) as executor:
            futures = [executor.submit(process_file, f, img_input_dir, mask_input_dir, img_out, lbl_out) for f in files]
            for f in tqdm(as_completed(futures), total=len(futures), desc=f"{map_folder} [{mode}]"):
                result = f.result()
                if result is True:
                    bg_count += 1
                elif result is False:
                    pass
                else:
                    skipped += 1

        print(f"✅ {map_folder} [{mode}]: {len(files)} tiles processed ({bg_count} background-only, {skipped} errors)")


🧩 Converting: Bear_Creek_20250112


Bear_Creek_20250112 [train]: 100%|█████████████████████████████████████████████████| 5107/5107 [01:47<00:00, 47.67it/s]


✅ Bear_Creek_20250112 [train]: 5107 tiles processed (3632 background-only)


Bear_Creek_20250112 [val]: 100%|███████████████████████████████████████████████████| 2189/2189 [00:46<00:00, 47.17it/s]


✅ Bear_Creek_20250112 [val]: 2189 tiles processed (1537 background-only)

🧩 Converting: Bear_Lane


Bear_Lane [train]: 100%|███████████████████████████████████████████████████████████| 1029/1029 [00:31<00:00, 32.22it/s]


✅ Bear_Lane [train]: 1029 tiles processed (443 background-only)


Bear_Lane [val]: 100%|███████████████████████████████████████████████████████████████| 441/441 [00:14<00:00, 31.40it/s]


✅ Bear_Lane [val]: 441 tiles processed (176 background-only)

🧩 Converting: Flight_2


Flight_2 [train]: 100%|████████████████████████████████████████████████████████████| 2463/2463 [01:16<00:00, 32.12it/s]


✅ Flight_2 [train]: 2463 tiles processed (1232 background-only)


Flight_2 [val]: 100%|██████████████████████████████████████████████████████████████| 1056/1056 [00:33<00:00, 31.84it/s]


✅ Flight_2 [val]: 1056 tiles processed (539 background-only)

🧩 Converting: Flight_2_25pct


Flight_2_25pct [train]: 100%|████████████████████████████████████████████████████████| 466/466 [00:10<00:00, 44.95it/s]


✅ Flight_2_25pct [train]: 466 tiles processed (290 background-only)


Flight_2_25pct [val]: 100%|██████████████████████████████████████████████████████████| 201/201 [00:04<00:00, 43.28it/s]


✅ Flight_2_25pct [val]: 201 tiles processed (130 background-only)

🧩 Converting: SFLBC


SFLBC [train]: 100%|███████████████████████████████████████████████████████████████| 3469/3469 [01:28<00:00, 39.04it/s]


✅ SFLBC [train]: 3469 tiles processed (1913 background-only)


SFLBC [val]: 100%|█████████████████████████████████████████████████████████████████| 1487/1487 [00:39<00:00, 38.02it/s]


✅ SFLBC [val]: 1487 tiles processed (802 background-only)

🧩 Converting: Sugar_Refugia_20241112


Sugar_Refugia_20241112 [train]: 100%|██████████████████████████████████████████████| 1316/1316 [00:36<00:00, 36.17it/s]


✅ Sugar_Refugia_20241112 [train]: 1316 tiles processed (659 background-only)


Sugar_Refugia_20241112 [val]: 100%|██████████████████████████████████████████████████| 564/564 [00:16<00:00, 34.97it/s]


✅ Sugar_Refugia_20241112 [val]: 564 tiles processed (277 background-only)

🧩 Converting: Wildcat_Creek


Wildcat_Creek [train]: 100%|███████████████████████████████████████████████████████| 2570/2570 [00:49<00:00, 52.30it/s]


✅ Wildcat_Creek [train]: 2570 tiles processed (2127 background-only)


Wildcat_Creek [val]: 100%|█████████████████████████████████████████████████████████| 1102/1102 [00:21<00:00, 50.98it/s]


✅ Wildcat_Creek [val]: 1102 tiles processed (889 background-only)

🧩 Converting: Project_2024_09_20


Project_2024_09_20 [train]: 100%|██████████████████████████████████████████████████| 3192/3192 [01:09<00:00, 45.64it/s]


✅ Project_2024_09_20 [train]: 3192 tiles processed (2055 background-only)


Project_2024_09_20 [val]: 100%|████████████████████████████████████████████████████| 1368/1368 [00:28<00:00, 48.58it/s]

✅ Project_2024_09_20 [val]: 1368 tiles processed (873 background-only)





In [1]:
import os
import shutil
import random
from tqdm import tqdm

# --- Configuration ---
base_dir = "C:/QGIS"
dataset_dir = os.path.join(base_dir, "dataset")
img_train_dir = os.path.join(dataset_dir, "images", "train")
img_val_dir = os.path.join(dataset_dir, "images", "val")
lbl_train_dir = os.path.join(dataset_dir, "labels", "train")
lbl_val_dir = os.path.join(dataset_dir, "labels", "val")
manifest_dir = os.path.join(dataset_dir, "manifests")
train_split = 0.8

holdout_set = "Flight_2_25pct"
all_sets = [
    "Bear_Creek_20250112", "Bear_Lane", "Flight_2",
    "SFLBC", "Sugar_Refugia_20241112", "Wildcat_Creek", "Project_2024_09_20"
]  # Excludes holdout

# --- Create necessary directories ---
for d in [img_train_dir, img_val_dir, lbl_train_dir, lbl_val_dir, manifest_dir]:
    os.makedirs(d, exist_ok=True)

# --- Clean old contents ---
for folder in [img_train_dir, img_val_dir, lbl_train_dir, lbl_val_dir]:
    for f in os.listdir(folder):
        file_path = os.path.join(folder, f)
        if os.path.isfile(file_path):
            os.remove(file_path)

for manifest_file in ["train_files.txt", "val_files.txt"]:
    path = os.path.join(manifest_dir, manifest_file)
    if os.path.exists(path):
        os.remove(path)

train_list, val_list = [], []

# --- Merge YOLO image/label files from each map set ---
for set_name in tqdm(all_sets, desc="Merging sets"):
    base_map_dir = os.path.join(base_dir, set_name)
    img_base = os.path.join(base_map_dir, "yolo_dataset_640", "images")
    lbl_base = os.path.join(base_map_dir, "yolo_dataset_640", "labels")

    all_images = []
    for split in ["train", "val"]:
        img_dir = os.path.join(img_base, split)
        lbl_dir = os.path.join(lbl_base, split)
        if os.path.exists(img_dir):
            for fname in os.listdir(img_dir):
                if fname.endswith(".png"):
                    all_images.append((
                        os.path.join(img_dir, fname),
                        os.path.join(lbl_dir, fname.replace(".png", ".txt"))
                    ))

    random.shuffle(all_images)
    split_idx = int(len(all_images) * train_split)
    split_files = [("train", all_images[:split_idx]), ("val", all_images[split_idx:])]

    for mode, files in split_files:
        img_out_dir = img_train_dir if mode == "train" else img_val_dir
        lbl_out_dir = lbl_train_dir if mode == "train" else lbl_val_dir
        manifest = train_list if mode == "train" else val_list

        for img_path, lbl_path in files:
            new_img_name = f"{set_name}_{os.path.basename(img_path)}"
            new_lbl_name = new_img_name.replace(".png", ".txt")

            shutil.copy(img_path, os.path.join(img_out_dir, new_img_name))
            if os.path.exists(lbl_path):
                shutil.copy(lbl_path, os.path.join(lbl_out_dir, new_lbl_name))
            else:
                open(os.path.join(lbl_out_dir, new_lbl_name), "w").close()

            manifest.append(f"images/{mode}/{new_img_name}")

# --- Save manifest files ---
os.makedirs(manifest_dir, exist_ok=True)
with open(os.path.join(manifest_dir, "train_files.txt"), "w") as f:
    f.write("\n".join(sorted(train_list)) + "\n")
with open(os.path.join(manifest_dir, "val_files.txt"), "w") as f:
    f.write("\n".join(sorted(val_list)) + "\n")

print(f"✅ Merged {len(train_list)} train / {len(val_list)} val tiles into: {dataset_dir}")
print(f"ℹ️ Held-out map: {holdout_set}")

Merging sets: 100%|██████████████████████████████████████████████████████████████████████| 7/7 [07:42<00:00, 66.06s/it]

✅ Merged 21880 train / 5473 val tiles into: C:/QGIS\dataset
ℹ️ Held-out map: Flight_2_25pct





In [None]:
# --- Training done in Train.py 
import os
from ultralytics import YOLO
import torch

def main():
    torch.cuda.empty_cache()

    dataset_dir = "C:/QGIS/dataset"
    model_type = "yolov8s-seg.pt"
    img_size = 1024
    save_dir = "C:/QGIS/runs/segment/train"
    class_names = ["Water", "Road", "PVeg"]

    data_yaml = os.path.join(dataset_dir, "data.yaml")
    with open(data_yaml, "w") as f:
        f.write(f"path: {dataset_dir}\n")
        f.write("train: images/train\n")
        f.write("val: images/val\n")
        f.write(f"nc: {len(class_names)}\n")
        f.write(f"names: {class_names}\n")

    print(f"✅ data.yaml written to: {data_yaml}")

    model = YOLO(model_type)
    model.train(
        data=data_yaml,
        imgsz=img_size,
        epochs=50,
        batch=16,
        workers=2,
        amp=True,
        patience=5,
        device="cuda",
        save=True,
        save_period=-1,
        project=save_dir,
        name="main_model",
        verbose=True,
        plots=False,
        cache=True
    )

if __name__ == "__main__":
    main()


In [None]:
# Model Report

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages

# --- Paths ---
results_path = "C:/QGIS/runs/YOLOv8s_640downsized1024/results.csv"
report_path = "C:/QGIS/diagnostics/model_report.pdf"
os.makedirs(os.path.dirname(report_path), exist_ok=True)

# --- Load results and clean columns ---
df = pd.read_csv(results_path)
df.columns = df.columns.str.strip()
11
# --- Key epoch metrics ---
final_epoch = df.iloc[-1]
best_map_epoch = df["metrics/mAP50(B)"].idxmax()
best_row = df.iloc[best_map_epoch]

# --- Basic training config (update if needed) ---
training_config = {
    "Model": "YOLOv8n-seg",
    "Input Size": "640x640",
    "Epochs": len(df),
    "Batch Size": 8,
    "Optimizer": "SGD (default)",
    "Confidence Threshold": 0.1,
    "Tile Size": "1024x1024",
    "Mask Source": "Polygon label → Raster mask via cv2.fillPoly"
}

# --- Metrics to plot ---
metrics = {
    "metrics/mAP50(B)": "mAP@0.5",
    "metrics/mAP50-95(B)": "mAP@0.5–0.95",
    "metrics/precision(B)": "Precision",
    "metrics/recall(B)": "Recall"
}
colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728"]

# --- Create PDF report ---
with PdfPages(report_path) as pdf:
    # Title + Summary page
    plt.figure(figsize=(11, 8.5))
    plt.text(0.5, 0.78, "Model Performance Report", ha="center", fontsize=24)
    plt.text(0.5, 0.70, "Project: River and Road Segmentation", ha="center", fontsize=14)
    plt.text(0.5, 0.63, f"Best mAP@0.5: {best_row['metrics/mAP50(B)']:.3f} at epoch {best_map_epoch}", ha="center", fontsize=12)
    plt.text(0.5, 0.57, f"Final Epoch Precision: {final_epoch['metrics/precision(B)']:.3f}", ha="center", fontsize=12)
    plt.text(0.5, 0.51, f"Final Epoch Recall: {final_epoch['metrics/recall(B)']:.3f}", ha="center", fontsize=12)
    plt.text(0.5, 0.45, "Report generated from YOLOv8 results.csv", ha="center", fontsize=10)
    plt.axis("off")
    pdf.savefig()
    plt.close()

    # Training configuration page
    fig, ax = plt.subplots(figsize=(11, 4))
    ax.axis("off")
    table_data = list(training_config.items())
    table = ax.table(cellText=table_data, colLabels=["Parameter", "Value"], loc="center")
    table.auto_set_font_size(False)
    table.set_fontsize(11)
    table.scale(1.2, 1.4)
    plt.title("Training Configuration", fontsize=14)
    pdf.savefig()
    plt.close()

    # Metric plots
    for i, (key, label) in enumerate(metrics.items()):
        if key in df.columns:
            plt.figure(figsize=(10, 4))
            plt.plot(df[key], marker='o', linewidth=2, color=colors[i])
            plt.title(f"{label} Over Epochs")
            plt.xlabel("Epoch")
            plt.ylabel(label)
            plt.grid(True)
            plt.tight_layout()
            pdf.savefig()
            plt.close()

    # Performance summary table
    summary_data = {
        "Metric": ["mAP@0.5", "mAP@0.5–0.95", "Precision", "Recall"],
        "Final Epoch": [
            f"{final_epoch.get('metrics/mAP50(B)', 0):.3f}",
            f"{final_epoch.get('metrics/mAP50-95(B)', 0):.3f}",
            f"{final_epoch.get('metrics/precision(B)', 0):.3f}",
            f"{final_epoch.get('metrics/recall(B)', 0):.3f}"
        ],
        f"Best Epoch ({best_map_epoch})": [
            f"{best_row.get('metrics/mAP50(B)', 0):.3f}",
            f"{best_row.get('metrics/mAP50-95(B)', 0):.3f}",
            f"{best_row.get('metrics/precision(B)', 0):.3f}",
            f"{best_row.get('metrics/recall(B)', 0):.3f}"
        ]
    }
    summary_df = pd.DataFrame(summary_data)
    fig, ax = plt.subplots(figsize=(10, 2.5))
    ax.axis("off")
    table = ax.table(cellText=summary_df.values, colLabels=summary_df.columns, loc="center")
    table.auto_set_font_size(False)
    table.set_fontsize(12)
    table.scale(1.2, 1.8)
    plt.title("Summary of Model Performance", fontsize=14)
    pdf.savefig()
    plt.close()

print(f"✅ PDF report saved to: {report_path}")

✅ PDF report saved to: C:/QGIS/diagnostics/model_report.pdf


In [1]:
from ultralytics import YOLO
import torch
import os
import glob

# --- Config ---
map_folder = "Flight_2_25pct"
image_dir = rf"C:\QGIS\{map_folder}\tiled\images"

# Absolute path to trained model
model_path = r"C:\QGIS\runs\segment\train\main_model9\weights\best.pt"

# Output folder for predictions
output_dir = rf"C:\QGIS\{map_folder}\predictions"
output_name = "predict_txt"

# --- Load model directly ---
model = YOLO(model_path)

# Output folder for predictions
output_dir = rf"C:\QGIS\{map_folder}\predictions"
output_name = "predict_txt"

# --- Optional: Clean up old predictions ---
label_dir = os.path.join(output_dir, output_name, "labels")
if os.path.exists(label_dir):
    old_labels = glob.glob(os.path.join(label_dir, "*.txt"))
    for f in old_labels:
        os.remove(f)
    print(f"🧹 Cleared {len(old_labels)} old prediction files from: {label_dir}")

# --- Predict ---
model.predict(
    source=image_dir,
    imgsz=640,
    conf=0.1,
    save=False,
    save_txt=True,
    save_conf=False,
    retina_masks=True,
    exist_ok=True,
    project=output_dir,
    name=output_name,
    device="cuda" if torch.cuda.is_available() else "cpu"
)

print(f"✅ Inference complete. Labels saved to: {output_dir}\\{output_name}\\labels\\")


image 1/667 C:\QGIS\Flight_2_25pct\tiled\images\chip_0.png: 640x640 1 Water, 98.2ms
image 2/667 C:\QGIS\Flight_2_25pct\tiled\images\chip_1.png: 640x640 1 Water, 101.3ms
image 3/667 C:\QGIS\Flight_2_25pct\tiled\images\chip_10.png: 640x640 1 Water, 89.7ms
image 4/667 C:\QGIS\Flight_2_25pct\tiled\images\chip_100.png: 640x640 1 Water, 95.2ms
image 5/667 C:\QGIS\Flight_2_25pct\tiled\images\chip_101.png: 640x640 1 Water, 76.8ms
image 6/667 C:\QGIS\Flight_2_25pct\tiled\images\chip_102.png: 640x640 1 Water, 71.3ms
image 7/667 C:\QGIS\Flight_2_25pct\tiled\images\chip_103.png: 640x640 1 Water, 1 PVeg, 73.1ms
image 8/667 C:\QGIS\Flight_2_25pct\tiled\images\chip_104.png: 640x640 4 Waters, 2 Roads, 3 PVegs, 70.5ms
image 9/667 C:\QGIS\Flight_2_25pct\tiled\images\chip_105.png: 640x640 4 Waters, 1 Road, 4 PVegs, 76.9ms
image 10/667 C:\QGIS\Flight_2_25pct\tiled\images\chip_106.png: 640x640 1 Water, 21 PVegs, 70.7ms
image 11/667 C:\QGIS\Flight_2_25pct\tiled\images\chip_107.png: 640x640 1 Water, 24 PVeg

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import rasterio
from tqdm import tqdm

# --- Config ---
map_folder = "Flight_2_25pct"
base_dir = f"C:/QGIS/{map_folder}"
mask_dir = os.path.join(base_dir, "predictions", "predict_txt", "labels")
tile_metadata_path = os.path.join(base_dir, "tiled", "tile_metadata.csv")
raster_shape_path = os.path.join(base_dir, "tiled", "raster_shape.txt")
tif_path = os.path.join(base_dir, f"{map_folder}.tiff")
shapefile_path = os.path.join(base_dir, f"{map_folder} Segmentation.shp")

chip_size = 1024        # Original chip size used during tiling
inference_size = 640    # YOLOv8 inference resolution

# Optional: Remap YOLO class IDs → Original class IDs
reverse_remap = {
    0: 1,  # Road
    1: 2,  # PVeg
    2: 3   # Water
}

# --- Load metadata and raster georeferencing ---
tile_meta = pd.read_csv(tile_metadata_path)
with open(raster_shape_path, "r") as f:
    height, width = map(int, f.read().strip().split(","))

with rasterio.open(tif_path) as src:
    transform = src.transform
    crs = src.crs

# --- Collect all polygons from predicted labels ---
features = []

for _, row in tqdm(tile_meta.iterrows(), total=len(tile_meta), desc="Stitching Tiles"):
    fname, x, y = row["filename"], int(row["x"]), int(row["y"])
    label_path = os.path.join(mask_dir, fname.replace(".png", ".txt"))
    if not os.path.exists(label_path):
        continue

    with open(label_path, "r") as f:
        lines = f.readlines()

    for line in lines:
        parts = line.strip().split()
        if len(parts) < 7:
            continue  # Skip degenerate polygons

        cls_id = int(float(parts[0]))  # Already YOLO-aligned: 0 = Road, 1 = PVeg, 2 = Water

        coords = list(map(float, parts[1:]))
        pts = np.array(coords, dtype=np.float32).reshape(-1, 2)
        pts *= chip_size  # Rescale to original resolution

        pts[:, 0] += x
        pts[:, 1] += y

        geo_pts = [rasterio.transform.xy(transform, y_, x_, offset='center') for x_, y_ in pts]
        poly = Polygon(geo_pts)

        if poly.is_valid and poly.area > 0:
            features.append({
                "geometry": poly,
                "class_id": reverse_remap.get(cls_id, cls_id)  # Optional remap
            })

# --- Export to shapefile ---
gdf = gpd.GeoDataFrame(features, crs=crs)
gdf.to_file(shapefile_path)

print(f"✅ Shapefile saved to: {shapefile_path}")

Stitching Tiles: 100%|███████████████████████████████████████████████████████████████| 667/667 [01:16<00:00,  8.67it/s]


✅ Shapefile saved to: C:/QGIS/Flight_2_25pct\Flight_2_25pct Segmentation.shp
