In [8]:
# -----------------------------------------------------------
# 🧩 Step 1: Chip TIFFs and Rasterize Masks
#
# This step creates 1600x1600 image/mask tiles from each map set
# - Uses 10% overlap (stride = 1440)
# - Includes background-only tiles
# - Rasterizes shapefile features to create mask chips
# - Outputs tile metadata for later preview and reconstruction
#
# Outputs:
# - /[Map Folder]/tiled/images/
# - /[Map Folder]/tiled/masks/
# - /[Map Folder]/tiled/tile_metadata.csv
# - /[Map Folder]/tiled/raster_shape.txt
# -----------------------------------------------------------

import os
import csv
import numpy as np
import rasterio
from rasterio.features import rasterize
from rasterio.windows import Window
from shapely.geometry import box
import geopandas as gpd
import cv2
from tqdm import tqdm
import shutil

# --- List of all map sets (Updated) ---
"""
all_maps = [
    "Bear_Creek_20250112",
    "Bear_Lane",
    "Flight_2",
    "Flight_2_25pct",
    "Project_2024_09_20",
    "SFLBC",
    "Sugar_Refugia_20241112",
    "Wildcat_Creek"
]
"""
# --- Configuration ---
map_folder = "Wildcat_Creek"
base_dir = f"/home/znelson/TensorStream/Labeled Data/"
chip_size = 1600
stride = 1280

# Original → YOLO class IDs
class_remap = {
    1: 0,  # Road
    2: 1,  # PVeg
    3: 2   # Water
}

map_base = os.path.join(base_dir, map_folder)
tif_path = os.path.join(map_base, f"{map_folder}.tiff")
shp_path = os.path.join(map_base, f"{map_folder}.shp")

out_img_dir = os.path.join(map_base, "tiled", "images")
out_mask_dir = os.path.join(map_base, "tiled", "masks")
meta_csv = os.path.join(map_base, "tiled", "tile_metadata.csv")
shape_txt = os.path.join(map_base, "tiled", "raster_shape.txt")

# --- Clean old data ---
for path in [out_img_dir, out_mask_dir]:
    if os.path.exists(path):
        shutil.rmtree(path)
os.makedirs(out_img_dir, exist_ok=True)
os.makedirs(out_mask_dir, exist_ok=True)

if os.path.exists(meta_csv):
    os.remove(meta_csv)
if os.path.exists(shape_txt):
    os.remove(shape_txt)

# --- Open raster and store shape ---
with rasterio.open(tif_path) as raster:
    raster_height, raster_width = raster.height, raster.width
    raster_crs = raster.crs
    with open(shape_txt, "w") as f:
        f.write(f"{raster_height},{raster_width}")

# --- Load and reproject shapefile ---
labels = gpd.read_file(shp_path).to_crs(raster_crs)
sindex = labels.sindex

# --- Begin processing ---
results = []
skipped = 0

with rasterio.open(tif_path) as raster:
    for idx, (y, x) in enumerate(tqdm(
        [(y, x) for y in range(0, raster_height - chip_size + 1, stride)
                 for x in range(0, raster_width - chip_size + 1, stride)],
        desc="🌍 Chipping tiles"
    )):
        window = Window(x, y, chip_size, chip_size)
        transform = raster.window_transform(window)
        bounds = box(*rasterio.windows.bounds(window, raster.transform))

        # Read image chip
        img = raster.read([1, 2, 3], window=window)
        img = np.transpose(img, (1, 2, 0))
        img_name = f"chip_{idx}.png"

        # --- Skip writing black tiles, but keep metadata ---
        if np.mean(img) < 5 and np.std(img) < 2:
            skipped += 1
            results.append((img_name, x, y))
            continue

        # Save image
        cv2.imwrite(os.path.join(out_img_dir, img_name), cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

        # Rasterize mask
        possible_matches = labels.iloc[list(sindex.intersection(bounds.bounds))]
        intersecting = possible_matches[possible_matches.intersects(bounds)]
        if not intersecting.empty:
            shapes = [
                (geom, class_remap.get(cid, 255))
                for geom, cid in zip(intersecting.geometry, intersecting["class_id"])
            ]
            mask = rasterize(
                shapes,
                out_shape=(chip_size, chip_size),
                transform=transform,
                fill=255,
                dtype=np.uint8
            )
        else:
            mask = np.full((chip_size, chip_size), 255, dtype=np.uint8)

        cv2.imwrite(os.path.join(out_mask_dir, img_name), mask)
        results.append((img_name, x, y))

# --- Save metadata ---
with open(meta_csv, mode="w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["filename", "x", "y"])
    writer.writerows(results)

print(f"✅ {len(results)} tiles processed from {map_folder}.")
print(f"🧹 {skipped} black/void tiles skipped (not saved, but tracked for stitching).")

🌍 Chipping tiles: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1911/1911 [03:17<00:00,  9.70it/s]

✅ 1911 tiles processed from Wildcat_Creek.
🧹 1252 black/void tiles skipped (not saved, but tracked for stitching).





In [1]:
# -----------------------------------------------------------
# ⚡ Step 2: Convert Mask Tiles to YOLOv8 Polygon Labels
#
# This step converts rasterized mask tiles into YOLOv8-style
# polygon .txt labels, enabling training with segmentation models.
#
# For each map folder:
# - Loads corresponding image and mask tiles
# - Resizes both to 640×640 (matching YOLOv8 input size)
# - Extracts external contours for each class in the mask
# - Writes polygons in YOLO format:
#     • class_id x1 y1 x2 y2 ... xn yn (normalized coordinates)
#
# Enhancements:
# - Runs in parallel using multithreading for speed
# - Background-only tiles are preserved with empty label files
# - Automatically splits 70/30 into train and val subsets
#
# Input:
# - /[map]/tiled/images/      ← RGB tiles
# - /[map]/tiled/masks/       ← Grayscale masks (class IDs)
#
# Output:
# - /[map]/yolo_dataset_640/images/train/val/
# - /[map]/yolo_dataset_640/labels/train/val/
#
# Notes:
# - Supports 3-class masks: 0 = Water, 1 = Road, 2 = PVeg
# - No remapping is needed if classes are already 0-based
# - This format is compatible with YOLOv8-seg training
# -----------------------------------------------------------

import os
import cv2
import numpy as np
import random
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# --- Updated list of map sets ---
all_maps = [
    "Bear_Creek_20250112",
    "Bear_Lane",
    "Flight_2",
    "Flight_2_25pct",
    "Project_2024_09_20",
    "SFLBC",
    "Sugar_Refugia_20241112",
    "Wildcat_Creek"
]

validation_set = "Flight_2"

# --- Configuration ---
base_dir = f"/home/znelson/TensorStream/Labeled Data/"
target_size = 1024
num_threads = 8


def mask_to_polygons(mask):
    contours = {}
    for cls_id in np.unique(mask):
        if cls_id == 255:
            continue
        binary = (mask == cls_id).astype(np.uint8)
        cnts, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if cnts:
            contours[cls_id] = cnts
    return contours

def process_file(fname, img_input_dir, mask_input_dir, img_out, lbl_out):
    img_path = os.path.join(img_input_dir, fname)
    mask_path = os.path.join(mask_input_dir, fname)

    try:
        img = cv2.imread(img_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if img is None or mask is None:
            return "error"

        img_resized = cv2.resize(img, (target_size, target_size), interpolation=cv2.INTER_AREA)
        mask_resized = cv2.resize(mask, (target_size, target_size), interpolation=cv2.INTER_NEAREST)

        label_path = os.path.join(lbl_out, fname.replace(".png", ".txt"))
        contours = mask_to_polygons(mask_resized)

        # Save image
        cv2.imwrite(os.path.join(img_out, fname), img_resized)

        # Write label file
        with open(label_path, "w") as f:
            if not contours:
                return "background"
            for cls_id, cnts in contours.items():
                for cnt in cnts:
                    if len(cnt) < 3:
                        continue
                    pts = cnt.reshape(-1, 2).astype(np.float32) / target_size
                    coords = " ".join(f"{x:.6f} {y:.6f}" for x, y in pts)
                    f.write(f"{cls_id} {coords}\n")

        return "labeled"
    except Exception as e:
        print(f"⚠️ Error processing {fname}: {e}")
        return "error"


# --- Process each map ---
for map_folder in all_maps:
    print(f"\n🧩 Converting: {map_folder}")

    base_map_dir = os.path.join(base_dir, map_folder)
    img_input_dir = os.path.join(base_map_dir, "tiled", "images")
    mask_input_dir = os.path.join(base_map_dir, "tiled", "masks")
    out_base = os.path.join(base_map_dir, "yolo_dataset_1024")

    # Create output directories
    if map_folder == validation_set:
        img_out = os.path.join(out_base, "images", "val")
        lbl_out = os.path.join(out_base, "labels", "val")
    else:
        img_out = os.path.join(out_base, "images", "train")
        lbl_out = os.path.join(out_base, "labels", "train")
    
    os.makedirs(img_out, exist_ok=True)
    os.makedirs(lbl_out, exist_ok=True)

    # Process all files
    counts = {"labeled": 0, "background": 0, "error": 0}
    chip_files = [f for f in os.listdir(img_input_dir) if f.endswith(".png")]

    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        futures = [executor.submit(process_file, f, img_input_dir, mask_input_dir, img_out, lbl_out) for f in chip_files]
        for f in tqdm(as_completed(futures), total=len(futures), desc=f"{map_folder}"):
            result = f.result()
            counts[result] += 1

    print(f"✅ {map_folder} summary:")
    print(f"   - {counts['labeled']} labeled tiles")
    print(f"   - {counts['background']} background-only tiles")
    print(f"   - {counts['error']} errors")


🧩 Converting: Bear_Creek_20250112


Bear_Creek_20250112: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1533/1533 [02:11<00:00, 11.66it/s]


✅ Bear_Creek_20250112 summary:
   - 1433 labeled tiles
   - 100 background-only tiles
   - 0 errors

🧩 Converting: Bear_Lane


Bear_Lane: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 548/548 [00:54<00:00,  9.98it/s]


✅ Bear_Lane summary:
   - 530 labeled tiles
   - 18 background-only tiles
   - 0 errors

🧩 Converting: Flight_2


Flight_2: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1710/1710 [02:22<00:00, 12.00it/s]


✅ Flight_2 summary:
   - 1188 labeled tiles
   - 522 background-only tiles
   - 0 errors

🧩 Converting: Flight_2_25pct


Flight_2_25pct: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 137/137 [00:15<00:00,  8.77it/s]


✅ Flight_2_25pct summary:
   - 133 labeled tiles
   - 4 background-only tiles
   - 0 errors

🧩 Converting: Project_2024_09_20


Project_2024_09_20: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1101/1101 [01:33<00:00, 11.80it/s]


✅ Project_2024_09_20 summary:
   - 966 labeled tiles
   - 135 background-only tiles
   - 0 errors

🧩 Converting: SFLBC


SFLBC: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1620/1620 [02:17<00:00, 11.77it/s]


✅ SFLBC summary:
   - 1321 labeled tiles
   - 299 background-only tiles
   - 0 errors

🧩 Converting: Sugar_Refugia_20241112


Sugar_Refugia_20241112: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 610/610 [00:59<00:00, 10.18it/s]


✅ Sugar_Refugia_20241112 summary:
   - 558 labeled tiles
   - 52 background-only tiles
   - 0 errors

🧩 Converting: Wildcat_Creek


Wildcat_Creek: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 659/659 [00:50<00:00, 12.96it/s]

✅ Wildcat_Creek summary:
   - 481 labeled tiles
   - 178 background-only tiles
   - 0 errors





In [2]:
# NEW DATASET BUILDER EXCLUDES BACKGROUND CHIPS
# -----------------------------------------------------------
# 📦 Unified YOLO Dataset Builder (Excludes Holdout Set)
#
# This script merges YOLO-formatted image and label tiles from
# multiple map sets into a single unified dataset structure.
#
# Excludes holdout map: Flight_2_25pct
# Output folders:
# - dataset/images/train/
# - dataset/images/val/
# - dataset/labels/train/
# - dataset/labels/val/
# - dataset/manifests/train_files.txt and val_files.txt
# -----------------------------------------------------------
# path: /home/znelson/TensorStream/Labeled Data/dataset
# train: images/train
# val: images/val
# nc: 4
# names: ["Background", "Road", "PVeg", "Water"]

import os
import shutil
from tqdm import tqdm

# --- Configuration ---
base_dir = f"/home/znelson/TensorStream/Labeled Data/"
dataset_dir = os.path.join(base_dir, "dataset")
img_train_dir = os.path.join(dataset_dir, "images", "train")
img_val_dir = os.path.join(dataset_dir, "images", "val")
lbl_train_dir = os.path.join(dataset_dir, "labels", "train")
lbl_val_dir = os.path.join(dataset_dir, "labels", "val")

validation_set = "Flight_2"
exclude_set = "Flight_2_25pct"

all_sets = [
    "Bear_Creek_20250112",
    "Bear_Lane",
    "Flight_2",
    "SFLBC",
    "Sugar_Refugia_20241112",
    "Wildcat_Creek",
    "Project_2024_09_20"
]

# --- Create necessary directories ---
for d in [img_train_dir, img_val_dir, lbl_train_dir, lbl_val_dir]:
    os.makedirs(d, exist_ok=True)

# --- Clean old contents ---
for folder in [img_train_dir, img_val_dir, lbl_train_dir, lbl_val_dir]:
    for f in os.listdir(folder):
        file_path = os.path.join(folder, f)
        if os.path.isfile(file_path):
            os.remove(file_path)

# --- Copy pre-sorted train/val files from each set ---
for set_name in tqdm(all_sets, desc="Copying datasets"):
    if set_name == exclude_set:
        print(f"⏭️ Skipping excluded set: {exclude_set}")
        continue

    base_map_dir = os.path.join(base_dir, set_name, "yolo_dataset_1024")
    for mode in ["train", "val"]:
        img_src_dir = os.path.join(base_map_dir, "images", mode)
        lbl_src_dir = os.path.join(base_map_dir, "labels", mode)

        if not os.path.exists(img_src_dir) or not os.path.exists(lbl_src_dir):
            continue

        if set_name == validation_set:
            img_dst_dir = img_val_dir
            lbl_dst_dir = lbl_val_dir
        else:
            img_dst_dir = img_train_dir
            lbl_dst_dir = lbl_train_dir

        for img_file in os.listdir(img_src_dir):
            if img_file.endswith(".png"):
                label_file = img_file.replace(".png", ".txt")
                lbl_src = os.path.join(lbl_src_dir, label_file)

                # Skip background-only tiles
                if not os.path.exists(lbl_src) or os.path.getsize(lbl_src) == 0:
                    continue

                prefix = set_name + "_"
                img_dst = os.path.join(img_dst_dir, prefix + img_file)
                lbl_dst = os.path.join(lbl_dst_dir, prefix + label_file)

                shutil.copy(os.path.join(img_src_dir, img_file), img_dst)
                shutil.copy(lbl_src, lbl_dst)

print(f"✅ Merged datasets into: {dataset_dir} (excluding background-only tiles)")

Copying datasets: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:07<00:00,  1.12s/it]

✅ Merged datasets into: /home/znelson/TensorStream/Labeled Data/dataset (excluding background-only tiles)





In [2]:
# -----------------------------------------------------------
# 📦 Unified YOLO Dataset Builder (Excludes Holdout Set)
#
# This script merges YOLO-formatted image and label tiles from
# multiple map sets into a single unified dataset structure.
#
# Excludes holdout map: Flight_2_25pct
# Output folders:
# - dataset/images/train/
# - dataset/images/val/
# - dataset/labels/train/
# - dataset/labels/val/
# - dataset/manifests/train_files.txt and val_files.txt
# -----------------------------------------------------------
# path: /home/znelson/TensorStream/Labeled Data/dataset
# train: images/train
# val: images/val
# nc: 4
# names: ["Background", "Road", "PVeg", "Water"]

import os
import shutil
from tqdm import tqdm

# --- Configuration ---
base_dir = f"/home/znelson/TensorStream/Labeled Data/"
dataset_dir = os.path.join(base_dir, "dataset")
img_train_dir = os.path.join(dataset_dir, "images", "train")
img_val_dir = os.path.join(dataset_dir, "images", "val")
lbl_train_dir = os.path.join(dataset_dir, "labels", "train")
lbl_val_dir = os.path.join(dataset_dir, "labels", "val")

validation_set = "Flight_2"
exclude_set = "Flight_2_25pct"

all_sets = [
    "Bear_Creek_20250112",
    "Bear_Lane",
    "Flight_2",
    "SFLBC",
    "Sugar_Refugia_20241112",
    "Wildcat_Creek",
    "Project_2024_09_20"
]

# --- Create necessary directories ---
for d in [img_train_dir, img_val_dir, lbl_train_dir, lbl_val_dir]:
    os.makedirs(d, exist_ok=True)

# --- Clean old contents ---
for folder in [img_train_dir, img_val_dir, lbl_train_dir, lbl_val_dir]:
    for f in os.listdir(folder):
        file_path = os.path.join(folder, f)
        if os.path.isfile(file_path):
            os.remove(file_path)

# --- Copy pre-sorted train/val files from each set ---
for set_name in tqdm(all_sets, desc="Copying datasets"):
    # Skip the excluded set
    if set_name == exclude_set:
        print(f"⏭️ Skipping excluded set: {exclude_set}")
        continue

    base_map_dir = os.path.join(base_dir, set_name, "yolo_dataset_640")
    for mode in ["train", "val"]:
        img_src_dir = os.path.join(base_map_dir, "images", mode)
        lbl_src_dir = os.path.join(base_map_dir, "labels", mode)

        if not os.path.exists(img_src_dir) or not os.path.exists(lbl_src_dir):
            continue

        # Determine output directories
        if set_name == validation_set:
            img_dst_dir = img_val_dir
            lbl_dst_dir = lbl_val_dir
        else:
            img_dst_dir = img_train_dir
            lbl_dst_dir = lbl_train_dir

        # Copy images and labels with prefix
        for img_file in os.listdir(img_src_dir):
            if img_file.endswith(".png"):
                prefix = set_name + "_"
                img_dst = os.path.join(img_dst_dir, prefix + img_file)
                lbl_dst = os.path.join(lbl_dst_dir, prefix + img_file.replace(".png", ".txt"))

                # Copy image
                shutil.copy(os.path.join(img_src_dir, img_file), img_dst)

                # Copy label
                lbl_src = os.path.join(lbl_src_dir, img_file.replace(".png", ".txt"))
                if os.path.exists(lbl_src):
                    shutil.copy(lbl_src, lbl_dst)
                else:
                    open(lbl_dst, "w").close()

print(f"✅ Merged datasets into: {dataset_dir}")

Copying datasets: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 7935.17it/s]

✅ Merged datasets into: /home/znelson/TensorStream/Labeled Data/dataset





In [2]:
import torch, os
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [3]:
import torch, os
torch.cuda.empty_cache()

In [7]:
import torch

torch.cuda.set_device(1)

In [10]:
import torch

print("CUDA Available:", torch.cuda.is_available())
print("CUDA Device Count:", torch.cuda.device_count())
if torch.cuda.is_available():
    print("Device Name:", torch.cuda.get_device_name(1))
    print("Current Device:", torch.cuda.current_device())

CUDA Available: True
CUDA Device Count: 2
Device Name: NVIDIA GeForce RTX 2080 Ti
Current Device: 1


In [None]:
# Batch Size Memory Test Script
import torch
import os
from ultralytics import YOLO

torch.cuda.empty_cache()
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
torch.cuda.set_device(0)

# Load model
model = YOLO("yolov8s-seg.pt")

# Config
imgsz = 640
data_yaml = "/home/znelson/TensorStream/Labeled Data/dataset/data.yaml"

# Sweep from small to large batch sizes
#for batch in [3, 4, 6, 8, 10, 12]:
#for batch in [4, 8, 12, 16, 20, 24, 32]:
for batch in [32, 48, 64]:
#for batch in [16, 20, 24, 32]:
    print(f"\n🚀 Testing batch size: {batch}")
    try:
        model.train(
            data=data_yaml,
            imgsz=imgsz,
            epochs=1,           # just one epoch for testing
            batch=batch,
            device="0",
            workers=2,          # keep this low to minimize RAM pressure
            cache="ram",
            amp=True,
            verbose=False,
            plots=False
        )
        print(f"✅ Batch size {batch} succeeded.")
    except RuntimeError as e:
        if "out of memory" in str(e).lower():
            print(f"❌ OOM at batch size {batch}")
            torch.cuda.empty_cache()
            break
        else:
            raise e

In [None]:
# NEW MODEL REPORT
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages

# --- Paths ---
results_path = f"/home/znelson/TensorStream/Labeled Data/runs/segment/train/yolov8s_1600to1024_model01/results.csv"
report_path = f"/home/znelson/TensorStream/Labeled Data/diagnostics/yolov8s_1600to1024_model01.pdf"
os.makedirs(os.path.dirname(report_path), exist_ok=True)

# --- Load results and clean columns ---
df = pd.read_csv(results_path)
df.columns = df.columns.str.strip()

# --- Calculate F1-score ---
df['metrics/f1(B)'] = 2 * (df['metrics/precision(B)'] * df['metrics/recall(B)']) / (
    df['metrics/precision(B)'] + df['metrics/recall(B)']).replace(0, 1e-8)

# --- Key epoch metrics ---
final_epoch = df.iloc[-1]
best_map_epoch = df["metrics/mAP50(B)"].idxmax()
best_row = df.iloc[best_map_epoch]

# --- Basic training config (update if needed) ---
training_config = {
    "Model": "YOLOv8s-seg",
    "Input Size": "1024x1024",
    "Epochs": len(df),
    "Batch Size": 12,
    "Optimizer": "AdamW",
    "Confidence Threshold": 0.1,
    "Tile Size": "1600x1600 (20% overlap stride: 1280x1280)",
    "Mask Source": "Polygon label → Raster mask via cv2.fillPoly"
}

# --- Metrics to plot ---
metrics = {
    "metrics/mAP50(B)": "mAP@0.5",
    "metrics/mAP50-95(B)": "mAP@0.5–095",
    "metrics/precision(B)": "Precision",
    "metrics/recall(B)": "Recall",
    "metrics/f1(B)": "F1 Score"
}
colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd"]

# --- Create PDF report ---
with PdfPages(report_path) as pdf:
    # Title + Summary page
    plt.figure(figsize=(11, 8.5))
    plt.text(0.5, 0.78, "Model Performance Report", ha="center", fontsize=24)
    plt.text(0.5, 0.70, "Project: River and Road Segmentation", ha="center", fontsize=14)
    plt.text(0.5, 0.63, f"Best mAP@0.5: {best_row['metrics/mAP50(B)']:.3f} at epoch {best_map_epoch}", ha="center", fontsize=12)
    plt.text(0.5, 0.57, f"Final Epoch Precision: {final_epoch['metrics/precision(B)']:.3f}", ha="center", fontsize=12)
    plt.text(0.5, 0.51, f"Final Epoch Recall: {final_epoch['metrics/recall(B)']:.3f}", ha="center", fontsize=12)
    plt.text(0.5, 0.45, f"Final Epoch F1 Score: {final_epoch['metrics/f1(B)']:.3f}", ha="center", fontsize=12)
    plt.text(0.5, 0.39, "Report generated from YOLOv8s results.csv", ha="center", fontsize=10)
    plt.axis("off")
    pdf.savefig()
    plt.close()

    # Training configuration page
    fig, ax = plt.subplots(figsize=(11, 4))
    ax.axis("off")
    table_data = list(training_config.items())
    table = ax.table(cellText=table_data, colLabels=["Parameter", "Value"], loc="center")
    table.auto_set_font_size(False)
    table.set_fontsize(11)
    table.scale(1.2, 1.4)
    plt.title("Training Configuration", fontsize=14)
    pdf.savefig()
    plt.close()

    # Metric plots
    for i, (key, label) in enumerate(metrics.items()):
        if key in df.columns:
            plt.figure(figsize=(10, 4))
            plt.plot(df[key], marker='o', linewidth=2, color=colors[i % len(colors)])
            plt.title(f"{label} Over Epochs")
            plt.xlabel("Epoch")
            plt.ylabel(label)
            plt.grid(True)
            plt.tight_layout()
            pdf.savefig()
            plt.close()

    # Performance summary table
    summary_data = {
        "Metric": ["mAP@0.5", "mAP@0.5–095", "Precision", "Recall", "F1 Score"],
        "Final Epoch": [
            f"{final_epoch.get('metrics/mAP50(B)', 0):.3f}",
            f"{final_epoch.get('metrics/mAP50-95(B)', 0):.3f}",
            f"{final_epoch.get('metrics/precision(B)', 0):.3f}",
            f"{final_epoch.get('metrics/recall(B)', 0):.3f}",
            f"{final_epoch.get('metrics/f1(B)', 0):.3f}"
        ],
        f"Best Epoch ({best_map_epoch})": [
            f"{best_row.get('metrics/mAP50(B)', 0):.3f}",
            f"{best_row.get('metrics/mAP50-95(B)', 0):.3f}",
            f"{best_row.get('metrics/precision(B)', 0):.3f}",
            f"{best_row.get('metrics/recall(B)', 0):.3f}",
            f"{best_row.get('metrics/f1(B)', 0):.3f}"
        ]
    }
    summary_df = pd.DataFrame(summary_data)
    fig, ax = plt.subplots(figsize=(10, 2.8))
    ax.axis("off")
    table = ax.table(cellText=summary_df.values, colLabels=summary_df.columns, loc="center")
    table.auto_set_font_size(False)
    table.set_fontsize(12)
    table.scale(1.2, 1.8)
    plt.title("Summary of Model Performance", fontsize=14)
    pdf.savefig()
    plt.close()

print(f"✅ PDF report saved to: {report_path}")

In [None]:
# Model Report

In [3]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages

# --- Paths ---
results_path = f"/home/znelson/TensorStream/Labeled Data/runs/segment/train/yolov8s_1600to1024_model01/results.csv"
report_path = f"/home/znelson/TensorStream/Labeled Data/diagnostics/yolov8s_1600to1024_model01.pdf"
os.makedirs(os.path.dirname(report_path), exist_ok=True)

# --- Load results and clean columns ---
df = pd.read_csv(results_path)
df.columns = df.columns.str.strip()

# --- Key epoch metrics ---
final_epoch = df.iloc[-1]
best_map_epoch = df["metrics/mAP50(B)"].idxmax()
best_row = df.iloc[best_map_epoch]

# --- Basic training config (update if needed) ---
training_config = {
    "Model": "YOLOv8s-seg",
    "Input Size": "1024x1024",
    "Epochs": len(df),
    "Batch Size": 12,
    "Optimizer": "AdamW",
    "Confidence Threshold": 0.1,
    "Tile Size": "1600x1600 (20%\ overlap stride: 1280x1280)",
    "Mask Source": "Polygon label → Raster mask via cv2.fillPoly"
}

# --- Metrics to plot ---
metrics = {
    "metrics/mAP50(B)": "mAP@0.5",
    "metrics/mAP50-95(B)": "mAP@0.5–0.95",
    "metrics/precision(B)": "Precision",
    "metrics/recall(B)": "Recall"
}
colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728"]

# --- Create PDF report ---
with PdfPages(report_path) as pdf:
    # Title + Summary page
    plt.figure(figsize=(11, 8.5))
    plt.text(0.5, 0.78, "Model Performance Report", ha="center", fontsize=24)
    plt.text(0.5, 0.70, "Project: River and Road Segmentation", ha="center", fontsize=14)
    plt.text(0.5, 0.63, f"Best mAP@0.5: {best_row['metrics/mAP50(B)']:.3f} at epoch {best_map_epoch}", ha="center", fontsize=12)
    plt.text(0.5, 0.57, f"Final Epoch Precision: {final_epoch['metrics/precision(B)']:.3f}", ha="center", fontsize=12)
    plt.text(0.5, 0.51, f"Final Epoch Recall: {final_epoch['metrics/recall(B)']:.3f}", ha="center", fontsize=12)
    plt.text(0.5, 0.45, "Report generated from YOLOv8s results.csv", ha="center", fontsize=10)
    plt.axis("off")
    pdf.savefig()
    plt.close()

    # Training configuration page
    fig, ax = plt.subplots(figsize=(11, 4))
    ax.axis("off")
    table_data = list(training_config.items())
    table = ax.table(cellText=table_data, colLabels=["Parameter", "Value"], loc="center")
    table.auto_set_font_size(False)
    table.set_fontsize(11)
    table.scale(1.2, 1.4)
    plt.title("Training Configuration", fontsize=14)
    pdf.savefig()
    plt.close()

    # Metric plots
    for i, (key, label) in enumerate(metrics.items()):
        if key in df.columns:
            plt.figure(figsize=(10, 4))
            plt.plot(df[key], marker='o', linewidth=2, color=colors[i])
            plt.title(f"{label} Over Epochs")
            plt.xlabel("Epoch")
            plt.ylabel(label)
            plt.grid(True)
            plt.tight_layout()
            pdf.savefig()
            plt.close()

    # Performance summary table
    summary_data = {
        "Metric": ["mAP@0.5", "mAP@0.5–0.95", "Precision", "Recall"],
        "Final Epoch": [
            f"{final_epoch.get('metrics/mAP50(B)', 0):.3f}",
            f"{final_epoch.get('metrics/mAP50-95(B)', 0):.3f}",
            f"{final_epoch.get('metrics/precision(B)', 0):.3f}",
            f"{final_epoch.get('metrics/recall(B)', 0):.3f}"
        ],
        f"Best Epoch ({best_map_epoch})": [
            f"{best_row.get('metrics/mAP50(B)', 0):.3f}",
            f"{best_row.get('metrics/mAP50-95(B)', 0):.3f}",
            f"{best_row.get('metrics/precision(B)', 0):.3f}",
            f"{best_row.get('metrics/recall(B)', 0):.3f}"
        ]
    }
    summary_df = pd.DataFrame(summary_data)
    fig, ax = plt.subplots(figsize=(10, 2.5))
    ax.axis("off")
    table = ax.table(cellText=summary_df.values, colLabels=summary_df.columns, loc="center")
    table.auto_set_font_size(False)
    table.set_fontsize(12)
    table.scale(1.2, 1.8)
    plt.title("Summary of Model Performance", fontsize=14)
    pdf.savefig()
    plt.close()

print(f"✅ PDF report saved to: {report_path}")

✅ PDF report saved to: /home/znelson/TensorStream/Labeled Data/diagnostics/yolov8n_1024to640_model02.pdf


In [None]:
from ultralytics import YOLO
import torch
import os
import glob

# --- Config ---
map_folder = "Flight_2_25pct"
image_dir = f"/home/znelson/TensorStream/Labeled Data/{map_folder}/tiled/images"

# Absolute path to trained model
model_path = f"/home/znelson/TensorStream/Labeled Data/runs/segment/train/yolov8s_1600to1024_model01/weights/best.pt"

# Output folder for predictions
output_dir = f"/home/znelson/TensorStream/Labeled Data/{map_folder}/predictions"
output_name = "predict_txt"

# --- Load model directly ---
model = YOLO(model_path)

# --- Optional: Clean up old predictions ---
label_dir = os.path.join(output_dir, output_name, "labels")
if os.path.exists(label_dir):
    old_labels = glob.glob(os.path.join(label_dir, "*.txt"))
    for f in old_labels:
        os.remove(f)
    print(f"🧹 Cleared {len(old_labels)} old prediction files from: {label_dir}")

# --- Predict with streaming ---
results = model.predict(
    source=image_dir,
    imgsz=1024,
    conf=0.3,
    iou=0.3,
    save=False,
    save_txt=True,
    save_conf=False,
    retina_masks=True,
    exist_ok=True,
    project=output_dir,
    name=output_name,
    device="cuda" if torch.cuda.is_available() else "cpu",
    stream=True
)

# Force the generator to run
for _ in results:
    pass

print(f"✅ Inference complete. Labels saved to: {output_dir}\\{output_name}\\labels\\")

🧹 Cleared 235 old prediction files from: /home/znelson/TensorStream/Labeled Data/Flight_2_25pct/predictions/predict_txt/labels

image 1/253 /home/znelson/TensorStream/Labeled Data/Flight_2_25pct/tiled/images/chip_103.png: 640x640 1 PVeg, 5.1ms
image 2/253 /home/znelson/TensorStream/Labeled Data/Flight_2_25pct/tiled/images/chip_104.png: 640x640 1 Road, 4 PVegs, 5.4ms
image 3/253 /home/znelson/TensorStream/Labeled Data/Flight_2_25pct/tiled/images/chip_105.png: 640x640 1 Road, 3 PVegs, 5.0ms
image 4/253 /home/znelson/TensorStream/Labeled Data/Flight_2_25pct/tiled/images/chip_106.png: 640x640 3 PVegs, 5.0ms
image 5/253 /home/znelson/TensorStream/Labeled Data/Flight_2_25pct/tiled/images/chip_107.png: 640x640 4 PVegs, 5.0ms
image 6/253 /home/znelson/TensorStream/Labeled Data/Flight_2_25pct/tiled/images/chip_108.png: 640x640 6 PVegs, 5.0ms
image 7/253 /home/znelson/TensorStream/Labeled Data/Flight_2_25pct/tiled/images/chip_109.png: 640x640 17 PVegs, 5.0ms
image 8/253 /home/znelson/TensorStrea

In [5]:
import os
import cv2
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import rasterio
from tqdm import tqdm

# --- Config ---
map_folder = "Flight_2_25pct"
base_dir = f"/home/znelson/TensorStream/Labeled Data/{map_folder}"
mask_dir = os.path.join(base_dir, "predictions", "predict_txt", "labels")
tile_metadata_path = os.path.join(base_dir, "tiled", "tile_metadata.csv")
raster_shape_path = os.path.join(base_dir, "tiled", "raster_shape.txt")
tif_path = os.path.join(base_dir, f"{map_folder}.tiff")
shapefile_path = os.path.join(base_dir, f"{map_folder} Segmentation.shp")

chip_size = 1600         # Original chip size used during tiling
inference_size = 1024    # YOLOv8 inference resolution

# Optional: Remap YOLO class IDs → Original class IDs
reverse_remap = {
    0: 1,  # Road
    1: 2,  # PVeg
    2: 3   # Water
}

# --- Load metadata and raster georeferencing ---
tile_meta = pd.read_csv(tile_metadata_path)
with open(raster_shape_path, "r") as f:
    height, width = map(int, f.read().strip().split(","))

with rasterio.open(tif_path) as src:
    transform = src.transform
    crs = src.crs

# --- Collect all polygons from predicted labels ---
features = []

for _, row in tqdm(tile_meta.iterrows(), total=len(tile_meta), desc="Stitching Tiles"):
    fname, x, y = row["filename"], int(row["x"]), int(row["y"])
    label_path = os.path.join(mask_dir, fname.replace(".png", ".txt"))
    if not os.path.exists(label_path):
        continue

    with open(label_path, "r") as f:
        lines = f.readlines()

    for line in lines:
        parts = line.strip().split()
        if len(parts) < 7:
            continue  # Skip degenerate polygons

        cls_id = int(float(parts[0]))  # Already YOLO-aligned: 0 = Road, 1 = PVeg, 2 = Water

        coords = list(map(float, parts[1:]))
        pts = np.array(coords, dtype=np.float32).reshape(-1, 2)
        pts *= chip_size  # Rescale to original resolution

        pts[:, 0] += x
        pts[:, 1] += y

        geo_pts = [rasterio.transform.xy(transform, y_, x_, offset='center') for x_, y_ in pts]
        poly = Polygon(geo_pts)

        if poly.is_valid and poly.area > 0:
            features.append({
                "geometry": poly,
                "class_id": reverse_remap.get(cls_id, cls_id)  # Optional remap
            })

# --- Export to shapefile ---
gdf = gpd.GeoDataFrame(features, crs=crs)
gdf.to_file(shapefile_path)

print(f"✅ Shapefile saved to: {shapefile_path}")

Stitching Tiles: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 667/667 [00:24<00:00, 27.60it/s]


✅ Shapefile saved to: /home/znelson/TensorStream/Labeled Data/Flight_2_25pct/Flight_2_25pct Segmentation.shp


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import rasterio
from tqdm import tqdm

# --- Configuration ---
map_folder = "Flight_2_25pct"
base_dir = f"/home/znelson/TensorStream/Labeled Data/{map_folder}"

reverse_remap = {
    0: "Road",
    1: "PVeg",
    2: "Water"
}

# --- Calculate areas for each map ---
print(f"Calculating areas for: {map_folder}")
base_map_dir = os.path.join(base_dir, map_folder)
tif_path = os.path.join(base_map_dir, f"{map_folder}.tiff")
mask_dir = os.path.join(base_map_dir, "predictions", "predict_txt", "labels")
tile_metadata_path = os.path.join(base_map_dir, "tiled", "tile_metadata.csv")
area_csv_path = os.path.join(base_map_dir, f"{map_folder}_area_summary.csv")

# --- Extract GSD from GeoTIFF ---
with rasterio.open(tif_path) as src:
    gsd_x = abs(src.transform.a)  # Meters per pixel (x direction)
    gsd_y = abs(src.transform.e)  # Meters per pixel (y direction)
    sqft_per_pixel = (gsd_x * gsd_y) * 10.7639  # Correct area calculation

print(f"🌎 {map_folder} GSD: {gsd_x:.4f} x {gsd_y:.4f} meters per pixel ({sqft_per_pixel:.4f} sqft per pixel)")

# --- Collect pixel counts per class ---
class_areas = {"Road": 0, "PVeg": 0, "Water": 0}
tile_meta = pd.read_csv(tile_metadata_path)
for _, row in tile_meta.iterrows():
    fname, x, y = row["filename"], int(row["x"]), int(row["y"])
    label_path = os.path.join(mask_dir, fname.replace(".png", ".txt"))
    if not os.path.exists(label_path):
        continue

    with open(label_path, "r") as f:
        for line in f.readlines():
            parts = line.strip().split()
            if len(parts) < 7:
                continue  # Skip degenerate polygons

            cls_id = int(float(parts[0]))
            original_class = reverse_remap.get(cls_id, cls_id)
            pixel_count = (len(parts) - 1) // 2  # Each (x, y) pair is 2 coordinates

            # Accumulate area
            class_areas[original_class] += pixel_count

# --- Convert to square feet and round ---
class_areas_sqft = {
    cls_name: round(count * sqft_per_pixel)
    for cls_name, count in class_areas.items()
}

# --- Save CSV for client ---
if os.path.exists(area_csv_path):
    os.remove(area_csv_path)

pd.DataFrame([
    {"label_name": cls_name, "area_sqft": area}
    for cls_name, area in class_areas_sqft.items()
]).to_csv(area_csv_path, index=False)

print(f"✅ {map_folder} area CSV saved to: {area_csv_path}")


In [2]:
# SCRIPT TO FIND AND DUPLICATE ROAD CHIPS (WITH SAFETY CHECK)
import os
import shutil
from collections import Counter

train_lbl_dir = "/home/znelson/TensorStream/Labeled Data/dataset/labels/train"
train_img_dir = "/home/znelson/TensorStream/Labeled Data/dataset/images/train"

# Set how many total copies (not additional) you want per qualifying tile
copies_per_tile = 3  # e.g., 3 means 1 original + 3 duplicates = 4 total versions

# Find Road-heavy tiles (YOLO class ID 0)
road_heavy_tiles = []
for fname in os.listdir(train_lbl_dir):
    if not fname.endswith(".txt"):
        continue
    path = os.path.join(train_lbl_dir, fname)
    with open(path, "r") as f:
        lines = f.readlines()
    count = Counter(int(line.split()[0]) for line in lines if line.strip())
    if count.get(0, 0) >= 3:  # Adjust this threshold if needed
        road_heavy_tiles.append(fname.replace(".txt", ""))

print(f"Found {len(road_heavy_tiles)} Road-heavy tiles.")

# Duplicate each qualifying tile safely
duplication_count = 0
for tile_name in road_heavy_tiles:
    img_path = os.path.join(train_img_dir, tile_name + ".png")
    lbl_path = os.path.join(train_lbl_dir, tile_name + ".txt")
    
    for i in range(1, copies_per_tile + 1):
        new_suffix = f"_dup{i}"
        new_img = os.path.join(train_img_dir, tile_name + new_suffix + ".png")
        new_lbl = os.path.join(train_lbl_dir, tile_name + new_suffix + ".txt")
        
        if not os.path.exists(new_img) and not os.path.exists(new_lbl):
            shutil.copyfile(img_path, new_img)
            shutil.copyfile(lbl_path, new_lbl)
            duplication_count += 1

print(f"✅ Duplicated {duplication_count} files across {len(road_heavy_tiles)} tiles")

Found 212 Road-heavy tiles.
✅ Duplicated 530 files across 212 tiles
