# Terralyte Model Benchmarking Suite

Relative + Absolute Benchmark Notebook  
Compares `verifier1.pt` vs `verifier2.pt`, and evaluates against real ground truth.


In [None]:
# IMPORTS

import os
import csv
import json
import math
import subprocess
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    mean_absolute_error
)

In [None]:
# CONFIGURATION

PY = "/usr/local/bin/python3"  

MODEL1 = "verifier1.pt"
MODEL2 = "verifier2.pt"

# CSVs
RELATIVE_CSV = "rel_benchmark.csv"      # sample_id,lat,lon
ABSOLUTE_CSV = "abs_benchmark.csv"       # sample_id,lat,lon,has_solar,panel_count

print("Using Python:", PY)

In [None]:
# HELPERS 
def run_tile_download(lat, lon, zoom=18, radius=1):
    """Runs imagenRunner.py and returns tile PNG path."""
    p = subprocess.run(
        [PY, "imagenRunner.py", str(lat), str(lon), str(zoom), str(radius), "esri", "--crop"],
        capture_output=True,
        text=True
    )
    if p.returncode != 0:
        raise RuntimeError(p.stderr)
    return p.stdout.strip().split("\n")[-1]


def run_model(image_path, model_path):
    """Runs YOLO model via run_model.py and returns JSON."""
    p = subprocess.run(
        [PY, "run_model.py", image_path, model_path],
        capture_output=True,
        text=True
    )
    if p.returncode != 0:
        raise RuntimeError(p.stderr)
    return json.loads(p.stdout.strip())

In [None]:
# RELATIVE BENCHMARKING

def relative_benchmark(csv_path):
    ids, m1_labels, m2_labels = [], [], []
    m1_counts, m2_counts = [], []

    with open(csv_path, "r") as f:
        reader = csv.DictReader(f)

        for r in reader:
            sid = r["sample_id"]
            lat, lon = float(r["lat"]), float(r["lon"])

            print(f"→ {sid} ({lat}, {lon})")
            img = run_tile_download(lat, lon)

            r1 = run_model(img, MODEL1)
            r2 = run_model(img, MODEL2)

            ids.append(sid)
            m1_labels.append(1 if r1["has_solar"] else 0)
            m2_labels.append(1 if r2["has_solar"] else 0)

            m1_counts.append(r1["panel_count_est"])
            m2_counts.append(r2["panel_count_est"])

    return {
        "ids": ids,
        "m1_labels": m1_labels,
        "m2_labels": m2_labels,
        "m1_counts": m1_counts,
        "m2_counts": m2_counts,
    }

# RUN RELATIVE BENCHMARK

rel = relative_benchmark(RELATIVE_CSV)
print("\nRelative Benchmark Complete!")

In [None]:
# RELATIVE METRICS

def relative_metrics(pred1, pred2):
    acc = accuracy_score(pred1, pred2)
    prec = precision_score(pred1, pred2)
    rec = recall_score(pred1, pred2)
    f1 = f1_score(pred1, pred2)
    return acc, prec, rec, f1

rel_metric = relative_metrics(rel["m1_labels"], rel["m2_labels"])

print("Relative Model2 vs Model1:")
print("Accuracy :", rel_metric[0])
print("Precision:", rel_metric[1])
print("Recall   :", rel_metric[2])
print("F1 Score :", rel_metric[3])

In [None]:
# ABSOLUTE BENCHMARKING

def absolute_benchmark(csv_path):
    gt_labels, gt_counts = [], []
    m1_labels, m1_counts = [], []
    m2_labels, m2_counts = [], []

    with open(csv_path, "r") as f:
        reader = csv.DictReader(f)

        for r in reader:
            sid = r["sample_id"]
            lat = float(r["lat"])
            lon = float(r["lon"])
            gt_has = int(r["has_solar"])
            gt_cnt = int(r["panel_count"])

            print(f"→ {sid} ({lat}, {lon})")

            img = run_tile_download(lat, lon)

            r1 = run_model(img, MODEL1)
            r2 = run_model(img, MODEL2)

            gt_labels.append(gt_has)
            gt_counts.append(gt_cnt)

            m1_labels.append(1 if r1["has_solar"] else 0)
            m1_counts.append(r1["panel_count_est"])

            m2_labels.append(1 if r2["has_solar"] else 0)
            m2_counts.append(r2["panel_count_est"])

    return {
        "gt_labels": gt_labels,
        "gt_counts": gt_counts,
        "m1_labels": m1_labels,
        "m1_counts": m1_counts,
        "m2_labels": m2_labels,
        "m2_counts": m2_counts,
    }

absb = absolute_benchmark(ABSOLUTE_CSV)
print("\nAbsolute Benchmark Complete!")

NameError: name 'csv' is not defined

In [None]:
# ABSOLUTE METRICS

def compute_abs(gt, pred, gt_cnt, pred_cnt):
    acc = accuracy_score(gt, pred)
    prec = precision_score(gt, pred, zero_division=0)
    rec = recall_score(gt, pred, zero_division=0)
    f1 = f1_score(gt, pred, zero_division=0)

    mae = mean_absolute_error(gt_cnt, pred_cnt)
    rmse = math.sqrt(sum((a - b)**2 for a, b in zip(gt_cnt, pred_cnt)) / len(gt_cnt))

    return acc, prec, rec, f1, mae, rmse


m1_abs = compute_abs(absb["gt_labels"], absb["m1_labels"], absb["gt_counts"], absb["m1_counts"])
m2_abs = compute_abs(absb["gt_labels"], absb["m2_labels"], absb["gt_counts"], absb["m2_counts"])

print("Model 1 Absolute:", m1_abs)
print("Model 2 Absolute:", m2_abs)

In [None]:
# PLOT

plt.figure(figsize=(7,7))
plt.scatter(absb["gt_counts"], absb["m1_counts"], label="Model1", alpha=0.7)
plt.scatter(absb["gt_counts"], absb["m2_counts"], label="Model2", alpha=0.7)
plt.plot([0, max(absb["gt_counts"])], [0, max(absb["gt_counts"])], 'k--', label="Ideal")

plt.xlabel("Ground Truth Count")
plt.ylabel("Predicted Count")
plt.legend()
plt.title("Absolute Count Prediction Accuracy")
plt.grid(True)
plt.show()