# Kiln Evolution: Presence Confusion Matrix

Uses ground truth and Gemini predictions to compute a presence confusion matrix.


In [1]:
import pandas as pd

gt_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/Delhi GT.csv"
pred_path = (
    "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/model_prediction_csv/teochat_kiln_results_test (1)_lat_lon_presence.csv"
)

gt = pd.read_csv(gt_path)
pred = pd.read_csv(pred_path)

gt.head()
pred.head()

Unnamed: 0,lat_lon,presence,appearance_year,shape_change_detected
0,28.205600_77.105800,1,2014,0
1,28.205600_77.164500,0,0,0
2,28.205600_77.244400,0,0,0
3,28.205600_77.340900,0,0,0
4,28.205600_77.344400,0,0,0


In [2]:
# Confusion matrix for presence (GT vs Gemini)
import pandas as pd

gt_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/Delhi GT.csv"
pred_path = (
    "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/model_prediction_csv/teochat_kiln_results_test (1)_lat_lon_presence.csv"
)

gt = pd.read_csv(gt_path)
pred = pd.read_csv(pred_path)

gt["key"] = gt["filename"].astype(str).str.replace(".png", "", regex=False)
pred["key"] = pred["lat_lon"].astype(str)

gt["presence"] = pd.to_numeric(gt["presence"], errors="coerce").fillna(0).astype(int)
pred["presence"] = pd.to_numeric(pred["presence"], errors="coerce").fillna(0).astype(int)

merged = gt.merge(pred[["key", "presence"]], on="key", how="inner", suffixes=("_gt", "_pred"))

cm = pd.crosstab(merged["presence_gt"], merged["presence_pred"], rownames=["gt"], colnames=["pred"], dropna=False)
for r in [0, 1]:
    if r not in cm.index:
        cm.loc[r] = 0
for c in [0, 1]:
    if c not in cm.columns:
        cm[c] = 0
cm = cm.sort_index().sort_index(axis=1)

TN = int(cm.loc[0, 0])
FP = int(cm.loc[0, 1])
FN = int(cm.loc[1, 0])
TP = int(cm.loc[1, 1])

metrics = {
    "TP": TP,
    "TN": TN,
    "FP": FP,
    "FN": FN,
    "accuracy": (TP + TN) / max(TP + TN + FP + FN, 1),
    "precision": TP / max(TP + FP, 1),
    "recall": TP / max(TP + FN, 1),
    "f1": (2 * TP) / max(2 * TP + FP + FN, 1),
}

print("merged rows", len(merged))
print("confusion matrix (gt rows, pred cols):")
print(cm)
print("metrics:")
for k, v in metrics.items():
    if isinstance(v, float):
        print(f"{k}: {v:.4f}")
    else:
        print(f"{k}: {v}")


merged rows 924
confusion matrix (gt rows, pred cols):
pred    0    1
gt            
0     125   46
1      38  715
metrics:
TP: 715
TN: 125
FP: 46
FN: 38
accuracy: 0.9091
precision: 0.9396
recall: 0.9495
f1: 0.9445


In [3]:
# Confusion matrix for appearance_year (GT vs Gemini)
import pandas as pd

gt_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/Delhi GT.csv"
pred_path = (
    "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/model_prediction_csv/teochat_kiln_results_test (1)_lat_lon_presence.csv"
)

gt = pd.read_csv(gt_path)
pred = pd.read_csv(pred_path)

gt["key"] = gt["filename"].astype(str).str.replace(".png", "", regex=False)
pred["key"] = pred["lat_lon"].astype(str)

gt["appearance_year"] = pd.to_numeric(gt["appearance_year"], errors="coerce").fillna(0).astype(int)
pred["appearance_year"] = pd.to_numeric(pred["appearance_year"], errors="coerce").fillna(0).astype(int)

merged = gt.merge(
    pred[["key", "appearance_year"]],
    on="key",
    how="inner",
    suffixes=("_gt", "_pred"),
)

cm = pd.crosstab(
    merged["appearance_year_gt"],
    merged["appearance_year_pred"],
    rownames=["gt"],
    colnames=["pred"],
    dropna=False,
)

print("merged rows", len(merged))
print("appearance_year confusion matrix (gt rows, pred cols):")
print(cm)


merged rows 924
appearance_year confusion matrix (gt rows, pred cols):
pred  0     2014  2016  2018  2020  2022
gt                                      
0      127    47     0     0     0     0
2014    25   411     6   112    20    18
2016     9    26     7    44     8     8
2018     0     1     0    22     9     4
2020     1     1     0     0     2     4
2022     1     1     0     1     0     9


In [4]:
# Accuracy and F1 for appearance_year (GT vs Gemini)
import pandas as pd

gt_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/Delhi GT.csv"
pred_path = (
    "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/model_prediction_csv/teochat_kiln_results_test (1)_lat_lon_presence.csv"
)

gt = pd.read_csv(gt_path)
pred = pd.read_csv(pred_path)

gt["key"] = gt["filename"].astype(str).str.replace(".png", "", regex=False)
pred["key"] = pred["lat_lon"].astype(str)

gt["appearance_year"] = pd.to_numeric(gt["appearance_year"], errors="coerce").fillna(0).astype(int)
pred["appearance_year"] = pd.to_numeric(pred["appearance_year"], errors="coerce").fillna(0).astype(int)

merged = gt.merge(
    pred[["key", "appearance_year"]],
    on="key",
    how="inner",
    suffixes=("_gt", "_pred"),
)

y_true = merged["appearance_year_gt"]
y_pred = merged["appearance_year_pred"]

accuracy = (y_true == y_pred).mean()

def f1_macro(y_true, y_pred):
    labels = sorted(set(y_true) | set(y_pred))
    f1s = []
    for label in labels:
        tp = ((y_true == label) & (y_pred == label)).sum()
        fp = ((y_true != label) & (y_pred == label)).sum()
        fn = ((y_true == label) & (y_pred != label)).sum()
        denom = (2 * tp + fp + fn)
        f1s.append((2 * tp / denom) if denom else 0.0)
    return sum(f1s) / len(f1s) if f1s else 0.0

def f1_weighted(y_true, y_pred):
    labels = sorted(set(y_true) | set(y_pred))
    total = len(y_true)
    f1_sum = 0.0
    for label in labels:
        tp = ((y_true == label) & (y_pred == label)).sum()
        fp = ((y_true != label) & (y_pred == label)).sum()
        fn = ((y_true == label) & (y_pred != label)).sum()
        denom = (2 * tp + fp + fn)
        f1 = (2 * tp / denom) if denom else 0.0
        support = (y_true == label).sum()
        f1_sum += f1 * support
    return f1_sum / total if total else 0.0

print("accuracy:", round(accuracy, 4))
print("f1_macro:", round(f1_macro(y_true, y_pred), 4))
print("f1_weighted:", round(f1_weighted(y_true, y_pred), 4))


accuracy: 0.6255
f1_macro: 0.3757
f1_weighted: 0.6564


In [5]:
# Confusion matrix + F1 for fcb_to_zigzag_Category vs shape_transition_year_after (±2 tolerance)
import pandas as pd

gt_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/Delhi GT.csv"
pred_path = (
    "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/model_prediction_csv/teochat_kiln_results_test (1)_lat_lon_presence.csv"
)

gt = pd.read_csv(gt_path)
pred = pd.read_csv(pred_path)

gt["key"] = gt["filename"].astype(str).str.replace(".png", "", regex=False)
pred["key"] = pred["lat_lon"].astype(str)

gt_col = "fcb_to_zigzag_Category"
pred_col = "type_transition_year_after"

gt[gt_col] = pd.to_numeric(gt[gt_col], errors="coerce").fillna(0).astype(int)
pred[pred_col] = pd.to_numeric(pred[pred_col], errors="coerce").fillna(0).astype(int)

merged = gt.merge(pred[["key", pred_col]], on="key", how="inner", suffixes=("_gt", "_pred"))

y_true = merged[gt_col]
y_pred = merged[pred_col]

# Apply ±2 tolerance: treat predictions within 2 years as correct
y_pred_adj = y_pred.copy()
tol = 2
mask = (y_true - y_pred).abs() <= tol
y_pred_adj[mask] = y_true[mask]

cm = pd.crosstab(
    y_true,
    y_pred_adj,
    rownames=["gt"],
    colnames=["pred (±2 tol)"],
    dropna=False,
)

accuracy = (y_true == y_pred_adj).mean()

def f1_macro(y_true, y_pred):
    labels = sorted(set(y_true) | set(y_pred))
    f1s = []
    for label in labels:
        tp = ((y_true == label) & (y_pred == label)).sum()
        fp = ((y_true != label) & (y_pred == label)).sum()
        fn = ((y_true == label) & (y_pred != label)).sum()
        denom = (2 * tp + fp + fn)
        f1s.append((2 * tp / denom) if denom else 0.0)
    return sum(f1s) / len(f1s) if f1s else 0.0

def f1_weighted(y_true, y_pred):
    labels = sorted(set(y_true) | set(y_pred))
    total = len(y_true)
    f1_sum = 0.0
    for label in labels:
        tp = ((y_true == label) & (y_pred == label)).sum()
        fp = ((y_true != label) & (y_pred == label)).sum()
        fn = ((y_true == label) & (y_pred != label)).sum()
        denom = (2 * tp + fp + fn)
        f1 = (2 * tp / denom) if denom else 0.0
        support = (y_true == label).sum()
        f1_sum += f1 * support
    return f1_sum / total if total else 0.0

print("merged rows", len(merged))
print("confusion matrix (gt rows, pred cols, ±2 tol):")
print(cm)
print("accuracy:", round(accuracy, 4))
print("f1_macro:", round(f1_macro(y_true, y_pred_adj), 4))
print("f1_weighted:", round(f1_weighted(y_true, y_pred_adj), 4))


KeyError: 'type_transition_year_after'

In [6]:
# Appearance_year confusion matrix with ±2 tolerance (GT vs Gemini)
import pandas as pd

gt_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/Delhi GT.csv"
pred_path = (
    "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/model_prediction_csv/teochat_kiln_results_test (1)_lat_lon_presence.csv"
)

gt = pd.read_csv(gt_path)
pred = pd.read_csv(pred_path)

gt["key"] = gt["filename"].astype(str).str.replace(".png", "", regex=False)
pred["key"] = pred["lat_lon"].astype(str)

gt_col = "appearance_year"
pred_col = "appearance_year"

gt[gt_col] = pd.to_numeric(gt[gt_col], errors="coerce").fillna(0).astype(int)
pred[pred_col] = pd.to_numeric(pred[pred_col], errors="coerce").fillna(0).astype(int)

merged = gt.merge(
    pred[["key", pred_col]],
    on="key",
    how="inner",
    suffixes=("_gt", "_pred"),
)

y_true = merged["appearance_year_gt"]
y_pred = merged["appearance_year_pred"]

# Apply ±2 tolerance: treat predictions within 2 years as correct
tol = 2
y_pred_adj = y_pred.copy()
mask = (y_true - y_pred).abs() <= tol
y_pred_adj[mask] = y_true[mask]

cm = pd.crosstab(
    y_true,
    y_pred_adj,
    rownames=["gt"],
    colnames=["pred (±2 tol)"],
    dropna=False,
)

accuracy = (y_true == y_pred_adj).mean()

def f1_macro(y_true, y_pred):
    labels = sorted(set(y_true) | set(y_pred))
    f1s = []
    for label in labels:
        tp = ((y_true == label) & (y_pred == label)).sum()
        fp = ((y_true != label) & (y_pred == label)).sum()
        fn = ((y_true == label) & (y_pred != label)).sum()
        denom = (2 * tp + fp + fn)
        f1s.append((2 * tp / denom) if denom else 0.0)
    return sum(f1s) / len(f1s) if f1s else 0.0

def f1_weighted(y_true, y_pred):
    labels = sorted(set(y_true) | set(y_pred))
    total = len(y_true)
    f1_sum = 0.0
    for label in labels:
        tp = ((y_true == label) & (y_pred == label)).sum()
        fp = ((y_true != label) & (y_pred == label)).sum()
        fn = ((y_true == label) & (y_pred != label)).sum()
        denom = (2 * tp + fp + fn)
        f1 = (2 * tp / denom) if denom else 0.0
        support = (y_true == label).sum()
        f1_sum += f1 * support
    return f1_sum / total if total else 0.0

print("merged rows", len(merged))
print("appearance_year confusion matrix (gt rows, pred cols, ±2 tol):")
print(cm)
print("accuracy:", round(accuracy, 4))
print("f1_macro:", round(f1_macro(y_true, y_pred_adj), 4))
print("f1_weighted:", round(f1_weighted(y_true, y_pred_adj), 4))


merged rows 924
appearance_year confusion matrix (gt rows, pred cols, ±2 tol):
pred (±2 tol)  0     2014  2016  2018  2020  2022
gt                                               
0               127    47     0     0     0     0
2014             25   417     0   112    20    18
2016              9     0    77     0     8     8
2018              0     1     0    31     0     4
2020              1     1     0     0     6     0
2022              1     1     0     1     0     9
accuracy: 0.7219
f1_macro: 0.5641
f1_weighted: 0.7619


In [7]:
# ============================================================
# Transition evaluation:
#  (A) Binary transition F1  (existence, FP vs FN explicit)
#  (B) Transition-year Macro-F1 with ±2-year tolerance
# ============================================================

import pandas as pd
import numpy as np

# --------------------
# Load data
# --------------------
gt_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/Delhi GT.csv"
pred_path = (
    "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/model_prediction_csv/teochat_kiln_results_test (1)_lat_lon_presence.csv"
)

gt = pd.read_csv(gt_path)
pred = pd.read_csv(pred_path)

# --------------------
# Align keys
# --------------------
gt["key"] = gt["filename"].astype(str).str.replace(".png", "", regex=False)
pred["key"] = pred["lat_lon"].astype(str)

gt_col = "fcb_to_zigzag_Category"
pred_col = "shape_change_detected"

gt[gt_col] = pd.to_numeric(gt[gt_col], errors="coerce").fillna(0).astype(int)
pred[pred_col] = pd.to_numeric(pred[pred_col], errors="coerce").fillna(0).astype(int)

merged = gt.merge(
    pred[["key", pred_col]],
    on="key",
    how="inner",
)

print("Merged rows:", len(merged))

y_true_year = merged[gt_col].values
y_pred_year = merged[pred_col].values

# ============================================================
# (A) Binary transition detection (existence)
# ============================================================

y_true_bin = (y_true_year != 0).astype(int)
y_pred_bin = (y_pred_year != 0).astype(int)

cm_bin = pd.crosstab(
    y_true_bin,
    y_pred_bin,
    rownames=["GT transition"],
    colnames=["Pred transition"],
).reindex(index=[0, 1], columns=[0, 1], fill_value=0)

TN = cm_bin.loc[0, 0]
FP = cm_bin.loc[0, 1]
FN = cm_bin.loc[1, 0]
TP = cm_bin.loc[1, 1]

binary_f1 = (2 * TP) / (2 * TP + FP + FN) if (2 * TP + FP + FN) else 0.0

# ============================================================
# (B) Transition-year localization with ±2-year tolerance
# ============================================================

tol = 2

def apply_tolerance(y_true, y_pred, tol):
    """
    Returns adjusted predictions where
    predictions within ±tol years are treated as correct.
    """
    y_pred_adj = y_pred.copy()
    mask = (
        (y_true != 0)
        & (y_pred != 0)
        & (np.abs(y_pred - y_true) <= tol)
    )
    y_pred_adj[mask] = y_true[mask]
    return y_pred_adj

y_pred_tol = apply_tolerance(y_true_year, y_pred_year, tol)

cm_tol = pd.crosstab(
    y_true_year,
    y_pred_tol,
    rownames=["GT year"],
    colnames=["Pred year (±2 tol)"],
    dropna=False,
)

def macro_f1(y_true, y_pred):
    labels = sorted(set(y_true) | set(y_pred))
    f1s = []
    for lab in labels:
        tp = ((y_true == lab) & (y_pred == lab)).sum()
        fp = ((y_true != lab) & (y_pred == lab)).sum()
        fn = ((y_true == lab) & (y_pred != lab)).sum()
        denom = 2 * tp + fp + fn
        f1s.append((2 * tp / denom) if denom else 0.0)
    return float(np.mean(f1s)) if f1s else 0.0

macro_f1_tol = macro_f1(y_true_year, y_pred_tol)

# ============================================================
# Print results
# ============================================================

print("\n=== Binary transition (existence) ===")
print(cm_bin)
print("Binary Transition F1:", round(binary_f1, 4))

print("\n=== Transition year (±2-year tolerance) ===")
print(cm_tol)
print("Macro-F1 (±2 yr):", round(macro_f1_tol, 4))

Merged rows: 924

=== Binary transition (existence) ===
Pred transition    0    1
GT transition            
0                242   11
1                533  138
Binary Transition F1: 0.3366

=== Transition year (±2-year tolerance) ===
Pred year (±2 tol)    0   1
GT year                    
0                   242  11
2016                  1   1
2018                 39  10
2020                161  39
2022                289  75
2023                  1   0
2024                 42  13
Macro-F1 (±2 yr): 0.0589


In [117]:
# YOLO presence confusion matrix (binary) vs GT
import pandas as pd

gt_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/Delhi GT.csv"
yolo_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/yolo_prediction_by_year.csv"

gt = pd.read_csv(gt_path)
yolo = pd.read_csv(yolo_path)

gt["key"] = gt["filename"].astype(str)
yolo["key"] = yolo["filename"].astype(str)

year_cols = [c for c in yolo.columns if c != "filename" and c != "key"]

def is_present(val):
    if pd.isna(val):
        return False
    s = str(val).strip().lower()
    return s not in {"negetive", "negative", "0", "none", "nan", ""}

yolo["presence"] = yolo[year_cols].apply(lambda row: any(is_present(v) for v in row), axis=1).astype(int)

gt["presence"] = pd.to_numeric(gt["presence"], errors="coerce").fillna(0).astype(int)

merged = gt.merge(yolo[["key", "presence"]], on="key", how="inner", suffixes=("_gt", "_pred"))

cm = pd.crosstab(merged["presence_gt"], merged["presence_pred"], rownames=["gt"], colnames=["pred"], dropna=False)
for r in [0, 1]:
    if r not in cm.index:
        cm.loc[r] = 0
for c in [0, 1]:
    if c not in cm.columns:
        cm[c] = 0
cm = cm.sort_index().sort_index(axis=1)

TN = int(cm.loc[0, 0])
FP = int(cm.loc[0, 1])
FN = int(cm.loc[1, 0])
TP = int(cm.loc[1, 1])

metrics = {
    "TP": TP,
    "TN": TN,
    "FP": FP,
    "FN": FN,
    "accuracy": (TP + TN) / max(TP + TN + FP + FN, 1),
    "precision": TP / max(TP + FP, 1),
    "recall": TP / max(TP + FN, 1),
    "f1": (2 * TP) / max(2 * TP + FP + FN, 1),
}

print("merged rows", len(merged))
print("YOLO presence confusion matrix (gt rows, pred cols):")
print(cm)
print("metrics:")
for k, v in metrics.items():
    if isinstance(v, float):
        print(f"{k}: {v:.4f}")
    else:
        print(f"{k}: {v}")


merged rows 924
YOLO presence confusion matrix (gt rows, pred cols):
pred    0    1
gt            
0     166    5
1       2  751
metrics:
TP: 751
TN: 166
FP: 5
FN: 2
accuracy: 0.9924
precision: 0.9934
recall: 0.9973
f1: 0.9954


In [118]:
# YOLO appearance_year vs GT with ±2 tolerance
import pandas as pd

gt_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/Delhi GT.csv"
yolo_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/yolo_prediction_by_year.csv"

gt = pd.read_csv(gt_path)
yolo = pd.read_csv(yolo_path)

gt["key"] = gt["filename"].astype(str)
yolo["key"] = yolo["filename"].astype(str)

year_cols = [c for c in yolo.columns if c not in {"filename", "key"}]
year_cols_int = sorted([int(c) for c in year_cols])

def is_present(val):
    if pd.isna(val):
        return False
    s = str(val).strip().lower()
    return s not in {"negetive", "negative", "0", "none", "nan", ""}

def first_present_year(row):
    for y in year_cols_int:
        v = row.get(str(y))
        if is_present(v):
            return y
    return 0

yolo["appearance_year"] = yolo.apply(first_present_year, axis=1).astype(int)

gt["appearance_year"] = pd.to_numeric(gt["appearance_year"], errors="coerce").fillna(0).astype(int)

merged = gt.merge(yolo[["key", "appearance_year"]], on="key", how="inner", suffixes=("_gt", "_pred"))

y_true = merged["appearance_year_gt"]
y_pred = merged["appearance_year_pred"]

# Apply ±2 tolerance: treat predictions within 2 years as correct
tol = 2
y_pred_adj = y_pred.copy()
mask = (y_true - y_pred).abs() <= tol
y_pred_adj[mask] = y_true[mask]

cm = pd.crosstab(
    y_true,
    y_pred_adj,
    rownames=["gt"],
    colnames=["pred (±2 tol)"],
    dropna=False,
)

accuracy = (y_true == y_pred_adj).mean()

def f1_macro(y_true, y_pred):
    labels = sorted(set(y_true) | set(y_pred))
    f1s = []
    for label in labels:
        tp = ((y_true == label) & (y_pred == label)).sum()
        fp = ((y_true != label) & (y_pred == label)).sum()
        fn = ((y_true == label) & (y_pred != label)).sum()
        denom = (2 * tp + fp + fn)
        f1s.append((2 * tp / denom) if denom else 0.0)
    return sum(f1s) / len(f1s) if f1s else 0.0

def f1_weighted(y_true, y_pred):
    labels = sorted(set(y_true) | set(y_pred))
    total = len(y_true)
    f1_sum = 0.0
    for label in labels:
        tp = ((y_true == label) & (y_pred == label)).sum()
        fp = ((y_true != label) & (y_pred == label)).sum()
        fn = ((y_true == label) & (y_pred != label)).sum()
        denom = (2 * tp + fp + fn)
        f1 = (2 * tp / denom) if denom else 0.0
        support = (y_true == label).sum()
        f1_sum += f1 * support
    return f1_sum / total if total else 0.0

print("merged rows", len(merged))
print("YOLO appearance_year confusion matrix (gt rows, pred cols, ±2 tol):")
print(cm)
print("accuracy:", round(accuracy, 4))
print("f1_macro:", round(f1_macro(y_true, y_pred_adj), 4))
print("f1_weighted:", round(f1_weighted(y_true, y_pred_adj), 4))


merged rows 924
YOLO appearance_year confusion matrix (gt rows, pred cols, ±2 tol):
pred (±2 tol)  0     2014  2016  2018  2020  2022
gt                                               
0               167     7     0     0     0     0
2014              1   582     0     7     2     0
2016              0     0   100     0     1     1
2018              0     0     0    36     0     0
2020              0     1     0     0     7     0
2022              0     1     0     0     0    11
accuracy: 0.9773
f1_macro: 0.9261
f1_weighted: 0.9778


In [119]:
# YOLO FCBK->Zigzag transition existence (binary) vs GT fcb_to_zigzag_Category
import pandas as pd

gt_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/Delhi GT.csv"
yolo_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/yolo_prediction_by_year.csv"

gt = pd.read_csv(gt_path)
yolo = pd.read_csv(yolo_path)

gt["key"] = gt["filename"].astype(str)
yolo["key"] = yolo["filename"].astype(str)

years = sorted([int(c) for c in yolo.columns if c not in {"filename", "key"}])

def is_fcbk(val):
    s = str(val).strip().lower()
    return s in {"fcbk", "cfcbk"}

def is_zz(val):
    s = str(val).strip().lower()
    return s == "zigzag"

def transition_exists(row):
    fcbk_years = [y for y in years if is_fcbk(row.get(str(y)))]
    if not fcbk_years:
        return 0
    t_fcbk = min(fcbk_years)

    zz_years_after = [y for y in years if y > t_fcbk and is_zz(row.get(str(y)))]
    if not zz_years_after:
        return 0
    t_zz = min(zz_years_after)

    # Consistency after first ZZ: no FCBK/CFCBK in later years
    for y in years:
        if y >= t_zz and is_fcbk(row.get(str(y))):
            return 0

    return 1

yolo["trans_pred"] = yolo.apply(transition_exists, axis=1).astype(int)

gt["trans_gt"] = (
    pd.to_numeric(gt["fcb_to_zigzag_Category"], errors="coerce").fillna(0).astype(int) > 0
).astype(int)

merged = gt.merge(yolo[["key", "trans_pred"]], on="key", how="inner")

cm = pd.crosstab(merged["trans_gt"], merged["trans_pred"], rownames=["gt"], colnames=["pred"], dropna=False)
for r in [0, 1]:
    if r not in cm.index:
        cm.loc[r] = 0
for c in [0, 1]:
    if c not in cm.columns:
        cm[c] = 0
cm = cm.sort_index().sort_index(axis=1)

TN = int(cm.loc[0, 0])
FP = int(cm.loc[0, 1])
FN = int(cm.loc[1, 0])
TP = int(cm.loc[1, 1])

metrics = {
    "TP": TP,
    "TN": TN,
    "FP": FP,
    "FN": FN,
    "accuracy": (TP + TN) / max(TP + TN + FP + FN, 1),
    "precision": TP / max(TP + FP, 1),
    "recall": TP / max(TP + FN, 1),
    "f1": (2 * TP) / max(2 * TP + FP + FN, 1),
}

print("merged rows", len(merged))
print("YOLO transition confusion matrix (gt rows, pred cols):")
print(cm)
print("metrics:")
for k, v in metrics.items():
    if isinstance(v, float):
        print(f"{k}: {v:.4f}")
    else:
        print(f"{k}: {v}")


merged rows 924
YOLO transition confusion matrix (gt rows, pred cols):
pred    0   1
gt           
0     249   4
1     611  60
metrics:
TP: 60
TN: 249
FP: 4
FN: 611
accuracy: 0.3344
precision: 0.9375
recall: 0.0894
f1: 0.1633


In [120]:
import pandas as pd
import numpy as np

gt_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/Delhi GT.csv"
yolo_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/yolo_prediction_by_year.csv"

gt = pd.read_csv(gt_path)
yolo = pd.read_csv(yolo_path)

# -----------------------
# Key normalization
# -----------------------
def canon_key(s):
    s = str(s).strip()
    if s.endswith(".png"):
        s = s[:-4]
    return s

gt["key"] = gt["filename"].map(canon_key)
yolo["key"] = yolo["filename"].map(canon_key)

# -----------------------
# Years columns
# -----------------------
years = sorted([int(c) for c in yolo.columns if c not in {"filename", "key"}])
year_cols = [str(y) for y in years]

# -----------------------
# Class parsing
# -----------------------
def is_fcbk(val):
    s = str(val).strip().lower()
    return s in {"fcbk", "cfcbk"}  # extend if needed

def is_zz(val):
    s = str(val).strip().lower()
    return s in {"zigzag", "zz"}   # extend if needed

# -----------------------
# Canonicalized transition existence
# -----------------------
def trans_exists_canonical(row, K=1):
    # yearly binary signals
    f = np.array([1 if is_fcbk(row.get(c)) else 0 for c in year_cols], dtype=int)
    z = np.array([1 if is_zz(row.get(c)) else 0 for c in year_cols], dtype=int)

    # 1) if no FCBK ever observed -> no FCBK->ZZ transition observable
    if f.sum() == 0:
        return 0

    # first FCBK index
    i_f = int(np.argmax(f == 1))

    # 2) suppress ZZ before first FCBK (causality)
    z[:i_f] = 0

    # 3) find first ZZ after FCBK
    zz_after = np.where((np.arange(len(years)) > i_f) & (z == 1))[0]
    if len(zz_after) == 0:
        return 0
    i_z = int(zz_after[0])

    # 4) optional persistence: require K consecutive ZZ timepoints from i_z
    if K > 1:
        ok = True
        for j in range(K):
            if i_z + j >= len(z) or z[i_z + j] == 0:
                ok = False
                break
        if not ok:
            # try later ZZ runs
            for cand in zz_after[1:]:
                ok = True
                for j in range(K):
                    if cand + j >= len(z) or z[cand + j] == 0:
                        ok = False
                        break
                if ok:
                    i_z = int(cand)
                    break
            else:
                return 0

    # 5) enforce monotonic after ZZ by canonicalization (do NOT reject)
    f[i_z:] = 0
    z[i_z:] = 1

    return 1

# Choose K=1 or K=2 depending on how noisy YOLO is
K_CONSEC_ZZ = 1
yolo["trans_pred"] = yolo.apply(lambda r: trans_exists_canonical(r, K=K_CONSEC_ZZ), axis=1).astype(int)

# -----------------------
# GT binary transition
# -----------------------
gt["trans_gt"] = (
    pd.to_numeric(gt["fcb_to_zigzag_Category"], errors="coerce").fillna(0).astype(int) != 0
).astype(int)

# -----------------------
# Merge + confusion matrix
# -----------------------
merged = gt[["key", "trans_gt"]].merge(yolo[["key", "trans_pred"]], on="key", how="left")
merged["trans_pred"] = merged["trans_pred"].fillna(0).astype(int)

cm = pd.crosstab(
    merged["trans_gt"],
    merged["trans_pred"],
    rownames=["gt"],
    colnames=["pred"],
).reindex(index=[0, 1], columns=[0, 1], fill_value=0)

TN, FP = int(cm.loc[0, 0]), int(cm.loc[0, 1])
FN, TP = int(cm.loc[1, 0]), int(cm.loc[1, 1])

precision = TP / (TP + FP) if (TP + FP) else 0.0
recall    = TP / (TP + FN) if (TP + FN) else 0.0
f1        = (2 * TP) / (2 * TP + FP + FN) if (2 * TP + FP + FN) else 0.0
acc       = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) else 0.0

print("rows (GT):", len(gt), "rows (YOLO):", len(yolo), "merged:", len(merged))
print("Confusion matrix (gt rows, pred cols):")
print(cm)
print(f"TP={TP} TN={TN} FP={FP} FN={FN}")
print(f"precision={precision:.4f} recall={recall:.4f} f1={f1:.4f} accuracy={acc:.4f}")

rows (GT): 924 rows (YOLO): 924 merged: 924
Confusion matrix (gt rows, pred cols):
pred    0   1
gt           
0     246   7
1     604  67
TP=67 TN=246 FP=7 FN=604
precision=0.9054 recall=0.0999 f1=0.1799 accuracy=0.3387


In [122]:
import pandas as pd
import numpy as np

gt_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/Delhi GT.csv"
yolo_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/yolo_prediction_by_year.csv"

gt = pd.read_csv(gt_path)
yolo = pd.read_csv(yolo_path)

# -----------------------
# Key normalization
# -----------------------
def canon_key(s):
    s = str(s).strip()
    if s.endswith(".png"):
        s = s[:-4]
    return s

gt["key"] = gt["filename"].map(canon_key)
yolo["key"] = yolo["filename"].map(canon_key)

# -----------------------
# Identify year columns
# -----------------------
years = sorted([int(c) for c in yolo.columns if c not in {"filename", "key"}])
year_cols = [str(y) for y in years]

# -----------------------
# Label parsing
# -----------------------
def is_fcbk(val):
    s = str(val).strip().lower()
    return s in {"fcbk", "cfcbk"}

def is_zz(val):
    s = str(val).strip().lower()
    return s in {"zigzag", "zz"}

# -----------------------
# YOLO predicted transition year
# -----------------------
def yolo_transition_year(row, K=1):
    f = np.array([1 if is_fcbk(row.get(c)) else 0 for c in year_cols], dtype=int)
    z = np.array([1 if is_zz(row.get(c)) else 0 for c in year_cols], dtype=int)

    # must observe FCBK at least once
    if f.sum() == 0:
        return 0

    i_f = int(np.argmax(f == 1))          # first FCBK index
    z[:i_f] = 0                           # suppress ZZ before FCBK (causality)

    # candidate ZZ indices after FCBK
    cand = np.where((np.arange(len(years)) > i_f) & (z == 1))[0]
    if len(cand) == 0:
        return 0

    # require K consecutive ZZ points starting at i_z (optional robustness)
    if K <= 1:
        i_z = int(cand[0])
        return years[i_z]

    for i_z in cand:
        ok = True
        for j in range(K):
            if i_z + j >= len(z) or z[i_z + j] == 0:
                ok = False
                break
        if ok:
            return years[int(i_z)]
    return 0

K_CONSEC_ZZ = 1   # set 2 if YOLO flickers a lot
yolo["pred_trans_year"] = yolo.apply(lambda r: yolo_transition_year(r, K=K_CONSEC_ZZ), axis=1).astype(int)

# -----------------------
# GT transition year
# -----------------------
gt["gt_trans_year"] = pd.to_numeric(gt["fcb_to_zigzag_Category"], errors="coerce").fillna(0).astype(int)

# -----------------------
# Merge (left join: missing YOLO -> 0)
# -----------------------
merged = gt[["key", "gt_trans_year"]].merge(
    yolo[["key", "pred_trans_year"]],
    on="key",
    how="left",
)
merged["pred_trans_year"] = merged["pred_trans_year"].fillna(0).astype(int)

y_true = merged["gt_trans_year"].values
y_pred = merged["pred_trans_year"].values

# -----------------------
# ±2-year tolerance adjustment (year localization only)
# -----------------------
tol = 2
y_pred_tol = y_pred.copy()

mask = (y_true != 0) & (y_pred != 0) & (np.abs(y_pred - y_true) <= tol)
y_pred_tol[mask] = y_true[mask]

# -----------------------
# Confusion matrix (tolerance-aware)
# -----------------------
cm = pd.crosstab(
    y_true,
    y_pred_tol,
    rownames=["gt"],
    colnames=[f"pred (±{tol}y tol)"],
    dropna=False,
)

# -----------------------
# Macro-F1 (tolerance-aware)
# -----------------------
def macro_f1(y_true, y_pred):
    labels = sorted(set(y_true) | set(y_pred))
    f1s = []
    for lab in labels:
        tp = ((y_true == lab) & (y_pred == lab)).sum()
        fp = ((y_true != lab) & (y_pred == lab)).sum()
        fn = ((y_true == lab) & (y_pred != lab)).sum()
        denom = 2 * tp + fp + fn
        f1s.append((2 * tp / denom) if denom else 0.0)
    return float(np.mean(f1s)) if f1s else 0.0

f1_mac_tol = macro_f1(y_true, y_pred_tol)

# Optional: tolerance accuracy (not your main metric)
tol_acc = (((y_true == 0) & (y_pred == 0)) | ((y_true != 0) & (y_pred != 0) & (np.abs(y_pred - y_true) <= tol))).mean()

print("Years:", years)
print("Merged rows:", len(merged))
print("\nConfusion matrix (tolerance-aware):")
print(cm)
print(f"\nMacro-F1 (±{tol}y): {f1_mac_tol:.4f}")
print(f"Tolerance accuracy (±{tol}y): {tol_acc:.4f}")

Years: [2014, 2016, 2018, 2020, 2022, 2024]
Merged rows: 924

Confusion matrix (tolerance-aware):
pred (±2y tol)  0     2016  2018  2020  2022  2024
gt                                                
0                246     3     0     0     1     3
2016               2     0     0     0     0     0
2018              45     0     3     0     1     0
2020             187     5     0     6     0     2
2022             325     8     9     0    22     0
2023               1     0     0     0     0     0
2024              44     1     2     1     0     7

Macro-F1 (±2y): 0.1317
Tolerance accuracy (±2y): 0.3074


In [124]:
# Print columns from qwen3_30b_kiln_stats_delhi_all_loc_bbox.csv
import pandas as pd

path = "model_prediction_csv/qwen3_30b_kiln_stats_delhi_all_loc_bbox.csv"
df = pd.read_csv(path)
print(df.columns.tolist())
print(df.head())


['lat', 'lon', 'lat_lon', 'presence', 'appearance_year', 'appearance_type', 'type_transition_year_before', 'type_transition_year_after', 'type_transition_note', 'shape_transition_year_before', 'shape_transition_year_after', 'shape_transition_note', 'demolished', 'demolished_year', 'negative_sample', 'confidence', 'inconsistent_presence', 'review_priority_score', 'monitoring_note_one_line', 'raw_output', 'status']
         lat        lon              lat_lon  presence  appearance_year  \
0  28.426401  77.593561  28.426401_77.593561      True             2014   
1  28.253880  77.441317  28.253880_77.441317      True             2018   
2  28.373744  77.014573  28.373744_77.014573      True             2014   
3  28.767656  77.376923  28.767656_77.376923      True             2016   
4  28.786954  77.572609  28.786954_77.572609      True             2014   

  appearance_type  type_transition_year_before  type_transition_year_after  \
0            FCBK                            0        

In [125]:
# Print monitoring_note_one_line column from qwen3_30b_kiln_stats_delhi_all_loc_bbox.csv
import pandas as pd

path = "model_prediction_csv/qwen3_30b_kiln_stats_delhi_all_loc_bbox.csv"
df = pd.read_csv(path)
print(df["monitoring_note_one_line"])


0      The site shows a circular FCBK structure from ...
1      A brick kiln structure appears in 2018 and rem...
2      A brick kiln structure is present from 2014 to...
3      Brick kiln structures appear in 2016 and remai...
4      A brick kiln structure is present from 2014 to...
                             ...                        
919    FCBK kiln present from 2014-2024, transitioned...
920    A brick kiln structure is present from 2014 to...
921    No kiln-like structures detected in any year a...
922    No kiln-like structures detected in any year f...
923    A brick kiln structure appears in 2016, is pre...
Name: monitoring_note_one_line, Length: 924, dtype: object


In [132]:
import pandas as pd
import numpy as np

# -------------------------------------------------
# Paths
# -------------------------------------------------
pred_path = (
    "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/model_prediction_csv/qwen_all_pred_stas_wo_bbox.csv"
)

gt_path = (
    "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/"
    "temporal-analysis/Delhi GT.csv"
)

# -------------------------------------------------
# Load
# -------------------------------------------------
pred = pd.read_csv(pred_path)
gt   = pd.read_csv(gt_path)

# -------------------------------------------------
# Key normalization
# -------------------------------------------------
def canon_key(x):
    x = str(x).strip()
    if x.endswith(".png"):
        x = x[:-4]
    return x

pred["key"] = pred["lat_lon"].map(canon_key)
gt["key"]   = gt["filename"].map(canon_key)

# -------------------------------------------------
# Select + normalize columns
# -------------------------------------------------
pred = pred[[
    "key",
    "presence",
    "appearance_year",
    "shape_transition_year_before",
    "shape_transition_year_after",
]]

gt = gt[[
    "key",
    "presence",
    "appearance_year",
    "fcb_to_zigzag_Category",
]]

# numeric cleanup
for c in [
    "appearance_year",
    "shape_transition_year_before",
    "shape_transition_year_after",
    "fcb_to_zigzag_Category",
]:
    if c in pred.columns:
        pred[c] = pd.to_numeric(pred[c], errors="coerce").fillna(0).astype(int)
    if c in gt.columns:
        gt[c] = pd.to_numeric(gt[c], errors="coerce").fillna(0).astype(int)

pred["presence"] = pred["presence"].astype(bool).astype(int)
gt["presence"]   = gt["presence"].astype(bool).astype(int)

# -------------------------------------------------
# Merge
# -------------------------------------------------
df = gt.merge(pred, on="key", how="inner", suffixes=("_gt", "_pred"))
print("Matched rows:", len(df))

# -------------------------------------------------
# Helper: Macro-F1
# -------------------------------------------------
def macro_f1(y_true, y_pred):
    labels = sorted(set(y_true) | set(y_pred))
    f1s = []
    for lab in labels:
        tp = ((y_true == lab) & (y_pred == lab)).sum()
        fp = ((y_true != lab) & (y_pred == lab)).sum()
        fn = ((y_true == lab) & (y_pred != lab)).sum()
        denom = 2 * tp + fp + fn
        f1s.append((2 * tp / denom) if denom else 0.0)
    return float(np.mean(f1s)) if f1s else 0.0

# -------------------------------------------------
# (1) Presence F1_bin
# -------------------------------------------------
y_true = df["presence_gt"].values
y_pred = df["presence_pred"].values

tp = ((y_true == 1) & (y_pred == 1)).sum()
fp = ((y_true == 0) & (y_pred == 1)).sum()
fn = ((y_true == 1) & (y_pred == 0)).sum()

presence_f1 = (2 * tp) / (2 * tp + fp + fn) if (2 * tp + fp + fn) else 0.0

# -------------------------------------------------
# (2) Appearance year Macro-F1 (±2y)
# -------------------------------------------------
tol = 2
yt = df["appearance_year_gt"].values
yp = df["appearance_year_pred"].values.copy()

mask = (yt != 0) & (yp != 0) & (np.abs(yp - yt) <= tol)
yp[mask] = yt[mask]

appearance_f1 = macro_f1(yt, yp)

# -------------------------------------------------
# (3) Transition existence F1_bin
# -------------------------------------------------
y_true = (df["fcb_to_zigzag_Category"] != 0).astype(int).values
y_pred = (df["shape_transition_year_after"] != 0).astype(int).values

tp = ((y_true == 1) & (y_pred == 1)).sum()
fp = ((y_true == 0) & (y_pred == 1)).sum()
fn = ((y_true == 1) & (y_pred == 0)).sum()

transition_f1 = (2 * tp) / (2 * tp + fp + fn) if (2 * tp + fp + fn) else 0.0

# -------------------------------------------------
# (4) Event year Macro-F1 (±2y)
# -------------------------------------------------
yt = df["fcb_to_zigzag_Category"].values
yp = df["shape_transition_year_after"].values.copy()

mask = (yt != 0) & (yp != 0) & (np.abs(yp - yt) <= tol)
yp[mask] = yt[mask]

event_year_f1 = macro_f1(yt, yp)

# -------------------------------------------------
# Final results
# -------------------------------------------------
print("\nQwen-3-30B metrics")
print(f"Presence F1_bin           : {presence_f1:.4f}")
print(f"Appearance Yr F1_mac(±2y) : {appearance_f1:.4f}")
print(f"Transition F1_bin         : {transition_f1:.4f}")
print(f"Event Yr F1_mac(±2y)      : {event_year_f1:.4f}")

Matched rows: 924

Qwen-3-30B metrics
Presence F1_bin           : 0.9508
Appearance Yr F1_mac(±2y) : 0.6929
Transition F1_bin         : 0.5583
Event Yr F1_mac(±2y)      : 0.2138


In [133]:
import numpy as np
import pandas as pd

tol = 2

y_true = df["fcb_to_zigzag_Category"].values          # GT year
y_pred = df["shape_transition_year_after"].values    # Pred year

y_pred_tol = y_pred.copy()
mask = (y_true != 0) & (y_pred != 0) & (np.abs(y_pred - y_true) <= tol)
y_pred_tol[mask] = y_true[mask]

In [134]:
cm_year = pd.crosstab(
    y_true,
    y_pred_tol,
    rownames=["GT year"],
    colnames=[f"Pred year (±{tol}y)"],
    dropna=False,
)
print(cm_year)

Pred year (±2y)  0     2016  2018  2020  2022  2024
GT year                                            
0                 170    30    22    16     9     6
2016                2     0     0     0     0     0
2018               22     0    26     0     1     0
2020              119    34     0    46     0     1
2022              202    57    43     0    62     0
2023                0     0     1     0     0     0
2024               34     7     8     3     0     3
