In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    confusion_matrix,
)

DATA_PATH = "FeatureEngineering/matchup_labels.csv"
TEST_SIZE = 0.2
RANDOM_STATE = 42

# Rules are informed by tree split features (pairwise_distance, ol_y_new, dl_y_new, ol_x, dl_x)
# but kept broad (no narrow bands).
DL_Y_EDGE_THRESHOLD = 4.5
OL_Y_EDGE_THRESHOLD = 3.2

Y_GAP_EDGE_MAX = 1.6
DL_Y_EDGE_MIN = 2.4
OL_Y_EDGE_MIN = 1.0
DL_X_EDGE_MAX = -0.8
OL_X_EDGE_MAX = 0.3
PAIRWISE_EDGE_MAX = 2.0

Y_GAP_INTERIOR_MAX = 0.8
PAIRWISE_INTERIOR_MAX = 1.6


def predict_rules(df: pd.DataFrame) -> np.ndarray:
    abs_dl = df["dl_y_new"].abs()
    abs_ol = df["ol_y_new"].abs()
    abs_y_gap = (df["dl_y_new"] - df["ol_y_new"]).abs()

    # Base edge rule: wide offsets in Y.
    cond_edge_base = (abs_dl >= DL_Y_EDGE_THRESHOLD) | (abs_ol >= OL_Y_EDGE_THRESHOLD)

    # Broad edge pocket for moderate Y-offsets with strong lateral separation.
    cond_edge_pocket = (
        (abs_y_gap <= Y_GAP_EDGE_MAX)
        & (abs_dl >= DL_Y_EDGE_MIN)
        & (abs_ol >= OL_Y_EDGE_MIN)
        & (df["dl_x"] <= DL_X_EDGE_MAX)
        & (df["ol_x"] <= OL_X_EDGE_MAX)
        & (df["pairwise_distance"] <= PAIRWISE_EDGE_MAX)
    )

    # Interior override for tight gap + short distance.
    cond_interior_tight = (
        (abs_y_gap <= Y_GAP_INTERIOR_MAX)
        & (df["pairwise_distance"] <= PAIRWISE_INTERIOR_MAX)
    )

    pred = np.full(len(df), "INTERIOR", dtype=object)
    pred[cond_edge_base | cond_edge_pocket] = "EDGE"
    pred[cond_interior_tight] = "INTERIOR"

    return pred


def print_metrics(y_true, y_pred, label: str) -> None:
    print(f"{label} metrics")
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.3f}")
    print(f"Precision (macro): {precision_score(y_true, y_pred, average='macro', zero_division=0):.3f}")
    print(f"Recall (macro): {recall_score(y_true, y_pred, average='macro', zero_division=0):.3f}")
    print(f"F1 (macro): {f1_score(y_true, y_pred, average='macro', zero_division=0):.3f}")
    print()
    print("Classification report:")
    print(classification_report(y_true, y_pred, zero_division=0))
    print("Confusion matrix (rows=true, cols=pred):")
    print(confusion_matrix(y_true, y_pred, labels=["EDGE", "INTERIOR"]))
    print("-" * 60)


def get_misclassified(df: pd.DataFrame, y_pred: np.ndarray) -> pd.DataFrame:
    out = df[["ref_id", "matchup_manual"]].copy()
    out["pred"] = y_pred
    return out[out["matchup_manual"] != out["pred"]]


df = pd.read_csv(DATA_PATH)
required = {"ol_y_new", "dl_y_new", "matchup_manual"}
missing = required - set(df.columns)
if missing:
    raise ValueError(f"Missing required columns: {sorted(missing)}")

df = df.dropna(subset=["matchup_manual"]).copy()

train_df, test_df = train_test_split(
    df,
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE,
    stratify=df["matchup_manual"],
)

print(f"Rows: {len(df)} | Train: {len(train_df)} | Test: {len(test_df)}")
print("Rule set: base Y-thresholds + broad edge pocket + tight interior override")
print()

train_pred = predict_rules(train_df)
test_pred = predict_rules(test_df)
full_pred = predict_rules(df)

print_metrics(train_df["matchup_manual"].values, train_pred, "Train")
print_metrics(test_df["matchup_manual"].values, test_pred, "Test")

train_mis = get_misclassified(train_df, train_pred)
print(f"Train misclassifications: {len(train_mis)}")
print(train_mis.to_string(index=False))
print("-" * 60)

all_mis = get_misclassified(df, full_pred)
print(f"All misclassifications (full dataset): {len(all_mis)}")
print(all_mis.to_string(index=False))


Rows: 121 | Train: 96 | Test: 25
Thresholds -> abs(dl_y_new) >= 4.5, abs(ol_y_new) >= 3.2

Train metrics
Accuracy: 0.948
Precision (macro): 0.948
Recall (macro): 0.946
F1 (macro): 0.947

Classification report:
              precision    recall  f1-score   support

        EDGE       0.95      0.93      0.94        43
    INTERIOR       0.94      0.96      0.95        53

    accuracy                           0.95        96
   macro avg       0.95      0.95      0.95        96
weighted avg       0.95      0.95      0.95        96

Confusion matrix (rows=true, cols=pred):
[[40  3]
 [ 2 51]]
------------------------------------------------------------
Test metrics
Accuracy: 1.000
Precision (macro): 1.000
Recall (macro): 1.000
F1 (macro): 1.000

Classification report:
              precision    recall  f1-score   support

        EDGE       1.00      1.00      1.00        11
    INTERIOR       1.00      1.00      1.00        14

    accuracy                           1.00        25
   mac