In [None]:
import os
import pandas as pd
import json
import matplotlib.pyplot as plt

OOF_DIR = "../experiments/oof_results"
LB_LOG_PATH = "../experiments/lb_scores.csv"

if not os.path.exists("../experiments"):
    os.makedirs("../experiments")

# Initialize LB log if missing
if not os.path.isfile(LB_LOG_PATH):
    pd.DataFrame([
        {"run_name": None, "mean_cv": None, "oof_score": None, "kaggle_lb": None}
    ]).to_csv(LB_LOG_PATH, index=False)


In [None]:
def load_oof_metrics(oof_dir=OOF_DIR):
    out = []
    for root, dirs, files in os.walk(oof_dir):
        if "oof_metrics.json" in files:
            name = os.path.basename(root)
            with open(os.path.join(root, "oof_metrics.json"), "r") as f:
                m = json.load(f)
            out.append({
                "run_name": name,
                "mean_cv": m.get("mean_cv"),
                "oof_score": m.get("oof_score")
            })
    return pd.DataFrame(out)

cv_df = load_oof_metrics()
cv_df.sort_values("mean_cv", ascending=False)


In [None]:
run_name = input("Run/experiment name: ").strip()
lb_score = float(input("Kaggle LB score: "))

lb_df = pd.read_csv(LB_LOG_PATH)

# Pull the matching experiment from CV results
match = cv_df[cv_df.run_name == run_name]
if len(match) == 0:
    print("⚠️ No local run found. Logging LB only.")
    new_row = {
        "run_name": run_name,
        "mean_cv": None,
        "oof_score": None,
        "kaggle_lb": lb_score
    }
else:
    m = match.iloc[0]
    new_row = {
        "run_name": run_name,
        "mean_cv": m.mean_cv,
        "oof_score": m.oof_score,
        "kaggle_lb": lb_score
    }

# Update CSV
lb_df = pd.concat([lb_df, pd.DataFrame([new_row])], ignore_index=True)
lb_df.to_csv(LB_LOG_PATH, index=False)

print("Logged:")
new_row


In [None]:
lb_df = pd.read_csv(LB_LOG_PATH).dropna(subset=["run_name"], how="all")
lb_df.sort_values("kaggle_lb", ascending=False)


In [None]:
df = lb_df.dropna(subset=["mean_cv", "kaggle_lb"])

plt.figure(figsize=(6,4))
plt.scatter(df["mean_cv"], df["kaggle_lb"])
plt.xlabel("Local Mean CV Score")
plt.ylabel("Kaggle LB Score")
plt.title("Local CV vs Kaggle LB")
for _, r in df.iterrows():
    plt.annotate(r.run_name, (r.mean_cv, r.kaggle_lb))
plt.grid(True)
plt.show()


In [None]:
lb_df.sort_values("kaggle_lb", ascending=False).reset_index(drop=True)


In [None]:
df = lb_df.copy()
df["delta_lb_cv"] = df["kaggle_lb"] - df["mean_cv"]
df.sort_values("delta_lb_cv", ascending=False)


In [None]:
prefix = "baseline"  # or "lr", "bs", etc.
lb_df[lb_df.run_name.str.contains(prefix, na=False)]
