In [3]:
import pandas as pd
import ast
from pprint import pformat

csv_path = "/Users/mazin/Desktop/hyperopt_live_log.csv"

def parse_params(s):
    if isinstance(s, str) and s.strip().startswith("{"):
        try:
            return ast.literal_eval(s)
        except Exception:
            return s
    return s

def round_floats(obj, ndigits=6):
    if isinstance(obj, float):
        return round(obj, ndigits)
    if isinstance(obj, dict):
        return {k: round_floats(v, ndigits) for k, v in obj.items()}
    if isinstance(obj, (list, tuple)):
        return type(obj)(round_floats(v, ndigits) for v in obj)
    return obj

def print_row(row, title):
    print(f"\n===== {title} =====")
    print(f"Iteration   : {int(row['iteration']) if pd.notna(row['iteration']) else row['iteration']}")
    if 'timestamp_dt' in row and pd.notna(row['timestamp_dt']):
        print(f"Timestamp   : {row['timestamp_dt']}")
    elif 'timestamp' in row and pd.notna(row['timestamp']):
        print(f"Timestamp   : {pd.to_datetime(row['timestamp'], unit='s', errors='coerce')}")
    print(f"train_mse   : {row['train_mse']:.6f}")
    print(f"test_mse    : {row['test_mse']:.6f}")
    print(f"train_r2    : {row['train_r2']:.6f}")
    print(f"test_r2     : {row['test_r2']:.6f}")
    params = row.get('params', None)
    if params is not None:
        params = round_floats(params, 6)
        print("params      :")
        print(pformat(params, width=100, sort_dicts=True))

# --- load & clean ---
df = pd.read_csv(csv_path, converters={"params": parse_params})
for c in ["train_mse", "train_r2", "test_mse", "test_r2"]:
    df[c] = pd.to_numeric(df[c], errors="coerce")
if "timestamp" in df.columns:
    df["timestamp_dt"] = pd.to_datetime(df["timestamp"], unit="s", errors="coerce")

# --- locate winners ---
print_row(df.loc[df["train_mse"].idxmin()], "Lowest train_mse")
print_row(df.loc[df["test_mse"].idxmin()],  "Lowest test_mse")
print_row(df.loc[df["train_r2"].idxmax()],  "Highest train_r2")
print_row(df.loc[df["test_r2"].idxmax()],   "Highest test_r2")



===== Lowest train_mse =====
Iteration   : 249
Timestamp   : 2025-08-08 17:45:44.365824
train_mse   : 0.005010
test_mse    : 0.018217
train_r2    : 0.833303
test_r2     : -0.376188
params      :
{'colsample_bytree': 0.360569,
 'device': 'cuda',
 'gamma': 0.003303,
 'learning_rate': 0.184417,
 'max_depth': 9,
 'min_child_weight': 8,
 'n_estimators': 3250,
 'n_jobs': -1,
 'objective': 'reg:squarederror',
 'random_state': 42,
 'reg_alpha': 24.718484,
 'reg_lambda': 4214.830235,
 'subsample': 0.547832}

===== Lowest test_mse =====
Iteration   : 741
Timestamp   : 2025-08-08 18:07:16.230345472
train_mse   : 0.028682
test_mse    : 0.013105
train_r2    : 0.045608
test_r2     : 0.009950
params      :
{'colsample_bytree': 0.500413,
 'device': 'cuda',
 'gamma': 1.077876,
 'learning_rate': 0.208581,
 'max_depth': 14,
 'min_child_weight': 8,
 'n_estimators': 2150,
 'n_jobs': -1,
 'objective': 'reg:squarederror',
 'random_state': 42,
 'reg_alpha': 220.784026,
 'reg_lambda': 3650.307704,
 'subsample

In [5]:
import pandas as pd
import ast

# --- path ---
csv_path = "/Users/mazin/Desktop/hyperopt_live_log.csv"

# ---------- helpers ----------
def parse_params(s):
    if isinstance(s, str) and s.strip().startswith("{"):
        try:
            return ast.literal_eval(s)
        except Exception:
            return s
    return s

def summarize_params(p, ndigits=5):
    """Make a short one-line summary of the most useful hyperparams."""
    if not isinstance(p, dict):
        return str(p)
    keys = [
        "learning_rate", "n_estimators", "max_depth", "min_child_weight",
        "gamma", "subsample", "colsample_bytree", "reg_alpha", "reg_lambda"
    ]
    parts = []
    for k in keys:
        if k in p:
            v = p[k]
            if isinstance(v, float):
                parts.append(f"{k}={v:.{ndigits}g}")
            else:
                parts.append(f"{k}={v}")
    return ", ".join(parts)

def make_table(df, sort_by, ascending, title, topn=20):
    metrics = ["train_mse", "test_mse", "train_r2", "test_r2"]
    top = (
        df.sort_values(sort_by, ascending=ascending, na_position="last")
          .head(topn)
          .copy()
    )
    top.insert(0, "rank", range(1, len(top) + 1))
    # Round metrics for readability
    for c in metrics:
        top[c] = pd.to_numeric(top[c], errors="coerce").round(6)
    # Human timestamp + compact params
    if "timestamp_dt" in top.columns:
        top["timestamp"] = top["timestamp_dt"].dt.strftime("%Y-%m-%d %H:%M:%S")
    elif "timestamp" in top.columns:
        top["timestamp"] = pd.to_datetime(top["timestamp"], unit="s", errors="coerce").dt.strftime("%Y-%m-%d %H:%M:%S")
    else:
        top["timestamp"] = ""
    top["params_summary"] = top["params"].apply(summarize_params)

    cols = ["rank", "iteration"] + metrics + ["timestamp", "params_summary"]

    print("\n" + "=" * 100)
    print(title)
    print("=" * 100)
    print(top[cols].to_string(index=False))
    return top[cols]

# ---------- load & clean ----------
df = pd.read_csv(csv_path, converters={"params": parse_params})
for c in ["train_mse", "test_mse", "train_r2", "test_r2"]:
    df[c] = pd.to_numeric(df[c], errors="coerce")
if "timestamp" in df.columns:
    df["timestamp_dt"] = pd.to_datetime(df["timestamp"], unit="s", errors="coerce")

# ---------- outputs ----------
tbl_train_mse = make_table(df, "train_mse", True,  "Top 20 LOWEST train_mse")
tbl_test_mse  = make_table(df, "test_mse",  True,  "Top 20 LOWEST test_mse")
tbl_train_r2  = make_table(df, "train_r2",  False, "Top 20 HIGHEST train_r2")
tbl_test_r2   = make_table(df, "test_r2",   False, "Top 20 HIGHEST test_r2")

# (Optional) Save the readable tables to CSVs:
# tbl_train_mse.to_csv("/Users/mazin/Desktop/top20_train_mse.csv", index=False)
# tbl_test_mse.to_csv("/Users/mazin/Desktop/top20_test_mse.csv", index=False)
# tbl_train_r2.to_csv("/Users/mazin/Desktop/top20_train_r2.csv", index=False)
# tbl_test_r2.to_csv("/Users/mazin/Desktop/top20_test_r2.csv", index=False)



Top 20 LOWEST train_mse
 rank  iteration  train_mse  test_mse  train_r2   test_r2           timestamp                                                                                                                                                                params_summary
    1        249   0.005010  0.018217  0.833303 -0.376188 2025-08-08 17:45:44  learning_rate=0.18442, n_estimators=3250, max_depth=9, min_child_weight=8, gamma=0.0033028, subsample=0.54783, colsample_bytree=0.36057, reg_alpha=24.718, reg_lambda=4214.8
    2        681   0.008481  0.016640  0.717809 -0.257106 2025-08-08 18:04:30  learning_rate=0.12912, n_estimators=1850, max_depth=6, min_child_weight=10, gamma=0.032793, subsample=0.91169, colsample_bytree=0.44973, reg_alpha=14.534, reg_lambda=1177.8
    3        382   0.009588  0.015351  0.680952 -0.159725 2025-08-08 17:52:20    learning_rate=0.17935, n_estimators=3550, max_depth=11, min_child_weight=9, gamma=0.12222, subsample=0.9851, colsample_bytree=0.39052, reg