In [3]:
import pandas as pd
import ast
from pprint import pformat

csv_path = "/Users/mazin/Desktop/hyperopt_live_log.csv"

def parse_params(s):
    if isinstance(s, str) and s.strip().startswith("{"):
        try:
            return ast.literal_eval(s)
        except Exception:
            return s
    return s

def round_floats(obj, ndigits=6):
    if isinstance(obj, float):
        return round(obj, ndigits)
    if isinstance(obj, dict):
        return {k: round_floats(v, ndigits) for k, v in obj.items()}
    if isinstance(obj, (list, tuple)):
        return type(obj)(round_floats(v, ndigits) for v in obj)
    return obj

def print_row(row, title):
    print(f"\n===== {title} =====")
    print(f"Iteration   : {int(row['iteration']) if pd.notna(row['iteration']) else row['iteration']}")
    if 'timestamp_dt' in row and pd.notna(row['timestamp_dt']):
        print(f"Timestamp   : {row['timestamp_dt']}")
    elif 'timestamp' in row and pd.notna(row['timestamp']):
        print(f"Timestamp   : {pd.to_datetime(row['timestamp'], unit='s', errors='coerce')}")
    print(f"train_mse   : {row['train_mse']:.6f}")
    print(f"test_mse    : {row['test_mse']:.6f}")
    print(f"train_r2    : {row['train_r2']:.6f}")
    print(f"test_r2     : {row['test_r2']:.6f}")
    params = row.get('params', None)
    if params is not None:
        params = round_floats(params, 6)
        print("params      :")
        print(pformat(params, width=100, sort_dicts=True))

# --- load & clean ---
df = pd.read_csv(csv_path, converters={"params": parse_params})
for c in ["train_mse", "train_r2", "test_mse", "test_r2"]:
    df[c] = pd.to_numeric(df[c], errors="coerce")
if "timestamp" in df.columns:
    df["timestamp_dt"] = pd.to_datetime(df["timestamp"], unit="s", errors="coerce")

# --- locate winners ---
print_row(df.loc[df["train_mse"].idxmin()], "Lowest train_mse")
print_row(df.loc[df["test_mse"].idxmin()],  "Lowest test_mse")
print_row(df.loc[df["train_r2"].idxmax()],  "Highest train_r2")
print_row(df.loc[df["test_r2"].idxmax()],   "Highest test_r2")



===== Lowest train_mse =====
Iteration   : 249
Timestamp   : 2025-08-08 17:45:44.365824
train_mse   : 0.005010
test_mse    : 0.018217
train_r2    : 0.833303
test_r2     : -0.376188
params      :
{'colsample_bytree': 0.360569,
 'device': 'cuda',
 'gamma': 0.003303,
 'learning_rate': 0.184417,
 'max_depth': 9,
 'min_child_weight': 8,
 'n_estimators': 3250,
 'n_jobs': -1,
 'objective': 'reg:squarederror',
 'random_state': 42,
 'reg_alpha': 24.718484,
 'reg_lambda': 4214.830235,
 'subsample': 0.547832}

===== Lowest test_mse =====
Iteration   : 741
Timestamp   : 2025-08-08 18:07:16.230345472
train_mse   : 0.028682
test_mse    : 0.013105
train_r2    : 0.045608
test_r2     : 0.009950
params      :
{'colsample_bytree': 0.500413,
 'device': 'cuda',
 'gamma': 1.077876,
 'learning_rate': 0.208581,
 'max_depth': 14,
 'min_child_weight': 8,
 'n_estimators': 2150,
 'n_jobs': -1,
 'objective': 'reg:squarederror',
 'random_state': 42,
 'reg_alpha': 220.784026,
 'reg_lambda': 3650.307704,
 'subsample

In [7]:
import pandas as pd
import ast
from math import isnan

# --- path ---
csv_path = "/Users/mazin/Desktop/hyperopt_live_log.csv"

# ---------- helpers ----------
def parse_params(s):
    if isinstance(s, str) and s.strip().startswith("{"):
        try:
            return ast.literal_eval(s)
        except Exception:
            return s
    return s

def fmt(x, nd=6):
    if isinstance(x, float):
        try:
            if isnan(x): return "NaN"
        except Exception:
            pass
        return f"{x:.{nd}f}"
    return str(x)

def pretty_params(p, nd=6, indent="  "):
    if not isinstance(p, dict):
        return indent + str(p)
    # sort keys for stability, round floats
    lines = []
    for k in sorted(p.keys()):
        v = p[k]
        if isinstance(v, float):
            v = float(f"{v:.{nd}g}")
        lines.append(f"{indent}{k}: {v}")
    return "\n".join(lines)

def human_time(raw):
    try:
        return pd.to_datetime(raw, unit="s", errors="coerce")
    except Exception:
        return pd.NaT

def print_full_row(row, key_metric, rank):
    # header line with the key value
    it = int(row["iteration"]) if pd.notna(row.get("iteration")) else "NA"
    ts_raw = row.get("timestamp", None)
    ts_human = row.get("timestamp_dt", None)
    if ts_human is pd.NaT or ts_human is None:
        ts_human = human_time(ts_raw)
    ts_human_str = "" if pd.isna(ts_human) else str(ts_human)

    key_val = row[key_metric]
    print(f"#{rank:02d} — {key_metric}: {fmt(key_val)}  (iteration {it}, time {ts_human_str})")

    # full row (all columns) for reference, formatted line-by-line
    print("  ── Full row:")
    for col in row.index:
        if col == "params":
            print(f"    {col}:")
            print(pretty_params(row[col], indent="      "))
        elif col == "timestamp_dt":
            # show the human time only once; still include the field name
            print(f"    {col}: {ts_human_str}")
        elif col in ("train_mse", "test_mse", "train_r2", "test_r2"):
            print(f"    {col}: {fmt(row[col])}")
        else:
            print(f"    {col}: {row[col]}")
    print("")  # blank line after each block

def print_section(df, sort_by, ascending, title, topn=20):
    print("=" * 100)
    print(title)
    print("=" * 100)

    # rank & iterate
    subset = (
        df.sort_values(sort_by, ascending=ascending, na_position="last")
          .head(min(topn, len(df)))
    )
    for r, (_, row) in enumerate(subset.iterrows(), start=1):
        print_full_row(row, key_metric=sort_by, rank=r)

# ---------- load & clean ----------
df = pd.read_csv(csv_path, converters={"params": parse_params})

# ensure numeric
for c in ["train_mse", "train_r2", "test_mse", "test_r2"]:
    df[c] = pd.to_numeric(df[c], errors="coerce")

# human timestamp column
if "timestamp" in df.columns:
    df["timestamp_dt"] = pd.to_datetime(df["timestamp"], unit="s", errors="coerce")

# ---------- print the four ranked lists ----------
print_section(df, "train_mse", True,  "Top 20 LOWEST train_mse")
print_section(df, "test_mse",  True,  "Top 20 LOWEST test_mse")
print_section(df, "train_r2",  False, "Top 20 HIGHEST train_r2")
print_section(df, "test_r2",   False, "Top 20 HIGHEST test_r2")


Top 20 LOWEST train_mse
#01 — train_mse: 0.005010  (iteration 249, time 2025-08-08 17:45:44.365824)
  ── Full row:
    iteration: 249
    train_mse: 0.005010
    train_r2: 0.833303
    test_mse: 0.018217
    test_r2: -0.376188
    params:
      colsample_bytree: 0.360569
      device: cuda
      gamma: 0.00330282
      learning_rate: 0.184417
      max_depth: 9
      min_child_weight: 8
      n_estimators: 3250
      n_jobs: -1
      objective: reg:squarederror
      random_state: 42
      reg_alpha: 24.7185
      reg_lambda: 4214.83
      subsample: 0.547832
    timestamp: 1754675144.365824
    timestamp_dt: 2025-08-08 17:45:44.365824

#02 — train_mse: 0.008481  (iteration 681, time 2025-08-08 18:04:30.457833984)
  ── Full row:
    iteration: 681
    train_mse: 0.008481
    train_r2: 0.717809
    test_mse: 0.016640
    test_r2: -0.257106
    params:
      colsample_bytree: 0.449734
      device: cuda
      gamma: 0.0327927
      learning_rate: 0.129122
      max_depth: 6
      min_chi