
# BlendLab (блендинг/стакинг без утечек)

Цель: быстро собрать бленд/стакинг из уже обученных прогонов (**run_id**) с прозрачными артефактами.

План:
1) выбрать **RUN_TAG** (набор фич),
2) выбрать состав кандидатов (run_id) из `artifacts/models/index.json`,
3) посмотреть корреляции OOF и диверсификацию,
4) выбрать **blend-space** (proba / logit / rank) и режим бленда,
5) (опц.) калибровка (Platt/Isotonic) и/или τ (для binary),
6) сохранить бленд и сформировать сабмит.

Анти-утечки:
- Подбор весов `--cv-weights`: веса ищутся на train-OOF, применяются на val-OOF.
- Level-2 (стакинг) делается fold-safe.
- Для AUC сабмит — **вероятности**, не классы (без глобального τ).

Зависимости: `numpy, pandas, matplotlib, ipywidgets, scikit-learn`. Скрипт: `tools/run_blend.py`.


In [None]:
from IPython.display import display, HTML
display(HTML("""
<style>
/* ipywidgets v8 (JupyterLab 4) */
.jp-OutputArea .widget-button .widget-label { 
  white-space: normal !important; 
  overflow: visible !important; 
  text-overflow: clip !important;
  line-height: 1.2 !important;
}
/* fallback для ipywidgets v7 */
.jupyter-widgets.widget-button .widget-label {
  white-space: normal !important; 
  overflow: visible !important; 
  text-overflow: clip !important;
  line-height: 1.2 !important;
}
</style>
"""))


In [None]:

# Core
import os, sys, json, math, subprocess, shutil, gc, warnings
from pathlib import Path
from datetime import datetime

import numpy as np
import pandas as pd

# Viz
import matplotlib.pyplot as plt

# Widgets
try:
    import ipywidgets as W
    from IPython.display import display, HTML, clear_output
except Exception:
    W = None
    print("[warn] ipywidgets не установлен — будет вариант без UI")

warnings.filterwarnings("ignore")
plt.rcParams["figure.figsize"] = (8, 5)

SETS_ROOT = Path("artifacts/sets")
MODELS_INDEX = Path("artifacts/models/index.json")
BLENDS_ROOT = Path("artifacts/models/blends")
SUBMITS_ROOT = Path("artifacts/submits")

# ------- helpers --------

def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def read_json(p: Path):
    if not p.exists(): return None
    try: return json.loads(p.read_text(encoding="utf-8"))
    except Exception: return None

def save_json(p: Path, obj: dict):
    ensure_dir(p.parent)
    p.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")

def read_parquet_any(p: Path):
    if not p.exists(): return None
    try:
        return pd.read_parquet(p)
    except Exception:
        try:
            import fastparquet  # noqa
            return pd.read_parquet(p, engine="fastparquet")
        except Exception:
            return None

def run_cmd(cmd: list, cwd: Path | None = None, verbose=True) -> tuple[int, str, str]:
    """Run shell command and capture output."""
    proc = subprocess.Popen(
        cmd, cwd=str(cwd) if cwd else None,
        stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
    )
    out, err = proc.communicate()
    if verbose:
        print(" ".join(cmd))
        print(out)
        if err.strip():
            print("[stderr]", err)
    return proc.returncode, out, err

def detect_task_from_y(y: np.ndarray) -> str:
    u = np.unique(y)
    if np.issubdtype(y.dtype, np.integer):
        return "binary" if len(u) <= 2 else "multiclass"
    if len(u) <= 2 and set(np.unique(y)).issubset({0.0, 1.0}):
        return "binary"
    return "regression"

def metric_fn(task: str, name: str):
    """Return scorer(y_true, y_pred) -> float (higher=better)."""
    from sklearn.metrics import (
        roc_auc_score, average_precision_score, log_loss, accuracy_score,
        f1_score, mean_squared_error, mean_absolute_error
    )
    name = name.lower()
    if task in ("binary","multiclass"):
        if name in ("roc_auc","auc"):
            def f(y, p):
                if task=="binary":
                    return roc_auc_score(y, p.reshape(-1))
                else:
                    # multiclass
                    from sklearn.preprocessing import label_binarize
                    cls = np.unique(y)
                    Y = label_binarize(y, classes=cls)
                    return roc_auc_score(Y, p, average="macro", multi_class="ovr")
            return f
        if name in ("pr_auc","ap","average_precision"):
            def f(y, p):
                if task=="binary":
                    return average_precision_score(y, p.reshape(-1))
                else:
                    from sklearn.preprocessing import label_binarize
                    cls = np.unique(y)
                    Y = label_binarize(y, classes=cls)
                    return average_precision_score(Y, p, average="macro")
            return f
        if name == "logloss":
            def f(y, p):
                if task=="binary":
                    q = np.clip(p.reshape(-1), 1e-15, 1-1e-15)
                    P = np.vstack([1-q, q]).T
                    return -log_loss(y, P, labels=[0,1])
                else:
                    return -log_loss(y, p)
            return f
        if name in ("accuracy","acc"):
            def f(y, p):
                if task=="binary":
                    return ( (p.reshape(-1)>=0.5).astype(int) == y ).mean()
                else:
                    return ( np.argmax(p,1) == y ).mean()
            return f
        if name in ("f1","macro_f1"):
            def f(y, p):
                if task=="binary":
                    return f1_score(y, (p.reshape(-1)>=0.5).astype(int))
                else:
                    return f1_score(y, np.argmax(p,1), average="macro")
            return f
    # regression
    if name=="rmse":
        def f(y, p): return -math.sqrt(mean_squared_error(y, p))
        return f
    if name=="mae":
        def f(y, p): return -mean_absolute_error(y, p)
        return f
    if name=="mape":
        def f(y, p):
            y = np.asarray(y,float)
            p = np.asarray(p,float)
            eps=1e-9
            return -np.mean(np.abs((y-p)/np.clip(np.abs(y),eps,None)))*100
        return f
    raise ValueError(f"Unknown metric {name}")

def normalize_pred_for_task(task: str, arr: np.ndarray) -> np.ndarray:
    a = np.asarray(arr)
    if task=="binary":
        if a.ndim==1: return a
        if a.ndim==2:
            if a.shape[1]==1: return a.reshape(-1)
            if a.shape[1]==2: return a[:,1]
            return a.max(1)
        return a.reshape(-1)
    elif task=="multiclass":
        assert a.ndim==2, "multiclass ожидает (n, C)"
        return a
    else:
        return a.reshape(-1)

def clip_proba(p, eps=1e-6):
    return np.clip(p, eps, 1-eps)

def to_blend_space(task: str, X: np.ndarray, space: str) -> np.ndarray:
    space = space.lower()
    if space=="proba":
        if task=="binary":
            return clip_proba(X)
        if task=="multiclass":
            return np.clip(X, 1e-8, 1-1e-8)
        return X
    if space=="logit":
        if task=="binary":
            p = clip_proba(X)
            return np.log(p/(1-p))
        if task=="multiclass":
            P = np.clip(X, 1e-8, 1-1e-8)
            Z = np.log(P/np.clip(1-P,1e-8,None))
            return Z
        return X
    if space=="rank":
        if task in ("binary","regression"):
            r = pd.Series(X).rank(method="average").to_numpy()
            r = (r-1)/max(len(r)-1,1)
            return r
        if task=="multiclass":
            R = np.zeros_like(X, float)
            for c in range(X.shape[1]):
                rc = pd.Series(X[:,c]).rank(method="average").to_numpy()
                rc = (rc-1)/max(len(rc)-1,1)
                R[:,c] = rc
            return R
        return X
    raise ValueError(space)

def from_blend_space(task: str, yb: np.ndarray, space: str) -> np.ndarray:
    space = space.lower()
    if space=="logit":
        if task=="binary":
            return 1/(1+np.exp(-yb))
        if task=="multiclass":
            P = 1/(1+np.exp(-yb))
            P = P/np.clip(P.sum(1, keepdims=True), 1e-8, None)
            return P
        return yb
    # rank/proba — уже [0..1] или proba
    return yb


In [None]:
BTN_LAYOUT = W.Layout(min_width="220px", width="auto", height="36px", flex="0 0 auto")
ROW_LAYOUT = W.Layout(flex_flow="row wrap", grid_gap="8px")
GRID_LAYOUT = W.Layout(grid_template_columns="repeat(3, minmax(220px, 1fr))", grid_gap="8px")

In [None]:

# Автосписок тэгов
AVAILABLE_TAGS = [p.name for p in SETS_ROOT.glob("*") if p.is_dir()]
AVAILABLE_TAGS.sort()

# UI
if W:
    dd_tag = W.Dropdown(options=AVAILABLE_TAGS, description="RUN_TAG:", layout=W.Layout(width="400px"))
    btn_check = W.Button(description="Проверить набор", button_style="primary", layout=BTN_LAYOUT)
    out_check = W.Output()
    display(W.HBox([dd_tag, btn_check], layout=ROW_LAYOUT), out_check)
else:
    print("Доступные RUN_TAG:", AVAILABLE_TAGS)
    dd_tag = type("Dummy", (), {"value": AVAILABLE_TAGS[-1] if AVAILABLE_TAGS else None})()
    print("Использую:", dd_tag.value)

STATE = {
    "RUN_TAG": None,
    "task": None,
    "metric": "roc_auc",
    "y": None,
    "folds": None,
    "ids_test": None,
    "id_col": None,
    "target_col": None
}

def check_set(tag: str):
    base = SETS_ROOT / tag
    y_path = base / "y_train.parquet"
    ids_path = base / "ids_test.parquet"
    folds_path = base / "folds.pkl"

    ydf = read_parquet_any(y_path)
    ids = read_parquet_any(ids_path)
    folds = None
    if (folds_path).exists():
        import pickle
        folds = pickle.loads(folds_path.read_bytes())

    if ydf is None or ydf.shape[1] < 2:
        print("[warn] Нет y_train.parquet с id+target — можно починить внизу.")
        return None, None, None, None, None

    id_col = ydf.columns[0]
    target_col = [c for c in ydf.columns if c != id_col][0]
    y = ydf[target_col].to_numpy()

    task = detect_task_from_y(y)
    print(f"RUN_TAG={tag} | y: {y.shape} | ids_test: {None if ids is None else ids.shape} | folds: {None if folds is None else len(folds)}")
    print(f"task: {task} | id_col: {id_col} | target_col: {target_col}")

    return y, folds, ids, id_col, target_col

def on_check(_):
    out_check.clear_output()
    with out_check:
        tag = dd_tag.value
        STATE["RUN_TAG"] = tag
        y, folds, ids, id_col, target_col = check_set(tag)
        STATE.update({"y": y, "folds": folds, "ids_test": ids, "id_col": id_col, "target_col": target_col, "task": detect_task_from_y(y) if y is not None else None})

if W:
    btn_check.on_click(on_check)
else:
    # автопроверка
    STATE["RUN_TAG"] = dd_tag.value
    y, folds, ids, id_col, target_col = check_set(dd_tag.value)
    STATE.update({"y": y, "folds": folds, "ids_test": ids, "id_col": id_col, "target_col": target_col, "task": detect_task_from_y(y) if y is not None else None})


In [None]:

# Если данных не хватает, можно собрать y_train/ids_test с нуля из CSV.
# Укажи пути и колонки и нажми "Собрать сет-файлы".

if W:
    tb_data_dir = W.Text(value="data", description="DATA_DIR:", layout=W.Layout(width="400px"))
    tb_train = W.Text(value="train.csv", description="TRAIN:", layout=W.Layout(width="400px"))
    tb_test = W.Text(value="test.csv", description="TEST:", layout=W.Layout(width="400px"))
    tb_id = W.Text(value="id", description="ID_COL:", layout=W.Layout(width="300px"))
    tb_tgt = W.Text(value="target", description="TARGET_COL:", layout=W.Layout(width="300px"))
    btn_fix = W.Button(description="Собрать сет-файлы", button_style="", layout=BTN_LAYOUT)
    out_fix = W.Output()
    display(W.HBox([tb_data_dir, tb_train, tb_test], layout=ROW_LAYOUT), W.HBox([tb_id, tb_tgt], layout=ROW_LAYOUT), W.HBox([btn_fix], layout=ROW_LAYOUT), out_fix)
else:
    print("Если нужно, вручную создай y_train.parquet (id+target) и ids_test.parquet (id).")

def fix_set(_):
    out_fix.clear_output()
    with out_fix:
        if not STATE["RUN_TAG"]:
            print("Сначала выбери RUN_TAG и нажми Проверить.")
            return
        base = SETS_ROOT / STATE["RUN_TAG"]
        ensure_dir(base)

        data_dir = Path(tb_data_dir.value)
        train_p = data_dir / tb_train.value
        test_p = data_dir / tb_test.value
        idc = tb_id.value
        tgt = tb_tgt.value

        if not train_p.exists():
            print("Нет TRAIN CSV:", train_p); return
        if not test_p.exists():
            print("Нет TEST CSV:", test_p); return

        tr = pd.read_csv(train_p)
        te = pd.read_csv(test_p)
        assert idc in tr.columns and idc in te.columns, "ID_COL не найден"
        assert tgt in tr.columns, "TARGET_COL не найден в train"

        ydf = tr[[idc, tgt]].copy()
        ids = te[[idc]].copy()

        y_path = base/"y_train.parquet"
        ids_path = base/"ids_test.parquet"
        ydf.to_parquet(y_path)
        ids.to_parquet(ids_path)
        print("Собрано:", y_path, ids_path)

        # создадим KFold по умолчанию, если нет time/group
        from sklearn.model_selection import KFold
        idx = np.arange(len(ydf))
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        folds = [(tr_idx, va_idx) for tr_idx, va_idx in kf.split(idx)]
        import pickle
        (base/"folds.pkl").write_bytes(pickle.dumps(folds))
        print("Создан folds.pkl (KFold=5)")

        # Обновим состояние:
        y, folds, ids, id_col, target_col = check_set(STATE["RUN_TAG"])
        STATE.update({"y": y, "folds": folds, "ids_test": ids, "id_col": id_col, "target_col": target_col, "task": detect_task_from_y(y)})

if W:
    btn_fix.on_click(fix_set)


In [None]:

# Метрика (по умолчанию roc_auc для binary/multiclass, rmse для regression)

if W:
    dd_metric = W.Dropdown(options=["roc_auc","pr_auc","logloss","accuracy","f1","rmse","mae","mape"],
                           description="metric:", value="roc_auc", layout=W.Layout(width="300px"))
    btn_load_runs = W.Button(description="Загрузить список кандидатов", button_style="primary", layout=BTN_LAYOUT)
    out_runs = W.Output()
    display(W.HBox([dd_metric, btn_load_runs], layout=ROW_LAYOUT), out_runs)
else:
    dd_metric = type("Dummy", (), {"value":"roc_auc"})()
    out_runs = None

RUNS_DF = None

def load_runs(_):
    global RUNS_DF
    if STATE["RUN_TAG"] is None:
        print("Сначала выбери RUN_TAG"); return
    idx = read_json(MODELS_INDEX) or {}
    rows = []
    for rid, rec in idx.items():
        if rec.get("tag")==STATE["RUN_TAG"]:
            rows.append({
                "run_id": rid,
                "cand": rec.get("cand"),
                "task": rec.get("task"),
                "metric": rec.get("metric"),
                "cv_mean": rec.get("cv_mean"),
                "path": rec.get("path"),
                "blend": bool(rec.get("blend", False))
            })
    RUNS_DF = pd.DataFrame(rows).sort_values("cv_mean", ascending=False).reset_index(drop=True)
    with out_runs:
        out_runs.clear_output()
        if len(RUNS_DF)==0:
            print("Нет кандидатов для этого RUN_TAG")
        else:
            display(RUNS_DF)

if W:
    btn_load_runs.on_click(load_runs)
else:
    load_runs(None)


In [None]:

SELECTED_RUNS = []

if W:
    # Список доступных
    def refresh_picklist():
        if RUNS_DF is None or len(RUNS_DF)==0:
            return W.SelectMultiple(options=[], description="runs")
        opts = [f'{r.run_id} | {r.cand} | {r.cv_mean:.6f}' for _, r in RUNS_DF.iterrows()]
        return W.SelectMultiple(options=opts, description="members:", rows=12, layout=W.Layout(width="750px"))

    pick = refresh_picklist()
    btn_pick = W.Button(description="Добавить в корзину", button_style="", layout=BTN_LAYOUT)
    btn_clear = W.Button(description="Очистить корзину", layout=BTN_LAYOUT)
    out_pick = W.Output()

    def _on_pick(_):
        with out_pick:
            for s in pick.value:
                rid = s.split(" | ")[0].strip()
                if rid not in SELECTED_RUNS:
                    SELECTED_RUNS.append(rid)
            print("Корзина:", SELECTED_RUNS)

    def _on_clear(_):
        SELECTED_RUNS.clear()
        with out_pick:
            out_pick.clear_output()
            print("Корзина очищена.")

    btn_pick.on_click(_on_pick)
    btn_clear.on_click(_on_clear)

    display(pick, W.HBox([btn_pick, btn_clear], layout=ROW_LAYOUT), out_pick)
else:
    print("Укажи вручную SELECTED_RUNS = [...]")


In [None]:

OOF_CACHE = {}   # run_id -> np.ndarray
TEST_CACHE = {}  # run_id -> np.ndarray|None
LOCAL_SCORES = {}  # run_id -> float

def load_member_preds(run_id: str, task: str):
    idx = read_json(MODELS_INDEX) or {}
    rec = idx.get(run_id)
    if rec is None:
        raise KeyError(f"Нет в index.json: {run_id}")
    path = Path(rec.get("path",""))
    if not path.exists():
        path = Path("artifacts/models")/run_id
    oof_p = path/"oof.npy"
    test_p = path/"test_pred.npy"
    oof = np.load(oof_p)
    test = np.load(test_p) if test_p.exists() else None
    return normalize_pred_for_task(task, oof), (None if test is None else normalize_pred_for_task(task, test))

def compute_local_scores():
    if STATE["y"] is None: return
    y = STATE["y"]
    task = STATE["task"]
    scorer = metric_fn(task, dd_metric.value)
    LOCAL_SCORES.clear()
    for rid in SELECTED_RUNS:
        p = OOF_CACHE[rid]
        try:
            LOCAL_SCORES[rid] = float(scorer(y, p))
        except Exception as e:
            LOCAL_SCORES[rid] = float("nan")

def load_selected():
    if STATE["task"] is None:
        print("Сначала проверь сет (RUN_TAG)"); return
    for rid in SELECTED_RUNS:
        if rid not in OOF_CACHE:
            p_oof, p_test = load_member_preds(rid, STATE["task"])
            OOF_CACHE[rid] = p_oof
            TEST_CACHE[rid] = p_test
    compute_local_scores()
    df = pd.DataFrame([{ "run_id": rid, "local_metric": LOCAL_SCORES.get(rid)} for rid in SELECTED_RUNS])            .sort_values("local_metric", ascending=False)
    display(df)

load_selected()


In [None]:

def oof_corr_matrix(task: str, run_ids: list[str]) -> pd.DataFrame:
    if len(run_ids) < 2:
        return pd.DataFrame()
    if task=="multiclass":
        # возьмём max по классу как быструю эвристику
        M = np.column_stack([OOF_CACHE[r].max(1) for r in run_ids])
    else:
        M = np.column_stack([OOF_CACHE[r].reshape(-1) for r in run_ids])
    C = np.corrcoef(M.T)
    return pd.DataFrame(C, index=run_ids, columns=run_ids)

if len(SELECTED_RUNS) >= 2:
    corr = oof_corr_matrix(STATE["task"], SELECTED_RUNS)
    display(corr.round(3))
    plt.imshow(corr.values, interpolation="nearest")
    plt.xticks(range(len(SELECTED_RUNS)), SELECTED_RUNS, rotation=90)
    plt.yticks(range(len(SELECTED_RUNS)), SELECTED_RUNS)
    plt.colorbar()
    plt.title("OOF correlation")
    plt.show()
else:
    print("Добавь >=2 моделей для анализа корреляций.")


In [None]:

if W:
    rb_space = W.RadioButtons(options=["proba","logit","rank"], description="space:", value="rank")
    dd_mode = W.Dropdown(options=["equal","dirichlet","nnls","coord","level2"], description="mode:", value="dirichlet")

    hb_params = W.HBox([
        W.IntText(value=4000, description="dir_samples"),
        W.Checkbox(value=True, description="nnls (warm)"),
        W.IntText(value=2000, description="coord_iters"),
        W.FloatText(value=0.01, description="coord_step"),
        W.Checkbox(value=True, description="nonneg"),
        W.Checkbox(value=True, description="sum1"),
        W.Checkbox(value=True, description="cv_weights"),
        W.Checkbox(value=False, description="reopt_after_cv"),
    ], layout=ROW_LAYOUT)
    display(rb_space, dd_mode, hb_params)
else:
    rb_space = type("Dummy", (), {"value":"rank"})()
    dd_mode = type("Dummy", (), {"value":"dirichlet"})()


In [None]:

def run_blend_analyze():
    assert STATE["RUN_TAG"], "RUN_TAG не выбран"
    assert len(SELECTED_RUNS)>=1, "Нет участников"
    members = ",".join(SELECTED_RUNS)

    cmd = [
        sys.executable, "tools/run_blend.py",
        "--tag", STATE["RUN_TAG"],
        "--members", members,
        "--mode", dd_mode.value,
        "--metric", dd_metric.value,
        "--task", STATE["task"],
        "--blend-space", rb_space.value,
        "--sets-dir", str(SETS_ROOT/STATE["RUN_TAG"]),
        "--models-index", str(MODELS_INDEX),
        "--analyze-only",
        "--verbose"
    ]
    # параметры
    if dd_mode.value == "dirichlet":
        ds = 4000 if not W else hb_params.children[0].value
        cmd += ["--dirichlet-samples", str(ds)]
    if dd_mode.value == "coord":
        ct = 2000 if not W else hb_params.children[2].value
        cs = 0.01 if not W else hb_params.children[3].value
        cmd += ["--coord-iters", str(ct), "--coord-step", str(cs)]
    if (not W and True) or (W and hb_params.children[1].value):
        cmd += ["--nnls"]
    if (not W and True) or (W and hb_params.children[4].value):
        cmd += ["--nonneg"]
    if (not W and True) or (W and hb_params.children[5].value):
        cmd += ["--sum-to-one"]
    if (not W and True) or (W and hb_params.children[6].value):
        cmd += ["--cv-weights"]
    if (not W and False) or (W and hb_params.children[7].value):
        cmd += ["--reopt-after-cv"]

    rc, out, err = run_cmd(cmd, verbose=True)
    return rc, out, err

rc, out, err = run_blend_analyze()


In [None]:

if STATE["task"]=="binary":
    if W:
        dd_cal = W.Dropdown(options=["off","platt","isotonic"], value="off", description="calibrate:")
        dd_thr = W.Text(value="off", description="threshold:")
        display(dd_cal, dd_thr)
    else:
        dd_cal = type("D", (), {"value":"off"})()
        dd_thr = type("D", (), {"value":"off"})()
else:
    dd_cal = type("D", (), {"value":"off"})()
    dd_thr = type("D", (), {"value":"off"})()

print("Подсказка: для AUC сабмита обычно калибровка/τ не применяются к файлу сабмита (только к отчёту).")


In [None]:

def run_blend_save(name: str, save_test: bool=True):
    assert STATE["RUN_TAG"], "RUN_TAG не выбран"
    assert len(SELECTED_RUNS)>=1, "Нет участников"
    members = ",".join(SELECTED_RUNS)

    cmd = [
        sys.executable, "tools/run_blend.py",
        "--tag", STATE["RUN_TAG"],
        "--members", members,
        "--mode", dd_mode.value,
        "--metric", dd_metric.value,
        "--task", STATE["task"],
        "--blend-space", rb_space.value,
        "--sets-dir", str(SETS_ROOT/STATE["RUN_TAG"]),
        "--models-index", str(MODELS_INDEX),
        "--name", name
    ]
    if dd_mode.value == "dirichlet":
        ds = 4000 if not W else hb_params.children[0].value
        cmd += ["--dirichlet-samples", str(ds)]
    if dd_mode.value == "coord":
        ct = 2000 if not W else hb_params.children[2].value
        cs = 0.01 if not W else hb_params.children[3].value
        cmd += ["--coord-iters", str(ct), "--coord-step", str(cs)]
    if (not W and True) or (W and hb_params.children[1].value): cmd += ["--nnls"]
    if (not W and True) or (W and hb_params.children[4].value): cmd += ["--nonneg"]
    if (not W and True) or (W and hb_params.children[5].value): cmd += ["--sum-to-one"]
    if (not W and True) or (W and hb_params.children[6].value): cmd += ["--cv-weights"]
    if (not W and False) or (W and hb_params.children[7].value): cmd += ["--reopt-after-cv"]

    if STATE["task"]=="binary":
        cmd += ["--calibrate", (dd_cal.value if W else "off")]
        cmd += ["--threshold", (dd_thr.value if W else "off")]
    if save_test:
        cmd += ["--save-test"]

    rc, out, err = run_cmd(cmd, verbose=True)
    return rc, out

# UI
if W:
    tb_name = W.Text(value="auc_rank_dir", description="blend name:")
    cb_save_test = W.Checkbox(value=True, description="save test")
    btn_save = W.Button(description="Сохранить бленд", button_style="success", layout=BTN_LAYOUT)
    out_save = W.Output()
    display(W.HBox([tb_name, cb_save_test], layout=ROW_LAYOUT), btn_save, out_save)

    def _on_save(_):
        out_save.clear_output()
        with out_save:
            rc, out = run_blend_save(tb_name.value, cb_save_test.value)
            print("RC:", rc)
    btn_save.on_click(_on_save)


In [None]:

def list_blends_for_tag(tag: str):
    rows=[]
    for d in BLENDS_ROOT.glob("*/metrics.json"):
        cfg = read_json(d.parent/"config.json")
        met = read_json(d)
        if not cfg: continue
        if cfg.get("tag") != tag: continue
        rows.append({
            "blend_id": d.parent.name,
            "mode": cfg.get("mode"),
            "space": cfg.get("blend_space"),
            "metric": met.get("oof_metric"),
            "metric_name": met.get("oof_metric_name"),
            "calibration": cfg.get("calibration"),
            "threshold": cfg.get("threshold"),
            "members": ",".join(cfg.get("members",[]))
        })
    df = pd.DataFrame(rows).sort_values("metric", ascending=False)
    return df

if STATE["RUN_TAG"]:
    display(list_blends_for_tag(STATE["RUN_TAG"]))
else:
    print("Выбери RUN_TAG")


In [None]:

# Выбор blend_id, имя сабмита, имя столбца (целевой), контроль id-колонки.

if W:
    # Соберём список blend_id
    OPTS = [p.name for p in BLENDS_ROOT.glob("*") if (p/"config.json").exists()]
    dd_blend = W.Dropdown(options=OPTS, description="blend_id:", layout=W.Layout(width="500px"))
    tb_subname = W.Text(value=f"submit_{datetime.now().strftime('%m%d_%H%M')}", description="SUB_TAG:")
    tb_col = W.Text(value=STATE["target_col"] or "target", description="submit_col:")
    btn_makesub = W.Button(description="Сформировать сабмит", button_style="primary", layout=BTN_LAYOUT)
    out_sub = W.Output()
    display(W.HBox([dd_blend, tb_subname], layout=ROW_LAYOUT), tb_col, btn_makesub, out_sub)
else:
    print("Укажи вручную переменные: SELECTED_BLEND_ID, SUB_TAG, SUBMIT_COL")

def make_submit(blend_id: str, sub_tag: str, submit_col: str):
    assert STATE["RUN_TAG"], "RUN_TAG?"
    base = SETS_ROOT/STATE["RUN_TAG"]
    ids = read_parquet_any(base/"ids_test.parquet")
    assert ids is not None, "Нет ids_test.parquet"
    id_col = ids.columns[0]

    bdir = BLENDS_ROOT/blend_id
    test_p = bdir/"test_pred.npy"
    assert test_p.exists(), "В бленде нет test_pred.npy (пересоздай с --save-test)"

    yhat = np.load(test_p)
    # бинарь/регрессия — вектор; multiclass — нужно выбрать класс/вероятности
    if STATE["task"]=="multiclass":
        # делаем сабмит «класс» или «proba class_k?» — зависит от регламента.
        # По умолчанию выведем класс argmax как baseline:
        yout = np.argmax(yhat,1)
    else:
        yout = yhat.reshape(-1)

    sub = pd.DataFrame({id_col: ids[id_col].values, submit_col: yout})
    SUB_DIR = SUBMITS_ROOT/STATE["RUN_TAG"]/sub_tag
    ensure_dir(SUB_DIR)
    out_csv = SUB_DIR/"submission.csv"
    sub.to_csv(out_csv, index=False)

    manifest = {
        "run_tag": STATE["RUN_TAG"],
        "blend_id": blend_id,
        "submit_col": submit_col,
        "generated_at": datetime.now().isoformat(),
        "task": STATE["task"]
    }
    save_json(SUB_DIR/"manifest.json", manifest)
    print("Готово →", out_csv)
    display(sub.head())

if W:
    def _on_make(_):
        out_sub.clear_output()
        with out_sub:
            make_submit(dd_blend.value, tb_subname.value, tb_col.value)
    btn_makesub.on_click(_on_make)


In [None]:

# Быстрая диагностика: бинарь — метрика по квинтилям/категориям относительно колонки из y_train.parquet.
# Для multiclass/regression можно расширить аналогично.

def slice_metric_binary(y, p, mask, scorer):
    try:
        return float(scorer(y[mask], p[mask]))
    except Exception:
        return np.nan

def quick_slices_binary(colname: str, bins: int=5):
    assert STATE["task"]=="binary", "Сейчас реализовано для binary"
    y_path = SETS_ROOT/STATE["RUN_TAG"]/ "y_train.parquet"
    ydf = read_parquet_any(y_path).copy()
    id_col = ydf.columns[0]
    tgt_col = [c for c in ydf.columns if c!=id_col][0]

    # нужна OOF выбранного бленда или одного из run_id
    ref = None
    if len(SELECTED_RUNS)>=1:
        # возьмем первую модель как пример
        ref = OOF_CACHE[SELECTED_RUNS[0]].reshape(-1)
    else:
        print("Добавь модели и перезагрузи OOF")
        return

    scorer = metric_fn("binary", dd_metric.value)

    if colname not in ydf.columns:
        print("Колонки нет в y_train.parquet")
        return

    x = ydf[colname]
    y = ydf[tgt_col].values
    p = ref

    if pd.api.types.is_numeric_dtype(x):
        q = pd.qcut(x, q=bins, duplicates='drop')
        tab = []
        for lvl in sorted(q.cat.categories, key=lambda z: z.left):
            mask = (q == lvl).values
            tab.append({"bin": str(lvl), "n": int(mask.sum()), "metric": slice_metric_binary(y, p, mask, scorer)})
        df = pd.DataFrame(tab)
    else:
        vc = x.value_counts()
        cats = list(vc.index)
        tab = []
        for c in cats[:min(len(cats), 12)]:
            mask = (x==c).values
            tab.append({"cat": str(c), "n": int(mask.sum()), "metric": slice_metric_binary(y, p, mask, scorer)})
        df = pd.DataFrame(tab).sort_values("metric", ascending=False)

    display(df)

# Пример вызова:
# quick_slices_binary("some_column", bins=5)


In [None]:

def panic_blend():
    # топ-3 по локальной метрике → rank-space + dirichlet + cv_weights, без калибровки
    if len(LOCAL_SCORES)==0:
        print("Нет локальных метрик (загрузи OOF)"); return
    top = sorted(LOCAL_SCORES.items(), key=lambda kv: kv[1], reverse=True)[:3]
    members = [k for k,_ in top]
    print("panic members:", members)
    cmd = [
        sys.executable, "tools/run_blend.py",
        "--tag", STATE["RUN_TAG"], "--members", ",".join(members),
        "--mode", "dirichlet", "--blend-space", "rank",
        "--metric", dd_metric.value, "--task", STATE["task"],
        "--cv-weights", "--dirichlet-samples", "4000",
        "--sets-dir", str(SETS_ROOT/STATE["RUN_TAG"]),
        "--models-index", str(MODELS_INDEX),
        "--name", "panic_rank_dir", "--save-test"
    ]
    run_cmd(cmd, verbose=True)

# panic_blend()



### Траблшутинг
- **Нет `y_train.parquet` или `ids_test.parquet`** → собери в Ячейке 3 (ремонт).
- **У участников разная длина OOF** → убедись, что все `run_id` обучались на одном `RUN_TAG`.
- **`level2` требует `folds.pkl`** → собери KFold в Ячейке 3 или используй режимы весов.
- **`logit`-space** → до логита клиппим `p∈[1e-6, 1-1e-6]`, в multiclass возвращаемся к вероятностям нормировкой по строке.
- **Сабмит под AUC/PR** → в файл сабмита кладём **скоры/вероятности**, не классы, τ хранится в manifest.
