# 01_production_blend: Plan

- Goal: Reproduce Run D (4-bin length-gated LOP, OOF≈0.30139), then iterate per expert plan to medal.
- Constraints: Deterministic 5-fold StratifiedKFold (seed=42), identical class order ['EAP','HPL','MWS'], no leakage. Use cached OOF/test npys.

Milestones:
1) Environment + data + npy audit: load train/test, enumerate npy predictions, validate shapes/probability simplex, compute per-model OOF logloss, set/save folds.
2) Reproduce Run D blender exactly (no overlay first), verify OOF close to 0.30139; emit submission.csv.
3) Batch 1: 5th very-long bin + tighter temps + overlay OFF; tune NB caps per bin.
4) Batch 2: Add per-bin Dirichlet calibration; compare.
5) Batch 3: Seed-average top bases; re-run best gating/calibration.

Always log progress, clip+renorm after every transform, and keep changes atomic.

In [1]:
# Environment + data + npy audit
import os, sys, json, time, math, glob, shutil, subprocess, textwrap, random
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

def run(cmd):
    print("$", " ".join(cmd), flush=True)
    return subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True).stdout

print("=== nvidia-smi ===", flush=True)
try:
    print(run(['bash','-lc','nvidia-smi || true']))
except Exception as e:
    print("nvidia-smi check failed:", e)

t0 = time.time()
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
print(f"Loaded train {train.shape}, test {test.shape}")

# Class order fixed
classes = ['EAP','HPL','MWS']
label_map = {c:i for i,c in enumerate(classes)}
y = train['author'].map(label_map).values
assert set(train['author'].unique()) == set(classes), f"Unexpected classes: {train['author'].unique()}"

# CV folds (deterministic, saved for reference)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
folds = np.full(len(train), -1, dtype=int)
for fi, (_, val_idx) in enumerate(skf.split(train, y)):
    folds[val_idx] = fi
assert (folds>=0).all()
np.save('folds_seed42_5.npy', folds)
print("Saved folds to folds_seed42_5.npy; fold counts:", np.bincount(folds))

def clip_renorm(P, eps=1e-6):
    P = np.asarray(P, dtype=np.float64)
    P = np.clip(P, eps, 1.0)
    P_sum = P.sum(axis=1, keepdims=True)
    P = P / P_sum
    return P

# Pair OOF and test npys
oof_files = sorted(glob.glob('oof_*.npy'))
test_files = sorted(glob.glob('test_*.npy'))
pairs = []
# Fix prefix lengths: 'test_' is 5 chars, 'oof_' is 4
key_to_test = {Path(f).name[5:]: f for f in test_files}
for oof in oof_files:
    key = Path(oof).name[4:]  # remove 'oof_'
    testf = key_to_test.get(key)
    if testf is not None:
        pairs.append((oof, testf))

print(f"Found {len(pairs)} OOF/test pairs out of {len(oof_files)} OOF and {len(test_files)} test files")

summary = []
bad = []
loaded = {}  # key -> dict(oof, test)
n_tr, n_te = len(train), len(test)

for oof, testf in pairs:
    key = Path(oof).stem[4:]  # strip 'oof_'
    try:
        O = np.load(oof)
        T = np.load(testf)
        ok = True
        if O.shape != (n_tr, 3) or T.shape != (n_te, 3):
            ok = False
            msg = f"shape mismatch: {O.shape} / {T.shape}"
        else:
            Oc = clip_renorm(O)
            Tc = clip_renorm(T)
            if not np.all(np.isfinite(Oc)) or not np.all(np.isfinite(Tc)):
                ok = False
                msg = "non-finite probs"
            else:
                try:
                    ll = log_loss(y, Oc, labels=[0,1,2])
                except Exception as e:
                    ok = False
                    msg = f"logloss err: {e}"
        if ok:
            loaded[key] = {'oof': Oc, 'test': Tc}
            summary.append((key, ll))
        else:
            bad.append((key, msg))
    except Exception as e:
        bad.append((key, f'load error: {e}'))

summary.sort(key=lambda x: x[1])
print("\nModel OOF logloss (best→worst):")
for k, ll in summary:
    print(f"{k:35s}  OOF={ll:.5f}")
if bad:
    print("\nSkipped/Bad:")
    for k, m in bad:
        print(f"{k:35s}  {m}")

# Length distribution + candidate bins
lens = train['text'].astype(str).str.len().values
def bin_counts(cuts):
    bins = np.digitize(lens, cuts, right=True)
    return np.bincount(bins, minlength=len(cuts)+1)

bins_4 = [80,130,200]
bins_5a = [80,130,200,260]
bins_5b = [80,130,200,280]
print("\nLength bin counts (4-bin <=80,81-130,131-200,>200):", bin_counts(bins_4))
print("Length bin counts (5-bin <=80,81-130,131-200,201-260,>260):", bin_counts(bins_5a))
print("Length bin counts (5-bin <=80,81-130,131-200,201-280,>280):", bin_counts(bins_5b))

elapsed = time.time()-t0
print(f"\nAudit done in {elapsed:.2f}s. Ready to reproduce Run D.")

=== nvidia-smi ===


$ bash -lc nvidia-smi || true


Mon Sep 29 20:44:32 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.144.06             Driver Version: 550.144.06     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A10-24Q                 On  |   00000002:00:00.0 Off |                    0 |
| N/A   N/A    P0             N/A /  N/A  |     182MiB /  24512MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
# Reproduce Run D: 4-bin gated LOP (overlay OFF), exact Run D settings + minimal merged patch
import numpy as np
from pathlib import Path
from sklearn.metrics import log_loss
import time

# Clear any stale arrays from other cells/runs
for v in ['oof_blend','test_blend']:
    if v in globals():
        del globals()[v]

t0 = time.time()
assert 'loaded' in globals(), "Run the audit cell first to populate 'loaded' dict."

# Portfolio (Run D 9-core + mnb_char_2_6 + 2 ultra-weak diversity if present)
portfolio = [
    'nbsvm_wc_tweaked',
    'nbsvm_char_2_6_counts',
    'nbsvm_char_2_7_presence',
    'mnb_char_2_6',  # added per expert
    'lr_char_1_8_hero',
    'lr_word13_charwb36',
    'lr_char_1_7',
    'lr_charwb_1_6',
    'lr_wordchar_fixed',
    'svc_char_1_6_iso',
]
for k in ['char5lm','stylo_lr']:
    if k in loaded:
        portfolio.append(k)

# Filter to available keys and print alignment
pf = [k for k in portfolio if k in loaded]
assert len(pf) > 0, "Empty portfolio after filtering"
print(f"Using portfolio: {pf} ({len(pf)} models)", flush=True)

classes = ['EAP','HPL','MWS']
y = train['author'].map({c:i for i,c in enumerate(classes)}).values
lens_tr = train['text'].astype(str).str.len().values
lens_te = test['text'].astype(str).str.len().values

# 4-bin cutpoints: <=80, 81-130, 131-200, >200
cuts = np.array([80,130,200])
bins_tr = np.digitize(lens_tr, cuts, right=True)  # 0..3
bins_te = np.digitize(lens_te, cuts, right=True)
bin_names = ['vshort','short','mid','long']

def clip_renorm(P, eps=1e-8):
    P = np.asarray(P, dtype=np.float64)
    C = P.shape[1] if P.ndim == 2 else 3
    P = np.clip(P, eps, 1.0 - eps*(C-1))
    P = P / P.sum(axis=1, keepdims=True)
    return P

def apply_temperature(logP, T):
    # logP-like inputs; scale by 1/T; final softmax happens only once at the end
    return logP / T

def lop_blend(logPs, w):
    # logPs: (N, M, C), w: (M,) non-neg, sum to 1; returns probs
    S = np.tensordot(logPs, w, axes=([1],[0]))  # (N,C)
    Smax = S.max(axis=1, keepdims=True)
    eS = np.exp(S - Smax)
    P = eS / (eS.sum(axis=1, keepdims=True) + 1e-20)
    return P

# Exact capped-simplex projection with final renorm safety
def project_capped_simplex(y, caps, iters=60):
    y = np.asarray(y, dtype=np.float64)
    caps = np.asarray(caps, dtype=np.float64)
    if caps.sum() < 1.0 - 1e-12:
        caps = caps * ((1.0 + 1e-12) / max(caps.sum(), 1e-12))
    lo = y.min() - caps.max() - 1.0
    hi = y.max() + 1.0
    for _ in range(iters):
        lam = 0.5 * (lo + hi)
        x = np.clip(y - lam, 0.0, caps)
        if x.sum() > 1.0:
            lo = lam
        else:
            hi = lam
    lam = 0.5 * (lo + hi)
    x = np.clip(y - lam, 0.0, caps)
    s = x.sum()
    if abs(s - 1.0) > 1e-8:
        x = x / s if s > 0 else np.zeros_like(x)
    return x

# Stabilized EG optimizer with early stop, best-cache, and numeric guard
def fit_lop_weights(X_log, y_true, caps, iters=360, eta0=0.40, decay=0.98, seed=42, lambda_ent=1e-4):
    N, M, C = X_log.shape
    Y = np.eye(C, dtype=np.float64)[y_true]
    w = project_capped_simplex(np.ones(M)/M, caps)

    def softmax(S):
        S = S - S.max(axis=1, keepdims=True)
        eS = np.exp(S)
        return eS / (eS.sum(axis=1, keepdims=True) + 1e-15)

    eta = eta0
    best_ll = float('inf')
    best_w = w.copy()
    prev_ll = float('inf')
    checks_without_improve = 0

    for t in range(iters):
        S = np.tensordot(X_log, w, axes=([1],[0]))
        P = softmax(S)
        diff = P - Y
        g = np.einsum('nc,nmc->m', diff, X_log) / N
        if lambda_ent > 0.0:
            g += lambda_ent * (np.log(np.clip(w, 1e-12, 1.0)) + 1.0)

        w_new = w * np.exp(-eta * g)
        if not np.isfinite(w_new).all():
            print(f"    WARNING: numerical instability at iter {t}; using best weights so far.", flush=True)
            break

        w = np.maximum(w_new, 1e-18)
        w = project_capped_simplex(w, caps)
        eta *= decay

        if t > 0 and t % 50 == 0:
            S_cur = np.tensordot(X_log, w, axes=([1],[0]))
            P_cur = softmax(S_cur)
            cur_ll = log_loss(y_true, P_cur, labels=[0,1,2])
            print(f"    EG progress (iter {t}/{iters}): current bin ll={cur_ll:.6f}", flush=True)

            if cur_ll + 1e-8 < best_ll:
                best_ll = cur_ll
                best_w = w.copy()

            if prev_ll - cur_ll < 1e-6:
                checks_without_improve += 1
                if checks_without_improve >= 3:
                    print(f"    Early stopping at iter {t}: plateau detected.", flush=True)
                    break
            else:
                checks_without_improve = 0
            prev_ll = cur_ll

    w = best_w
    S = np.tensordot(X_log, w, axes=([1],[0]))
    P = softmax(S)
    ll = log_loss(y_true, P, labels=[0,1,2])
    return ll, w

def fit_scalar_temperature(logP, y_true, T_bounds=(0.70, 1.40), steps=32):
    a, b = T_bounds
    gr = (np.sqrt(5) - 1) / 2
    c = b - gr * (b - a)
    d = a + gr * (b - a)
    def f(T):
        S = logP / T
        Smax = S.max(axis=1, keepdims=True)
        logZ = Smax + np.log(np.exp(S - Smax).sum(axis=1, keepdims=True))
        ll = -(S[np.arange(len(y_true)), y_true] - logZ.ravel()).mean()
        return float(ll)
    fc, fd = f(c), f(d)
    for _ in range(steps):
        if fc > fd:
            a = c; c = d; fc = fd; d = a + gr * (b - a); fd = f(d)
        else:
            b = d; d = c; fd = fc; c = b - gr * (b - a); fc = f(c)
    return float(0.5*(a+b))

# Prepare per-bin indices
per_bin_tr_idxs = [np.where(bins_tr==b)[0] for b in range(4)]
oof_blend = np.zeros((len(train), 3), dtype=float)
test_blend = np.zeros((len(test), 3), dtype=float)

# Caps per expert
nb_like = {'nbsvm_wc_tweaked','nbsvm_char_2_6_counts','nbsvm_char_2_7_presence','mnb_char_2_6'}
ultra_weak = {'char5lm','stylo_lr'}
per_bin_caps_nb    = [0.68, 0.67, 0.62, 0.58]
per_bin_global_cap = [0.52, 0.53, 0.54, 0.55]  # non-NB per-bin global cap (tightened)
ultra_weak_caps    = [0.0, 0.0, 0.0, 0.0]      # zero ultra-weak caps

print("Precomputing global temperatures...", flush=True)
model_global_T = {}
for k in pf:
    logP_all = np.log(clip_renorm(loaded[k]['oof']))
    Tg = fit_scalar_temperature(logP_all, y, T_bounds=(0.70,1.40), steps=32)
    model_global_T[k] = Tg
print("Global temperatures done in {:.2f}s".format(time.time()-t0), flush=True)

t1 = time.time()
for b, tr_idx in enumerate(per_bin_tr_idxs):
    te_idx = np.where(bins_te==b)[0]
    print(f"Bin {b} ({bin_names[b]}): n_tr={len(tr_idx)} n_te={len(te_idx)}", flush=True)
    Xo_raw = []
    Xt_raw = []
    caps = []
    # Use all portfolio models in every bin (Run D)
    models = pf
    for k in models:
        O_bin = loaded[k]['oof'][tr_idx]
        Xo_raw.append(np.log(clip_renorm(O_bin)))
        if len(te_idx) > 0:
            T_bin = loaded[k]['test'][te_idx]
            Xt_raw.append(np.log(clip_renorm(T_bin)))
        if k in nb_like:
            caps.append(per_bin_caps_nb[b])
        elif k in ultra_weak:
            caps.append(ultra_weak_caps[b])
        else:
            caps.append(per_bin_global_cap[b])
    # Explicit per-bin caps for mnb_char_2_6
    if 'mnb_char_2_6' in models:
        mi = models.index('mnb_char_2_6')
        if b == 0:
            caps[mi] = min(caps[mi], 0.12)
        elif b == 1:
            caps[mi] = min(caps[mi], 0.22)
        elif b == 2:
            caps[mi] = min(caps[mi], 0.16)  # nudged down by 0.02
        else:
            caps[mi] = min(caps[mi], 0.12)  # nudged down by 0.02
    # special tighter caps in b0
    if b == 0 and 'nbsvm_char_2_7_presence' in models:
        mi = models.index('nbsvm_char_2_7_presence')
        caps[mi] = min(caps[mi], 0.48)
    if b == 0 and 'svc_char_1_6_iso' in models:
        mi = models.index('svc_char_1_6_iso')
        caps[mi] = min(caps[mi], 0.40)
    if b == 0 and 'mnb_char_2_6' in models:
        mi = models.index('mnb_char_2_6')
        caps[mi] = min(caps[mi], 0.12)
    Xo_raw = np.stack(Xo_raw, axis=1)  # (n_tr_bin, M, 3)
    caps = np.asarray(caps, dtype=float)
    M = Xo_raw.shape[1]
    Xo_temp = np.empty_like(Xo_raw)
    Xt_temp = None
    if len(te_idx) > 0:
        Xt_raw = np.stack(Xt_raw, axis=1)  # (n_te_bin, M, 3)
        Xt_temp = np.empty_like(Xt_raw)
    # per-model temp with bin-specific bounds; T_eff = 0.80*Tg + 0.20*Tb
    T_eff_vals = []
    for m in range(M):
        k = models[m]
        if b == 0:
            bounds = (0.65, 1.45)
        elif b == 1:
            bounds = (0.67, 1.43)
        elif b == 2:
            bounds = (0.70, 1.40)
        else:
            bounds = (0.73, 1.37)
        Tb = fit_scalar_temperature(Xo_raw[:,m,:], y[tr_idx], T_bounds=bounds, steps=28)
        Tg = model_global_T[k]
        T_eff = float(np.clip(0.80*Tg + 0.20*Tb, bounds[0], bounds[1]))
        if k in {'char5lm','stylo_lr'}:
            T_eff = 1.0
        T_eff_vals.append(T_eff)
        Xo_temp[:,m,:] = apply_temperature(Xo_raw[:,m,:], T_eff)
        if Xt_temp is not None:
            Xt_temp[:,m,:] = apply_temperature(Xt_raw[:,m,:], T_eff)
    print(f"  Models in bin: {M}, caps min/max: {caps.min():.3f}/{caps.max():.3f}", flush=True)
    assert np.isfinite(Xo_temp).all(), "Non-finite Xo_temp"
    best_ll, w = fit_lop_weights(
        X_log=Xo_temp, y_true=y[tr_idx], caps=caps,
        iters=300, eta0=0.40, decay=0.98,
        seed=42, lambda_ent=1e-4
    )
    print(f"  Best OOF bin logloss: {best_ll:.6f}", flush=True)
    if b == 0:
        print("  top weights (b0):", sorted(zip(models, w), key=lambda x: -x[1])[:6])
    # Blend
    Po = lop_blend(Xo_temp, w)
    oof_blend[tr_idx] = Po
    if Xt_temp is not None and len(te_idx) > 0:
        Pt = lop_blend(Xt_temp, w)
        test_blend[te_idx] = Pt
print("Per-bin loop done in {:.2f}s".format(time.time()-t1), flush=True)

# OOF (no overlay) per expert
oof_noov = clip_renorm(oof_blend)
oof_ll_no_overlay = log_loss(y, oof_noov, labels=[0,1,2])
print(f"\nOOF (no overlay): {oof_ll_no_overlay:.5f}", flush=True)

# Save submission
sub = pd.read_csv('sample_submission.csv')
sub[classes] = clip_renorm(test_blend)
sub.to_csv('submission.csv', index=False)
print("Saved submission.csv. Total elapsed {:.2f}s".format(time.time()-t0), flush=True)

Using portfolio: ['nbsvm_wc_tweaked', 'nbsvm_char_2_6_counts', 'nbsvm_char_2_7_presence', 'mnb_char_2_6', 'lr_char_1_8_hero', 'lr_word13_charwb36', 'lr_char_1_7', 'lr_charwb_1_6', 'lr_wordchar_fixed', 'svc_char_1_6_iso', 'char5lm', 'stylo_lr'] (12 models)


Precomputing global temperatures...


Global temperatures done in 0.59s


Bin 0 (vshort): n_tr=4330 n_te=466


  Models in bin: 12, caps min/max: 0.000/0.680


    EG progress (iter 50/300): current bin ll=0.506735


    EG progress (iter 100/300): current bin ll=0.506358


    EG progress (iter 150/300): current bin ll=0.506254


    EG progress (iter 200/300): current bin ll=0.506220


    EG progress (iter 250/300): current bin ll=0.506208


  Best OOF bin logloss: 0.506208


  top weights (b0): [('nbsvm_wc_tweaked', 0.12784967180823195), ('nbsvm_char_2_7_presence', 0.12758047515548465), ('mnb_char_2_6', 0.12), ('nbsvm_char_2_6_counts', 0.1145075281145779), ('lr_word13_charwb36', 0.09779001610193298), ('lr_wordchar_fixed', 0.09178018207980683)]
Bin 1 (short): n_tr=4674 n_te=516


  Models in bin: 12, caps min/max: 0.000/0.670


    EG progress (iter 50/300): current bin ll=0.368175


    EG progress (iter 100/300): current bin ll=0.366342


    EG progress (iter 150/300): current bin ll=0.365783


    EG progress (iter 200/300): current bin ll=0.365593


    EG progress (iter 250/300): current bin ll=0.365525


  Best OOF bin logloss: 0.365525


Bin 2 (mid): n_tr=4698 n_te=506


  Models in bin: 12, caps min/max: 0.000/0.620


    EG progress (iter 50/300): current bin ll=0.247954


    EG progress (iter 100/300): current bin ll=0.245964


    EG progress (iter 150/300): current bin ll=0.245336


    EG progress (iter 200/300): current bin ll=0.245120


    EG progress (iter 250/300): current bin ll=0.245044


  Best OOF bin logloss: 0.245044


Bin 3 (long): n_tr=3919 n_te=470


  Models in bin: 12, caps min/max: 0.000/0.580


    EG progress (iter 50/300): current bin ll=0.143137


    EG progress (iter 100/300): current bin ll=0.140624


    EG progress (iter 150/300): current bin ll=0.139786


    EG progress (iter 200/300): current bin ll=0.139493


    EG progress (iter 250/300): current bin ll=0.139389


  Best OOF bin logloss: 0.139389


Per-bin loop done in 3.59s



OOF (no overlay): 0.31768


Saved submission.csv. Total elapsed 4.20s


In [None]:
# Batch 1: 5-bin gated LOP (overlay OFF) with tightened temps and caps
import numpy as np
from sklearn.metrics import log_loss

assert 'loaded' in globals(), "Run the audit cell first to populate 'loaded' dict."
assert 'train' in globals() and 'test' in globals(), "Train/test not loaded."

classes = ['EAP','HPL','MWS']
y = train['author'].map({c:i for i,c in enumerate(classes)}).values
lens = train['text'].astype(str).str.len().values

# 5-bin cutpoints per expert: <=80, 81-130, 131-200, 201-280, >280
cuts5 = np.array([80,130,200,280])
bins5 = np.digitize(lens, cuts5, right=True)  # 0..4
bin_names5 = ['vshort','short','mid','long','vlong']

def clip_renorm(P, eps=1e-6):
    P = np.asarray(P, dtype=np.float64)
    P = np.clip(P, eps, 1.0)
    P = P / P.sum(axis=1, keepdims=True)
    return P

def softmax_logP(logP):
    Smax = logP.max(axis=1, keepdims=True)
    eS = np.exp(logP - Smax)
    P = eS / eS.sum(axis=1, keepdims=True)
    return np.log(clip_renorm(P))

def apply_temperature(logP, T):
    return softmax_logP(logP / T)

def lop_blend(logPs, w):
    S = np.tensordot(logPs, w, axes=([1],[0]))
    Smax = S.max(axis=1, keepdims=True)
    eS = np.exp(S - Smax)
    P = eS / eS.sum(axis=1, keepdims=True)
    return P

def rand_on_capped_simplex(rng, caps):
    M = len(caps)
    for _ in range(1000):
        w = rng.dirichlet(alpha=np.ones(M))
        w = np.minimum(w, caps)
        s = w.sum()
        if s > 1e-12:
            w = w / s
        if np.all(w <= caps + 1e-12):
            return w
    w = np.minimum(np.ones(M)/M, caps)
    return w / w.sum()

def local_perturb(rng, w, caps, scale=0.15):
    z = np.log(np.clip(w, 1e-12, 1))
    z = z + rng.normal(0, scale, size=w.shape)
    w_new = np.exp(z)
    w_new = np.minimum(w_new, caps)
    w_new = np.maximum(w_new, 1e-12)
    return w_new / w_new.sum()

def fit_lop_weights(X_log, y_true, caps=None, n_starts=96, seed=42, lambda_ent=0.0022, iters=80):
    rng = np.random.RandomState(seed)
    M = X_log.shape[1]
    if caps is None:
        caps = np.ones(M)
    caps = np.asarray(caps, dtype=float)
    def obj(w):
        P = lop_blend(X_log, w)
        L = log_loss(y_true, clip_renorm(P), labels=[0,1,2])
        w_safe = np.clip(w, 1e-12, 1.0)
        ent = -np.sum(w_safe * np.log(w_safe))
        return L - lambda_ent * ent
    best_ll = 1e9; best_w = None
    for s in range(n_starts):
        if s % 16 == 0:
            print(f"    start {s}/{n_starts}", flush=True)
        w = rand_on_capped_simplex(rng, caps)
        f = obj(w)
        for t in range(iters):
            w_candidate = local_perturb(rng, w, caps, scale=0.12)
            f2 = obj(w_candidate)
            if f2 < f:
                w, f = w_candidate, f2
        if f < best_ll:
            best_ll, best_w = f, w.copy()
    Pbest = lop_blend(X_log, best_w)
    ll = log_loss(y_true, clip_renorm(Pbest), labels=[0,1,2])
    return ll, best_w

# Portfolio (Run D 9-core + diversity tiny caps if present)
portfolio = [
    'nbsvm_wc_tweaked',
    'nbsvm_char_2_6_counts',
    'nbsvm_char_2_7_presence',
    'lr_char_1_8_hero',
    'lr_word13_charwb36',
    'lr_char_1_7',
    'lr_charwb_1_6',
    'lr_wordchar_fixed',
    'svc_char_1_6_iso',
]
for k in ['char5lm','stylo_lr']:
    if k in loaded:
        portfolio.append(k)
print("Using portfolio (5-bin):", portfolio)

# Precompute global per-model temperatures
model_global_T = {}
for k in portfolio:
    logP_all = np.log(clip_renorm(loaded[k]['oof']))
    # simple line search
    a, b = 0.8, 1.3
    gr = (np.sqrt(5) - 1) / 2
    c = b - gr * (b - a)
    d = a + gr * (b - a)
    def fT(T):
        P = np.exp(softmax_logP(logP_all / T))
        return log_loss(y, clip_renorm(P), labels=[0,1,2])
    fc, fd = fT(c), fT(d)
    for _ in range(28):
        if fc > fd:
            a = c; c = d; fc = fd; d = a + gr * (b - a); fd = fT(d)
        else:
            b = d; d = c; fd = fc; c = b - gr * (b - a); fc = fT(c)
    model_global_T[k] = float((a + b) / 2)

# Caps per expert (NB tighter on longer bins, ultra-weak tiny; may zero on vlong later if noisy)
nb_like = {'nbsvm_wc_tweaked','nbsvm_char_2_6_counts','nbsvm_char_2_7_presence','mnb_char_2_6'}
ultra_weak = {'char5lm','stylo_lr'}
nb_caps = [0.68, 0.65, 0.62, 0.58, 0.54]
tiny_caps = [0.010, 0.010, 0.008, 0.006, 0.004]
global_cap = 0.55

oof_blend = np.zeros((len(train), 3), dtype=float)
test_blend_parts = [np.zeros((len(test), 3), dtype=float) for _ in range(5)]

for b in range(5):
    idx = np.where(bins5==b)[0]
    print(f"Bin {b} ({bin_names5[b]}): n={len(idx)}")
    Xo_raw = []
    Xt_raw = []
    caps = []
    for k in portfolio:
        O_bin = loaded[k]['oof'][idx]
        T_all = loaded[k]['test']
        Xo_raw.append(np.log(clip_renorm(O_bin)))
        Xt_raw.append(np.log(clip_renorm(T_all)))
        if k in nb_like:
            caps.append(nb_caps[b])
        elif k in ultra_weak:
            caps.append(tiny_caps[b])
        else:
            caps.append(global_cap)
    Xo_raw = np.stack(Xo_raw, axis=1)
    Xt_raw = np.stack(Xt_raw, axis=1)
    caps = np.asarray(caps, dtype=float)
    M = Xo_raw.shape[1]
    # Per-model per-bin temperature with shrink 0.85*global + 0.15*bin
    Xo_t = np.empty_like(Xo_raw)
    Xt_t = np.empty_like(Xt_raw)
    for m, k in enumerate(portfolio):
        # bin-fit T
        a, bnd = 0.8, 1.3
        gr = (np.sqrt(5) - 1) / 2
        c = bnd - gr * (bnd - a)
        d = a + gr * (bnd - a)
        def fT(T):
            P = np.exp(softmax_logP(Xo_raw[:,m,:] / T))
            return log_loss(y[idx], clip_renorm(P), labels=[0,1,2])
        fc, fd = fT(c), fT(d)
        for _ in range(22):
            if fc > fd:
                a = c; c = d; fc = fd; d = a + gr * (bnd - a); fd = fT(d)
            else:
                bnd = d; d = c; fd = fc; c = bnd - gr * (bnd - a); fc = fT(c)
        Tb = float((a + bnd) / 2)
        Tg = model_global_T[k]
        Te = float(np.clip(0.85*Tg + 0.15*Tb, 0.8, 1.3))
        Xo_t[:,m,:] = apply_temperature(Xo_raw[:,m,:], Te)
        Xt_t[:,m,:] = apply_temperature(Xt_raw[:,m,:], Te)
    print(f"  Models in bin: {M}, caps min/max: {caps.min():.3f}/{caps.max():.3f}")
    # Fit weights (lighter search for speed)
    best_ll, w = fit_lop_weights(Xo_t, y[idx], caps=caps, n_starts=96, seed=42, lambda_ent=0.0022, iters=80)
    print(f"  Best OOF bin logloss: {best_ll:.6f}")
    Po = lop_blend(Xo_t, w)
    Pt = lop_blend(Xt_t, w)
    oof_blend[idx] = Po
    test_blend_parts[b] = Pt

test_blend = np.zeros_like(test_blend_parts[0])
for b in range(5):
    test_blend += test_blend_parts[b] / 5.0

oof_ll = log_loss(y, clip_renorm(oof_blend), labels=[0,1,2])
print(f"\n5-bin gated LOP (overlay OFF). OOF={oof_ll:.5f}")

sub = pd.read_csv('sample_submission.csv')
sub[classes] = clip_renorm(test_blend)
sub.to_csv('submission.csv', index=False)
print("Saved submission.csv")

In [None]:
# Classwise LOP (4-bin) with correct test gating; expect OOF ~0.301
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss

assert 'loaded' in globals() and 'train' in globals() and 'test' in globals(), "Run audit first."
assert 'summary' in globals(), "Run audit to compute per-model OOF summary for weak caps."

classes = ['EAP','HPL','MWS']
y = train['author'].map({c:i for i,c in enumerate(classes)}).values
lens_tr = train['text'].astype(str).str.len().values
lens_te = test['text'].astype(str).str.len().values

# 4-bin cutpoints: <=80, 81-130, 131-200, >200
cuts4 = np.array([80,130,200])
bins_tr = np.digitize(lens_tr, cuts4, right=True)  # 0..3
bins_te = np.digitize(lens_te, cuts4, right=True)
bin_names4 = ['vshort','short','mid','long']

def clip_renorm(P, eps=1e-6):
    P = np.asarray(P, dtype=np.float64)
    P = np.clip(P, eps, 1.0)
    P = P / P.sum(axis=1, keepdims=True)
    return P

def softmax(S):
    Smax = S.max(axis=1, keepdims=True)
    eS = np.exp(S - Smax)
    return eS / eS.sum(axis=1, keepdims=True)

def softmax_logP(logP):
    Smax = logP.max(axis=1, keepdims=True)
    eS = np.exp(logP - Smax)
    P = eS / eS.sum(axis=1, keepdims=True)
    return np.log(clip_renorm(P))

# Temperature scaling: no centering, no per-model re-softmax
def apply_temperature(logP, T):
    return logP / T

def fit_scalar_temperature(logP, y_true, T_bounds=(0.8, 1.3), steps=28):
    a, b = T_bounds
    gr = (np.sqrt(5) - 1) / 2
    c = b - gr * (b - a)
    d = a + gr * (b - a)
    def f(T):
        P = np.exp(softmax_logP(logP / T))
        return log_loss(y_true, clip_renorm(P), labels=[0,1,2])
    fc = f(c); fd = f(d)
    for _ in range(steps):
        if fc > fd:
            a = c; c = d; fc = fd; d = a + gr * (b - a); fd = f(d)
        else:
            b = d; d = c; fd = fc; c = b - gr * (b - a); fc = f(c)
    return float((a + b) / 2)

def classwise_lop(Z, W):
    # Z: (N, M, C) logP; W: (M, C) weights per class (sum_m=1 for each c)
    S = np.einsum('nmc,mc->nc', Z, W)
    return softmax(S)

# Classwise optimizer with per-class recompute of S/P
def fit_classwise_weights(Z, y_true, caps, starts=96, iters=80, lambda_ent=0.0022, seed=42):
    rng = np.random.RandomState(seed)
    N, M, C = Z.shape
    Y = np.eye(C, dtype=np.float64)[y_true]
    caps = np.asarray(caps, dtype=np.float64)

    def project_on_capped_simplex(y, caps, iters=60):
        y = np.asarray(y, dtype=np.float64)
        caps = np.asarray(caps, dtype=np.float64)
        if caps.sum() < 1.0 - 1e-12:
            caps = caps * ((1.0 + 1e-12) / max(caps.sum(), 1e-12))
        lo = y.min() - caps.max() - 1.0
        hi = y.max() + 1.0
        for _ in range(iters):
            lam = 0.5 * (lo + hi)
            x = np.clip(y - lam, 0.0, caps)
            s = x.sum()
            if s > 1.0:
                lo = lam
            else:
                hi = lam
        lam = 0.5 * (lo + hi)
        x = np.clip(y - lam, 0.0, caps)
        if not (abs(x.sum() - 1.0) < 1e-6 and np.all(x <= caps + 1e-9)):
            lam = lam + (x.sum() - 1.0) * 1e-3
            x = np.clip(y - lam, 0.0, caps)
        return x

    best_ll, best_W = 1e9, None
    for s in range(starts):
        if s % 16 == 0:
            print(f"    start {s}/{starts}", flush=True)
        W = np.zeros((M, C), dtype=np.float64)
        for c in range(C):
            W[:, c] = project_on_capped_simplex(rng.rand(M), caps)
            assert abs(W[:, c].sum() - 1.0) < 1e-6
            assert np.all(W[:, c] <= caps + 1e-9)
        eta = 0.25
        for t in range(iters):
            for c in range(C):
                S = np.einsum('nmc,mc->nc', Z, W)
                P = softmax(S)
                diff = (P[:, c] - Y[:, c]).reshape(-1, 1)
                g = (diff * Z[:, :, c]).mean(axis=0)
                g += lambda_ent * (np.log(np.clip(W[:, c], 1e-12, 1.0)) + 1.0)
                w_unconstrained = W[:, c] * np.exp(-eta * g)
                W[:, c] = project_on_capped_simplex(w_unconstrained, caps)
                assert abs(W[:, c].sum() - 1.0) < 1e-6
                assert np.all(W[:, c] <= caps + 1e-9)
            eta *= 0.96
        ll = log_loss(y_true, clip_renorm(softmax(np.einsum('nmc,mc->nc', Z, W))), labels=[0,1,2])
        if ll < best_ll:
            best_ll, best_W = ll, W.copy()
    print("  mean var across classes:", float(np.var(best_W, axis=1).mean()))
    return best_ll, best_W

# Portfolio (revert to known-good 9-core + diversity tiny caps)
portfolio = [
    'nbsvm_wc_tweaked',
    'nbsvm_char_2_6_counts',
    'nbsvm_char_2_7_presence',
    'lr_char_1_8_hero',
    'lr_word13_charwb36',
    'lr_char_1_7',
    'lr_charwb_1_6',
    'lr_wordchar_fixed',
    'svc_char_1_6_iso',
]
for k in ['char5lm','stylo_lr']:
    if k in loaded:
        portfolio.append(k)
print("Using portfolio (4-bin classwise):", portfolio)

# Map per-model solo OOF for weak caps
solo_oof = {k: v for k, v in summary}
weak_cap = 0.09

# Precompute global per-model temperatures
model_global_T = {}
for k in portfolio:
    logP_all = np.log(clip_renorm(loaded[k]['oof']))
    model_global_T[k] = fit_scalar_temperature(logP_all, y, T_bounds=(0.8,1.3), steps=28)

nb_like = {'nbsvm_wc_tweaked','nbsvm_char_2_6_counts','nbsvm_char_2_7_presence','mnb_char_2_6'}
ultra_weak = {'char5lm','stylo_lr'}
nb_caps_4 = [0.68, 0.65, 0.62, 0.58]  # per bin
tiny_caps_4 = [0.010, 0.010, 0.008, 0.006]
global_cap = 0.55

oof_blend = np.zeros((len(train), 3), dtype=float)
test_blend = np.zeros((len(test), 3), dtype=float)

for b in range(4):
    tr_idx = np.where(bins_tr==b)[0]
    te_idx = np.where(bins_te==b)[0]
    print(f"Bin {b} ({bin_names4[b]}): n_tr={len(tr_idx)} n_te={len(te_idx)}")
    # Build Z tensors (logP after per-model per-bin temperature) for train bin and test bin
    Z_tr_list = []
    Z_te_list = []
    caps = []
    for k in portfolio:
        O_bin = loaded[k]['oof'][tr_idx]
        logP_tr_raw = np.log(clip_renorm(O_bin))
        # fit per-bin T for this model
        Tb = fit_scalar_temperature(logP_tr_raw, y[tr_idx], T_bounds=(0.8,1.3), steps=24)
        Tg = model_global_T[k]
        Teff = float(np.clip(0.85*Tg + 0.15*Tb, 0.8, 1.3))
        Z_tr_list.append(apply_temperature(logP_tr_raw, Teff))
        # apply bin temps only to that bin's test rows
        if len(te_idx) > 0:
            T_bin = loaded[k]['test'][te_idx]
            logP_te_raw = np.log(clip_renorm(T_bin))
            Z_te_list.append(apply_temperature(logP_te_raw, Teff))
        # caps
        if k in nb_like:
            cap_k = nb_caps_4[b]
        elif k in ultra_weak:
            cap_k = tiny_caps_4[b]
        else:
            cap_k = global_cap
        solo = solo_oof.get(k, 0.0)
        if solo > 0.40:
            cap_k = min(cap_k, weak_cap)
        caps.append(cap_k)
    Z_tr = np.stack(Z_tr_list, axis=1)  # (n_bin, M, C) in log-space (no re-softmax)
    caps = np.asarray(caps, dtype=float)
    M = Z_tr.shape[1]
    print(f"  Models: {M}, caps min/max: {caps.min():.3f}/{caps.max():.3f}")
    # Fit classwise weights on this bin with tiny entropy regularization
    best_ll, W = fit_classwise_weights(Z_tr, y[tr_idx], caps, starts=96, iters=80, lambda_ent=1e-5, seed=42)
    print(f"  Best OOF bin logloss: {best_ll:.6f}")
    # Blend for train bin and test bin indices
    Po = classwise_lop(Z_tr, W)
    oof_blend[tr_idx] = Po
    if len(te_idx) > 0:
        Z_te = np.stack(Z_te_list, axis=1)  # (n_te_bin, M, C)
        Pt = classwise_lop(Z_te, W)
        test_blend[te_idx] = Pt

oof_ll = log_loss(y, clip_renorm(oof_blend), labels=[0,1,2])
print(f"\nClasswise 4-bin gated LOP (overlay OFF). OOF={oof_ll:.5f}")

sub = pd.read_csv('sample_submission.csv')
sub[classes] = clip_renorm(test_blend)
sub.to_csv('submission.csv', index=False)
print("Saved submission.csv")

In [None]:
# Quick finalize from existing arrays (avoid re-running bin loop)
import numpy as np, pandas as pd, time
from sklearn.metrics import log_loss

def clip_renorm(P, eps=1e-6):
    P = np.asarray(P, dtype=np.float64)
    P = np.clip(P, eps, 1.0)
    P = P / P.sum(axis=1, keepdims=True)
    return P

def overlay_confidence(P, alpha_max=0.08):
    m = P.max(axis=1, keepdims=True)
    gamma = alpha_max * (1.0 - m)
    U = np.full_like(P, 1.0 / P.shape[1])
    Q = (1.0 - gamma) * P + gamma * U
    return Q / Q.sum(axis=1, keepdims=True)

assert 'train' in globals() and 'test' in globals()
classes = ['EAP','HPL','MWS']
y = train['author'].map({c:i for i,c in enumerate(classes)}).values

if 'oof_blend' in globals() and 'test_blend' in globals():
    print('Found existing oof_blend/test_blend with shapes:',
          np.shape(oof_blend), np.shape(test_blend), flush=True)
    assert oof_blend.shape == (len(train), 3)
    assert test_blend.shape == (len(test), 3)
    t0 = time.time()
    oof_ll_no_overlay = log_loss(y, clip_renorm(oof_blend), labels=[0,1,2])
    print(f"OOF (no overlay): {oof_ll_no_overlay:.5f}", flush=True)
    oof_ov = overlay_confidence(oof_blend, alpha_max=0.08)
    te_ov = overlay_confidence(test_blend, alpha_max=0.08)
    oof_ll = log_loss(y, clip_renorm(oof_ov), labels=[0,1,2])
    print(f"OOF (with overlay 0.08): {oof_ll:.5f}", flush=True)
    sub = pd.read_csv('sample_submission.csv')
    sub[classes] = clip_renorm(te_ov)
    sub.to_csv('submission.csv', index=False)
    print('Saved submission.csv. Elapsed {:.2f}s'.format(time.time()-t0), flush=True)
else:
    print('oof_blend/test_blend not found in kernel. Re-run the bin loop cell first.', flush=True)

In [7]:
# Per-bin cross-fit multinomial logistic stacker over all cached OOF models (no calibration yet)
import numpy as np, pandas as pd, time
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

t0 = time.time()
assert 'loaded' in globals() and 'train' in globals() and 'test' in globals(), 'Run audit first.'
classes = ['EAP','HPL','MWS']
label_map = {c:i for i,c in enumerate(classes)}
y = train['author'].map(label_map).values

# Use all available base models as features
model_keys = sorted(loaded.keys())
print('Using base models (count):', len(model_keys))

def clip_renorm(P, eps=1e-8):
    P = np.asarray(P, dtype=np.float64)
    P = np.clip(P, eps, 1.0 - eps*2)
    P = P / P.sum(axis=1, keepdims=True)
    return P

def build_features(split='oof'):
    feats = []
    for k in model_keys:
        P = loaded[k][split]
        feats.append(np.log(clip_renorm(P)))  # logits-like
    X = np.concatenate(feats, axis=1)  # shape (N, M*3)
    return X

X_tr_all = build_features('oof')
X_te_all = build_features('test')
print('Feature shapes:', X_tr_all.shape, X_te_all.shape, flush=True)

# Define 5 length bins (<=80, 81-130, 131-200, 201-280, >280)
lens_tr = train['text'].astype(str).str.len().values
lens_te = test['text'].astype(str).str.len().values
cuts = np.array([80, 130, 200, 280])
bins_tr = np.digitize(lens_tr, cuts, right=True)  # 0..4
bins_te = np.digitize(lens_te, cuts, right=True)
bin_names = ['vshort','short','mid','long','vlong']
print('Bin counts:', [int((bins_tr==b).sum()) for b in range(5)], flush=True)

# Load fixed 5-fold indices saved in audit
folds = np.load('folds_seed42_5.npy')
n_folds = int(folds.max()+1)
print('Folds:', n_folds, np.bincount(folds), flush=True)

oof_meta = np.zeros((len(train), 3), dtype=float)
test_meta_parts = [np.zeros((len(test), 3), dtype=float) for _ in range(n_folds)]

Cs_grid = [0.25, 0.5, 1.0, 2.0, 4.0]
inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

def fit_lr_selectC(X, y, Cs, cv):
    # simple inner CV to pick C by mean val log_loss
    best_C = Cs[0]; best_score = 1e9
    for C in Cs:
        scores = []
        for it, (tr, va) in enumerate(cv.split(X, y)):
            clf = LogisticRegression(penalty='l2', C=C, solver='lbfgs',
                                     multi_class='multinomial', max_iter=2000, n_jobs=None, random_state=42)
            clf.fit(X[tr], y[tr])
            P = clf.predict_proba(X[va])
            scores.append(log_loss(y[va], clip_renorm(P), labels=[0,1,2]))
        m = float(np.mean(scores))
        if m < best_score:
            best_score, best_C = m, C
    # refit on full X with best_C
    clf = LogisticRegression(penalty='l2', C=best_C, solver='lbfgs',
                             multi_class='multinomial', max_iter=2000, n_jobs=None, random_state=42)
    clf.fit(X, y)
    return clf, best_C, best_score

overall_t1 = time.time()
for b in range(5):
    tr_idx_bin = np.where(bins_tr==b)[0]
    te_idx_bin = np.where(bins_te==b)[0]
    if len(tr_idx_bin) == 0:
        continue
    print(f'Bin {b} ({bin_names[b]}): n_tr={len(tr_idx_bin)} n_te={len(te_idx_bin)}', flush=True)
    Xb = X_tr_all[tr_idx_bin]
    yb = y[tr_idx_bin]
    Xb_te = X_te_all[te_idx_bin] if len(te_idx_bin)>0 else None
    t_bin = time.time()
    for f in range(n_folds):
        tr_mask = (folds[tr_idx_bin] != f)
        va_mask = ~tr_mask
        X_tr_bin_fold = Xb[tr_mask]
        y_tr_bin_fold = yb[tr_mask]
        X_va_bin_fold = Xb[va_mask]
        # Select C via inner CV on current fold's training split
        clf, bestC, best_inner = fit_lr_selectC(X_tr_bin_fold, y_tr_bin_fold, Cs_grid, inner_cv)
        oof_meta[tr_idx_bin[va_mask]] = clf.predict_proba(X_va_bin_fold)
        if Xb_te is not None and len(te_idx_bin)>0:
            test_meta_parts[f][te_idx_bin] = clf.predict_proba(Xb_te)
        print(f'  Fold {f}: tr={tr_mask.sum()} va={va_mask.sum()} bestC={bestC} innerLL={best_inner:.5f} elapsed={time.time()-t_bin:.2f}s', flush=True)
    print(f'  Bin {b} done in {time.time()-t_bin:.2f}s', flush=True)

# Average test predictions across folds
test_meta = np.zeros_like(test_meta_parts[0])
for f in range(n_folds):
    test_meta += test_meta_parts[f] / n_folds

oof_ll = log_loss(y, clip_renorm(oof_meta), labels=[0,1,2])
print(f'OOF (stacker, no calibration): {oof_ll:.5f}', flush=True)

# Save submission
sub = pd.read_csv('sample_submission.csv')
sub[classes] = clip_renorm(test_meta)
sub.to_csv('submission.csv', index=False)
print('Saved submission.csv. Total elapsed {:.2f}s'.format(time.time()-t0), flush=True)

Using base models (count): 34
Feature shapes: (17621, 102) (1958, 102)


Bin counts: [4330, 4674, 4698, 2459, 1460]


Folds: 5 [3525 3524 3524 3524 3524]


Bin 0 (vshort): n_tr=4330 n_te=466


































  Fold 0: tr=3495 va=835 bestC=0.25 innerLL=0.49089 elapsed=8.88s


































  Fold 1: tr=3445 va=885 bestC=0.25 innerLL=0.48195 elapsed=16.84s


































  Fold 2: tr=3485 va=845 bestC=0.25 innerLL=0.49011 elapsed=25.48s


































  Fold 3: tr=3421 va=909 bestC=0.25 innerLL=0.47749 elapsed=34.50s


































  Fold 4: tr=3474 va=856 bestC=0.25 innerLL=0.48592 elapsed=43.40s


  Bin 0 done in 43.40s


Bin 1 (short): n_tr=4674 n_te=516


































  Fold 0: tr=3714 va=960 bestC=0.25 innerLL=0.35023 elapsed=8.30s


































  Fold 1: tr=3733 va=941 bestC=0.25 innerLL=0.35622 elapsed=17.09s


































  Fold 2: tr=3706 va=968 bestC=0.25 innerLL=0.35013 elapsed=25.73s


































  Fold 3: tr=3777 va=897 bestC=0.25 innerLL=0.36791 elapsed=32.79s


































  Fold 4: tr=3766 va=908 bestC=0.25 innerLL=0.35783 elapsed=41.16s


  Bin 1 done in 41.16s


Bin 2 (mid): n_tr=4698 n_te=506
































  Fold 0: tr=3729 va=969 bestC=0.25 innerLL=0.23343 elapsed=6.37s


































  Fold 1: tr=3784 va=914 bestC=0.25 innerLL=0.22935 elapsed=12.09s


































  Fold 2: tr=3755 va=943 bestC=0.25 innerLL=0.23265 elapsed=19.18s


































  Fold 3: tr=3774 va=924 bestC=0.25 innerLL=0.23667 elapsed=26.79s
































  Fold 4: tr=3750 va=948 bestC=0.25 innerLL=0.23302 elapsed=32.85s


  Bin 2 done in 32.85s


Bin 3 (long): n_tr=2459 n_te=292


























  Fold 0: tr=1990 va=469 bestC=0.25 innerLL=0.20374 elapsed=3.43s






















  Fold 1: tr=1967 va=492 bestC=0.25 innerLL=0.20881 elapsed=6.52s


























  Fold 2: tr=1983 va=476 bestC=0.25 innerLL=0.19492 elapsed=9.83s
























  Fold 3: tr=1954 va=505 bestC=0.25 innerLL=0.18777 elapsed=13.14s






















  Fold 4: tr=1942 va=517 bestC=0.25 innerLL=0.19639 elapsed=16.08s


  Bin 3 done in 16.08s


Bin 4 (vlong): n_tr=1460 n_te=178










  Fold 0: tr=1168 va=292 bestC=0.25 innerLL=0.15731 elapsed=1.15s












  Fold 1: tr=1168 va=292 bestC=0.25 innerLL=0.17108 elapsed=2.40s












  Fold 2: tr=1168 va=292 bestC=0.25 innerLL=0.17149 elapsed=3.42s












  Fold 3: tr=1171 va=289 bestC=0.25 innerLL=0.11922 elapsed=4.56s














  Fold 4: tr=1165 va=295 bestC=0.25 innerLL=0.16312 elapsed=6.20s


  Bin 4 done in 6.20s


OOF (stacker, no calibration): 0.30475


Saved submission.csv. Total elapsed 139.76s


In [8]:
# Per-bin scalar temperature calibration on meta stacker outputs + optional tiny overlay
import numpy as np, pandas as pd, time
from sklearn.metrics import log_loss

assert 'oof_meta' in globals() and 'test_meta' in globals(), 'Run the stacker cell first.'
assert 'train' in globals() and 'test' in globals(), 'Run audit first.'

classes = ['EAP','HPL','MWS']
label_map = {c:i for i,c in enumerate(classes)}
y = train['author'].map(label_map).values

def clip_renorm(P, eps=1e-8):
    P = np.asarray(P, dtype=np.float64)
    P = np.clip(P, eps, 1.0 - eps*2)
    P = P / P.sum(axis=1, keepdims=True)
    return P

def softmax_from_logS(S):
    Smax = S.max(axis=1, keepdims=True)
    eS = np.exp(S - Smax)
    return eS / (eS.sum(axis=1, keepdims=True) + 1e-20)

def fit_scalar_temperature(logP, y_true, T_bounds=(0.65, 1.45), steps=36):
    a, b = T_bounds
    gr = (np.sqrt(5) - 1) / 2
    c = b - gr * (b - a)
    d = a + gr * (b - a)
    def nll(T):
        S = logP / T
        P = softmax_from_logS(S)
        return log_loss(y_true, clip_renorm(P), labels=[0,1,2])
    fc, fd = nll(c), nll(d)
    for _ in range(steps):
        if fc > fd:
            a = c; c = d; fc = fd; d = a + gr * (b - a); fd = nll(d)
        else:
            b = d; d = c; fd = fc; c = b - gr * (b - a); fc = nll(c)
    return float(0.5*(a+b))

# 5-bin definitions used in stacker cell
lens_tr = train['text'].astype(str).str.len().values
lens_te = test['text'].astype(str).str.len().values
cuts = np.array([80, 130, 200, 280])
bins_tr = np.digitize(lens_tr, cuts, right=True)  # 0..4
bins_te = np.digitize(lens_te, cuts, right=True)
bin_names = ['vshort','short','mid','long','vlong']

oof_cal = oof_meta.copy()
test_cal = test_meta.copy()

t0 = time.time()
for b in range(5):
    tr_idx = np.where(bins_tr==b)[0]
    te_idx = np.where(bins_te==b)[0]
    if len(tr_idx) == 0:
        continue
    # work in log-prob space for calibration
    logP_tr = np.log(clip_renorm(oof_meta[tr_idx]))
    T = fit_scalar_temperature(logP_tr, y[tr_idx],
                               T_bounds=(0.65, 1.45) if b==0 else (0.67,1.43) if b==1 else (0.70,1.40) if b==2 else (0.73,1.37),
                               steps=32)
    # apply to train/test bins
    oof_cal[tr_idx] = softmax_from_logS(logP_tr / T)
    if len(te_idx) > 0:
        logP_te = np.log(clip_renorm(test_meta[te_idx]))
        test_cal[te_idx] = softmax_from_logS(logP_te / T)
    print(f'Bin {b} ({bin_names[b]}): T={T:.4f}, n_tr={len(tr_idx)} n_te={len(te_idx)}', flush=True)

oof_ll_cal = log_loss(y, clip_renorm(oof_cal), labels=[0,1,2])
print(f'OOF after per-bin temperature calibration: {oof_ll_cal:.5f}', flush=True)

# Optional tiny overlay for stability (can comment out if undesired)
def overlay_confidence(P, alpha_max=0.05):
    m = P.max(axis=1, keepdims=True)
    gamma = alpha_max * (1.0 - m)
    U = np.full_like(P, 1.0 / P.shape[1])
    Q = (1.0 - gamma) * P + gamma * U
    return clip_renorm(Q)

oof_cal_ov = overlay_confidence(oof_cal, alpha_max=0.05)
oof_ll_cal_ov = log_loss(y, oof_cal_ov, labels=[0,1,2])
print(f'OOF after calib + tiny overlay (0.05): {oof_ll_cal_ov:.5f}', flush=True)

# Save calibrated submission (with tiny overlay applied for test as well)
sub = pd.read_csv('sample_submission.csv')
sub[classes] = overlay_confidence(test_cal, alpha_max=0.05)
sub.to_csv('submission.csv', index=False)
print('Saved submission.csv. Calib elapsed {:.2f}s'.format(time.time()-t0), flush=True)

Bin 0 (vshort): T=1.1170, n_tr=4330 n_te=466


Bin 1 (short): T=1.1091, n_tr=4674 n_te=516


Bin 2 (mid): T=1.1030, n_tr=4698 n_te=506


Bin 3 (long): T=1.3370, n_tr=2459 n_te=292


Bin 4 (vlong): T=1.3700, n_tr=1460 n_te=178


OOF after per-bin temperature calibration: 0.30023


OOF after calib + tiny overlay (0.05): 0.30006


Saved submission.csv. Calib elapsed 0.23s


In [None]:
# ElasticNet multinomial stacker with meta features and quick prune (FAST: fixed C,l1_ratio per bin, no inner CV)
import numpy as np, pandas as pd, time, math
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

t0 = time.time()
assert 'loaded' in globals() and 'train' in globals() and 'test' in globals() and 'summary' in globals(), 'Run audit first.'
classes = ['EAP','HPL','MWS']
label_map = {c:i for i,c in enumerate(classes)}
y = train['author'].map(label_map).values

# Quick prune per expert
drop_set = {'char5lm','stylo_lr','nbsvm_wordchar','nbsvm_word','cnb_word','svc_word_uni_iso'}
solo = dict(summary)  # key -> OOF
model_keys = [k for k,_ in summary if (k not in drop_set) and (solo.get(k, 1e9) <= 0.46)]
print('Using base models after prune (count):', len(model_keys))

def clip_renorm(P, eps=1e-8):
    P = np.asarray(P, dtype=np.float64)
    P = np.clip(P, eps, 1.0 - eps*2)
    P = P / P.sum(axis=1, keepdims=True)
    return P

def build_logits(split='oof'):
    feats = []
    for k in model_keys:
        P = loaded[k][split]
        feats.append(np.log(clip_renorm(P)))  # (N,3)
    X = np.concatenate(feats, axis=1) if feats else np.zeros((len(train if split=='oof' else test),0))
    return X

X_tr_logits = build_logits('oof')
X_te_logits = build_logits('test')
print('Logit feature shapes:', X_tr_logits.shape, X_te_logits.shape, flush=True)

# Meta features (5): log_len, punct_ratio, digit_ratio, upper_ratio, entropy_top (from nbsvm_wc_tweaked)
text_tr = train['text'].astype(str).values
text_te = test['text'].astype(str).values
def meta_from_text(texts):
    L = np.array([len(t) for t in texts], dtype=np.float64)
    L[L<1] = 1.0
    log_len = np.log1p(L)
    def cnt(s, chars):
        return sum(s.count(ch) for ch in chars)
    punct_chars = list(".,;:?!'\"-")
    digit_chars = list('0123456789')
    upper_ratio = np.array([sum(ch.isupper() for ch in s)/max(len(s),1) for s in texts], dtype=np.float64)
    punct_ratio = np.array([cnt(s, punct_chars)/max(len(s),1) for s in texts], dtype=np.float64)
    digit_ratio = np.array([cnt(s, digit_chars)/max(len(s),1) for s in texts], dtype=np.float64)
    return np.stack([log_len, punct_ratio, digit_ratio, upper_ratio], axis=1)

X_meta_tr_basic = meta_from_text(text_tr)
X_meta_te_basic = meta_from_text(text_te)

def entropy_rows(P):
    P = clip_renorm(P)
    return -(P * np.log(np.clip(P, 1e-12, 1.0))).sum(axis=1)

base_key_for_entropy = 'nbsvm_wc_tweaked' if 'nbsvm_wc_tweaked' in loaded else summary[0][0]
ent_tr = entropy_rows(loaded[base_key_for_entropy]['oof']).reshape(-1,1)
ent_te = entropy_rows(loaded[base_key_for_entropy]['test']).reshape(-1,1)

# Assemble meta features
X_meta_tr_full = np.concatenate([X_meta_tr_basic, ent_tr], axis=1)  # (N,5)
X_meta_te_full = np.concatenate([X_meta_te_basic, ent_te], axis=1)  # (Nte,5)

# Define 5 length bins
lens_tr = np.array([len(t) for t in text_tr])
lens_te = np.array([len(t) for t in text_te])
cuts = np.array([80,130,200,280])
bins_tr = np.digitize(lens_tr, cuts, right=True)  # 0..4
bins_te = np.digitize(lens_te, cuts, right=True)
bin_names = ['vshort','short','mid','long','vlong']
print('Bin counts:', [int((bins_tr==b).sum()) for b in range(5)], flush=True)

# Folds
folds = np.load('folds_seed42_5.npy')
n_folds = int(folds.max()+1)
print('Folds:', n_folds, np.bincount(folds), flush=True)

# Outputs
oof_meta = np.zeros((len(train), 3), dtype=float)
test_meta_parts = [np.zeros((len(test), 3), dtype=float) for _ in range(n_folds)]

# Fixed ElasticNet hyperparams (fast, strong defaults)
fixed_C = 0.5
fixed_l1 = 0.5

overall_t1 = time.time()
for b in range(5):
    tr_idx_bin = np.where(bins_tr==b)[0]
    te_idx_bin = np.where(bins_te==b)[0]
    if len(tr_idx_bin) == 0:
        continue
    print(f'Bin {b} ({bin_names[b]}): n_tr={len(tr_idx_bin)} n_te={len(te_idx_bin)}', flush=True)
    # Standardize meta features per bin using training-bin statistics
    Xm_tr = X_meta_tr_full[tr_idx_bin].astype(np.float64)
    mu = Xm_tr.mean(axis=0, keepdims=True)
    sd = Xm_tr.std(axis=0, keepdims=True) + 1e-8
    Xm_tr_z = (Xm_tr - mu) / sd
    Xm_te = X_meta_te_full[te_idx_bin].astype(np.float64) if len(te_idx_bin)>0 else np.zeros((0, Xm_tr.shape[1]))
    Xm_te_z = (Xm_te - mu) / sd if len(te_idx_bin)>0 else Xm_te
    # Concatenate logits + meta
    Xb_all = np.concatenate([X_tr_logits[tr_idx_bin], Xm_tr_z], axis=1)
    Xb_te_all = np.concatenate([X_te_logits[te_idx_bin], Xm_te_z], axis=1) if len(te_idx_bin)>0 else None
    yb = y[tr_idx_bin]

    t_bin = time.time()
    for f in range(n_folds):
        tr_mask = (folds[tr_idx_bin] != f)
        va_mask = ~tr_mask
        X_tr_fold = Xb_all[tr_mask]
        y_tr_fold = yb[tr_mask]
        X_va_fold = Xb_all[va_mask]
        clf = LogisticRegression(solver='saga', penalty='elasticnet', l1_ratio=fixed_l1, C=fixed_C,
                                 max_iter=3000, tol=1e-4)
        clf.fit(X_tr_fold, y_tr_fold)
        oof_meta[tr_idx_bin[va_mask]] = clf.predict_proba(X_va_fold)
        if Xb_te_all is not None and len(te_idx_bin)>0:
            test_meta_parts[f][te_idx_bin] = clf.predict_proba(Xb_te_all)
        print(f'  Fold {f}: tr={tr_mask.sum()} va={va_mask.sum()} elapsed={time.time()-t_bin:.2f}s', flush=True)
    print(f'  Bin {b} done in {time.time()-t_bin:.2f}s', flush=True)

# Average test predictions across folds
test_meta = np.zeros_like(test_meta_parts[0])
for f in range(n_folds):
    test_meta += test_meta_parts[f] / n_folds

oof_ll = log_loss(y, clip_renorm(oof_meta), labels=[0,1,2])
print(f'OOF (ElasticNet stacker, no calibration): {oof_ll:.5f}', flush=True)

# Emit a submission (raw meta) for sanity; calibration cell can be run next
sub = pd.read_csv('sample_submission.csv')
sub[classes] = clip_renorm(test_meta)
sub.to_csv('submission.csv', index=False)
print('Saved submission.csv. Total elapsed {:.2f}s'.format(time.time()-t0), flush=True)

Using base models after prune (count): 27
Logit feature shapes: (17621, 81) (1958, 81)


Bin counts: [4330, 4674, 4698, 2459, 1460]


Folds: 5 [3525 3524 3524 3524 3524]


Bin 0 (vshort): n_tr=4330 n_te=466
