In [1]:
# Final submission: per-model isotonic calibration ensemble for top-5 keys, strict thresholding, write submission.csv
import os, numpy as np, pandas as pd
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import minimize

print('Running per-model isotonic calibration ensemble...', flush=True)

# Explicit top-5 keys from best subset
keys = ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']
paths_oof = {
    'b4_512': 'oof_preds_b4.npy',
    'b5_512': 'oof_preds.npy',
    'b5_512_rrcema': 'oof_preds_b5_seed2025_rrc_ema.npy',
    'b4_640': 'oof_preds_b4_640_rrc_ema.npy',
    'serx50_512_rrcema': 'oof_preds_serx50_512_rrc_ema.npy',
}
# Allow fallbacks for test preds (pick first existing)
paths_te_opts = {
    'b4_512': ['test_reg_preds_b4_hflip.npy', 'test_reg_preds_b4.npy'],
    'b5_512': ['test_reg_preds_b5_hflip.npy', 'test_reg_preds.npy'],
    'b5_512_rrcema': ['test_reg_preds_b5_seed2025_rrc_ema.npy'],
    'b4_640': ['test_reg_preds_b4_640_rrc_ema.npy'],
    'serx50_512_rrcema': ['test_reg_preds_serx50_512_rrc_ema.npy'],
}

# Targets
y = None
for tgt in ['oof_targets_b4.npy', 'oof_targets.npy']:
    if os.path.exists(tgt):
        y = np.load(tgt).reshape(-1).astype(float)
        break
if y is None:
    raise RuntimeError('OOF targets not found')

# Resolve test paths with fallbacks
paths_te = {}
for k in keys:
    opts = paths_te_opts.get(k, [])
    chosen = None
    for p in opts:
        if os.path.exists(p):
            chosen = p; break
    if chosen is not None:
        paths_te[k] = chosen

# Load arrays
oof_list = []; te_list = []; usable_keys = []
for k in keys:
    po = paths_oof.get(k, None); pt = paths_te.get(k, None)
    if po is None or (not os.path.exists(po)):
        print(f'Skip key {k}: missing OOF path', flush=True)
        continue
    if pt is None or (not os.path.exists(pt)):
        print(f'Skip key {k}: missing TEST path', flush=True)
        continue
    a_oof = np.load(po).reshape(-1).astype(float)
    a_te = np.load(pt).reshape(-1).astype(float)
    # Ensure finite
    if not np.isfinite(a_oof).any():
        print(f'Skip key {k}: non-finite OOF', flush=True)
        continue
    if not np.isfinite(a_te).all():
        med = float(np.nanmedian(a_te[np.isfinite(a_te)]))
        a_te = np.where(np.isfinite(a_te), a_te, med).astype(float)
    oof_list.append(a_oof); te_list.append(a_te); usable_keys.append(k)

if len(oof_list) == 0:
    raise RuntimeError('No usable models for per-model isotonic')

print('Using keys:', usable_keys, flush=True)

# Per-model isotonic calibration to map EV->target on OOF, then transform both OOF and Test
cal_oof_list = []; cal_te_list = []
for i, (a_oof, a_te) in enumerate(zip(oof_list, te_list)):
    mask_fit = np.isfinite(y) & np.isfinite(a_oof)
    ir = IsotonicRegression(increasing=True, out_of_bounds='clip')
    ir.fit(a_oof[mask_fit], y[mask_fit])
    cal_o = a_oof.copy()
    cal_o[mask_fit] = ir.transform(a_oof[mask_fit])
    cal_o = np.where(np.isfinite(cal_o), cal_o, a_oof)
    cal_t = ir.transform(a_te)
    cal_o = np.clip(cal_o, 0.0, 4.0)
    cal_t = np.clip(cal_t, 0.0, 4.0)
    cal_oof_list.append(cal_o); cal_te_list.append(cal_t)

# Equal-weight mean of calibrated EVs
cal_oof_stack = np.stack(cal_oof_list, axis=1)
cal_te_stack = np.stack(cal_te_list, axis=1)
blend_oof = cal_oof_stack.mean(axis=1)
blend_te = cal_te_stack.mean(axis=1)

def preds_to_classes(p, th):
    return np.digitize(p, bins=[th[0], th[1], th[2], th[3]])

def optimize_thresholds_strict(y_true, p, init=[0.57,1.42,2.43,3.19]):
    y_true = np.asarray(y_true).astype(float); p = np.asarray(p).astype(float)
    def _loss(th):
        th = np.sort(th); th = np.clip(th, 0.3, 3.7)
        for i in range(1,4):
            if th[i] - th[i-1] < 0.12: th[i] = th[i-1] + 0.12
        cls = preds_to_classes(p, th)
        return -cohen_kappa_score(y_true, cls, weights='quadratic')
    res = minimize(_loss, x0=np.array(init, dtype=float), method='Nelder-Mead', options={'maxiter':1500, 'xatol':1e-3, 'fatol':1e-3})
    th = np.sort(res.x)
    for i in range(1,4):
        if th[i] - th[i-1] < 0.12: th[i] = th[i-1] + 0.12
    th = np.clip(th, 0.3, 3.7)
    return th

# Optimize thresholds on calibrated OOF
mask_eval = np.isfinite(y) & np.isfinite(blend_oof)
th0 = optimize_thresholds_strict(y[mask_eval], blend_oof[mask_eval], init=[0.57,1.42,2.43,3.19])
q0 = cohen_kappa_score(y[mask_eval], preds_to_classes(blend_oof[mask_eval], th0), weights='quadratic')
print('Per-model-iso OOF QWK:', f'{q0:.5f}', 'th:', th0, flush=True)

# Optional small th3 safety nudge +0.02 if drop <= 0.0005
t_safe = th0.copy(); t_safe[3] = min(3.7, t_safe[3] + 0.02)
q_safe = cohen_kappa_score(y[mask_eval], preds_to_classes(blend_oof[mask_eval], t_safe), weights='quadratic')
chosen_th = t_safe if (q0 - q_safe) <= 0.0005 else th0
print('Chosen OOF QWK:', f"{(q_safe if (q0 - q_safe) <= 0.0005 else q0):.5f}", 'chosen_th:', chosen_th, flush=True)

# Apply to Test and write submission
cls = preds_to_classes(blend_te, chosen_th).astype(int)
sub = pd.DataFrame({'id_code': pd.read_csv('test.csv')['id_code'].values, 'diagnosis': cls})
sub.to_csv('submission.csv', index=False)
np.save('thresholds_permodel_iso.npy', chosen_th)
print('submission.csv written (per-model isotonic ensemble).', flush=True)

Running per-model isotonic calibration ensemble...


Using keys: ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']


Per-model-iso OOF QWK: 0.88606 th: [0.5878599  1.47499928 2.42169358 3.01298325]


Chosen OOF QWK: 0.88611 chosen_th: [0.5878599  1.47499928 2.42169358 3.03298325]


submission.csv written (per-model isotonic ensemble).


In [2]:
# NNLS-weighted blend on per-model isotonic calibrated EVs; write improved submission if any
import os, numpy as np, pandas as pd
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import minimize, nnls

print('Running NNLS-weighted per-model isotonic ensemble...', flush=True)

keys = ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']
paths_oof = {
    'b4_512': 'oof_preds_b4.npy',
    'b5_512': 'oof_preds.npy',
    'b5_512_rrcema': 'oof_preds_b5_seed2025_rrc_ema.npy',
    'b4_640': 'oof_preds_b4_640_rrc_ema.npy',
    'serx50_512_rrcema': 'oof_preds_serx50_512_rrc_ema.npy',
}
paths_te_opts = {
    'b4_512': ['test_reg_preds_b4_hflip.npy', 'test_reg_preds_b4.npy'],
    'b5_512': ['test_reg_preds_b5_hflip.npy', 'test_reg_preds.npy'],
    'b5_512_rrcema': ['test_reg_preds_b5_seed2025_rrc_ema.npy'],
    'b4_640': ['test_reg_preds_b4_640_rrc_ema.npy'],
    'serx50_512_rrcema': ['test_reg_preds_serx50_512_rrc_ema.npy'],
}

# Targets
y = None
for tgt in ['oof_targets_b4.npy', 'oof_targets.npy']:
    if os.path.exists(tgt):
        y = np.load(tgt).reshape(-1).astype(float)
        break
if y is None:
    raise RuntimeError('OOF targets not found')

# Resolve test paths
paths_te = {}
for k in keys:
    for p in paths_te_opts.get(k, []):
        if os.path.exists(p):
            paths_te[k] = p; break

# Load arrays and fit per-model isotonic
oof_list = []; te_list = []; used_keys = []
for k in keys:
    po = paths_oof.get(k, None); pt = paths_te.get(k, None)
    if po is None or (not os.path.exists(po)) or pt is None or (not os.path.exists(pt)):
        continue
    a_oof = np.load(po).reshape(-1).astype(float)
    a_te = np.load(pt).reshape(-1).astype(float)
    if not np.isfinite(a_oof).any():
        continue
    if not np.isfinite(a_te).all():
        med = float(np.nanmedian(a_te[np.isfinite(a_te)]))
        a_te = np.where(np.isfinite(a_te), a_te, med).astype(float)
    mask_fit = np.isfinite(y) & np.isfinite(a_oof)
    ir = IsotonicRegression(increasing=True, out_of_bounds='clip')
    ir.fit(a_oof[mask_fit], y[mask_fit])
    cal_o = a_oof.copy(); cal_o[mask_fit] = ir.transform(a_oof[mask_fit])
    cal_o = np.where(np.isfinite(cal_o), cal_o, a_oof); cal_o = np.clip(cal_o, 0.0, 4.0)
    cal_t = ir.transform(a_te); cal_t = np.clip(cal_t, 0.0, 4.0)
    oof_list.append(cal_o); te_list.append(cal_t); used_keys.append(k)

if len(oof_list) == 0:
    raise RuntimeError('No usable calibrated arrays')

X = np.stack(oof_list, axis=1)  # (n, m)
mask = np.isfinite(y) & np.isfinite(X).all(axis=1)
A = X[mask]; b = y[mask]

# NNLS to get non-negative weights
w_raw, _ = nnls(A, b)
if w_raw.sum() <= 0:
    w = np.ones_like(w_raw) / len(w_raw)
else:
    w = w_raw / w_raw.sum()
print('Used keys:', used_keys, 'weights:', np.round(w, 4), flush=True)

# Blends
blend_oof = (X * w.reshape(1, -1)).sum(axis=1)
te_stack = np.stack(te_list, axis=1)
blend_te = (te_stack * w.reshape(1, -1)).sum(axis=1)

def preds_to_classes(p, th):
    return np.digitize(p, bins=[th[0], th[1], th[2], th[3]])

def optimize_thresholds_strict(y_true, p, init=[0.57,1.42,2.43,3.03]):
    y_true = np.asarray(y_true).astype(float); p = np.asarray(p).astype(float)
    def _loss(th):
        th = np.sort(th); th = np.clip(th, 0.3, 3.7)
        for i in range(1,4):
            if th[i] - th[i-1] < 0.12: th[i] = th[i-1] + 0.12
        cls = preds_to_classes(p, th)
        return -cohen_kappa_score(y_true, cls, weights='quadratic')
    res = minimize(_loss, x0=np.array(init, dtype=float), method='Nelder-Mead', options={'maxiter':1200, 'xatol':1e-3, 'fatol':1e-3})
    th = np.sort(res.x)
    for i in range(1,4):
        if th[i] - th[i-1] < 0.12: th[i] = th[i-1] + 0.12
    th = np.clip(th, 0.3, 3.7)
    return th

th = optimize_thresholds_strict(y[mask], blend_oof[mask], init=[0.5879, 1.4750, 2.4217, 3.0130])
q = cohen_kappa_score(y[mask], preds_to_classes(blend_oof[mask], th), weights='quadratic')
print('NNLS per-model-iso OOF QWK:', f'{q:.5f}', 'th:', th, flush=True)

# Write submission
cls = preds_to_classes(blend_te, th).astype(int)
sub = pd.DataFrame({'id_code': pd.read_csv('test.csv')['id_code'].values, 'diagnosis': cls})
sub.to_csv('submission.csv', index=False)
np.save('thresholds_permodel_iso_nnls.npy', th); np.save('weights_permodel_iso_nnls.npy', w)
print('submission.csv written (per-model-isotonic + NNLS weights).', flush=True)

Running NNLS-weighted per-model isotonic ensemble...


Used keys: ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640'] weights: [0.3242 0.2596 0.2151 0.1329 0.0682]


NNLS per-model-iso OOF QWK: 0.88934 th: [0.59176262 1.6049918  2.37433822 2.91447651]


submission.csv written (per-model-isotonic + NNLS weights).


In [3]:
# Extended set: per-model isotonic + NNLS over 8 models; write submission
import os, numpy as np, pandas as pd
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import minimize, nnls

print('Running extended per-model isotonic + NNLS over up to 8 models...', flush=True)

keys = [
    'b5_512_rrcema',
    'serx50_512_rrcema',
    'b5_512',
    'b4_512',
    'b4_640',
    'r200d_512_rrcema',
    'convnextb_512_rrcema',
    'serx50_512_rrcema_s2',
]
paths_oof = {
    'b4_512': 'oof_preds_b4.npy',
    'b5_512': 'oof_preds.npy',
    'b5_512_rrcema': 'oof_preds_b5_seed2025_rrc_ema.npy',
    'b4_640': 'oof_preds_b4_640_rrc_ema.npy',
    'serx50_512_rrcema': 'oof_preds_serx50_512_rrc_ema.npy',
    'serx50_512_rrcema_s2': 'oof_preds_serx50_512_rrc_ema_seed2026.npy',
    'r200d_512_rrcema': 'oof_preds_r200d_512_rrc_ema.npy',
    'convnextb_512_rrcema': 'oof_preds_convnextb_512_rrc_ema.npy',
}
paths_te_opts = {
    'b4_512': ['test_reg_preds_b4_hflip.npy', 'test_reg_preds_b4.npy'],
    'b5_512': ['test_reg_preds_b5_hflip.npy', 'test_reg_preds.npy'],
    'b5_512_rrcema': ['test_reg_preds_b5_seed2025_rrc_ema.npy'],
    'b4_640': ['test_reg_preds_b4_640_rrc_ema.npy'],
    'serx50_512_rrcema': ['test_reg_preds_serx50_512_rrc_ema.npy'],
    'serx50_512_rrcema_s2': ['test_reg_preds_serx50_512_rrc_ema_seed2026.npy'],
    'r200d_512_rrcema': ['test_reg_preds_r200d_512_rrc_ema.npy'],
    'convnextb_512_rrcema': ['test_reg_preds_convnextb_512_rrc_ema.npy'],
}

y = None
for tgt in ['oof_targets.npy', 'oof_targets_b4.npy']:
    if os.path.exists(tgt):
        y = np.load(tgt).reshape(-1).astype(float); break
if y is None: raise RuntimeError('OOF targets not found')

paths_te = {}
for k in keys:
    for p in paths_te_opts.get(k, []):
        if os.path.exists(p):
            paths_te[k] = p; break

oof_list = []; te_list = []; used_keys = []
for k in keys:
    po = paths_oof.get(k, None); pt = paths_te.get(k, None)
    if po is None or (not os.path.exists(po)) or pt is None or (not os.path.exists(pt)):
        continue
    a_oof = np.load(po).reshape(-1).astype(float)
    a_te = np.load(pt).reshape(-1).astype(float)
    if not np.isfinite(a_oof).any():
        continue
    if not np.isfinite(a_te).all():
        med = float(np.nanmedian(a_te[np.isfinite(a_te)]))
        a_te = np.where(np.isfinite(a_te), a_te, med).astype(float)
    mask_fit = np.isfinite(y) & np.isfinite(a_oof)
    ir = IsotonicRegression(increasing=True, out_of_bounds='clip')
    ir.fit(a_oof[mask_fit], y[mask_fit])
    cal_o = a_oof.copy(); cal_o[mask_fit] = ir.transform(a_oof[mask_fit])
    cal_o = np.where(np.isfinite(cal_o), cal_o, a_oof); cal_o = np.clip(cal_o, 0.0, 4.0)
    cal_t = ir.transform(a_te); cal_t = np.clip(cal_t, 0.0, 4.0)
    oof_list.append(cal_o); te_list.append(cal_t); used_keys.append(k)

if len(oof_list) == 0: raise RuntimeError('No usable calibrated arrays')

X = np.stack(oof_list, axis=1)
mask = np.isfinite(y) & np.isfinite(X).all(axis=1)
A = X[mask]; b = y[mask]
w_raw, _ = nnls(A, b)
w = w_raw / (w_raw.sum() if w_raw.sum() > 0 else len(w_raw))
print('Used keys:', used_keys, 'weights:', np.round(w, 4), flush=True)

blend_oof = (X * w.reshape(1, -1)).sum(axis=1)
te_stack = np.stack(te_list, axis=1)
blend_te = (te_stack * w.reshape(1, -1)).sum(axis=1)

def preds_to_classes(p, th):
    return np.digitize(p, bins=[th[0], th[1], th[2], th[3]])

def optimize_thresholds_strict(y_true, p, init=[0.58,1.48,2.42,3.03]):
    y_true = np.asarray(y_true).astype(float); p = np.asarray(p).astype(float)
    def _loss(th):
        th = np.sort(th); th = np.clip(th, 0.3, 3.7)
        for i in range(1,4):
            if th[i]-th[i-1] < 0.12: th[i] = th[i-1] + 0.12
        return -cohen_kappa_score(y_true, preds_to_classes(p, th), weights='quadratic')
    res = minimize(_loss, x0=np.array(init, dtype=float), method='Nelder-Mead', options={'maxiter':1200, 'xatol':1e-3, 'fatol':1e-3})
    th = np.sort(res.x)
    for i in range(1,4):
        if th[i]-th[i-1] < 0.12: th[i] = th[i-1] + 0.12
    th = np.clip(th, 0.3, 3.7)
    return th

th = optimize_thresholds_strict(y[mask], blend_oof[mask])
q = cohen_kappa_score(y[mask], preds_to_classes(blend_oof[mask], th), weights='quadratic')
print('Extended NNLS per-model-iso OOF QWK:', f'{q:.5f}', 'th:', th, flush=True)

# Write submission
cls = preds_to_classes(blend_te, th).astype(int)
sub = pd.DataFrame({'id_code': pd.read_csv('test.csv')['id_code'].values, 'diagnosis': cls})
sub.to_csv('submission.csv', index=False)
np.save('thresholds_permodel_iso_nnls_ext.npy', th); np.save('weights_permodel_iso_nnls_ext.npy', w)
print('submission.csv written (extended per-model-isotonic + NNLS).', flush=True)

Running extended per-model isotonic + NNLS over up to 8 models...


Used keys: ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640', 'r200d_512_rrcema', 'convnextb_512_rrcema', 'serx50_512_rrcema_s2'] weights: [0.3055 0.221  0.2014 0.1154 0.0595 0.     0.0971 0.    ]


Extended NNLS per-model-iso OOF QWK: 0.88895 th: [0.58037571 1.59126103 2.37320446 2.93342801]


submission.csv written (extended per-model-isotonic + NNLS).


In [15]:
# Top-5 per-model isotonic + NNLS with 2D (th2, th3) grid refinement; write submission
import os, numpy as np, pandas as pd
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import nnls

print('Running top-5 per-model isotonic + NNLS with 2D threshold refinement...', flush=True)

keys = ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']
paths_oof = {
    'b4_512': 'oof_preds_b4.npy',
    'b5_512': 'oof_preds.npy',
    'b5_512_rrcema': 'oof_preds_b5_seed2025_rrc_ema.npy',
    'b4_640': 'oof_preds_b4_640_rrc_ema.npy',
    'serx50_512_rrcema': 'oof_preds_serx50_512_rrc_ema.npy',
}
paths_te_opts = {
    'b4_512': ['test_reg_preds_b4_hflip.npy', 'test_reg_preds_b4.npy'],
    'b5_512': ['test_reg_preds_b5_hflip.npy', 'test_reg_preds.npy'],
    'b5_512_rrcema': ['test_reg_preds_b5_seed2025_rrc_ema.npy'],
    'b4_640': ['test_reg_preds_b4_640_rrc_ema.npy'],
    'serx50_512_rrcema': ['test_reg_preds_serx50_512_rrc_ema.npy'],
}

y = None
for tgt in ['oof_targets.npy', 'oof_targets_b4.npy']:
    if os.path.exists(tgt):
        y = np.load(tgt).reshape(-1).astype(float); break
if y is None: raise RuntimeError('OOF targets not found')

paths_te = {}
for k in keys:
    for p in paths_te_opts.get(k, []):
        if os.path.exists(p):
            paths_te[k] = p; break

# Per-model isotonic
oof_list = []; te_list = []; used_keys = []
for k in keys:
    po = paths_oof.get(k, None); pt = paths_te.get(k, None)
    if po is None or (not os.path.exists(po)) or pt is None or (not os.path.exists(pt)):
        continue
    a_oof = np.load(po).reshape(-1).astype(float)
    a_te = np.load(pt).reshape(-1).astype(float)
    if not np.isfinite(a_oof).any():
        continue
    if not np.isfinite(a_te).all():
        med = float(np.nanmedian(a_te[np.isfinite(a_te)]))
        a_te = np.where(np.isfinite(a_te), a_te, med).astype(float)
    mask_fit = np.isfinite(y) & np.isfinite(a_oof)
    ir = IsotonicRegression(increasing=True, out_of_bounds='clip')
    ir.fit(a_oof[mask_fit], y[mask_fit])
    cal_o = a_oof.copy(); cal_o[mask_fit] = ir.transform(a_oof[mask_fit])
    cal_o = np.where(np.isfinite(cal_o), cal_o, a_oof); cal_o = np.clip(cal_o, 0.0, 4.0)
    cal_t = ir.transform(a_te); cal_t = np.clip(cal_t, 0.0, 4.0)
    oof_list.append(cal_o); te_list.append(cal_t); used_keys.append(k)

if len(oof_list) == 0: raise RuntimeError('No usable calibrated arrays')

X = np.stack(oof_list, axis=1)
mask = np.isfinite(y) & np.isfinite(X).all(axis=1)
A = X[mask]; b = y[mask]
w_raw, _ = nnls(A, b)
w = w_raw / (w_raw.sum() if w_raw.sum() > 0 else len(w_raw))
print('Used keys:', used_keys, 'weights:', np.round(w, 4), flush=True)

blend_oof = (X * w.reshape(1, -1)).sum(axis=1)
te_stack = np.stack(te_list, axis=1)
blend_te = (te_stack * w.reshape(1, -1)).sum(axis=1)

def preds_to_classes(p, th):
    return np.digitize(p, bins=[th[0], th[1], th[2], th[3]])

# Start from previous best NNLS per-model iso thresholds if exist, else use reasonable init
t0 = np.array([0.5918, 1.6050, 2.3743, 2.9145], dtype=float)
try:
    t_prev = np.load('thresholds_permodel_iso_nnls.npy')
    if t_prev.shape == (4,): t0 = t_prev.astype(float)
except Exception:
    pass

# 2D grid around th2, th3
th2_c, th3_c = float(t0[2]), float(t0[3])
th2_min = max(0.6, th2_c - 0.12); th2_max = min(3.4, th2_c + 0.12)
th3_min = max(th2_c + 0.12, th3_c - 0.12); th3_max = min(3.7, th3_c + 0.12)
g2 = np.arange(th2_min, th2_max + 1e-12, 0.005)
g3 = np.arange(th3_min, th3_max + 1e-12, 0.005)
best_q = -1.0; best_th = t0.copy()
y_m = y[mask]; p_m = blend_oof[mask]
for i, t2 in enumerate(g2):
    # enforce 0.12 gaps
    t1_min = max(0.3, t2 - 2.0); t1_max = min(t2 - 0.12, 2.8)
    t0_min = max(0.3, t1_min - 1.0); t0_max = min(t1_max - 0.12, 1.6)
    # keep t0,t1 clamped to previous for speed but valid
    th_try = best_th.copy()
    th_try[2] = t2
    # sweep th3 for this th2
    for j, t3 in enumerate(g3):
        if t3 - t2 < 0.12: continue
        th_try[3] = t3
        # ensure lower thresholds maintain gaps with new th2
        if th_try[1] >= th_try[2]: th_try[1] = max(th_try[1], th_try[2] - 0.12)
        if th_try[0] >= th_try[1]: th_try[0] = max(0.3, th_try[1] - 0.12)
        q = cohen_kappa_score(y_m, preds_to_classes(p_m, th_try), weights='quadratic')
        if q > best_q:
            best_q = q; best_th = th_try.copy()
    if (i+1) % 10 == 0:
        print(f'grid row {i+1}/{len(g2)} best_q={best_q:.5f}', flush=True)

print('2D grid best OOF QWK:', f'{best_q:.5f}', 'best_th:', best_th, flush=True)

# Apply to test and save
cls = preds_to_classes(blend_te, best_th).astype(int)
sub = pd.DataFrame({'id_code': pd.read_csv('test.csv')['id_code'].values, 'diagnosis': cls})
sub.to_csv('submission.csv', index=False)
np.save('thresholds_permodel_iso_nnls_grid2d.npy', best_th); np.save('weights_permodel_iso_nnls_top5.npy', w)
print('submission.csv written (top-5 per-model-iso + NNLS + 2D grid refine).', flush=True)

Running top-5 per-model isotonic + NNLS with 2D threshold refinement...


Used keys: ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640'] weights: [0.3242 0.2596 0.2151 0.1329 0.0682]


grid row 10/49 best_q=0.88690


grid row 20/49 best_q=0.88882


grid row 30/49 best_q=0.88941


grid row 40/49 best_q=0.88941


2D grid best OOF QWK: 0.88941 best_th: [0.59176262 1.6049918  2.37433822 2.92947651]


submission.csv written (top-5 per-model-iso + NNLS + 2D grid refine).


In [5]:
# Add ordinal EV model into per-model isotonic + NNLS top-5, then 2D refine; write submission
import os, numpy as np, pandas as pd
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import nnls

print('Running top-5 + ordinal per-model isotonic + NNLS with 2D threshold refinement...', flush=True)

keys = ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640', 'b5_ordinal']
paths_oof = {
    'b4_512': 'oof_preds_b4.npy',
    'b5_512': 'oof_preds.npy',
    'b5_512_rrcema': 'oof_preds_b5_seed2025_rrc_ema.npy',
    'b4_640': 'oof_preds_b4_640_rrc_ema.npy',
    'serx50_512_rrcema': 'oof_preds_serx50_512_rrc_ema.npy',
    'b5_ordinal': 'oof_ev_b5_ordinal.npy',
}
paths_te_opts = {
    'b4_512': ['test_reg_preds_b4_hflip.npy', 'test_reg_preds_b4.npy'],
    'b5_512': ['test_reg_preds_b5_hflip.npy', 'test_reg_preds.npy'],
    'b5_512_rrcema': ['test_reg_preds_b5_seed2025_rrc_ema.npy'],
    'b4_640': ['test_reg_preds_b4_640_rrc_ema.npy'],
    'serx50_512_rrcema': ['test_reg_preds_serx50_512_rrc_ema.npy'],
    'b5_ordinal': ['test_ev_b5_ordinal.npy'],
}

y = None
for tgt in ['oof_targets.npy', 'oof_targets_b4.npy']:
    if os.path.exists(tgt):
        y = np.load(tgt).reshape(-1).astype(float); break
if y is None: raise RuntimeError('OOF targets not found')

paths_te = {}
for k in keys:
    for p in paths_te_opts.get(k, []):
        if os.path.exists(p):
            paths_te[k] = p; break

# Per-model isotonic
oof_list = []; te_list = []; used_keys = []
for k in keys:
    po = paths_oof.get(k, None); pt = paths_te.get(k, None)
    if po is None or (not os.path.exists(po)) or pt is None or (not os.path.exists(pt)):
        continue
    a_oof = np.load(po).reshape(-1).astype(float)
    a_te = np.load(pt).reshape(-1).astype(float)
    if not np.isfinite(a_oof).any():
        continue
    if not np.isfinite(a_te).all():
        med = float(np.nanmedian(a_te[np.isfinite(a_te)]))
        a_te = np.where(np.isfinite(a_te), a_te, med).astype(float)
    mask_fit = np.isfinite(y) & np.isfinite(a_oof)
    ir = IsotonicRegression(increasing=True, out_of_bounds='clip')
    ir.fit(a_oof[mask_fit], y[mask_fit])
    cal_o = a_oof.copy(); cal_o[mask_fit] = ir.transform(a_oof[mask_fit])
    cal_o = np.where(np.isfinite(cal_o), cal_o, a_oof); cal_o = np.clip(cal_o, 0.0, 4.0)
    cal_t = ir.transform(a_te); cal_t = np.clip(cal_t, 0.0, 4.0)
    oof_list.append(cal_o); te_list.append(cal_t); used_keys.append(k)

if len(oof_list) == 0: raise RuntimeError('No usable calibrated arrays')

X = np.stack(oof_list, axis=1)
mask = np.isfinite(y) & np.isfinite(X).all(axis=1)
A = X[mask]; b = y[mask]
w_raw, _ = nnls(A, b)
w = w_raw / (w_raw.sum() if w_raw.sum() > 0 else len(w_raw))
print('Used keys:', used_keys, 'weights:', np.round(w, 4), flush=True)

blend_oof = (X * w.reshape(1, -1)).sum(axis=1)
te_stack = np.stack(te_list, axis=1)
blend_te = (te_stack * w.reshape(1, -1)).sum(axis=1)

def preds_to_classes(p, th):
    return np.digitize(p, bins=[th[0], th[1], th[2], th[3]])

# Start from last best if available
t0 = np.array([0.5918, 1.6050, 2.3743, 2.9295], dtype=float)
for cand in ['thresholds_permodel_iso_nnls_grid2d.npy', 'thresholds_permodel_iso_nnls.npy', 'thresholds_permodel_iso.npy']:
    if os.path.exists(cand):
        tt = np.load(cand)
        if getattr(tt, 'shape', None) == (4,):
            t0 = tt.astype(float); break

# 2D grid around th2, th3
th2_c, th3_c = float(t0[2]), float(t0[3])
th2_min = max(0.6, th2_c - 0.12); th2_max = min(3.4, th2_c + 0.12)
th3_min = max(th2_c + 0.12, th3_c - 0.12); th3_max = min(3.7, th3_c + 0.12)
g2 = np.arange(th2_min, th2_max + 1e-12, 0.005)
g3 = np.arange(th3_min, th3_max + 1e-12, 0.005)
best_q = -1.0; best_th = t0.copy()
y_m = y[mask]; p_m = blend_oof[mask]
for i, t2 in enumerate(g2):
    for j, t3 in enumerate(g3):
        if t3 - t2 < 0.12: continue
        th_try = best_th.copy(); th_try[2] = t2; th_try[3] = t3
        if th_try[1] >= th_try[2]: th_try[1] = max(th_try[1], th_try[2] - 0.12)
        if th_try[0] >= th_try[1]: th_try[0] = max(0.3, th_try[1] - 0.12)
        q = cohen_kappa_score(y_m, preds_to_classes(p_m, th_try), weights='quadratic')
        if q > best_q: best_q = q; best_th = th_try.copy()
    if (i+1) % 10 == 0:
        print(f'grid row {i+1}/{len(g2)} best_q={best_q:.5f}', flush=True)

print('2D grid (with ordinal) best OOF QWK:', f'{best_q:.5f}', 'best_th:', best_th, flush=True)

# Apply to test and save
cls = preds_to_classes(blend_te, best_th).astype(int)
sub = pd.DataFrame({'id_code': pd.read_csv('test.csv')['id_code'].values, 'diagnosis': cls})
sub.to_csv('submission.csv', index=False)
np.save('thresholds_permodel_iso_nnls_withord_grid2d.npy', best_th); np.save('weights_permodel_iso_nnls_withord.npy', w)
print('submission.csv written (top-5+ordinal per-model-iso + NNLS + 2D grid refine).', flush=True)

Running top-5 + ordinal per-model isotonic + NNLS with 2D threshold refinement...


Used keys: ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640', 'b5_ordinal'] weights: [0.3242 0.2596 0.2151 0.1329 0.0682 0.    ]


grid row 10/49 best_q=0.88690


grid row 20/49 best_q=0.88882


grid row 30/49 best_q=0.88941


grid row 40/49 best_q=0.88941


2D grid (with ordinal) best OOF QWK: 0.88941 best_th: [0.59176262 1.6049918  2.37433822 2.92947651]


submission.csv written (top-5+ordinal per-model-iso + NNLS + 2D grid refine).


In [6]:
# Rank-averaged ensemble over up to 8 models + strict thresholding; write submission
import os, numpy as np, pandas as pd
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import minimize

print('Running rank-averaged ensemble over up to 8 models...', flush=True)

keys = [
    'b5_512_rrcema',
    'serx50_512_rrcema',
    'b5_512',
    'b4_512',
    'b4_640',
    'r200d_512_rrcema',
    'convnextb_512_rrcema',
    'serx50_512_rrcema_s2',
]
paths_oof = {
    'b4_512': 'oof_preds_b4.npy',
    'b5_512': 'oof_preds.npy',
    'b5_512_rrcema': 'oof_preds_b5_seed2025_rrc_ema.npy',
    'b4_640': 'oof_preds_b4_640_rrc_ema.npy',
    'serx50_512_rrcema': 'oof_preds_serx50_512_rrc_ema.npy',
    'serx50_512_rrcema_s2': 'oof_preds_serx50_512_rrc_ema_seed2026.npy',
    'r200d_512_rrcema': 'oof_preds_r200d_512_rrc_ema.npy',
    'convnextb_512_rrcema': 'oof_preds_convnextb_512_rrc_ema.npy',
}
paths_te_opts = {
    'b4_512': ['test_reg_preds_b4_hflip.npy', 'test_reg_preds_b4.npy'],
    'b5_512': ['test_reg_preds_b5_hflip.npy', 'test_reg_preds.npy'],
    'b5_512_rrcema': ['test_reg_preds_b5_seed2025_rrc_ema.npy'],
    'b4_640': ['test_reg_preds_b4_640_rrc_ema.npy'],
    'serx50_512_rrcema': ['test_reg_preds_serx50_512_rrc_ema.npy'],
    'serx50_512_rrcema_s2': ['test_reg_preds_serx50_512_rrc_ema_seed2026.npy'],
    'r200d_512_rrcema': ['test_reg_preds_r200d_512_rrc_ema.npy'],
    'convnextb_512_rrcema': ['test_reg_preds_convnextb_512_rrc_ema.npy'],
}

# Targets
y = None
for tgt in ['oof_targets.npy', 'oof_targets_b4.npy']:
    if os.path.exists(tgt):
        y = np.load(tgt).reshape(-1).astype(float); break
if y is None: raise RuntimeError('OOF targets not found')

# Resolve test paths
paths_te = {}
for k in keys:
    for p in paths_te_opts.get(k, []):
        if os.path.exists(p):
            paths_te[k] = p; break

# Load arrays
oof_list = []; te_list = []; used = []
for k in keys:
    po = paths_oof.get(k, None); pt = paths_te.get(k, None)
    if po is None or (not os.path.exists(po)) or pt is None or (not os.path.exists(pt)):
        continue
    a_oof = np.load(po).reshape(-1).astype(float)
    a_te = np.load(pt).reshape(-1).astype(float)
    if not np.isfinite(a_oof).any(): continue
    if not np.isfinite(a_te).all():
        med = float(np.nanmedian(a_te[np.isfinite(a_te)]))
        a_te = np.where(np.isfinite(a_te), a_te, med).astype(float)
    oof_list.append(a_oof); te_list.append(a_te); used.append(k)

if len(oof_list) == 0: raise RuntimeError('No usable arrays for rank blend')
print('Used keys:', used, flush=True)

# Rank-transform to [0,4] using fractional ranks on OOF; apply same order for test per model
def to_rank_scale(arr):
    n = arr.shape[0]
    order = np.argsort(arr)
    ranks = np.empty_like(order, dtype=float)
    ranks[order] = np.arange(n, dtype=float) / max(1, n-1)  # [0,1]
    return ranks * 4.0

rank_oof = []; rank_te = []
for a_oof, a_te in zip(oof_list, te_list):
    r_o = to_rank_scale(a_oof)
    # Map test by interpolation based on OOF value->rank relation
    # Use sorted pairs (x,y) from OOF and np.interp
    xs = np.sort(a_oof)
    ys = np.sort(r_o)
    r_t = np.interp(a_te, xs, ys, left=ys[0], right=ys[-1])
    rank_oof.append(r_o); rank_te.append(r_t)

rank_oof = np.stack(rank_oof, axis=1)
rank_te = np.stack(rank_te, axis=1)

# Equal-weight rank mean (robust)
blend_oof = rank_oof.mean(axis=1)
blend_te = rank_te.mean(axis=1)

def preds_to_classes(p, th):
    return np.digitize(p, bins=[th[0], th[1], th[2], th[3]])

def optimize_thresholds_strict(y_true, p, init=[0.6,1.6,2.4,3.0]):
    y_true = np.asarray(y_true).astype(float); p = np.asarray(p).astype(float)
    def _loss(th):
        th = np.sort(th); th = np.clip(th, 0.3, 3.7)
        for i in range(1,4):
            if th[i]-th[i-1] < 0.12: th[i] = th[i-1] + 0.12
        return -cohen_kappa_score(y_true, preds_to_classes(p, th), weights='quadratic')
    res = minimize(_loss, x0=np.array(init, dtype=float), method='Nelder-Mead', options={'maxiter':1200, 'xatol':1e-3, 'fatol':1e-3})
    th = np.sort(res.x)
    for i in range(1,4):
        if th[i]-th[i-1] < 0.12: th[i] = th[i-1] + 0.12
    th = np.clip(th, 0.3, 3.7)
    return th

mask = np.isfinite(y) & np.isfinite(blend_oof)
th = optimize_thresholds_strict(y[mask], blend_oof[mask])
q = cohen_kappa_score(y[mask], preds_to_classes(blend_oof[mask], th), weights='quadratic')
print('Rank-mean OOF QWK:', f'{q:.5f}', 'th:', th, flush=True)

# Write submission
cls = preds_to_classes(blend_te, th).astype(int)
sub = pd.DataFrame({'id_code': pd.read_csv('test.csv')['id_code'].values, 'diagnosis': cls})
sub.to_csv('submission.csv', index=False)
np.save('thresholds_rank_mean.npy', th)
print('submission.csv written (rank-averaged ensemble).', flush=True)

Running rank-averaged ensemble over up to 8 models...


Used keys: ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640', 'r200d_512_rrcema', 'convnextb_512_rrcema', 'serx50_512_rrcema_s2']


Rank-mean OOF QWK: 0.59607 th: [1.45973336 2.68771838 2.84911448 3.34870908]


submission.csv written (rank-averaged ensemble).


In [9]:
# Per-fold isotonic per model -> NNLS -> 2D (th2, th3) refine + th3 safety nudge; write submission
import os, numpy as np, pandas as pd
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import nnls

print('Running per-fold isotonic per model + NNLS + 2D refine with th3 safety nudge...', flush=True)

# Config: top-5 models
keys = ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']
paths_oof = {
    'b4_512': 'oof_preds_b4.npy',
    'b5_512': 'oof_preds.npy',
    'b5_512_rrcema': 'oof_preds_b5_seed2025_rrc_ema.npy',
    'b4_640': 'oof_preds_b4_640_rrc_ema.npy',
    'serx50_512_rrcema': 'oof_preds_serx50_512_rrc_ema.npy',
}
paths_te_opts = {
    'b4_512': ['test_reg_preds_b4_hflip.npy', 'test_reg_preds_b4.npy'],
    'b5_512': ['test_reg_preds_b5_hflip.npy', 'test_reg_preds.npy'],
    'b5_512_rrcema': ['test_reg_preds_b5_seed2025_rrc_ema.npy'],
    'b4_640': ['test_reg_preds_b4_640_rrc_ema.npy'],
    'serx50_512_rrcema': ['test_reg_preds_serx50_512_rrc_ema.npy'],
}

# Targets and folds (assume order aligns with oof_targets.npy i.e., train.csv order)
y = None
for tgt in ['oof_targets.npy', 'oof_targets_b4.npy']:
    if os.path.exists(tgt):
        y = np.load(tgt).reshape(-1).astype(float); break
if y is None: raise RuntimeError('OOF targets not found')
folds_df = pd.read_csv('folds.csv')
fold_arr = folds_df['fold'].values.astype(int)
n_folds = int(fold_arr.max()) + 1
if len(fold_arr) != len(y): raise RuntimeError('folds.csv length mismatch with OOF targets')

# Resolve test paths
paths_te = {}
for k in keys:
    for p in paths_te_opts.get(k, []):
        if os.path.exists(p):
            paths_te[k] = p; break

# Per-model per-fold isotonic calibration (robust to NaNs)
cal_oof_list = []; cal_te_list = []; used_keys = []
for k in keys:
    po = paths_oof.get(k, None); pt = paths_te.get(k, None)
    if po is None or (not os.path.exists(po)) or pt is None or (not os.path.exists(pt)):
        print(f'Skip {k}: missing paths')
        continue
    a_oof = np.load(po).reshape(-1).astype(float)
    a_te_base = np.load(pt).reshape(-1).astype(float)
    if not np.isfinite(a_oof).any():
        print(f'Skip {k}: non-finite OOF')
        continue
    if not np.isfinite(a_te_base).all():
        med = float(np.nanmedian(a_te_base[np.isfinite(a_te_base)]))
        a_te_base = np.where(np.isfinite(a_te_base), a_te_base, med).astype(float)
    # Build calibrated OOF with fold-wise transforms and average per-fold calibrated test
    cal_oof = np.zeros_like(a_oof, dtype=float)
    te_stack = []
    finite_oof = np.isfinite(a_oof)
    finite_y = np.isfinite(y)
    for f in range(n_folds):
        tr_mask = (fold_arr != f)
        va_mask = (fold_arr == f)
        tr_fit = tr_mask & finite_oof & finite_y
        va_apply = va_mask & finite_oof
        if tr_fit.sum() < 2 or va_apply.sum() == 0:
            # Fallback: identity mapping for this fold
            cal_oof[va_mask] = a_oof[va_mask]
            te_stack.append(a_te_base.copy())
            continue
        ir = IsotonicRegression(increasing=True, out_of_bounds='clip')
        ir.fit(a_oof[tr_fit], y[tr_fit])
        # Transform only valid values for val; keep original where not finite
        cal_fold = a_oof[va_mask].copy()
        if va_apply.any():
            cal_fold[va_apply[va_mask]] = ir.transform(a_oof[va_apply])
        cal_oof[va_mask] = cal_fold
        te_stack.append(ir.transform(a_te_base))
    cal_te = np.mean(np.stack(te_stack, axis=0), axis=0) if len(te_stack) > 0 else a_te_base.copy()
    # Final cleanup
    cal_oof = np.where(np.isfinite(cal_oof), cal_oof, a_oof)
    cal_oof = np.clip(cal_oof, 0.0, 4.0)
    cal_te = np.clip(cal_te, 0.0, 4.0)
    cal_oof_list.append(cal_oof); cal_te_list.append(cal_te); used_keys.append(k)

if len(cal_oof_list) == 0: raise RuntimeError('No usable models after per-fold isotonic')
print('Used keys:', used_keys, flush=True)

# NNLS weights on calibrated OOF
X = np.stack(cal_oof_list, axis=1)
mask = np.isfinite(y) & np.isfinite(X).all(axis=1)
A = X[mask]; b = y[mask]
w_raw, _ = nnls(A, b)
w = w_raw / (w_raw.sum() if w_raw.sum() > 0 else len(w_raw))
print('NNLS weights:', np.round(w, 4), flush=True)

blend_oof = (X * w.reshape(1, -1)).sum(axis=1)
te_stack = np.stack(cal_te_list, axis=1)
blend_te = (te_stack * w.reshape(1, -1)).sum(axis=1)

def preds_to_classes(p, th):
    return np.digitize(p, bins=[th[0], th[1], th[2], th[3]])

# 2D refine around th2, th3 with min-gap 0.12, then safety nudge th3 +0.015 if within 0.0005 drop
t0 = np.array([0.5918, 1.6050, 2.3743, 2.9295], dtype=float)
try:
    t_prev = np.load('thresholds_permodel_iso_nnls_grid2d.npy')
    if getattr(t_prev, 'shape', None) == (4,): t0 = t_prev.astype(float)
except Exception:
    pass
y_m = y[mask]; p_m = blend_oof[mask]
th2_c, th3_c = float(t0[2]), float(t0[3])
th2_min = max(0.6, th2_c - 0.12); th2_max = min(3.4, th2_c + 0.12)
th3_min = max(th2_c + 0.12, th3_c - 0.12); th3_max = min(3.7, th3_c + 0.12)
g2 = np.arange(th2_min, th2_max + 1e-12, 0.005)
g3 = np.arange(th3_min, th3_max + 1e-12, 0.005)
best_q = -1.0; best_th = t0.copy()
for i, t2 in enumerate(g2):
    for t3 in g3:
        if t3 - t2 < 0.12: continue
        th_try = best_th.copy(); th_try[2] = t2; th_try[3] = t3
        if th_try[1] >= th_try[2]: th_try[1] = max(th_try[1], th_try[2] - 0.12)
        if th_try[0] >= th_try[1]: th_try[0] = max(0.3, th_try[1] - 0.12)
        q = cohen_kappa_score(y_m, preds_to_classes(p_m, th_try), weights='quadratic')
        if q > best_q: best_q = q; best_th = th_try.copy()
    if (i+1) % 10 == 0:
        print(f'grid row {i+1}/{len(g2)} best_q={best_q:.5f}', flush=True)
print('2D refine OOF QWK:', f'{best_q:.5f}', 'best_th:', best_th, flush=True)

# Safety nudge th3 +0.015 if within 0.0005 drop
th_safe = best_th.copy(); th_safe[3] = min(3.7, th_safe[3] + 0.015)
q_safe = cohen_kappa_score(y_m, preds_to_classes(p_m, th_safe), weights='quadratic')
chosen_th = th_safe if (best_q - q_safe) <= 0.0005 else best_th
chosen_q = q_safe if (best_q - q_safe) <= 0.0005 else best_q
print('Chosen OOF QWK:', f'{chosen_q:.5f}', 'chosen_th:', chosen_th, flush=True)

# Apply to test and save submission
cls = preds_to_classes(blend_te, chosen_th).astype(int)
sub = pd.DataFrame({'id_code': pd.read_csv('test.csv')['id_code'].values, 'diagnosis': cls})
sub.to_csv('submission.csv', index=False)
np.save('thresholds_perfold_iso_nnls.npy', chosen_th); np.save('weights_perfold_iso_nnls.npy', w)
print('submission.csv written (per-fold isotonic + NNLS + 2D refine + th3 nudge).', flush=True)

Running per-fold isotonic per model + NNLS + 2D refine with th3 safety nudge...


Used keys: ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']


NNLS weights: [0.3532 0.1979 0.2314 0.1711 0.0464]


grid row 10/49 best_q=0.87851


grid row 20/49 best_q=0.87851


grid row 30/49 best_q=0.87961


grid row 40/49 best_q=0.87961


2D refine OOF QWK: 0.87961 best_th: [0.59176262 1.6049918  2.39433822 2.99947651]


Chosen OOF QWK: 0.87950 chosen_th: [0.59176262 1.6049918  2.39433822 3.01447651]


submission.csv written (per-fold isotonic + NNLS + 2D refine + th3 nudge).


In [11]:
# Top-5 per-model isotonic + NNLS + 2D refine with th3 +0.015 safety nudge; write submission
import os, numpy as np, pandas as pd
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import nnls

print('Running top-5 per-model isotonic + NNLS + 2D refine with th3 safety nudge...', flush=True)

keys = ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']
paths_oof = {
    'b4_512': 'oof_preds_b4.npy',
    'b5_512': 'oof_preds.npy',
    'b5_512_rrcema': 'oof_preds_b5_seed2025_rrc_ema.npy',
    'b4_640': 'oof_preds_b4_640_rrc_ema.npy',
    'serx50_512_rrcema': 'oof_preds_serx50_512_rrc_ema.npy',
}
paths_te_opts = {
    'b4_512': ['test_reg_preds_b4_hflip.npy', 'test_reg_preds_b4.npy'],
    'b5_512': ['test_reg_preds_b5_hflip.npy', 'test_reg_preds.npy'],
    'b5_512_rrcema': ['test_reg_preds_b5_seed2025_rrc_ema.npy'],
    'b4_640': ['test_reg_preds_b4_640_rrc_ema.npy'],
    'serx50_512_rrcema': ['test_reg_preds_serx50_512_rrc_ema.npy'],
}

# Targets
y = None
for tgt in ['oof_targets_b4.npy', 'oof_targets.npy']:
    if os.path.exists(tgt):
        y = np.load(tgt).reshape(-1).astype(float)
        break
if y is None:
    raise RuntimeError('OOF targets not found')

# Resolve test paths
paths_te = {}
for k in keys:
    for p in paths_te_opts.get(k, []):
        if os.path.exists(p):
            paths_te[k] = p; break

# Load arrays and fit per-model isotonic (global) for speed
oof_list = []; te_list = []; used_keys = []
for k in keys:
    po = paths_oof.get(k, None); pt = paths_te.get(k, None)
    if po is None or (not os.path.exists(po)) or pt is None or (not os.path.exists(pt)):
        continue
    a_oof = np.load(po).reshape(-1).astype(float)
    a_te = np.load(pt).reshape(-1).astype(float)
    if not np.isfinite(a_oof).any():
        continue
    if not np.isfinite(a_te).all():
        med = float(np.nanmedian(a_te[np.isfinite(a_te)]))
        a_te = np.where(np.isfinite(a_te), a_te, med).astype(float)
    mask_fit = np.isfinite(y) & np.isfinite(a_oof)
    ir = IsotonicRegression(increasing=True, out_of_bounds='clip')
    ir.fit(a_oof[mask_fit], y[mask_fit])
    cal_o = a_oof.copy(); cal_o[mask_fit] = ir.transform(a_oof[mask_fit])
    cal_o = np.where(np.isfinite(cal_o), cal_o, a_oof); cal_o = np.clip(cal_o, 0.0, 4.0)
    cal_t = ir.transform(a_te); cal_t = np.clip(cal_t, 0.0, 4.0)
    oof_list.append(cal_o); te_list.append(cal_t); used_keys.append(k)

if len(oof_list) == 0:
    raise RuntimeError('No usable models for per-model isotonic')

print('Using keys:', used_keys, flush=True)

# NNLS weights
X = np.stack(oof_list, axis=1)
mask = np.isfinite(y) & np.isfinite(X).all(axis=1)
A = X[mask]; b = y[mask]
w_raw, _ = nnls(A, b)
w = w_raw / (w_raw.sum() if w_raw.sum() > 0 else len(w_raw))
print('NNLS weights:', np.round(w, 4), flush=True)

blend_oof = (X * w.reshape(1, -1)).sum(axis=1)
te_stack = np.stack(te_list, axis=1)
blend_te = (te_stack * w.reshape(1, -1)).sum(axis=1)

def preds_to_classes(p, th):
    return np.digitize(p, bins=[th[0], th[1], th[2], th[3]])

# Load previous best thresholds if exist; else from our best grid
t0 = np.array([0.5918, 1.6050, 2.3743, 2.9295], dtype=float)
for cand in ['thresholds_permodel_iso_nnls_grid2d.npy', 'thresholds_blend_nm.npy', 'thresholds_blend_isotonic.npy']:
    if os.path.exists(cand):
        tt = np.load(cand)
        if getattr(tt, 'shape', None) == (4,):
            t0 = tt.astype(float); break

# Apply safety nudge +0.015 to th3 (clip), without re-optimizing on OOF
t_nudge = t0.copy()
t_nudge[3] = min(3.7, t_nudge[3] + 0.015)
print('Applying th3 safety nudge: from', np.round(t0, 5), 'to', np.round(t_nudge, 5), flush=True)

# Write submission
cls = preds_to_classes(blend_te, t_nudge).astype(int)
sub = pd.DataFrame({'id_code': pd.read_csv('test.csv')['id_code'].values, 'diagnosis': cls})
sub.to_csv('submission.csv', index=False)
np.save('thresholds_permodel_iso_nnls_nudged.npy', t_nudge); np.save('weights_permodel_iso_nnls_top5.npy', w)
print('submission.csv written (top-5 per-model-iso + NNLS + th3 +0.015 nudge).', flush=True)

Running top-5 per-model isotonic + NNLS + 2D refine with th3 safety nudge...


Using keys: ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']


NNLS weights: [0.3242 0.2596 0.2151 0.1329 0.0682]


Applying th3 safety nudge: from [0.59176 1.60499 2.37434 2.92948] to [0.59176 1.60499 2.37434 2.94448]


submission.csv written (top-5 per-model-iso + NNLS + th3 +0.015 nudge).


In [12]:
# Counts-based thresholding on best calibrated blend (top-5 per-model iso + NNLS); write submission
import os, numpy as np, pandas as pd
from sklearn.isotonic import IsotonicRegression
from scipy.optimize import nnls

print('Running counts-based thresholds on calibrated NNLS blend...', flush=True)

keys = ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']
paths_oof = {
    'b4_512': 'oof_preds_b4.npy',
    'b5_512': 'oof_preds.npy',
    'b5_512_rrcema': 'oof_preds_b5_seed2025_rrc_ema.npy',
    'b4_640': 'oof_preds_b4_640_rrc_ema.npy',
    'serx50_512_rrcema': 'oof_preds_serx50_512_rrc_ema.npy',
}
paths_te_opts = {
    'b4_512': ['test_reg_preds_b4_hflip.npy', 'test_reg_preds_b4.npy'],
    'b5_512': ['test_reg_preds_b5_hflip.npy', 'test_reg_preds.npy'],
    'b5_512_rrcema': ['test_reg_preds_b5_seed2025_rrc_ema.npy'],
    'b4_640': ['test_reg_preds_b4_640_rrc_ema.npy'],
    'serx50_512_rrcema': ['test_reg_preds_serx50_512_rrc_ema.npy'],
}

y = None
for tgt in ['oof_targets.npy', 'oof_targets_b4.npy']:
    if os.path.exists(tgt):
        y = np.load(tgt).reshape(-1).astype(float); break
if y is None: raise RuntimeError('OOF targets not found')

paths_te = {}
for k in keys:
    for p in paths_te_opts.get(k, []):
        if os.path.exists(p):
            paths_te[k] = p; break

# Per-model global isotonic calibration
oof_list = []; te_list = []; used_keys = []
for k in keys:
    po = paths_oof.get(k, None); pt = paths_te.get(k, None)
    if po is None or (not os.path.exists(po)) or pt is None or (not os.path.exists(pt)):
        continue
    a_oof = np.load(po).reshape(-1).astype(float)
    a_te = np.load(pt).reshape(-1).astype(float)
    if not np.isfinite(a_oof).any():
        continue
    if not np.isfinite(a_te).all():
        med = float(np.nanmedian(a_te[np.isfinite(a_te)]))
        a_te = np.where(np.isfinite(a_te), a_te, med).astype(float)
    mask_fit = np.isfinite(y) & np.isfinite(a_oof)
    ir = IsotonicRegression(increasing=True, out_of_bounds='clip')
    ir.fit(a_oof[mask_fit], y[mask_fit])
    cal_o = a_oof.copy(); cal_o[mask_fit] = ir.transform(a_oof[mask_fit])
    cal_o = np.where(np.isfinite(cal_o), cal_o, a_oof); cal_o = np.clip(cal_o, 0.0, 4.0)
    cal_t = ir.transform(a_te); cal_t = np.clip(cal_t, 0.0, 4.0)
    oof_list.append(cal_o); te_list.append(cal_t); used_keys.append(k)

if len(oof_list) == 0: raise RuntimeError('No usable arrays for counts-based thresholding')
print('Using keys:', used_keys, flush=True)

# NNLS blend
X = np.stack(oof_list, axis=1)
mask = np.isfinite(y) & np.isfinite(X).all(axis=1)
A = X[mask]; b = y[mask]
w_raw, _ = nnls(A, b)
w = w_raw / (w_raw.sum() if w_raw.sum() > 0 else len(w_raw))
blend_oof = (X * w.reshape(1, -1)).sum(axis=1)
te_stack = np.stack(te_list, axis=1)
blend_te = (te_stack * w.reshape(1, -1)).sum(axis=1)
print('NNLS weights:', np.round(w, 4), flush=True)

def preds_to_classes(p, th):
    return np.digitize(p, bins=[th[0], th[1], th[2], th[3]])

# Derive base class counts from OOF proportions using best thresholds if available
t_base = None
for cand in ['thresholds_permodel_iso_nnls_grid2d.npy', 'thresholds_permodel_iso_nnls.npy', 'thresholds_blend_nm.npy']:
    if os.path.exists(cand):
        tt = np.load(cand)
        if getattr(tt, 'shape', None) == (4,):
            t_base = tt.astype(float); break
if t_base is None:
    t_base = np.array([0.5918, 1.6050, 2.3743, 2.9295], dtype=float)

# OOF class distribution under t_base
cls_oof = preds_to_classes(blend_oof, t_base)
counts_prop = np.bincount(cls_oof, minlength=5).astype(float) / max(1, len(cls_oof))
n_test = int(pd.read_csv('test.csv').shape[0])
counts_test = np.round(counts_prop * n_test).astype(int)
diff = n_test - counts_test.sum()
if diff != 0:
    # Adjust the largest proportion classes to fix sum
    order = np.argsort(-counts_prop)
    i = 0
    while diff != 0 and i < 5:
        k = order[i]
        counts_test[k] += 1 if diff > 0 else -1
        diff = n_test - counts_test.sum()
        if (diff == 0): break
        i = (i + 1) % 5
print('Target test counts:', counts_test.tolist(), flush=True)

def thresholds_for_counts(scores, counts, min_gap=0.12):
    scores = np.asarray(scores).astype(float)
    s = np.sort(scores)
    c = counts.astype(int)
    idxs = [c[0]-1, c[0]+c[1]-1, c[0]+c[1]+c[2]-1, c[0]+c[1]+c[2]+c[3]-1]
    th = []
    for idx in idxs:
        if idx < 0: th.append(s[0])
        elif idx >= len(s): th.append(s[-1])
        else: th.append(s[idx])
    th = np.array(th, dtype=float)
    # Enforce gaps by forward pass
    th = np.clip(th, 0.3, 3.7)
    for i in range(1,4):
        if th[i] - th[i-1] < min_gap:
            th[i] = min(3.7, th[i-1] + min_gap)
    return th

# Compute thresholds on TEST scores to match desired counts
th_counts = thresholds_for_counts(blend_te, counts_test, min_gap=0.12)
print('Counts-based thresholds:', np.round(th_counts, 5), flush=True)

# Predict and write submission
cls = preds_to_classes(blend_te, th_counts).astype(int)
sub = pd.DataFrame({'id_code': pd.read_csv('test.csv')['id_code'].values, 'diagnosis': cls})
sub.to_csv('submission.csv', index=False)
np.save('thresholds_counts_based.npy', th_counts); np.save('weights_counts_blend.npy', w)
print('submission.csv written (counts-based thresholds).', flush=True)

Running counts-based thresholds on calibrated NNLS blend...


Using keys: ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']


NNLS weights: [0.3242 0.2596 0.2151 0.1329 0.0682]


Target test counts: [107, 20, 63, 19, 158]


Counts-based thresholds: [0.3     0.42    0.83715 0.98371]


submission.csv written (counts-based thresholds).


In [14]:
# Brightness-binned thresholds on top-5 per-model isotonic + NNLS blend; write submission
import os, numpy as np, pandas as pd, cv2, glob, time
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import nnls

t0 = time.time()
print('Running brightness-binned thresholds on calibrated NNLS blend...', flush=True)

keys = ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']
paths_oof = {
    'b4_512': 'oof_preds_b4.npy',
    'b5_512': 'oof_preds.npy',
    'b5_512_rrcema': 'oof_preds_b5_seed2025_rrc_ema.npy',
    'b4_640': 'oof_preds_b4_640_rrc_ema.npy',
    'serx50_512_rrcema': 'oof_preds_serx50_512_rrc_ema.npy',
}
paths_te_opts = {
    'b4_512': ['test_reg_preds_b4_hflip.npy', 'test_reg_preds_b4.npy'],
    'b5_512': ['test_reg_preds_b5_hflip.npy', 'test_reg_preds.npy'],
    'b5_512_rrcema': ['test_reg_preds_b5_seed2025_rrc_ema.npy'],
    'b4_640': ['test_reg_preds_b4_640_rrc_ema.npy'],
    'serx50_512_rrcema': ['test_reg_preds_serx50_512_rrc_ema.npy'],
}

# Targets
y = None
for tgt in ['oof_targets.npy', 'oof_targets_b4.npy']:
    if os.path.exists(tgt):
        y = np.load(tgt).reshape(-1).astype(float); break
if y is None: raise RuntimeError('OOF targets not found')

# Resolve test paths
paths_te = {}
for k in keys:
    for p in paths_te_opts.get(k, []):
        if os.path.exists(p):
            paths_te[k] = p; break

# Per-model global isotonic calibration
oof_list = []; te_list = []; used_keys = []
for k in keys:
    po = paths_oof.get(k, None); pt = paths_te.get(k, None)
    if po is None or (not os.path.exists(po)) or pt is None or (not os.path.exists(pt)):
        continue
    a_oof = np.load(po).reshape(-1).astype(float)
    a_te = np.load(pt).reshape(-1).astype(float)
    if not np.isfinite(a_oof).any():
        continue
    if not np.isfinite(a_te).all():
        med = float(np.nanmedian(a_te[np.isfinite(a_te)]))
        a_te = np.where(np.isfinite(a_te), a_te, med).astype(float)
    mask_fit = np.isfinite(y) & np.isfinite(a_oof)
    ir = IsotonicRegression(increasing=True, out_of_bounds='clip')
    ir.fit(a_oof[mask_fit], y[mask_fit])
    cal_o = a_oof.copy(); cal_o[mask_fit] = ir.transform(a_oof[mask_fit])
    cal_o = np.where(np.isfinite(cal_o), cal_o, a_oof); cal_o = np.clip(cal_o, 0.0, 4.0)
    cal_t = ir.transform(a_te); cal_t = np.clip(cal_t, 0.0, 4.0)
    oof_list.append(cal_o); te_list.append(cal_t); used_keys.append(k)

if len(oof_list) == 0: raise RuntimeError('No usable arrays for brightness-binned thresholding')
print('Using keys:', used_keys, flush=True)

# NNLS blend
X = np.stack(oof_list, axis=1)
mask = np.isfinite(y) & np.isfinite(X).all(axis=1)
A = X[mask]; b = y[mask]
w_raw, _ = nnls(A, b)
w = w_raw / (w_raw.sum() if w_raw.sum() > 0 else len(w_raw))
blend_oof = (X * w.reshape(1, -1)).sum(axis=1)
te_stack = np.stack(te_list, axis=1)
blend_te = (te_stack * w.reshape(1, -1)).sum(axis=1)
print('NNLS weights:', np.round(w, 4), flush=True)

def preds_to_classes(p, th):
    return np.digitize(p, bins=[th[0], th[1], th[2], th[3]])

# Load or compute brightness for train/test from cached 512px images
def compute_brightness(img_paths):
    vals = []
    for i, p in enumerate(img_paths):
        im = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
        if im is None:
            vals.append(np.nan);
        else:
            vals.append(float(np.mean(im)))
        if (i+1) % 500 == 0:
            print(f'.. brightness {i+1}/{len(img_paths)}', flush=True)
    v = np.array(vals, dtype=float)
    # Handle NaNs by median fill
    med = float(np.nanmedian(v)) if np.isfinite(v).any() else 128.0
    v = np.where(np.isfinite(v), v, med)
    return v

# Train file order must match oof_targets.npy -> we assume it's train.csv order
train_df = pd.read_csv('folds.csv')  # contains id_code in same order used for OOF
train_ids = train_df['id_code'].values
train_img_paths = [os.path.join('cache512/train', f'{i}.png') for i in train_ids]
test_ids = pd.read_csv('test.csv')['id_code'].values
test_img_paths = [os.path.join('cache512/test', f'{i}.png') for i in test_ids]

bright_train = compute_brightness(train_img_paths)
bright_test = compute_brightness(test_img_paths)

# Define two bins by median train brightness
thr_b = float(np.median(bright_train))
bin0_tr = bright_train <= thr_b
bin1_tr = ~bin0_tr
bin0_te = bright_test <= thr_b
bin1_te = ~bin0_te
print('Brightness split @', thr_b, 'bin sizes train:', int(bin0_tr.sum()), int(bin1_tr.sum()), 'test:', int(bin0_te.sum()), int(bin1_te.sum()), flush=True)

# Start from previous best thresholds if available
t_base = np.array([0.5918, 1.6050, 2.3743, 2.9295], dtype=float)
for cand in ['thresholds_permodel_iso_nnls_grid2d.npy', 'thresholds_permodel_iso_nnls.npy', 'thresholds_permodel_iso.npy']:
    if os.path.exists(cand):
        tt = np.load(cand)
        if getattr(tt, 'shape', None) == (4,):
            t_base = tt.astype(float); break

def refine_2d(y_true, p, t0):
    y_true = y_true.astype(float); p = p.astype(float)
    m = np.isfinite(y_true) & np.isfinite(p)
    y_m = y_true[m]; p_m = p[m]
    th2_c, th3_c = float(t0[2]), float(t0[3])
    th2_min = max(0.6, th2_c - 0.12); th2_max = min(3.4, th2_c + 0.12)
    th3_min = max(th2_c + 0.12, th3_c - 0.12); th3_max = min(3.7, th3_c + 0.12)
    g2 = np.arange(th2_min, th2_max + 1e-12, 0.005)
    g3 = np.arange(th3_min, th3_max + 1e-12, 0.005)
    best_q = -1.0; best_th = t0.copy()
    for i, t2 in enumerate(g2):
        for t3 in g3:
            if t3 - t2 < 0.12: continue
            th_try = best_th.copy(); th_try[2] = t2; th_try[3] = t3
            if th_try[1] >= th_try[2]: th_try[1] = max(th_try[1], th_try[2] - 0.12)
            if th_try[0] >= th_try[1]: th_try[0] = max(0.3, th_try[1] - 0.12)
            q = cohen_kappa_score(y_m, preds_to_classes(p_m, th_try), weights='quadratic')
            if q > best_q: best_q = q; best_th = th_try.copy()
        if (i+1) % 10 == 0:
            print(f'.. grid row {i+1}/{len(g2)} best_q={best_q:.5f}', flush=True)
    return best_th, best_q

# Refine thresholds separately per brightness bin on OOF, then apply per-bin to test
t0_bin0, q0 = refine_2d(y[bin0_tr], blend_oof[bin0_tr], t_base)
t0_bin1, q1 = refine_2d(y[bin1_tr], blend_oof[bin1_tr], t_base)
print('Bin0 OOF QWK candidate th:', np.round(t0_bin0, 5), 'Bin1:', np.round(t0_bin1, 5), flush=True)

# Predict test per bin
cls = np.zeros_like(blend_te, dtype=int)
cls[bin0_te] = preds_to_classes(blend_te[bin0_te], t0_bin0)
cls[bin1_te] = preds_to_classes(blend_te[bin1_te], t0_bin1)

sub = pd.DataFrame({'id_code': test_ids, 'diagnosis': cls.astype(int)})
sub.to_csv('submission.csv', index=False)
np.save('thresholds_brightness_bin0.npy', t0_bin0); np.save('thresholds_brightness_bin1.npy', t0_bin1); np.save('weights_permodel_iso_nnls_top5.npy', w)
print('submission.csv written (brightness-binned thresholds). Elapsed: %.1fs' % (time.time()-t0), flush=True)

Running brightness-binned thresholds on calibrated NNLS blend...


Using keys: ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']


NNLS weights: [0.3242 0.2596 0.2151 0.1329 0.0682]


.. brightness 500/3295


.. brightness 1000/3295


.. brightness 1500/3295


.. brightness 2000/3295


.. brightness 2500/3295


.. brightness 3000/3295


Brightness split @ 127.6648178100586 bin sizes train: 1648 1647 test: 183 184


.. grid row 10/49 best_q=0.87000


.. grid row 20/49 best_q=0.87213


.. grid row 30/49 best_q=0.87388


.. grid row 40/49 best_q=0.87388


.. grid row 10/49 best_q=0.89318


.. grid row 20/49 best_q=0.89550


.. grid row 30/49 best_q=0.89550


.. grid row 40/49 best_q=0.89550


Bin0 OOF QWK candidate th: [0.59176 1.60499 2.37434 2.92948] Bin1: [0.59176 1.60499 2.32434 2.89948]


submission.csv written (brightness-binned thresholds). Elapsed: 32.8s


In [16]:
# Distribution alignment (quantile/CDF mapping) on best calibrated NNLS blend; write submission
import os, numpy as np, pandas as pd
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import nnls

print('Running distribution alignment on top-5 per-model iso + NNLS blend...', flush=True)

keys = ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']
paths_oof = {
    'b4_512': 'oof_preds_b4.npy',
    'b5_512': 'oof_preds.npy',
    'b5_512_rrcema': 'oof_preds_b5_seed2025_rrc_ema.npy',
    'b4_640': 'oof_preds_b4_640_rrc_ema.npy',
    'serx50_512_rrcema': 'oof_preds_serx50_512_rrc_ema.npy',
}
paths_te_opts = {
    'b4_512': ['test_reg_preds_b4_hflip.npy', 'test_reg_preds_b4.npy'],
    'b5_512': ['test_reg_preds_b5_hflip.npy', 'test_reg_preds.npy'],
    'b5_512_rrcema': ['test_reg_preds_b5_seed2025_rrc_ema.npy'],
    'b4_640': ['test_reg_preds_b4_640_rrc_ema.npy'],
    'serx50_512_rrcema': ['test_reg_preds_serx50_512_rrc_ema.npy'],
}

# Targets
y = None
for tgt in ['oof_targets.npy', 'oof_targets_b4.npy']:
    if os.path.exists(tgt):
        y = np.load(tgt).reshape(-1).astype(float); break
if y is None: raise RuntimeError('OOF targets not found')

# Resolve test paths
paths_te = {}
for k in keys:
    for p in paths_te_opts.get(k, []):
        if os.path.exists(p):
            paths_te[k] = p; break

# Per-model isotonic calibration (global), then NNLS blend
oof_list = []; te_list = []; used_keys = []
for k in keys:
    po = paths_oof.get(k, None); pt = paths_te.get(k, None)
    if po is None or (not os.path.exists(po)) or pt is None or (not os.path.exists(pt)):
        continue
    a_oof = np.load(po).reshape(-1).astype(float)
    a_te = np.load(pt).reshape(-1).astype(float)
    if not np.isfinite(a_oof).any():
        continue
    if not np.isfinite(a_te).all():
        med = float(np.nanmedian(a_te[np.isfinite(a_te)]))
        a_te = np.where(np.isfinite(a_te), a_te, med).astype(float)
    mask_fit = np.isfinite(y) & np.isfinite(a_oof)
    ir = IsotonicRegression(increasing=True, out_of_bounds='clip')
    ir.fit(a_oof[mask_fit], y[mask_fit])
    cal_o = a_oof.copy(); cal_o[mask_fit] = ir.transform(a_oof[mask_fit])
    cal_o = np.where(np.isfinite(cal_o), cal_o, a_oof); cal_o = np.clip(cal_o, 0.0, 4.0)
    cal_t = ir.transform(a_te); cal_t = np.clip(cal_t, 0.0, 4.0)
    oof_list.append(cal_o); te_list.append(cal_t); used_keys.append(k)

if len(oof_list) == 0: raise RuntimeError('No usable arrays for distribution alignment')
print('Using keys:', used_keys, flush=True)

X = np.stack(oof_list, axis=1)
mask = np.isfinite(y) & np.isfinite(X).all(axis=1)
A = X[mask]; b = y[mask]
w_raw, _ = nnls(A, b)
w = w_raw / (w_raw.sum() if w_raw.sum() > 0 else len(w_raw))
print('NNLS weights:', np.round(w, 4), flush=True)
blend_oof = (X * w.reshape(1, -1)).sum(axis=1)
te_stack = np.stack(te_list, axis=1)
blend_te = (te_stack * w.reshape(1, -1)).sum(axis=1)

# Load best thresholds from 2D grid refine
best_th = None
for cand in ['thresholds_permodel_iso_nnls_grid2d.npy', 'thresholds_permodel_iso_nnls.npy']:
    if os.path.exists(cand):
        tt = np.load(cand)
        if getattr(tt, 'shape', None) == (4,):
            best_th = tt.astype(float); break
if best_th is None:
    best_th = np.array([0.59176262, 1.6049918, 2.37433822, 2.92947651], dtype=float)

def preds_to_classes(p, th):
    return np.digitize(p, bins=[th[0], th[1], th[2], th[3]])

# Evaluate base OOF QWK with best_th
y_m = y[mask]; p_m = blend_oof[mask]
base_q = cohen_kappa_score(y_m, preds_to_classes(p_m, best_th), weights='quadratic')
print('Base OOF QWK (no mapping):', f'{base_q:.5f}', 'th:', np.round(best_th,5), flush=True)

# Fit monotonic mapping (isotonic) between sorted test preds and sorted OOF preds
te_sorted = np.sort(blend_te)
oof_sorted = np.sort(blend_oof[mask])
iso_map = IsotonicRegression(increasing=True, out_of_bounds='clip')
iso_map.fit(te_sorted, np.linspace(oof_sorted[0], oof_sorted[-1], num=len(te_sorted)))
# Alternatively fit directly on sorted pairs (np.sort(blend_te), np.sort(blend_oof))
# iso_map.fit(np.sort(blend_te), np.sort(blend_oof))

blend_te_aligned = iso_map.transform(blend_te)
blend_te_aligned = np.clip(blend_te_aligned, 0.0, 4.0)

# Optional th3 safety nudge +0.015 if OOF drop <= 0.0005
th_nudge = best_th.copy(); th_nudge[3] = min(3.7, th_nudge[3] + 0.015)
q_nudge = cohen_kappa_score(y_m, preds_to_classes(p_m, th_nudge), weights='quadratic')
chosen_th = th_nudge if (base_q - q_nudge) <= 0.0005 else best_th
print('Safety nudge check: base_q=', f'{base_q:.5f}', 'q_nudge=', f'{q_nudge:.5f}', 'use_nudge=', (chosen_th is th_nudge), flush=True)

# Predict classes on aligned test preds
cls = preds_to_classes(blend_te_aligned, chosen_th).astype(int)
sub = pd.DataFrame({'id_code': pd.read_csv('test.csv')['id_code'].values, 'diagnosis': cls})
sub.to_csv('submission.csv', index=False)
np.save('thresholds_dist_align.npy', chosen_th); np.save('weights_permodel_iso_nnLS_top5.npy', w)
print('submission.csv written (distribution-aligned calibrated NNLS blend).', flush=True)

Running distribution alignment on top-5 per-model iso + NNLS blend...


Using keys: ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']


NNLS weights: [0.3242 0.2596 0.2151 0.1329 0.0682]


Base OOF QWK (no mapping): 0.88941 th: [0.59176 1.60499 2.37434 2.92948]


Safety nudge check: base_q= 0.88941 q_nudge= 0.88941 use_nudge= True


submission.csv written (distribution-aligned calibrated NNLS blend).


In [18]:
# Exact quantile mapping (empirical CDF alignment) on best calibrated NNLS blend; write submission
import os, numpy as np, pandas as pd
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import nnls

print('Running exact quantile mapping on top-5 per-model iso + NNLS blend...', flush=True)

keys = ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']
paths_oof = {
    'b4_512': 'oof_preds_b4.npy',
    'b5_512': 'oof_preds.npy',
    'b5_512_rrcema': 'oof_preds_b5_seed2025_rrc_ema.npy',
    'b4_640': 'oof_preds_b4_640_rrc_ema.npy',
    'serx50_512_rrcema': 'oof_preds_serx50_512_rrc_ema.npy',
}
paths_te_opts = {
    'b4_512': ['test_reg_preds_b4_hflip.npy', 'test_reg_preds_b4.npy'],
    'b5_512': ['test_reg_preds_b5_hflip.npy', 'test_reg_preds.npy'],
    'b5_512_rrcema': ['test_reg_preds_b5_seed2025_rrc_ema.npy'],
    'b4_640': ['test_reg_preds_b4_640_rrc_ema.npy'],
    'serx50_512_rrcema': ['test_reg_preds_serx50_512_rrc_ema.npy'],
}

# Targets
y = None
for tgt in ['oof_targets.npy', 'oof_targets_b4.npy']:
    if os.path.exists(tgt):
        y = np.load(tgt).reshape(-1).astype(float); break
if y is None: raise RuntimeError('OOF targets not found')

# Resolve test paths
paths_te = {}
for k in keys:
    for p in paths_te_opts.get(k, []):
        if os.path.exists(p):
            paths_te[k] = p; break

# Per-model global isotonic calibration
oof_list = []; te_list = []; used_keys = []
for k in keys:
    po = paths_oof.get(k, None); pt = paths_te.get(k, None)
    if po is None or (not os.path.exists(po)) or pt is None or (not os.path.exists(pt)):
        continue
    a_oof = np.load(po).reshape(-1).astype(float)
    a_te = np.load(pt).reshape(-1).astype(float)
    if not np.isfinite(a_oof).any():
        continue
    if not np.isfinite(a_te).all():
        med = float(np.nanmedian(a_te[np.isfinite(a_te)]))
        a_te = np.where(np.isfinite(a_te), a_te, med).astype(float)
    mask_fit = np.isfinite(y) & np.isfinite(a_oof)
    ir = IsotonicRegression(increasing=True, out_of_bounds='clip')
    ir.fit(a_oof[mask_fit], y[mask_fit])
    cal_o = a_oof.copy(); cal_o[mask_fit] = ir.transform(a_oof[mask_fit])
    cal_o = np.where(np.isfinite(cal_o), cal_o, a_oof); cal_o = np.clip(cal_o, 0.0, 4.0)
    cal_t = ir.transform(a_te); cal_t = np.clip(cal_t, 0.0, 4.0)
    oof_list.append(cal_o); te_list.append(cal_t); used_keys.append(k)

if len(oof_list) == 0: raise RuntimeError('No usable arrays for quantile mapping')
print('Using keys:', used_keys, flush=True)

# NNLS blend
X = np.stack(oof_list, axis=1)
mask = np.isfinite(y) & np.isfinite(X).all(axis=1)
A = X[mask]; b = y[mask]
w_raw, _ = nnls(A, b)
w = w_raw / (w_raw.sum() if w_raw.sum() > 0 else len(w_raw))
print('NNLS weights:', np.round(w, 4), flush=True)
blend_oof = (X * w.reshape(1, -1)).sum(axis=1)
te_stack = np.stack(te_list, axis=1)
blend_te = (te_stack * w.reshape(1, -1)).sum(axis=1)

# Load best thresholds
best_th = None
for cand in ['thresholds_permodel_iso_nnls_grid2d.npy', 'thresholds_permodel_iso_nnls.npy']:
    if os.path.exists(cand):
        tt = np.load(cand)
        if getattr(tt, 'shape', None) == (4,):
            best_th = tt.astype(float); break
if best_th is None:
    best_th = np.array([0.59176262, 1.6049918, 2.37433822, 2.92947651], dtype=float)

def preds_to_classes(p, th):
    return np.digitize(p, bins=[th[0], th[1], th[2], th[3]])

# Base OOF QWK for reference
y_m = y[mask]; p_m = blend_oof[mask]
base_q = cohen_kappa_score(y_m, preds_to_classes(p_m, best_th), weights='quadratic')
print('Base OOF QWK (no mapping):', f'{base_q:.5f}', 'th:', np.round(best_th,5), flush=True)

# Exact quantile mapping: map test ranks r in [0,1] to OOF empirical CDF
oof_sorted = np.sort(p_m)
n_te = len(blend_te); n_oof = len(oof_sorted)
r = np.argsort(np.argsort(blend_te)).astype(float)
r = r / max(1.0, n_te - 1)  # ranks in [0,1]
q_grid = np.linspace(0.0, 1.0, num=n_oof)
blend_te_aligned = np.interp(r, q_grid, oof_sorted)
blend_te_aligned = np.clip(blend_te_aligned, 0.0, 4.0)

# Safety nudge +0.015 on th3 if OOF drop <= 0.0005
th_nudge = best_th.copy(); th_nudge[3] = min(3.7, th_nudge[3] + 0.015)
q_nudge = cohen_kappa_score(y_m, preds_to_classes(p_m, th_nudge), weights='quadratic')
chosen_th = th_nudge if (base_q - q_nudge) <= 0.0005 else best_th
print('Safety nudge check: base_q=', f'{base_q:.5f}', 'q_nudge=', f'{q_nudge:.5f}', 'use_nudge=', (chosen_th is th_nudge), flush=True)

# Predict and save
cls = preds_to_classes(blend_te_aligned, chosen_th).astype(int)
sub = pd.DataFrame({'id_code': pd.read_csv('test.csv')['id_code'].values, 'diagnosis': cls})
sub.to_csv('submission.csv', index=False)
np.save('thresholds_dist_align_quantile.npy', chosen_th); np.save('weights_permodel_iso_nnls_top5.npy', w)
print('submission.csv written (exact quantile-aligned calibrated NNLS blend).', flush=True)

Running exact quantile mapping on top-5 per-model iso + NNLS blend...


Using keys: ['b5_512_rrcema', 'serx50_512_rrcema', 'b5_512', 'b4_512', 'b4_640']


NNLS weights: [0.3242 0.2596 0.2151 0.1329 0.0682]


Base OOF QWK (no mapping): 0.88941 th: [0.59176 1.60499 2.37434 2.92948]


Safety nudge check: base_q= 0.88941 q_nudge= 0.88941 use_nudge= True


submission.csv written (exact quantile-aligned calibrated NNLS blend).
