In [136]:
# Force linker to use pip-installed CUDA/NCCL libs first, then restart kernel
import os, sys, subprocess, pathlib

SITE = "/app/.pip-target"

# Ensure pip-target is first on sys.path
if SITE not in sys.path:
    sys.path.insert(0, SITE)

# Build LD_LIBRARY_PATH with pip libs FIRST (order matters)
libs = [
    f"{SITE}/torch/lib",
    f"{SITE}/nvidia/nccl/lib",
    f"{SITE}/nvidia/cublas/lib",
    f"{SITE}/nvidia/cuda_runtime/lib",
    f"{SITE}/nvidia/cuda_nvrtc/lib",
    f"{SITE}/nvidia/cuda_cupti/lib",
    f"{SITE}/nvidia/cudnn/lib",
    f"{SITE}/nvidia/cufft/lib",
    f"{SITE}/nvidia/cusparse/lib",
    f"{SITE}/nvidia/cusolver/lib",
    f"{SITE}/nvidia/curand/lib",
    f"{SITE}/nvidia/nvjitlink/lib",
]
ld = ":".join([p for p in libs if os.path.isdir(p)])
os.environ["LD_LIBRARY_PATH"] = ld + ((":" + os.environ["LD_LIBRARY_PATH"]) if "LD_LIBRARY_PATH" in os.environ else "")

# Preload pip NCCL and NVJitLink to avoid /usr/local/nvidia shadowing
nccl = f"{SITE}/nvidia/nccl/lib/libnccl.so.2"
nvjl = f"{SITE}/nvidia/nvjitlink/lib/libnvJitLink.so.12"
pre = ":".join([p for p in (nccl, nvjl) if os.path.exists(p)])
if pre:
    os.environ["LD_PRELOAD"] = pre + ((":" + os.environ["LD_PRELOAD"]) if "LD_PRELOAD" in os.environ else "")

# Optional: show which libnccl libtorch_cuda will use
libtorch_cuda = pathlib.Path(SITE).joinpath("torch", "lib", "libtorch_cuda.so")
if libtorch_cuda.exists():
    print("ldd libtorch_cuda.so:")
    subprocess.run(["ldd", str(libtorch_cuda)], check=False)

# Restart kernel so dynamic linker picks up new paths
import sys as _sys, os as _os
print("Restarting kernel to apply LD_LIBRARY_PATH/LD_PRELOAD...")
_os.execv(_sys.executable, [_sys.executable] + _sys.argv)

In [132]:
# Monotonic LightGBM stacker with fold-safe CE isotonic calibration
# Loads OOF/train features and test counterparts, builds constraints, trains 5-fold CV, outputs OOF and submission.
import time, gc, math, re
import numpy as np
import pandas as pd
from collections import defaultdict
from pathlib import Path
from sklearn.isotonic import IsotonicRegression
from scipy.stats import pearsonr
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge

t0 = time.time()
pd.set_option('display.max_columns', 200)

# ---------- Load base data and folds ----------
train = pd.read_csv('train.csv')  # id, anchor, target, context, score
test = pd.read_csv('test.csv')
folds = pd.read_csv('folds_by_id.csv')  # id, fold
train = train.merge(folds, on='id', how='left', validate='one_to_one')
assert (train['fold']>=0).all(), 'Fold merge by id failed'
NUM_FOLDS = int(train['fold'].max()) + 1
print('Folds:', NUM_FOLDS, flush=True)

# ---------- Utilities to load features ----------
def load_oof(oof_path, id_col='id'):
    df = pd.read_csv(oof_path)
    assert id_col in df.columns, f'Missing id in {oof_path}'
    return df

def load_test(test_path, id_col='id'):
    df = pd.read_csv(test_path)
    assert id_col in df.columns, f'Missing id in {test_path}'
    return df

def add_feats(oof_path, test_path, train_df, test_df, prefix=None):
    oof_df = load_oof(oof_path)
    te_df  = load_test(test_path)
    oof_cols = [c for c in oof_df.columns if c != 'id']
    te_cols  = [c for c in te_df.columns if c != 'id']
    common = [c for c in oof_cols if c in te_cols]
    if len(common) == 0:
        print(f'WARN: no common feature columns between {oof_path} and {test_path}; skipping', flush=True)
        return train_df, test_df, []
    oof_df = oof_df[['id'] + common].copy()
    te_df  = te_df[['id'] + common].copy()
    if prefix:
        rename_map = {c: f'{prefix}_{c}' for c in common}
        oof_df = oof_df.rename(columns=rename_map)
        te_df  = te_df.rename(columns=rename_map)
        feat_cols = [f'{prefix}_{c}' for c in common]
    else:
        feat_cols = common
    train_df = train_df.merge(oof_df, on='id', how='left')
    test_df  = test_df.merge(te_df, on='id', how='left')
    return train_df, test_df, feat_cols

def add_single_column_from_submission(oof_path, submission_path, train_df, test_df, new_name):
    oof_df = pd.read_csv(oof_path)
    oof_cols = [c for c in oof_df.columns if c != 'id']
    assert len(oof_cols) >= 1, f'No feature cols in {oof_path}'
    col = oof_cols[-1]
    oof_df = oof_df[['id', col]].rename(columns={col: new_name})
    sub_df = pd.read_csv(submission_path)
    sub_cols = [c for c in sub_df.columns if c != 'id']
    assert len(sub_cols) >= 1, f'No feature cols in {submission_path}'
    scol = sub_cols[-1]
    sub_df = sub_df[['id', scol]].rename(columns={scol: new_name})
    train_df = train_df.merge(oof_df, on='id', how='left')
    test_df  = test_df.merge(sub_df, on='id', how='left')
    return train_df, test_df, [new_name]

Folds: 5


In [133]:
import time, gc, re
import numpy as np, pandas as pd
from pathlib import Path
from scipy.stats import pearsonr
import lightgbm as lgb

# Start from base frames
trF = train[['id','fold','score']].copy()
teF = test[['id']].copy()
feature_cols = []

def safe_add(oof_p, te_p, prefix=None):
    global trF, teF, feature_cols
    if Path(oof_p).exists() and Path(te_p).exists():
        trF, teF, fc = add_feats(oof_p, te_p, trF, teF, prefix)
        feature_cols.extend(fc)

def safe_add_sub(oof_p, sub_p, new_name):
    global trF, teF, feature_cols
    if Path(oof_p).exists() and Path(sub_p).exists():
        trF, teF, fc = add_single_column_from_submission(oof_p, sub_p, trF, teF, new_name)
        feature_cols.extend(fc)

# Embedding singles (if you made OOF+submission)
safe_add_sub('oof_mpnet_st.csv','submission_mpnet_st.csv','mpnet_st_raw')
safe_add_sub('oof_e5_asym.csv','submission_e5_asym.csv','e5_asym_raw')
safe_add_sub('oof_bge.csv','submission_bge.csv','bge_raw')

# Legacy lexical
safe_add('oof_soft_tfidf.csv','soft_tfidf_test.csv','soft_tfidf')
safe_add('oof_bm25_var.csv','bm25_var_test.csv','bm25')
safe_add('oof_idf_overlap.csv','idf_overlap_test.csv','idf')
safe_add('oof_char_edit.csv','char_edit_test.csv','char')
safe_add('oof_lcs_char_ngrams.csv','lcs_char_ngrams_test.csv','lcs')
safe_add('oof_fuzz.csv','fuzz_test.csv','fuzz')
safe_add('oof_soft_align.csv','soft_align_test.csv','softalign')
safe_add('oof_numeric_units.csv','numeric_units_test.csv','numunit')
safe_add('oof_acronym.csv','acronym_test.csv','acronym')
# New: char3 TF-IDF cosine
safe_add('oof_char3_tfidf_cos.csv','char3_tfidf_cos_test.csv','char3')
# New: char4/5 TF-IDF cosine
safe_add('oof_char45_tfidf_cos.csv','char45_tfidf_cos_test.csv','char45')

# Normalized lexical (keep alongside legacy; prune later)
safe_add('oof_soft_tfidf_norm.csv','soft_tfidf_norm_test.csv','nsoft_tfidf')
safe_add('oof_bm25_var_norm.csv','bm25_var_norm_test.csv','nbm25')
safe_add('oof_idf_overlap_norm.csv','idf_overlap_norm_test.csv','nidf')
safe_add('oof_norm_text.csv','norm_text_test.csv','nlex')

# Monge–Elkan (NEW)
safe_add('oof_monge.csv','monge_test.csv','monge')

# Cross-encoders (ingest OOF + submissions as single columns if present)
safe_add_sub('oof_ce_minilm.csv','submission_ce_minilm.csv','ce_minilm_raw')
safe_add_sub('oof_ce_large.csv','submission_ce_large.csv','ce_large_raw')
safe_add_sub('oof_ce_stsb.csv','submission_ce_stsb.csv','ce_stsb_raw')
safe_add_sub('oof_ce_bge_rerank.csv','submission_ce_bge_rerank.csv','ce_bge_rerank_raw')

# CE transformed feature block (raw, iso, z, rank) from MiniLM CE
safe_add('oof_ce_plain_feats.csv','ce_plain_feats_test.csv','ceplain')

# Embedding transforms (fold-safe iso/z/rank for mpnet/e5/bge)
safe_add('oof_embed_transforms.csv','embed_transforms_test.csv','emb')

# PatentSBERTa transforms (raw/iso/z/rank)
safe_add('oof_patentsberta.csv','patentsberta_test.csv','patberta')

# anferico/bert-for-patents transforms (raw/iso/z/rank)
safe_add('oof_bertpat.csv','bertpat_test.csv','bertpat')

# Length + stopword-stripped overlaps (NEW)
safe_add('oof_len_stop.csv','len_stop_test.csv','lenstop')

# KNN meta from embeddings (NEW)
safe_add('oof_knn_meta.csv','knn_meta_test.csv','knn')

# Target encoding (NEW)
if Path('oof_te.csv').exists() and Path('te_test.csv').exists():
    trF, teF, fc = add_feats('oof_te.csv','te_test.csv', trF, teF, prefix='te')
    feature_cols.extend(fc)

print(f'Loaded features: {len(feature_cols)}', flush=True)

# Build matrices
y = trF['score'].values.astype(np.float32)
X = trF[feature_cols].astype(np.float32)
X_te = teF[feature_cols].astype(np.float32)

# Fill and simple FE
X = X.fillna(0.0)
X_te = X_te.fillna(0.0)
for c in ['soft_tfidf_sim','bm25_okapi_ab','bm25_okapi_ba','ce_minilm_raw','ce_large_raw']:
    if c in X.columns:
        X[f'{c}_sq'] = (X[c]**2).astype(np.float32)
        X_te[f'{c}_sq'] = (X_te[c]**2).astype(np.float32)
# Add squares for CE transformed features
for c in list(X.columns):
    if c.startswith('ceplain_'):
        sq = f'{c}_sq'
        if sq not in X.columns:
            X[sq] = (X[c]**2).astype(np.float32)
            X_te[sq] = (X_te[c]**2).astype(np.float32)
# Add squares for char3/char45 tfidf cosine to increase capacity on these sims
for c in list(X.columns):
    if c.startswith('char3_') or c.startswith('char45_'):
        sq = f'{c}_sq'
        if sq not in X.columns:
            X[sq] = (X[c]**2).astype(np.float32)
            X_te[sq] = (X_te[c]**2).astype(np.float32)

# Cheap CE*lexical interactions (limited) to squeeze extra signal
ce_core = [c for c in X.columns if c in ('ceplain_ce_plain_iso','ceplain_ce_plain_raw')]
sim_targets = []
for c in X.columns:
    name = c.lower()
    if ('char3_tfidf_cos' in name) or ('char4_tfidf_cos' in name) or ('char5_tfidf_cos' in name):
        sim_targets.append(c)
    if ('bm25' in name) and (name.endswith('_okapi_ab') or name.endswith('_okapi_ba')):
        sim_targets.append(c)
# Optional small expansion: keep monge/soft-align OFF
sim_targets = list(dict.fromkeys(sim_targets))
for ce_c in ce_core:
    for s_c in sim_targets:
        ix = f'{ce_c}__x__{s_c}'
        if ix not in X.columns:
            X[ix] = (X[ce_c] * X[s_c]).astype(np.float32)
            X_te[ix] = (X_te[ce_c] * X_te[s_c]).astype(np.float32)

# Interactions: patent encoders (raw/iso) × lexical sims (char3/char45, bm25 okapi ab/ba)
enc_core = []
for c in X.columns:
    if (c.startswith('patberta_') or c.startswith('bertpat_')) and (c.endswith('_raw') or c.endswith('_iso')):
        enc_core.append(c)
enc_core = list(dict.fromkeys(enc_core))
for e_c in enc_core:
    for s_c in sim_targets:
        ix = f'{e_c}__x__{s_c}'
        if ix not in X.columns:
            X[ix] = (X[e_c] * X[s_c]).astype(np.float32)
            X_te[ix] = (X_te[e_c] * X_te[s_c]).astype(np.float32)

# Drop near-constant
std = X.std()
keep = std[std > 1e-4].index.tolist()
X = X[keep]; X_te = X_te[keep]

# Redundancy pruning (|corr|>0.97), prefer CE highest, then TE, then normalized/embeds
from sklearn.preprocessing import StandardScaler
pref = {c:0 for c in X.columns}
for c in X.columns:
    # Highest priority: any CE family (explicit ceplain_* or generic ce_ prefix)
    if c.startswith(('ceplain', 'ce_')):
        pref[c] = 4
    # Next: target encodings
    elif c.startswith('te_'):
        pref[c] = 3
    # Then normalized lexical, embedding transforms, and KNN metas
    elif c.startswith(('nsoft','nbm25','nidf','nlex','emb_','patberta_','bertpat_','knn_')) or ('norm' in c):
        pref[c] = 2
    else:
        pref[c] = pref.get(c, 0)

def protect_key(col: str):
    if col.startswith('ceplain'): return 'ceplain'
    if col.startswith('ce_'): return 'ce'
    if col.startswith('patberta_'): return 'patberta'
    if col.startswith('bertpat_'): return 'bertpat'
    if col.startswith('char3_'): return 'char3'
    return None

corr = X.corr().abs()
upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool))
drop = set()
thr = 0.97
for c in X.columns:
    if c in drop: continue
    partners = upper.index[upper[c] > thr].tolist()
    for p in partners:
        if p in drop: continue
        pk_c = protect_key(c); pk_p = protect_key(p)
        # Group-wise keep: don't drop within the same protected family
        if pk_c is not None and pk_c == pk_p:
            continue
        keep_c = c if pref[c] >= pref[p] else p
        drop_c = p if keep_c == c else c
        drop.add(drop_c)
cols = [c for c in X.columns if c not in drop]
trX_f = X[cols].copy(); teX_f = X_te[cols].copy()
n_ce_cols = sum([1 for c in trX_f.columns if c.startswith(('ceplain','ce_'))])
print(f'Final features: {trX_f.shape[1]} (CE cols kept: {n_ce_cols})', flush=True)

# Monotone constraints OFF for this run (more flexible LGBM)
USE_MONO = False
mono = []
inc_pat = re.compile(r'(te_|bm25|tfidf|overlap|fuzz|align|sim|ce_|monge|ceplain|emb_|patberta|bertpat|knn_)', re.I)
for c in trX_f.columns:
    mono.append(1 if inc_pat.search(c) else 0)

# Train LGBM 5-fold with seed bagging (stabilize oof/test)
SEEDS = [42, 2025, 7]
folds_arr = train['fold'].values.astype(int)
oof_pred_seeds = []; test_pred_seeds = []
for sd in SEEDS:
    params = {
        'objective':'regression','metric':'rmse','learning_rate':0.035,
        'num_leaves':127,'min_data_in_leaf':32,'feature_fraction':0.8,
        'bagging_fraction':0.8,'bagging_freq':1,'lambda_l1':0.05,'lambda_l2':1.0,
        'seed':sd,'verbose':-1,'n_jobs':-1,
    }
    oof_pred = np.zeros(len(train), dtype=np.float32)
    test_pred = np.zeros(len(test), dtype=np.float32)
    for f in range(int(folds_arr.max())+1):
        tr_idx = np.where(folds_arr!=f)[0]; va_idx = np.where(folds_arr==f)[0]
        dtr = lgb.Dataset(trX_f.iloc[tr_idx].values, label=y[tr_idx], free_raw_data=False)
        dva = lgb.Dataset(trX_f.iloc[va_idx].values, label=y[va_idx], free_raw_data=False)
        p = params.copy()
        if USE_MONO:
            p['monotone_constraints'] = mono
        booster = lgb.train(p, dtr, num_boost_round=20000, valid_sets=[dva],
                            callbacks=[lgb.early_stopping(300, verbose=False)])
        oof_pred[va_idx] = booster.predict(trX_f.iloc[va_idx].values, num_iteration=booster.best_iteration).astype(np.float32)
        test_pred += booster.predict(teX_f.values, num_iteration=booster.best_iteration).astype(np.float32)
        print(f'[LGBM sd={sd} Fold {f}] r={pearsonr(oof_pred[va_idx], y[va_idx])[0]:.6f}', flush=True)
    test_pred /= (int(folds_arr.max())+1)
    print(f'[LGBM seed {sd}] OOF r={pearsonr(oof_pred, y)[0]:.6f}', flush=True)
    oof_pred_seeds.append(oof_pred); test_pred_seeds.append(test_pred)

# Average across seeds
oof_pred = np.mean(oof_pred_seeds, axis=0).astype(np.float32)
test_pred = np.mean(test_pred_seeds, axis=0).astype(np.float32)
print('LGBM (seed-bagged) OOF r=', pearsonr(oof_pred, y)[0])

# Save + keep vars for downstream cells
pd.DataFrame({'id': train['id'], 'oof': oof_pred}).to_csv('oof_stack_lgbm.csv', index=False)
pd.DataFrame({'id': test['id'], 'score': np.clip(test_pred, 0.0, 1.0)}).to_csv('submission_stack_lgbm.csv', index=False)

Loaded features: 216


Final features: 122 (CE cols kept: 32)


[LGBM sd=42 Fold 0] r=0.782490


[LGBM sd=42 Fold 1] r=0.775062


[LGBM sd=42 Fold 2] r=0.767728


[LGBM sd=42 Fold 3] r=0.777447


[LGBM sd=42 Fold 4] r=0.760847


[LGBM seed 42] OOF r=0.772405


[LGBM sd=2025 Fold 0] r=0.783774


[LGBM sd=2025 Fold 1] r=0.774771


[LGBM sd=2025 Fold 2] r=0.768888


[LGBM sd=2025 Fold 3] r=0.776623


[LGBM sd=2025 Fold 4] r=0.761805


[LGBM seed 2025] OOF r=0.772797


[LGBM sd=7 Fold 0] r=0.783449


[LGBM sd=7 Fold 1] r=0.773661


[LGBM sd=7 Fold 2] r=0.768969


[LGBM sd=7 Fold 3] r=0.777169


[LGBM sd=7 Fold 4] r=0.762570


[LGBM seed 7] OOF r=0.772827


LGBM (seed-bagged) OOF r= 0.7743376726078489


In [126]:
# CatBoost and XGBoost secondary models + blends (with seed bagging)
import sys, subprocess, numpy as np, pandas as pd, time, gc
from scipy.stats import pearsonr

# Ensure deps
subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', 'catboost==1.2.7', 'xgboost==2.1.1'], check=True)
from catboost import CatBoostRegressor, Pool
import xgboost as xgb

assert 'trX_f' in globals() and 'teX_f' in globals() and 'y' in globals(), 'Design matrices not found; run cell 0 first'

X = trX_f.values.astype(np.float32)
X_te = teX_f.values.astype(np.float32)
fold_arr = train['fold'].values.astype(int)
n_te = len(test)

# ---------- CatBoost with seed bagging ----------
seeds = [42, 2025, 7, 101, 303]
cat_oof_seeds, cat_te_seeds = [], []
for sd in seeds:
    cat_oof = np.zeros(len(train), dtype=np.float32)
    cat_te_accum = np.zeros(n_te, dtype=np.float32)
    for f in range(NUM_FOLDS):
        f0 = time.time()
        tr_idx = np.where(fold_arr != f)[0]
        va_idx = np.where(fold_arr == f)[0]
        tr_pool = Pool(X[tr_idx], label=y[tr_idx])
        va_pool = Pool(X[va_idx], label=y[va_idx])
        te_pool = Pool(X_te)
        params = dict(
            loss_function='RMSE',
            depth=7,
            learning_rate=0.05,
            l2_leaf_reg=12.0,
            subsample=0.8,
            rsm=0.8,
            random_seed=sd,
            iterations=20000,
            od_type='Iter',
            od_wait=300,
            verbose=False,
            allow_writing_files=False,
            thread_count=-1,
        )
        model = CatBoostRegressor(**params)
        model.fit(tr_pool, eval_set=va_pool, use_best_model=True, verbose=False)
        cat_oof[va_idx] = model.predict(va_pool).astype(np.float32)
        cat_te_accum += model.predict(te_pool).astype(np.float32)
        r = pearsonr(cat_oof[va_idx], y[va_idx])[0]
        print(f'[CatBoost sd={sd} Fold {f}] r={r:.6f}; elapsed {time.time()-f0:.1f}s', flush=True)
        del model, tr_pool, va_pool; gc.collect()
    cat_te = cat_te_accum / NUM_FOLDS
    print(f'[CatBoost seed {sd}] r={pearsonr(cat_oof, y)[0]:.6f}', flush=True)
    cat_oof_seeds.append(cat_oof); cat_te_seeds.append(cat_te)

cat_oof = np.mean(cat_oof_seeds, axis=0).astype(np.float32)
cat_te  = np.mean(cat_te_seeds, axis=0).astype(np.float32)
pd.DataFrame({'id': train['id'], 'oof_cat': cat_oof}).to_csv('oof_stack_cat.csv', index=False)
pd.DataFrame({'id': test['id'], 'score': np.clip(cat_te, 0.0, 1.0)}).to_csv('submission_stack_cat.csv', index=False)
print('CatBoost OOF r=', round(float(pearsonr(cat_oof, y)[0]), 6), flush=True)

# ---------- XGBoost with seed bagging ----------
xgb_oof_seeds, xgb_te_seeds = [], []
for sd in seeds:
    xgb_oof = np.zeros(len(train), dtype=np.float32)
    xgb_te_accum = np.zeros(n_te, dtype=np.float32)
    for f in range(NUM_FOLDS):
        f0 = time.time()
        tr_idx = np.where(fold_arr != f)[0]
        va_idx = np.where(fold_arr == f)[0]
        dtr = xgb.DMatrix(X[tr_idx], label=y[tr_idx])
        dva = xgb.DMatrix(X[va_idx], label=y[va_idx])
        dte = xgb.DMatrix(X_te)
        params = {
            'objective': 'reg:squarederror',
            'eval_metric': 'rmse',
            'eta': 0.05,
            'max_depth': 7,
            'subsample': 0.8,
            'colsample_bytree': 0.8,
            'lambda': 2.5,
            'tree_method': 'hist',
            'seed': sd,
            'nthread': -1,
        }
        evallist = [(dva, 'val')]
        booster = xgb.train(params, dtr, num_boost_round=6000, evals=evallist, early_stopping_rounds=300, verbose_eval=100)
        xgb_oof[va_idx] = booster.predict(dva, iteration_range=(0, booster.best_iteration+1)).astype(np.float32)
        xgb_te_accum += booster.predict(dte, iteration_range=(0, booster.best_iteration+1)).astype(np.float32)
        r = pearsonr(xgb_oof[va_idx], y[va_idx])[0]
        print(f'[XGB sd={sd} Fold {f}] r={r:.6f}; elapsed {time.time()-f0:.1f}s', flush=True)
        del booster, dtr, dva; gc.collect()
    xgb_te = xgb_te_accum / NUM_FOLDS
    print(f'[XGB seed {sd}] r={pearsonr(xgb_oof, y)[0]:.6f}', flush=True)
    xgb_oof_seeds.append(xgb_oof); xgb_te_seeds.append(xgb_te)

xgb_oof = np.mean(xgb_oof_seeds, axis=0).astype(np.float32)
xgb_te  = np.mean(xgb_te_seeds, axis=0).astype(np.float32)
pd.DataFrame({'id': train['id'], 'oof_xgb': xgb_oof}).to_csv('oof_stack_xgb.csv', index=False)
pd.DataFrame({'id': test['id'], 'score': np.clip(xgb_te, 0.0, 1.0)}).to_csv('submission_stack_xgb.csv', index=False)
print('XGBoost OOF r=', round(float(pearsonr(xgb_oof, y)[0]), 6), flush=True)

# ---------- Simple blends ----------
artifacts = {}
if 'oof_pred' in globals():
    artifacts['lgbm'] = (oof_pred, test_pred)
artifacts['cat'] = (cat_oof, cat_te)
artifacts['xgb'] = (xgb_oof, xgb_te)
if 'ridge_oof' in globals() and 'ridge_te' in globals():
    artifacts['ridge'] = (ridge_oof, ridge_te)

def try_blend(keys, weights_grid):
    best = (-1.0, None, None)
    for ws in weights_grid:
        assert abs(sum(ws)-1.0) < 1e-6
        oof_b = np.zeros_like(y, dtype=np.float32)
        te_b = np.zeros(n_te, dtype=np.float32)
        for k, w in zip(keys, ws):
            oof_b += w * artifacts[k][0]
            te_b  += w * artifacts[k][1]
        r = pearsonr(oof_b, y)[0]
        if r > best[0]:
            best = (r, ws, te_b.copy())
    return best

# 2-way blends
pairs = [('lgbm','cat'), ('lgbm','xgb'), ('cat','xgb')] if 'lgbm' in artifacts else [('cat','xgb')]
w2 = [(0.5,0.5), (0.6,0.4), (0.7,0.3), (0.4,0.6)]
for a,b in pairs:
    r, ws, te_b = try_blend([a,b], w2)
    print(f'Best 2-way {a}+{b}: r={r:.6f} weights={ws}', flush=True)

# 3-way blend lgbm+cat+xgb
if all(k in artifacts for k in ('lgbm','cat','xgb')):
    w3 = [
        (0.4,0.3,0.3), (0.5,0.3,0.2), (0.5,0.25,0.25),
        (0.33,0.33,0.34), (0.6,0.2,0.2)
    ]
    r3, ws3, te3 = try_blend(['lgbm','cat','xgb'], w3)
    print(f'Best 3-way lgbm+cat+xgb: r={r3:.6f} weights={ws3}', flush=True)

print('Done secondary models.', flush=True)

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torch 2.4.1+cu121 requires nvidia-cublas-cu12==12.1.3.1; platform_system == "Linux" and platform_machine == "x86_64", but you have nvidia-cublas-cu12 12.8.4.1 which is incompatible.
torch 2.4.1+cu121 requires nvidia-cuda-cupti-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64", but you have nvidia-cuda-cupti-cu12 12.8.90 which is incompatible.
torch 2.4.1+cu121 requires nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64", but you have nvidia-cuda-nvrtc-cu12 12.8.93 which is incompatible.
torch 2.4.1+cu121 requires nvidia-cuda-runtime-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64", but you have nvidia-cuda-runtime-cu12 12.8.90 which is incompatible.
torch 2.4.1+cu121 requires nvidia-cudnn-cu12==9.1.0.70; platform_system 

AssertionError: Design matrices not found; run cell 0 first

In [85]:
# ElasticNet on dense feature matrix as an extra calibrated base
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from scipy.stats import pearsonr
import numpy as np

assert 'trX_f' in globals() and 'teX_f' in globals() and 'y' in globals() and 'NUM_FOLDS' in globals(), 'Run cells 0-1 first'

folds_arr = train['fold'].values.astype(int)
X_all = trX_f.values.astype(np.float32); X_te_all = teX_f.values.astype(np.float32)
alphas = [0.003, 0.01]; l1s = [0.3, 0.5]
best = (-1.0, None, None)
for a in alphas:
    for l1 in l1s:
        oof = np.zeros(len(train), dtype=np.float32); te_acc = np.zeros(len(test), dtype=np.float32)
        for f in range(NUM_FOLDS):
            tr = folds_arr!=f; va = folds_arr==f
            ss = StandardScaler(with_mean=True, with_std=True)
            Xtr = ss.fit_transform(X_all[tr]); Xva = ss.transform(X_all[va]); Xte = ss.transform(X_te_all)
            en = ElasticNet(alpha=a, l1_ratio=l1, random_state=42, max_iter=4000)
            en.fit(Xtr, y[tr])
            oof[va] = en.predict(Xva).astype(np.float32)
            te_acc += en.predict(Xte).astype(np.float32)
        r = pearsonr(oof, y)[0]
        print(f'ElasticNet dense a={a} l1={l1} OOF r={r:.6f}', flush=True)
        if r > best[0]:
            best = (r, oof.copy(), (te_acc/NUM_FOLDS).astype(np.float32))
print(f'ElasticNet dense BEST OOF r={best[0]:.6f}', flush=True)
elastic_oof, elastic_te = best[1].astype(np.float32), best[2].astype(np.float32)

ElasticNet dense a=0.003 l1=0.3 OOF r=0.738455


ElasticNet dense a=0.003 l1=0.5 OOF r=0.736260


ElasticNet dense a=0.01 l1=0.3 OOF r=0.731633


ElasticNet dense a=0.01 l1=0.5 OOF r=0.727119


ElasticNet dense BEST OOF r=0.738455


In [135]:
# NNLS meta-blend over model OOFs + rank-average baseline + fold-safe isotonic calibration
import numpy as np, pandas as pd, time
from scipy.optimize import nnls
from scipy.stats import pearsonr
from sklearn.isotonic import IsotonicRegression
from pathlib import Path

assert 'y' in globals(), 'Run cell 0 first to define y'

# Load train/test ids early for file-based fallbacks
train_df_ids = pd.read_csv('train.csv')[['id']]
test_df_ids  = pd.read_csv('test.csv')[['id']]

# Helpers to load bases from disk if globals are missing
def try_load_base_from_files(oof_path, sub_path, oof_col, sub_col='score'):
    if Path(oof_path).exists() and Path(sub_path).exists():
        try:
            oof_df = pd.read_csv(oof_path); sub_df = pd.read_csv(sub_path)
            oof_arr = train_df_ids.merge(oof_df, on='id', how='left')[oof_col].astype(np.float32).values
            te_arr  = test_df_ids.merge(sub_df, on='id', how='left')[sub_col].astype(np.float32).values
            return oof_arr, te_arr
        except Exception as e:
            print(f'WARN: failed loading {oof_path}/{sub_path}:', e, flush=True)
    return None, None

# Attempt to populate missing globals from saved artifacts
if not ('oof_pred' in globals() and 'test_pred' in globals()):
    lgbm_oof_f, lgbm_te_f = try_load_base_from_files('oof_stack_lgbm.csv','submission_stack_lgbm.csv','oof','score')
    if lgbm_oof_f is not None:
        oof_pred = lgbm_oof_f; test_pred = lgbm_te_f
if not ('cat_oof' in globals() and 'cat_te' in globals()):
    cat_oof_f, cat_te_f = try_load_base_from_files('oof_stack_cat.csv','submission_stack_cat.csv','oof_cat','score')
    if cat_oof_f is not None:
        cat_oof = cat_oof_f; cat_te = cat_te_f
if not ('xgb_oof' in globals() and 'xgb_te' in globals()):
    xgb_oof_f, xgb_te_f = try_load_base_from_files('oof_stack_xgb.csv','submission_stack_xgb.csv','oof_xgb','score')
    if xgb_oof_f is not None:
        xgb_oof = xgb_oof_f; xgb_te = xgb_te_f

# Collect available base models
bases = []
if 'oof_pred' in globals() and 'test_pred' in globals():
    bases.append(('lgbm', oof_pred.astype(np.float32), test_pred.astype(np.float32)))
if 'cat_oof' in globals() and 'cat_te' in globals():
    bases.append(('cat', cat_oof.astype(np.float32), cat_te.astype(np.float32)))
if 'xgb_oof' in globals() and 'xgb_te' in globals():
    bases.append(('xgb', xgb_oof.astype(np.float32), xgb_te.astype(np.float32)))
if 'ridge_oof' in globals() and 'ridge_te' in globals():
    bases.append(('ridge', ridge_oof.astype(np.float32), ridge_te.astype(np.float32)))
if 'ce_meta_oof' in globals() and 'ce_meta_te' in globals():
    bases.append(('ce_meta', ce_meta_oof.astype(np.float32), ce_meta_te.astype(np.float32)))
# Optional dense ElasticNet base (if computed in prior cell)
if 'elastic_oof' in globals() and 'elastic_te' in globals():
    bases.append(('elastic', elastic_oof.astype(np.float32), elastic_te.astype(np.float32)))

# Priority 1: Add TF-IDF Ridge as an external calibrated NNLS base if files exist
def load_single_col(path):
    df = pd.read_csv(path)
    cols = [c for c in df.columns if c != 'id']
    assert len(cols) >= 1, f'No non-id column in {path}'
    return df[cols[-1]].values.astype(np.float32)

tfidf_oof = tfidf_te = None
if Path('oof_tfidf_ridge.csv').exists() and Path('submission_tfidf.csv').exists():
    try:
        tfidf_oof = load_single_col('oof_tfidf_ridge.csv')
        tfidf_te  = load_single_col('submission_tfidf.csv')
        bases.append(('tfidf_ridge', tfidf_oof, tfidf_te))
        print('Added TF-IDF Ridge base to NNLS.')
    except Exception as e:
        print('Failed to load TF-IDF Ridge base:', e)

# Keep only strongest bases for NNLS (include tfidf_ridge first, then exclude per expert tweak)
keep = {'lgbm','cat','xgb','tfidf_ridge'}
bases = [b for b in bases if b[0] in keep]
print('Bases kept for NNLS (pre-exclude):', [b[0] for b in bases], flush=True)

# Exclude tfidf_ridge for lean 3-base + ranks
bases = [b for b in bases if b[0] != 'tfidf_ridge']
print('Bases kept for NNLS (final):', [b[0] for b in bases], flush=True)

# Local, fold-safe fold array independent of notebook state
folds_df = pd.read_csv('folds_by_id.csv')  # id, fold
merged_folds = train_df_ids.merge(folds_df, on='id', how='left', validate='one_to_one')
fold_arr = merged_folds['fold'].values.astype(int)
NUM_FOLDS = int(merged_folds['fold'].max()) + 1

def fold_iso_with_te(oof, te, y, folds):
    o2 = np.zeros_like(oof, np.float32); te_list = []
    F = int(folds.max()) + 1
    for f in range(F):
        tr = folds != f; va = folds == f
        iso = IsotonicRegression(increasing=True, out_of_bounds='clip')
        iso.fit(oof[tr], y[tr])
        o2[va] = iso.transform(oof[va]).astype(np.float32)
        te_list.append(iso.transform(te).astype(np.float32))
    te_avg = (np.mean(np.stack(te_list, axis=0), axis=0)).astype(np.float32)
    return o2, te_avg, te_list  # calibrated OOF, calibrated test (avg), per-fold calibrated test list

def fold_rank_from_calibrated(cal_oof, te_folds, folds):
    F = int(folds.max()) + 1
    r_oof = np.zeros_like(cal_oof, np.float32)
    te_acc = np.zeros_like(te_folds[0], np.float64)
    for f in range(F):
        tr = folds != f; va = folds == f
        ref = np.sort(cal_oof[tr].astype(np.float32))
        if ref.size == 0:
            continue
        j_va = np.searchsorted(ref, cal_oof[va], side='right')
        r_oof[va] = (j_va / max(ref.size - 1, 1)).astype(np.float32)
        j_te = np.searchsorted(ref, te_folds[f], side='right')
        te_acc += (j_te / max(ref.size - 1, 1)).astype(np.float64)
    r_te = (te_acc / F).astype(np.float32)
    return r_oof, r_te

# New: fold-safe z-score copies from calibrated
def fold_z_from_calibrated(cal_oof, te_folds, folds):
    F = int(folds.max()) + 1
    z_oof = np.zeros_like(cal_oof, np.float32)
    te_acc = np.zeros_like(te_folds[0], np.float64)
    for f in range(F):
        tr = folds != f; va = folds == f
        m = float(cal_oof[tr].mean()); s = float(cal_oof[tr].std() + 1e-8)
        z_oof[va] = ((cal_oof[va] - m) / s).astype(np.float32)
        te_acc += ((te_folds[f] - m) / s).astype(np.float64)
    z_te = (te_acc / F).astype(np.float32)
    return z_oof, z_te

# Enable rank copies as advised
USE_RANK_COPIES = True

# Build design matrix with raw, calibrated, rank, and z-score copies
names_all = []
blocks_tr = []
blocks_te = []

for name, tr, te in bases:
    # raw copies
    names_all.append(name + '_raw'); blocks_tr.append(tr.reshape(-1,1)); blocks_te.append(te.reshape(-1,1))
    # calibrated copies
    cal_tr, cal_te, cal_te_folds = fold_iso_with_te(tr, te, y.astype(np.float32), fold_arr)
    names_all.append(name); blocks_tr.append(cal_tr.reshape(-1,1)); blocks_te.append(cal_te.reshape(-1,1))
    # rank copies from calibrated
    if USE_RANK_COPIES:
        r_tr, r_te = fold_rank_from_calibrated(cal_tr, cal_te_folds, fold_arr)
        names_all.append(name + '_rank'); blocks_tr.append(r_tr.reshape(-1,1)); blocks_te.append(r_te.reshape(-1,1))
    # z-score copies from calibrated
    z_tr, z_te = fold_z_from_calibrated(cal_tr, cal_te_folds, fold_arr)
    names_all.append(name + '_z'); blocks_tr.append(z_tr.reshape(-1,1)); blocks_te.append(z_te.reshape(-1,1))

P_tr = np.hstack(blocks_tr).astype(np.float64) if blocks_tr else None
P_te = np.hstack(blocks_te).astype(np.float64) if blocks_te else None
y_vec = y.astype(np.float64)

print('NNLS over bases:', names_all, flush=True)

# NNLS weights with tiny L2 (non-negative ridge via augmentation)
def fit_nnls_l2(P_tr, y_vec, alpha: float):
    K = P_tr.shape[1]
    if alpha <= 0:
        w, _ = nnls(P_tr, y_vec)
    else:
        A = np.vstack([P_tr, np.sqrt(alpha) * np.eye(K, dtype=np.float64)])
        b = np.concatenate([y_vec, np.zeros(K, dtype=np.float64)])
        w, _ = nnls(A, b)
    s = w.sum() if w.sum() > 0 else 1.0
    return w / s

alphas = [0.0, 1e-5, 5e-5, 1e-4, 5e-4]
best = (-1.0, None)
for a in alphas:
    w_try = fit_nnls_l2(P_tr, y_vec, a)
    r_try = pearsonr((P_tr @ w_try).astype(np.float32), y)[0]
    print(f'NNLS L2 alpha={a:g} OOF r={r_try:.6f}')
    if r_try > best[0]:
        best = (r_try, w_try)
best_r_nnls, w_norm = best
print('Chosen NNLS (possibly L2) OOF r=', round(float(best_r_nnls),6))
blend_oof = (P_tr @ w_norm).astype(np.float32)
blend_te  = (P_te @ w_norm).astype(np.float32)

# Optional tiny manual 3-way sweep on calibrated lgbm/cat/xgb (no ranks), per expert advice
sweep_best_r = -1.0; sweep_best_te = None; sweep_best_oof = None
name_to_idx = {n:i for i,n in enumerate(names_all)}
have_three = all(k in name_to_idx for k in ('lgbm','cat','xgb'))
if have_three:
    i_l, i_c, i_x = name_to_idx['lgbm'], name_to_idx['cat'], name_to_idx['xgb']
    col_l = P_tr[:, i_l]; col_c = P_tr[:, i_c]; col_x = P_tr[:, i_x]
    te_l = P_te[:, i_l]; te_c = P_te[:, i_c]; te_x = P_te[:, i_x]
    grid_l = [0.18, 0.19, 0.20, 0.21, 0.22]
    grid_c = [0.57, 0.58, 0.59, 0.60, 0.61]
    for wl in grid_l:
        for wc in grid_c:
            wx = 1.0 - wl - wc
            if wx < 0.17 or wx > 0.23:
                continue
            oof_try = (wl*col_l + wc*col_c + wx*col_x).astype(np.float32)
            r_try = pearsonr(oof_try, y)[0]
            if r_try > sweep_best_r:
                sweep_best_r = r_try
                sweep_best_oof = oof_try
                sweep_best_te = (wl*te_l + wc*te_c + wx*te_x).astype(np.float32)
    if sweep_best_r > -1:
        print('Manual sweep best OOF r=', round(float(sweep_best_r),6), flush=True)

# Rank-average baseline (robustness check)
def rank01(a):
    order = a.argsort(kind='mergesort')
    ranks = np.empty_like(order, dtype=np.float64)
    ranks[order] = np.arange(len(a), dtype=np.float64)
    return ranks / max(len(a)-1, 1)

K = P_tr.shape[1]
ranks_tr = [rank01(P_tr[:,i]) for i in range(K)]
ranks_te = [rank01(P_te[:,i]) for i in range(K)]
rank_avg_oof = np.mean(np.vstack(ranks_tr), axis=0).astype(np.float32)
rank_avg_te = np.mean(np.vstack(ranks_te), axis=0).astype(np.float32)
r_rank = pearsonr(rank_avg_oof, y)[0]
print('Rank-average OOF r=', round(float(r_rank), 6), flush=True)

# Choose best between NNLS and (if available) manual sweep
best_oof = blend_oof
best_te = blend_te
best_r = best_r_nnls
if sweep_best_oof is not None and float(sweep_best_r) > float(best_r):
    best_oof = sweep_best_oof
    best_te = sweep_best_te
    best_r = sweep_best_r
    print('Using manual 3-way sweep (beats NNLS). Final OOF r=', round(float(best_r),6), flush=True)
else:
    print('Using NNLS. Final OOF r=', round(float(best_r),6), flush=True)

# Save artifacts
pd.DataFrame({'id': train_df_ids['id'], 'oof': best_oof}).to_csv('oof_stack_nnls_raw.csv', index=False)
pd.DataFrame({'id': test_df_ids['id'], 'score': np.clip(best_te, 0.0, 1.0)}).to_csv('submission_stack_nnls_raw.csv', index=False)
print('Saved calibrated NNLS blend (or manual sweep if better).', flush=True)

Added TF-IDF Ridge base to NNLS.
Bases kept for NNLS (pre-exclude): ['lgbm', 'cat', 'xgb', 'tfidf_ridge']


Bases kept for NNLS (final): ['lgbm', 'cat', 'xgb']


NNLS over bases: ['lgbm_raw', 'lgbm', 'lgbm_rank', 'lgbm_z', 'cat_raw', 'cat', 'cat_rank', 'cat_z', 'xgb_raw', 'xgb', 'xgb_rank', 'xgb_z']


NNLS L2 alpha=0 OOF r=0.776951
NNLS L2 alpha=1e-05 OOF r=0.776951
NNLS L2 alpha=5e-05 OOF r=0.776951
NNLS L2 alpha=0.0001 OOF r=0.776951


NNLS L2 alpha=0.0005 OOF r=0.776951
Chosen NNLS (possibly L2) OOF r= 0.776951
Manual sweep best OOF r= 0.77675


Rank-average OOF r= 0.742897


Using NNLS. Final OOF r= 0.776951


Saved calibrated NNLS blend (or manual sweep if better).


In [87]:
# Fold-safe post-calibration of final NNLS blend: global + per-anchor + per-CPC3 isotonic
import numpy as np, pandas as pd
from sklearn.isotonic import IsotonicRegression
from scipy.stats import pearsonr

# Load train/test and raw NNLS outputs
train_df = pd.read_csv('train.csv')
test_df  = pd.read_csv('test.csv')
folds    = pd.read_csv('folds_by_id.csv')
oof_raw  = pd.read_csv('oof_stack_nnls_raw.csv')  # id,oof
sub_raw  = pd.read_csv('submission_stack_nnls_raw.csv')  # id,score

train_df = train_df.merge(folds, on='id', how='left', validate='one_to_one')
train_df['fold'] = train_df['fold'].astype(int)
y = train_df['score'].astype(np.float32).values
pred_tr = train_df[['id']].merge(oof_raw, on='id', how='left')['oof'].astype(np.float32).values
pred_te = test_df[['id']].merge(sub_raw, on='id', how='left')['score'].astype(np.float32).values
anchors_tr = train_df['anchor'].astype(str).values
cpc3_tr    = train_df['context'].astype(str).str[:3].values
anchors_te = test_df['anchor'].astype(str).values
cpc3_te    = test_df['context'].astype(str).str[:3].values
fold_arr   = train_df['fold'].values.astype(int)
F = int(fold_arr.max()) + 1

oof_global = np.zeros_like(pred_tr, dtype=np.float32)
oof_anchor = np.zeros_like(pred_tr, dtype=np.float32)
oof_cpc3   = np.zeros_like(pred_tr, dtype=np.float32)
te_global_acc = np.zeros_like(pred_te, dtype=np.float64)
te_anchor_acc = np.zeros_like(pred_te, dtype=np.float64)
te_cpc3_acc   = np.zeros_like(pred_te, dtype=np.float64)

MIN_GRP = 20  # minimum train instances to fit a group iso; else fallback to global

for f in range(F):
    tr = fold_arr != f; va = fold_arr == f
    # Global isotonic on train-only
    iso_g = IsotonicRegression(increasing=True, out_of_bounds='clip')
    iso_g.fit(pred_tr[tr], y[tr])
    oof_global[va] = iso_g.transform(pred_tr[va]).astype(np.float32)
    te_global_acc += iso_g.transform(pred_te).astype(np.float64)

    # Build group maps from train-only
    # anchors
    anchor_to_idx = {}
    for i in np.where(tr)[0]:
        a = anchors_tr[i]
        if a not in anchor_to_idx: anchor_to_idx[a] = []
        anchor_to_idx[a].append(i)
    # cpc3
    cpc3_to_idx = {}
    for i in np.where(tr)[0]:
        c = cpc3_tr[i]
        if c not in cpc3_to_idx: cpc3_to_idx[c] = []
        cpc3_to_idx[c].append(i)

    # Fit per-anchor iso where enough samples, else use global
    anchor_iso = {}
    for a, idxs in anchor_to_idx.items():
        if len(idxs) >= MIN_GRP:
            iso = IsotonicRegression(increasing=True, out_of_bounds='clip')
            iso.fit(pred_tr[idxs], y[idxs])
            anchor_iso[a] = iso
    # Apply to validation fold
    for i in np.where(va)[0]:
        a = anchors_tr[i]
        if a in anchor_iso:
            oof_anchor[i] = anchor_iso[a].transform([pred_tr[i]]).astype(np.float32)[0]
        else:
            oof_anchor[i] = oof_global[i]
    # Apply to test and accumulate
    tmp = np.zeros(len(test_df), dtype=np.float64)
    for j in range(len(test_df)):
        a = anchors_te[j]
        if a in anchor_iso:
            tmp[j] = float(anchor_iso[a].transform([pred_te[j]])[0])
        else:
            tmp[j] = float(iso_g.transform([pred_te[j]])[0])
    te_anchor_acc += tmp

    # Fit per-CPC3 iso where enough samples
    cpc3_iso = {}
    for c, idxs in cpc3_to_idx.items():
        if len(idxs) >= MIN_GRP:
            iso = IsotonicRegression(increasing=True, out_of_bounds='clip')
            iso.fit(pred_tr[idxs], y[idxs])
            cpc3_iso[c] = iso
    # Apply to validation fold
    for i in np.where(va)[0]:
        c = cpc3_tr[i]
        if c in cpc3_iso:
            oof_cpc3[i] = cpc3_iso[c].transform([pred_tr[i]]).astype(np.float32)[0]
        else:
            oof_cpc3[i] = oof_global[i]
    # Apply to test and accumulate
    tmp2 = np.zeros(len(test_df), dtype=np.float64)
    for j in range(len(test_df)):
        c = cpc3_te[j]
        if c in cpc3_iso:
            tmp2[j] = float(cpc3_iso[c].transform([pred_te[j]])[0])
        else:
            tmp2[j] = float(iso_g.transform([pred_te[j]])[0])
    te_cpc3_acc += tmp2

# Average test across folds
te_global = (te_global_acc / F).astype(np.float32)
te_anchor = (te_anchor_acc / F).astype(np.float32)
te_cpc3   = (te_cpc3_acc / F).astype(np.float32)

# Simple hierarchy at OOF: prefer anchor if differs from global (i.e., had model), else cpc3, else global
use_anchor = (np.abs(oof_anchor - oof_global) > 1e-12)
use_cpc3   = (~use_anchor) & (np.abs(oof_cpc3 - oof_global) > 1e-12)
oof_cal = oof_global.copy()
oof_cal[use_cpc3] = oof_cpc3[use_cpc3]
oof_cal[use_anchor] = oof_anchor[use_anchor]

# Same hierarchy for test based on availability proportions (approximate via train groups):
# If an anchor had a model in at least one fold (captured by te_anchor != te_global), prefer it; else if cpc3 had a model, use it; else global.
use_anchor_te = (np.abs(te_anchor - te_global) > 1e-8)
use_cpc3_te   = (~use_anchor_te) & (np.abs(te_cpc3 - te_global) > 1e-8)
te_cal = te_global.copy()
te_cal[use_cpc3_te] = te_cpc3[use_cpc3_te]
te_cal[use_anchor_te] = te_anchor[use_anchor_te]

# Evaluate and save
r_raw = pearsonr(pred_tr, y)[0]
r_cal = pearsonr(oof_cal, y)[0]
print('Post-calibration OOF r: raw=', round(float(r_raw), 6), 'calibrated=', round(float(r_cal), 6), 'delta=', round(float(r_cal - r_raw), 6), flush=True)

pd.DataFrame({'id': train_df['id'], 'oof': oof_cal.astype(np.float32)}).to_csv('oof_stack_nnls_calibrated.csv', index=False)
pd.DataFrame({'id': test_df['id'], 'score': np.clip(te_cal.astype(np.float32), 0.0, 1.0)}).to_csv('submission_stack_nnls_calibrated.csv', index=False)
print('Saved submission_stack_nnls_calibrated.csv', flush=True)

In [27]:
# CE diagnostics + CE-only ElasticNet meta (fold-safe)
import numpy as np, pandas as pd, time
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from scipy.stats import pearsonr

assert 'train' in globals() and 'test' in globals() and 'trX_f' in globals() and 'teX_f' in globals() and 'y' in globals(), 'Run cell 0 first'

# Identify CE families present
ce_tags = []
for tag in ['ce_large','ce_bge_rerank','ce_l12','ce_stsb']:
    if f'{tag}_raw' in trX_f.columns and f'{tag}_iso' in trX_f.columns:
        ce_tags.append(tag)
print('CE tags found:', ce_tags, flush=True)

# Diagnostics: OOF Pearson for ce_*_raw and ce_*_iso
fold_arr = train['fold'].values.astype(int)
good_ce = []
for tag in ce_tags:
    r_raw = pearsonr(trX_f[f'{tag}_raw'].astype(float).values, y)[0]
    r_iso = pearsonr(trX_f[f'{tag}_iso'].astype(float).values, y)[0]
    print(f'{tag}: raw r={r_raw:.6f}, iso r={r_iso:.6f}', flush=True)
    if (r_raw is not None and r_raw >= 0.65) and (r_iso is not None and r_iso >= 0.60):
        good_ce.append(tag)
print('Good CE tags kept:', good_ce, flush=True)
if not good_ce:
    good_ce = ce_tags  # fallback keep all

# Build CE-only feature matrices
keep_cols = []
for tag in good_ce:
    for c in (f'{tag}_raw', f'{tag}_iso', f'{tag}_raw_sq', f'{tag}_raw_x_iso'):
        if c in trX_f.columns:
            keep_cols.append(c)
# Add small lexical anchors
for c in ['soft_tfidf','bm25_okapi_ab','bm25_okapi_ba','bm25_okapi_ab_sq','bm25_okapi_ba_sq']:
    if c in trX_f.columns:
        keep_cols.append(c)
keep_cols = list(dict.fromkeys(keep_cols))  # de-dup
print('CE-meta features:', len(keep_cols))
X_all = trX_f[keep_cols].values.astype(np.float32)
X_te_all = teX_f[keep_cols].values.astype(np.float32)

# Per-fold ElasticNet with standardization; grid search
alphas = [1e-3, 3e-3, 1e-2, 3e-2, 1e-1]
l1_ratios = [0.1, 0.3, 0.5]
ce_meta_oof = np.zeros(len(train), dtype=np.float32)
ce_meta_te_acc = { (a,l): np.zeros(len(test), dtype=np.float32) for a in alphas for l in l1_ratios }
oof_by_cfg = { (a,l): np.zeros(len(train), dtype=np.float32) for a in alphas for l in l1_ratios }

for f in range(NUM_FOLDS):
    tr_idx = np.where(fold_arr != f)[0]
    va_idx = np.where(fold_arr == f)[0]
    X_tr = X_all[tr_idx]; X_va = X_all[va_idx]
    y_tr = y[tr_idx]
    scaler = StandardScaler(with_mean=True, with_std=True)
    X_tr_s = scaler.fit_transform(X_tr)
    X_va_s = scaler.transform(X_va)
    X_te_s = scaler.transform(X_te_all)
    for a in alphas:
        for l in l1_ratios:
            mdl = ElasticNet(alpha=a, l1_ratio=l, random_state=42, max_iter=2000)
            mdl.fit(X_tr_s, y_tr)
            preds_va = mdl.predict(X_va_s).astype(np.float32)
            oof_by_cfg[(a,l)][va_idx] = preds_va
            ce_meta_te_acc[(a,l)] += mdl.predict(X_te_s).astype(np.float32)

# Pick best cfg by OOF Pearson
best_cfg = None; best_r = -1.0
for a in alphas:
    for l in l1_ratios:
        r = pearsonr(oof_by_cfg[(a,l)], y)[0]
        print(f'ElasticNet a={a} l1={l}: OOF r={r:.6f}', flush=True)
        if r > best_r:
            best_r = r; best_cfg = (a,l)
a,l = best_cfg
print('Chosen CE-ElasticNet:', best_cfg, 'OOF r=', round(float(best_r),6), flush=True)
ce_meta_oof = oof_by_cfg[best_cfg].astype(np.float32)
ce_meta_te = (ce_meta_te_acc[best_cfg] / NUM_FOLDS).astype(np.float32)
pd.DataFrame({'id': train['id'], 'oof_ce_meta': ce_meta_oof}).to_csv('oof_ce_meta.csv', index=False)
pd.DataFrame({'id': test['id'], 'score': np.clip(ce_meta_te, 0.0, 1.0)}).to_csv('submission_ce_meta.csv', index=False)
print('Saved CE meta artifacts.', flush=True)

CE tags found: ['ce_large', 'ce_bge_rerank', 'ce_l12', 'ce_stsb']


ce_large: raw r=0.551378, iso r=0.550467


ce_bge_rerank: raw r=0.445772, iso r=0.443981


ce_l12: raw r=0.445772, iso r=0.443981


ce_stsb: raw r=0.426387, iso r=0.445756


Good CE tags kept: []


CE-meta features: 20


  model = cd_fast.enet_coordinate_descent(


  model = cd_fast.enet_coordinate_descent(


  model = cd_fast.enet_coordinate_descent(


  model = cd_fast.enet_coordinate_descent(


  model = cd_fast.enet_coordinate_descent(


  model = cd_fast.enet_coordinate_descent(


  model = cd_fast.enet_coordinate_descent(


  model = cd_fast.enet_coordinate_descent(


ElasticNet a=0.001 l1=0.1: OOF r=0.619543


ElasticNet a=0.001 l1=0.3: OOF r=0.619184


ElasticNet a=0.001 l1=0.5: OOF r=0.618623


ElasticNet a=0.003 l1=0.1: OOF r=0.618726


ElasticNet a=0.003 l1=0.3: OOF r=0.617381


ElasticNet a=0.003 l1=0.5: OOF r=0.616541


ElasticNet a=0.01 l1=0.1: OOF r=0.616727


ElasticNet a=0.01 l1=0.3: OOF r=0.612233


ElasticNet a=0.01 l1=0.5: OOF r=0.611205


ElasticNet a=0.03 l1=0.1: OOF r=0.611173


ElasticNet a=0.03 l1=0.3: OOF r=0.610777


ElasticNet a=0.03 l1=0.5: OOF r=0.610408


ElasticNet a=0.1 l1=0.1: OOF r=0.609684


ElasticNet a=0.1 l1=0.3: OOF r=0.606673


ElasticNet a=0.1 l1=0.5: OOF r=0.605102


Chosen CE-ElasticNet: (0.001, 0.1) OOF r= 0.619543


Saved CE meta artifacts.


In [28]:
# Fixed NNLS (no standardization), try with/without bias; include CE-ElasticNet if available; no calibration
import numpy as np, pandas as pd
from scipy.optimize import nnls
from scipy.stats import pearsonr

assert 'y' in globals(), 'Run cell 0 first to define y'

# Collect bases
bases = []
names = []
if 'oof_pred' in globals() and 'test_pred' in globals():
    names.append('lgbm'); bases.append((oof_pred.astype(np.float64), test_pred.astype(np.float64)))
if 'cat_oof' in globals() and 'cat_te' in globals():
    names.append('cat'); bases.append((cat_oof.astype(np.float64), cat_te.astype(np.float64)))
if 'xgb_oof' in globals() and 'xgb_te' in globals():
    names.append('xgb'); bases.append((xgb_oof.astype(np.float64), xgb_te.astype(np.float64)))
if 'ridge_oof' in globals() and 'ridge_te' in globals():
    names.append('ridge'); bases.append((ridge_oof.astype(np.float64), ridge_te.astype(np.float64)))
if 'ce_meta_oof' in globals() and 'ce_meta_te' in globals():
    names.append('ce_meta'); bases.append((ce_meta_oof.astype(np.float64), ce_meta_te.astype(np.float64)))

P_tr = np.column_stack([b[0] for b in bases]) if bases else None
P_te = np.column_stack([b[1] for b in bases]) if bases else None
y_vec = y.astype(np.float64)
print('Fixed NNLS over bases:', names, flush=True)

def fit_nnls(P_tr, P_te, add_bias: bool):
    if add_bias:
        ones_tr = np.ones((P_tr.shape[0], 1), dtype=np.float64)
        ones_te = np.ones((P_te.shape[0], 1), dtype=np.float64)
        A_tr = np.hstack([P_tr, ones_tr])
        A_te = np.hstack([P_te, ones_te])
        w, _ = nnls(A_tr, y_vec)
        pred_tr = A_tr @ w
        pred_te = A_te @ w
        return w, pred_tr.astype(np.float32), pred_te.astype(np.float32), True
    else:
        w, _ = nnls(P_tr, y_vec)
        pred_tr = P_tr @ w
        pred_te = P_te @ w
        return w, pred_tr.astype(np.float32), pred_te.astype(np.float32), False

best = (-1.0, None, None, None, None)
for add_bias in (False, True):
    w, trp, tep, ab = fit_nnls(P_tr, P_te, add_bias)
    r = pearsonr(trp, y)[0]
    print(f'NNLS (bias={ab}) OOF r={r:.6f}; weights_dim={len(w)}', flush=True)
    if r > best[0]:
        best = (r, w, trp, tep, ab)

best_r, best_w, best_trp, best_tep, best_bias = best
print('Chosen NNLS variant: bias=', best_bias, 'OOF r=', round(float(best_r), 6), flush=True)
print('Weights:', best_w.round(6), flush=True)

pd.DataFrame({'id': train['id'], 'oof': best_trp}).to_csv('oof_stack_nnls_fixed.csv', index=False)
pd.DataFrame({'id': test['id'], 'score': np.clip(best_tep, 0.0, 1.0)}).to_csv('submission_stack_nnls_fixed.csv', index=False)
print('Saved submission_stack_nnls_fixed.csv', flush=True)

Fixed NNLS over bases: ['lgbm', 'cat', 'xgb', 'ridge', 'ce_meta']


NNLS (bias=False) OOF r=0.745432; weights_dim=5


NNLS (bias=True) OOF r=0.745432; weights_dim=6


Chosen NNLS variant: bias= True OOF r= 0.745432


Weights: [0.       0.624995 0.308901 0.066712 0.       0.00142 ]


Saved submission_stack_nnls_fixed.csv


In [130]:
# Export best (raw vs calibrated) NNLS blend to submission.csv based on OOF Pearson
import pandas as pd, numpy as np
from scipy.stats import pearsonr

train = pd.read_csv('train.csv')[['id','score']]

def oof_r(path: str) -> float | None:
    try:
        o = pd.read_csv(path)
        df = train.merge(o, on='id', how='inner')
        r = pearsonr(df['oof'].astype(float).values, df['score'].astype(float).values)[0]
        return float(r)
    except Exception as e:
        print('Failed to eval', path, e)
        return None

raw_oof_path = 'oof_stack_nnls_raw.csv'
cal_oof_path = 'oof_stack_nnls_calibrated.csv'
raw_sub_path = 'submission_stack_nnls_raw.csv'
cal_sub_path = 'submission_stack_nnls_calibrated.csv'

r_raw = oof_r(raw_oof_path)
r_cal = oof_r(cal_oof_path) if (pd.Series([cal_oof_path]).map(lambda p: pd.io.common.file_exists(p)).iloc[0]) else None
print('OOF raw r=', r_raw, 'cal r=', r_cal)

best_path = raw_sub_path
if r_cal is not None and r_cal > (r_raw if r_raw is not None else -1):
    best_path = cal_sub_path
    print('Choosing calibrated submission')
else:
    print('Choosing raw submission')

sub = pd.read_csv(best_path)
sub.rename(columns={'score':'score'}, inplace=True)
sub.to_csv('submission.csv', index=False)
print('Wrote submission.csv from', best_path, 'shape=', sub.shape, 'min=', float(sub.score.min()), 'max=', float(sub.score.max()), 'mean=', float(sub.score.mean()))

OOF raw r= 0.7770992116688046 cal r= 0.7677005185973185
Choosing raw submission
Wrote submission.csv from submission_stack_nnls_raw.csv shape= (3648, 2) min= 0.006142874 max= 0.99729794 mean= 0.38283369440383763


In [121]:
# Environment fix: install exact cu121 torch stack and verify GPU access
import os, sys, subprocess, shutil, time
from pathlib import Path

def pip(*args):
    print(">", *args, flush=True)
    subprocess.run([sys.executable, "-m", "pip", *args], check=True)

print("[GPU Setup] Uninstalling any existing torch stack...", flush=True)
for pkg in ("torch","torchvision","torchaudio","nvidia-nccl-cu12","nvidia-nvjitlink-cu12"):
    subprocess.run([sys.executable, "-m", "pip", "uninstall", "-y", pkg], check=False)

# Clean stray site dirs that can shadow correct wheels (idempotent)
for d in (
    "/app/.pip-target/torch",
    "/app/.pip-target/torch-2.8.0.dist-info",
    "/app/.pip-target/torch-2.4.1.dist-info",
    "/app/.pip-target/torchvision",
    "/app/.pip-target/torchvision-0.23.0.dist-info",
    "/app/.pip-target/torchvision-0.19.1.dist-info",
    "/app/.pip-target/torchaudio",
    "/app/.pip-target/torchaudio-2.8.0.dist-info",
    "/app/.pip-target/torchaudio-2.4.1.dist-info",
    "/app/.pip-target/torchgen",
    "/app/.pip-target/functorch",
):
    if os.path.exists(d):
        print("Removing", d, flush=True)
        shutil.rmtree(d, ignore_errors=True)

print("[GPU Setup] Installing exact cu121 torch stack...", flush=True)
pip("install",
    "--index-url", "https://download.pytorch.org/whl/cu121",
    "--extra-index-url", "https://pypi.org/simple",
    "torch==2.4.1", "torchvision==0.19.1", "torchaudio==2.4.1")

Path("constraints.txt").write_text("torch==2.4.1\ntorchvision==0.19.1\ntorchaudio==2.4.1\n")

print("[GPU Setup] Reinstalling key deps under constraints without touching torch...", flush=True)
pip("install", "-c", "constraints.txt",
    "transformers==4.44.2", "accelerate==0.34.2",
    "sentencepiece", "scikit-learn",
    "--upgrade-strategy", "only-if-needed")

import torch
print("torch:", torch.__version__, "built CUDA:", getattr(torch.version, "cuda", None), flush=True)
print("CUDA available:", torch.cuda.is_available(), flush=True)
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0), flush=True)
else:
    print("[WARN] CUDA still not available. Check container GPU visibility and any site-package shadowing.", flush=True)

[GPU Setup] Uninstalling any existing torch stack...


Found existing installation: torch 2.8.0


Uninstalling torch-2.8.0:


  Successfully uninstalled torch-2.8.0


Found existing installation: torchvision 0.19.1+cu121
Uninstalling torchvision-0.19.1+cu121:
  Successfully uninstalled torchvision-0.19.1+cu121


Found existing installation: torchaudio 2.4.1+cu121
Uninstalling torchaudio-2.4.1+cu121:
  Successfully uninstalled torchaudio-2.4.1+cu121


Found existing installation: nvidia-nccl-cu12 2.28.3
Uninstalling nvidia-nccl-cu12-2.28.3:
  Successfully uninstalled nvidia-nccl-cu12-2.28.3


Found existing installation: nvidia-nvjitlink-cu12 12.9.86
Uninstalling nvidia-nvjitlink-cu12-12.9.86:
  Successfully uninstalled nvidia-nvjitlink-cu12-12.9.86
Removing /app/.pip-target/torch


Removing /app/.pip-target/torch-2.4.1.dist-info


Removing /app/.pip-target/torchgen


[GPU Setup] Installing exact cu121 torch stack...


> install --index-url https://download.pytorch.org/whl/cu121 --extra-index-url https://pypi.org/simple torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1


Looking in indexes: https://download.pytorch.org/whl/cu121, https://pypi.org/simple


Collecting torch==2.4.1
  Downloading https://download.pytorch.org/whl/cu121/torch-2.4.1%2Bcu121-cp311-cp311-linux_x86_64.whl (799.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 799.0/799.0 MB 439.3 MB/s eta 0:00:00


Collecting torchvision==0.19.1
  Downloading https://download.pytorch.org/whl/cu121/torchvision-0.19.1%2Bcu121-cp311-cp311-linux_x86_64.whl (7.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.1/7.1 MB 508.4 MB/s eta 0:00:00


Collecting torchaudio==2.4.1
  Downloading https://download.pytorch.org/whl/cu121/torchaudio-2.4.1%2Bcu121-cp311-cp311-linux_x86_64.whl (3.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.4/3.4 MB 406.7 MB/s eta 0:00:00
Collecting nvidia-cuda-runtime-cu12==12.1.105
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 823.6/823.6 KB 34.7 MB/s eta 0:00:00
Collecting triton==3.0.0
  Downloading triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (209.4 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 209.4/209.4 MB 261.3 MB/s eta 0:00:00


Collecting nvidia-cusparse-cu12==12.1.0.106
  Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 196.0/196.0 MB 315.9 MB/s eta 0:00:00


Collecting fsspec
  Downloading fsspec-2025.9.0-py3-none-any.whl (199 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 199.3/199.3 KB 518.9 MB/s eta 0:00:00


Collecting nvidia-nccl-cu12==2.20.5
  Downloading nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 176.2/176.2 MB 347.8 MB/s eta 0:00:00


Collecting nvidia-cufft-cu12==11.0.2.54
  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 121.6/121.6 MB 370.4 MB/s eta 0:00:00
Collecting jinja2
  Downloading jinja2-3.1.6-py3-none-any.whl (134 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 134.9/134.9 KB 473.2 MB/s eta 0:00:00


Collecting typing-extensions>=4.8.0
  Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 44.6/44.6 KB 402.3 MB/s eta 0:00:00
Collecting nvidia-cudnn-cu12==9.1.0.70
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 664.8/664.8 MB 251.8 MB/s eta 0:00:00


Collecting filelock
  Downloading filelock-3.19.1-py3-none-any.whl (15 kB)


Collecting nvidia-cuda-nvrtc-cu12==12.1.105
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.7/23.7 MB 312.7 MB/s eta 0:00:00


Collecting nvidia-nvtx-cu12==12.1.105
  Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 99.1/99.1 KB 420.4 MB/s eta 0:00:00


Collecting nvidia-cublas-cu12==12.1.3.1
  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 410.6/410.6 MB 306.8 MB/s eta 0:00:00


Collecting nvidia-curand-cu12==10.3.2.106
  Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.5/56.5 MB 116.4 MB/s eta 0:00:00
Collecting nvidia-cuda-cupti-cu12==12.1.105
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.1/14.1 MB 262.6 MB/s eta 0:00:00
Collecting networkx
  Downloading networkx-3.5-py3-none-any.whl (2.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 565.4 MB/s eta 0:00:00


Collecting nvidia-cusolver-cu12==11.4.5.107
  Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 124.2/124.2 MB 228.6 MB/s eta 0:00:00


Collecting sympy
  Downloading sympy-1.14.0-py3-none-any.whl (6.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.3/6.3 MB 499.7 MB/s eta 0:00:00


Collecting pillow!=8.3.*,>=5.3.0
  Downloading pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.6/6.6 MB 322.7 MB/s eta 0:00:00


Collecting numpy
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 443.2 MB/s eta 0:00:00
Collecting nvidia-nvjitlink-cu12
  Downloading nvidia_nvjitlink_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (39.7 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 39.7/39.7 MB 247.6 MB/s eta 0:00:00


Collecting MarkupSafe>=2.0
  Downloading MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (23 kB)
Collecting mpmath<1.4,>=1.1.0
  Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 536.2/536.2 KB 527.6 MB/s eta 0:00:00


Installing collected packages: mpmath, typing-extensions, sympy, pillow, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numpy, networkx, MarkupSafe, fsspec, filelock, triton, nvidia-cusparse-cu12, nvidia-cudnn-cu12, jinja2, nvidia-cusolver-cu12, torch, torchvision, torchaudio


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
datasets 2.21.0 requires fsspec[http]<=2024.6.1,>=2023.1.0, but you have fsspec 2025.9.0 which is incompatible.


Successfully installed MarkupSafe-3.0.2 filelock-3.19.1 fsspec-2025.9.0 jinja2-3.1.6 mpmath-1.3.0 networkx-3.5 numpy-1.26.4 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.9.86 nvidia-nvtx-cu12-12.1.105 pillow-11.3.0 sympy-1.14.0 torch-2.4.1+cu121 torchaudio-2.4.1+cu121 torchvision-0.19.1+cu121 triton-3.0.0 typing-extensions-4.15.0




[GPU Setup] Reinstalling key deps under constraints without touching torch...


> install -c constraints.txt transformers==4.44.2 accelerate==0.34.2 sentencepiece scikit-learn --upgrade-strategy only-if-needed


Collecting transformers==4.44.2
  Downloading transformers-4.44.2-py3-none-any.whl (9.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.5/9.5 MB 127.2 MB/s eta 0:00:00
Collecting accelerate==0.34.2
  Downloading accelerate-0.34.2-py3-none-any.whl (324 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 324.4/324.4 KB 115.3 MB/s eta 0:00:00


Collecting sentencepiece
  Downloading sentencepiece-0.2.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (1.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.4/1.4 MB 275.6 MB/s eta 0:00:00
Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.7 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.7/9.7 MB 145.2 MB/s eta 0:00:00


Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl (64 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 64.7/64.7 KB 404.5 MB/s eta 0:00:00
Collecting filelock
  Downloading filelock-3.19.1-py3-none-any.whl (15 kB)
Collecting huggingface-hub<1.0,>=0.23.2
  Downloading huggingface_hub-0.35.1-py3-none-any.whl (563 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 563.3/563.3 KB 524.0 MB/s eta 0:00:00
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (762 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 763.0/763.0 KB 548.1 MB/s eta 0:00:00


Collecting numpy>=1.17
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 191.3 MB/s eta 0:00:00
Collecting packaging>=20.0
  Downloading packaging-25.0-py3-none-any.whl (66 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 66.5/66.5 KB 420.5 MB/s eta 0:00:00


Collecting regex!=2019.12.17
  Downloading regex-2025.9.18-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (798 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 799.0/799.0 KB 526.1 MB/s eta 0:00:00
Collecting safetensors>=0.4.1


  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (485 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 485.8/485.8 KB 422.1 MB/s eta 0:00:00


Collecting tokenizers<0.20,>=0.19
  Downloading tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.6/3.6 MB 81.8 MB/s eta 0:00:00
Collecting tqdm>=4.27
  Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 78.5/78.5 KB 462.2 MB/s eta 0:00:00


Collecting torch>=1.10.0
  Downloading torch-2.4.1-cp311-cp311-manylinux1_x86_64.whl (797.1 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 797.1/797.1 MB 160.1 MB/s eta 0:00:00


Collecting psutil
  Downloading psutil-7.1.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (291 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 291.2/291.2 KB 479.2 MB/s eta 0:00:00
Collecting threadpoolctl>=3.1.0
  Downloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)


Collecting scipy>=1.8.0
  Downloading scipy-1.16.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (35.9 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 35.9/35.9 MB 479.9 MB/s eta 0:00:00
Collecting joblib>=1.2.0
  Downloading joblib-1.5.2-py3-none-any.whl (308 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 308.4/308.4 KB 60.5 MB/s eta 0:00:00


Collecting hf-xet<2.0.0,>=1.1.3
  Downloading hf_xet-1.1.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.2/3.2 MB 492.7 MB/s eta 0:00:00
Collecting typing-extensions>=3.7.4.3
  Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 44.6/44.6 KB 370.6 MB/s eta 0:00:00
Collecting fsspec>=2023.5.0
  Downloading fsspec-2025.9.0-py3-none-any.whl (199 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 199.3/199.3 KB 480.6 MB/s eta 0:00:00


Collecting networkx
  Downloading networkx-3.5-py3-none-any.whl (2.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 421.5 MB/s eta 0:00:00
Collecting nvidia-cufft-cu12==11.0.2.54
  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 121.6/121.6 MB 198.5 MB/s eta 0:00:00
Collecting nvidia-nccl-cu12==2.20.5
  Downloading nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 176.2/176.2 MB 109.6 MB/s eta 0:00:00
Collecting nvidia-nvtx-cu12==12.1.105
  Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 99.1/99.1 KB 471.3 MB/s eta 0:00:00
Collecting nvidia-cuda-runtime-cu12==12.1.105
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 823.6/823.6 KB 420.8 MB/s eta 0:00:00
Collecting nvidia-cudnn-cu12==9.1.0.70
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 664.8/664.8 MB 195.1 MB/s eta 0:00:00


Collecting triton==3.0.0
  Downloading triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (209.4 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 209.4/209.4 MB 110.9 MB/s eta 0:00:00
Collecting nvidia-cuda-nvrtc-cu12==12.1.105


  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.7/23.7 MB 159.7 MB/s eta 0:00:00


Collecting nvidia-cusparse-cu12==12.1.0.106
  Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 196.0/196.0 MB 159.2 MB/s eta 0:00:00
Collecting nvidia-curand-cu12==10.3.2.106
  Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.5/56.5 MB 138.3 MB/s eta 0:00:00
Collecting nvidia-cusolver-cu12==11.4.5.107
  Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 124.2/124.2 MB 286.1 MB/s eta 0:00:00
Collecting nvidia-cublas-cu12==12.1.3.1
  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 410.6/410.6 MB 266.1 MB/s eta 0:00:00


Collecting jinja2
  Downloading jinja2-3.1.6-py3-none-any.whl (134 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 134.9/134.9 KB 500.2 MB/s eta 0:00:00
Collecting nvidia-cuda-cupti-cu12==12.1.105
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.1/14.1 MB 196.1 MB/s eta 0:00:00
Collecting sympy
  Downloading sympy-1.14.0-py3-none-any.whl (6.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.3/6.3 MB 353.8 MB/s eta 0:00:00
Collecting nvidia-nvjitlink-cu12
  Downloading nvidia_nvjitlink_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (39.7 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 39.7/39.7 MB 246.9 MB/s eta 0:00:00
Collecting certifi>=2017.4.17
  Downloading certifi-2025.8.3-py3-none-any.whl (161 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 161.2/161.2 KB 258.3 MB/s eta 0:00:00
Collecting idna<4,>=2.5
  Downloading idna-3.10-py3-none-any.whl (70 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 70.4/70.4 KB 452.6 MB/s eta 0:00:00


Collecting charset_normalizer<4,>=2
  Downloading charset_normalizer-3.4.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (150 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 150.3/150.3 KB 511.2 MB/s eta 0:00:00
Collecting urllib3<3,>=1.21.1
  Downloading urllib3-2.5.0-py3-none-any.whl (129 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 129.8/129.8 KB 459.0 MB/s eta 0:00:00
Collecting MarkupSafe>=2.0


  Downloading MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (23 kB)
Collecting mpmath<1.4,>=1.1.0
  Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 536.2/536.2 KB 177.8 MB/s eta 0:00:00


Installing collected packages: mpmath, urllib3, typing-extensions, tqdm, threadpoolctl, sympy, sentencepiece, safetensors, regex, pyyaml, psutil, packaging, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numpy, networkx, MarkupSafe, joblib, idna, hf-xet, fsspec, filelock, charset_normalizer, certifi, triton, scipy, requests, nvidia-cusparse-cu12, nvidia-cudnn-cu12, jinja2, scikit-learn, nvidia-cusolver-cu12, huggingface-hub, torch, tokenizers, transformers, accelerate


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
datasets 2.21.0 requires fsspec[http]<=2024.6.1,>=2023.1.0, but you have fsspec 2025.9.0 which is incompatible.


Successfully installed MarkupSafe-3.0.2 accelerate-0.34.2 certifi-2025.8.3 charset_normalizer-3.4.3 filelock-3.19.1 fsspec-2025.9.0 hf-xet-1.1.10 huggingface-hub-0.35.1 idna-3.10 jinja2-3.1.6 joblib-1.5.2 mpmath-1.3.0 networkx-3.5 numpy-1.26.4 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.9.86 nvidia-nvtx-cu12-12.1.105 packaging-25.0 psutil-7.1.0 pyyaml-6.0.2 regex-2025.9.18 requests-2.32.5 safetensors-0.6.2 scikit-learn-1.7.2 scipy-1.16.2 sentencepiece-0.2.1 sympy-1.14.0 threadpoolctl-3.6.0 tokenizers-0.19.1 torch-2.4.1 tqdm-4.67.1 transformers-4.44.2 triton-3.0.0 typing-extensions-4.15.0 urllib3-2.5.0


ImportError: /app/.pip-target/torch/lib/libtorch_cuda.so: undefined symbol: ncclCommRegister

In [124]:
# Quick 2-base calibrated NNLS (lgbm + cat) trial from saved artifacts; overwrite raw artifacts if better
import numpy as np, pandas as pd
from sklearn.isotonic import IsotonicRegression
from scipy.optimize import nnls
from scipy.stats import pearsonr

# Load targets and per-model OOF/test from disk to avoid kernel state dependency
train_df = pd.read_csv('train.csv')
test_df  = pd.read_csv('test.csv')
y = train_df['score'].astype(np.float32).values

lgbm_oof = pd.read_csv('oof_stack_lgbm.csv').merge(train_df[['id']], on='id', how='right')['oof'].astype(np.float32).values
lgbm_te  = pd.read_csv('submission_stack_lgbm.csv').merge(test_df[['id']], on='id', how='right')['score'].astype(np.float32).values
cat_oof  = pd.read_csv('oof_stack_cat.csv').merge(train_df[['id']], on='id', how='right')['oof_cat'].astype(np.float32).values
cat_te   = pd.read_csv('submission_stack_cat.csv').merge(test_df[['id']], on='id', how='right')['score'].astype(np.float32).values

# Fold array by id (fold-safe)
folds_df = pd.read_csv('folds_by_id.csv')
fold_arr = train_df[['id']].merge(folds_df, on='id', how='left', validate='one_to_one')['fold'].astype(int).values

def fold_iso(oof, te, y, folds):
    F = int(folds.max()) + 1
    o2 = np.zeros_like(oof, np.float32); te_list = []
    for f in range(F):
        tr = folds != f; va = folds == f
        iso = IsotonicRegression(increasing=True, out_of_bounds='clip')
        iso.fit(oof[tr], y[tr])
        o2[va] = iso.transform(oof[va]).astype(np.float32)
        te_list.append(iso.transform(te).astype(np.float32))
    te_avg = (np.mean(np.stack(te_list,0),0)).astype(np.float32)
    return o2, te_avg

# Calibrate each base
l_tr, l_te = fold_iso(lgbm_oof, lgbm_te, y.astype(np.float32), fold_arr)
c_tr, c_te = fold_iso(cat_oof,  cat_te,  y.astype(np.float32), fold_arr)

P_tr = np.column_stack([l_tr, c_tr]).astype(np.float64)
P_te = np.column_stack([l_te, c_te]).astype(np.float64)
w, _ = nnls(P_tr, y.astype(np.float64))
w = w / (w.sum() if w.sum() > 0 else 1.0)
two_oof = (P_tr @ w).astype(np.float32)
two_te  = (P_te @ w).astype(np.float32)
r_two = pearsonr(two_oof, y)[0]
print('2-base (lgbm+cat) NNLS OOF r=', round(float(r_two),6), 'weights=', w.round(6), flush=True)

# Compare to current best (from oof_stack_nnls_raw.csv), overwrite if better
try:
    cur = pd.read_csv('oof_stack_nnls_raw.csv')
    r_cur = pearsonr(train_df[['id']].merge(cur, on='id', how='left')['oof'].astype(float).values, y)[0]
except Exception:
    r_cur = -1.0
print('Current raw NNLS OOF r=', round(float(r_cur),6))
if r_two > r_cur:
    pd.DataFrame({'id': train_df['id'], 'oof': two_oof}).to_csv('oof_stack_nnls_raw.csv', index=False)
    pd.DataFrame({'id': test_df['id'], 'score': np.clip(two_te, 0.0, 1.0)}).to_csv('submission_stack_nnls_raw.csv', index=False)
    print('Overwrote raw NNLS artifacts with 2-base blend.', flush=True)
else:
    print('Kept existing raw NNLS artifacts.', flush=True)

2-base (lgbm+cat) NNLS OOF r= 0.776865 weights= [0.390442 0.609558]


Current raw NNLS OOF r= 0.776908
Kept existing raw NNLS artifacts.
