In [1]:
# Blending and submission builder: base + large
import numpy as np, pandas as pd, time
from scipy.stats import spearmanr, rankdata
from sklearn.linear_model import RidgeCV

t0=time.time()
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
sample_sub = pd.read_csv('sample_submission.csv')
id_col = sample_sub.columns[0]
target_cols = [c for c in sample_sub.columns if c != id_col]
folds = np.load('folds.npy')

def spearman_cols(y_pred: np.ndarray, y_true: np.ndarray):
    rhos=[]
    for i in range(y_pred.shape[1]):
        r = spearmanr(y_pred[:,i], y_true[:,i]).correlation
        rhos.append(0.0 if (r is None or np.isnan(r)) else float(r))
    return float(np.mean(rhos)), rhos

def to_rank01(v: np.ndarray) -> np.ndarray:
    r = rankdata(v, method='average').astype(np.float64)
    denom = max(1.0, len(r)-1)
    return ((r-1.0)/denom).astype(np.float32)

# Load OOF/test from base and large
oof_base = np.load('oof_all_targets_deberta_base.npy')
te_base  = np.load('test_all_targets_deberta_base.npy')
oof_large = np.load('oof_all_targets_deberta_large.npy')
te_large  = np.load('test_all_targets_deberta_large.npy')
y_true = train[target_cols].to_numpy(dtype=np.float32)

# Sanity shapes
assert oof_base.shape == oof_large.shape == (len(train), len(target_cols))
assert te_base.shape[0] == len(test) and te_large.shape[0] == len(test)

# Individual OOF scores
base_oof_score, _ = spearman_cols(oof_base, y_true)
large_oof_score, _ = spearman_cols(oof_large, y_true)
print(f'Base OOF mean-30: {base_oof_score:.5f} | Large OOF mean-30: {large_oof_score:.5f}')

# Raw weighted blend
w_large = 0.65; w_base = 0.35
oof_raw = w_large * oof_large + w_base * oof_base
te_raw  = w_large * te_large  + w_base * te_base
raw_oof_score, _ = spearman_cols(oof_raw, y_true)
print(f'Raw 0.65L/0.35B OOF mean-30: {raw_oof_score:.5f}')

# Rank-based per-target blend
oof_base_rank = np.zeros_like(oof_base, dtype=np.float32)
oof_large_rank = np.zeros_like(oof_large, dtype=np.float32)
for i in range(oof_base.shape[1]):
    oof_base_rank[:,i]  = to_rank01(oof_base[:,i])
    oof_large_rank[:,i] = to_rank01(oof_large[:,i])
oof_rankblend = w_large * oof_large_rank + w_base * oof_base_rank
rank_oof_score,_ = spearman_cols(oof_rankblend, y_true)
print(f'Rank 0.65L/0.35B OOF mean-30: {rank_oof_score:.5f}')

# Apply same rank-normalization to test before averaging (map each model's column to ranks on its test outputs)
te_base_rank = np.zeros_like(te_base, dtype=np.float32)
te_large_rank = np.zeros_like(te_large, dtype=np.float32)
for i in range(te_base.shape[1]):
    te_base_rank[:,i]  = to_rank01(te_base[:,i])
    te_large_rank[:,i] = to_rank01(te_large[:,i])
te_rankblend = w_large * te_large_rank + w_base * te_base_rank

# Ridge stacking with out-of-fold meta (fold-correct) and test averaged over folds
alphas = [0.1, 0.3, 1.0, 3.0, 10.0]
unique_folds = np.unique(folds)
oof_ridge = np.zeros_like(oof_base, dtype=np.float32)
te_ridge = np.zeros_like(te_base, dtype=np.float32)
for i_col in range(len(target_cols)):
    X_full = np.stack([oof_base[:,i_col], oof_large[:,i_col]], axis=1)
    y = y_true[:, i_col]
    teX = np.stack([te_base[:,i_col], te_large[:,i_col]], axis=1)
    te_fold_preds = []
    for f in unique_folds:
        tr_idx = np.where(folds != f)[0]; va_idx = np.where(folds == f)[0]
        model = RidgeCV(alphas=alphas, fit_intercept=True)
        model.fit(X_full[tr_idx], y[tr_idx])
        oof_ridge[va_idx, i_col] = model.predict(X_full[va_idx]).astype(np.float32)
        te_fold_preds.append(model.predict(teX).astype(np.float32))
    te_ridge[:, i_col] = np.mean(np.stack(te_fold_preds, axis=0), axis=0).astype(np.float32)
ridge_oof_score,_ = spearman_cols(oof_ridge, y_true)
print(f'Ridge stack OOF mean-30: {ridge_oof_score:.5f}')

# Pick best OOF strategy
candidates = [
    ('raw', raw_oof_score, np.clip(te_raw, 0,1).astype(np.float32)),
    ('rank', rank_oof_score, np.clip(te_rankblend, 0,1).astype(np.float32)),
    ('ridge', ridge_oof_score, np.clip(te_ridge, 0,1).astype(np.float32)),
]
best_name, best_oof, best_test = sorted(candidates, key=lambda x: x[1], reverse=True)[0]
print('Best strategy:', best_name, 'OOF:', round(best_oof,5))

# Save diagnostics and submissions
np.save('oof_blend_raw.npy', np.clip(oof_raw, 0,1).astype(np.float32))
np.save('oof_blend_rank.npy', np.clip(oof_rankblend, 0,1).astype(np.float32))
np.save('oof_blend_ridge.npy', np.clip(oof_ridge, 0,1).astype(np.float32))
np.save('test_blend_raw.npy', np.clip(te_raw, 0,1).astype(np.float32))
np.save('test_blend_rank.npy', np.clip(te_rankblend, 0,1).astype(np.float32))
np.save('test_blend_ridge.npy', np.clip(te_ridge, 0,1).astype(np.float32))

def write_sub(pred, path):
    sub = sample_sub.copy()
    sub[id_col] = test[id_col].values
    for i, col in enumerate(target_cols):
        sub[col] = pred[:, i]
    sub.to_csv(path, index=False)

write_sub(np.clip(te_raw,0,1), 'submission_blend_raw.csv')
write_sub(np.clip(te_rankblend,0,1), 'submission_blend_rank.csv')
write_sub(np.clip(te_ridge,0,1), 'submission_blend_ridge.csv')
write_sub(best_test, 'submission.csv')
print('Saved submissions. Final chosen:', best_name, '| elapsed', round(time.time()-t0,1),'s')

print('Done.')

Base OOF mean-30: 0.32984 | Large OOF mean-30: 0.34386
Raw 0.65L/0.35B OOF mean-30: 0.35617
Rank 0.65L/0.35B OOF mean-30: 0.35561


Ridge stack OOF mean-30: 0.35300
Best strategy: raw OOF: 0.35617
Saved submissions. Final chosen: raw | elapsed 1.1 s
Done.
