In [2]:
# RidgeClassifier + Platt scaling and LDA(lsqr, shrinkage) on fixed 6-folds
import numpy as np, pandas as pd, json, time
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import log_loss
from sklearn.linear_model import RidgeClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

SEED = 2025
np.random.seed(SEED)

def clip_norm(P):
    P = np.clip(P, 1e-15, 1-1e-15)
    return P / P.sum(axis=1, keepdims=True)

def save_probs_and_logits(prefix: str, oof: np.ndarray, test_pred: np.ndarray):
    np.save(f'oof_{prefix}.npy', oof.astype(np.float32))
    np.save(f'test_{prefix}.npy', test_pred.astype(np.float32))
    oof_log = np.log(np.clip(oof, 1e-15, 1.0))
    test_log = np.log(np.clip(test_pred, 1e-15, 1.0))
    np.save(f'oof_{prefix}_logits.npy', oof_log.astype(np.float32))
    np.save(f'test_{prefix}_logits.npy', test_log.astype(np.float32))
    print(f'Saved oof_{prefix}.npy, test_{prefix}.npy and *_logits.npy', flush=True)

# Load data and folds
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
id_col = 'id'; target_col = 'species'
feature_cols = [c for c in train.columns if c not in [id_col, target_col]]
X = train[feature_cols].values.astype(np.float64, copy=True)
X_test = test[feature_cols].values.astype(np.float64, copy=True)
le = LabelEncoder()
y = le.fit_transform(train[target_col].values)
K = len(le.classes_)
with open('folds_6.json', 'r') as f:
    folds = [(np.array(a, dtype=np.int64), np.array(b, dtype=np.int64)) for a,b in json.load(f)]
print('Data ready:', X.shape, X_test.shape, 'Classes:', K, 'Folds:', len(folds), flush=True)

def run_ridge_platt(alphas=(0.5,1.0,2.0,5.0)):
    print('Running RidgeClassifier + Platt scaling', flush=True)
    best = (None, 1e9); best_oof=None; best_test=None
    for a in alphas:
        t0 = time.time()
        oof = np.zeros((len(X), K), dtype=np.float64)
        test_pred = np.zeros((len(X_test), K), dtype=np.float64)
        for i, (trn_idx, val_idx) in enumerate(folds, 1):
            sc = StandardScaler(with_mean=True, with_std=True)
            X_tr = sc.fit_transform(X[trn_idx])
            X_va = sc.transform(X[val_idx])
            X_te = sc.transform(X_test)
            clf = RidgeClassifier(alpha=a, tol=1e-6, max_iter=20000)
            fstart = time.time()
            clf.fit(X_tr, y[trn_idx])
            # Calibrate on validation fold
            cal = CalibratedClassifierCV(estimator=clf, method='sigmoid', cv='prefit')
            cal.fit(X_va, y[val_idx])
            P_va = cal.predict_proba(X_va)
            P_te = cal.predict_proba(X_te)
            oof[val_idx] = P_va
            test_pred += P_te / len(folds)
            print(f'  [Ridge fold {i}/{len(folds)}] alpha={a} time={time.time()-fstart:.2f}s', flush=True)
        ll = log_loss(y, clip_norm(oof), labels=list(range(K)))
        print(f'--> Ridge+Platt OOF={ll:.6f} | alpha={a} | time {time.time()-t0:.2f}s', flush=True)
        if ll < best[1]:
            best = (a, ll); best_oof=oof; best_test=test_pred
    print('Best Ridge+Platt:', best, flush=True)
    if best_oof is not None:
        save_probs_and_logits('ridge_platt', best_oof, best_test)
    return best, best_oof, best_test

def run_lda_lsqr(shrinkages=('auto',), use_std=False):
    print('Running LDA (lsqr) with shrinkage', flush=True)
    best = (None, 1e9); best_oof=None; best_test=None
    for sh in shrinkages:
        t0 = time.time()
        oof = np.zeros((len(X), K), dtype=np.float64)
        test_pred = np.zeros((len(X_test), K), dtype=np.float64)
        for i, (trn_idx, val_idx) in enumerate(folds, 1):
            sc = StandardScaler(with_mean=True, with_std=bool(use_std))
            X_tr = sc.fit_transform(X[trn_idx])
            X_va = sc.transform(X[val_idx])
            X_te = sc.transform(X_test)
            clf = LDA(solver='lsqr', shrinkage=sh)
            fstart = time.time()
            clf.fit(X_tr, y[trn_idx])
            P_va = clf.predict_proba(X_va)
            P_te = clf.predict_proba(X_te)
            oof[val_idx] = P_va
            test_pred += P_te / len(folds)
            print(f'  [LDA fold {i}/{len(folds)}] shrinkage={sh} time={time.time()-fstart:.2f}s', flush=True)
        ll = log_loss(y, clip_norm(oof), labels=list(range(K)))
        print(f'--> LDA OOF={ll:.6f} | shrinkage={sh} | time {time.time()-t0:.2f}s', flush=True)
        if ll < best[1]:
            best = (sh, ll); best_oof=oof; best_test=test_pred
    print('Best LDA:', best, flush=True)
    if best_oof is not None:
        save_probs_and_logits('lda_lsqr', best_oof, best_test)
    return best, best_oof, best_test

# Run both models
best_ridge, oof_ridge, test_ridge = run_ridge_platt(alphas=(0.5,1.0,2.0,5.0))
best_lda, oof_lda, test_lda = run_lda_lsqr(shrinkages=('auto',), use_std=False)
print('Done. Best Ridge:', best_ridge, '| Best LDA:', best_lda)

Data ready: (891, 192) (99, 192) Classes: 99 Folds: 6


Running RidgeClassifier + Platt scaling


  [Ridge fold 1/6] alpha=0.5 time=0.22s


  [Ridge fold 2/6] alpha=0.5 time=0.22s


  [Ridge fold 3/6] alpha=0.5 time=0.22s


  [Ridge fold 4/6] alpha=0.5 time=0.21s


  [Ridge fold 5/6] alpha=0.5 time=0.22s


  [Ridge fold 6/6] alpha=0.5 time=0.23s


--> Ridge+Platt OOF=1.080926 | alpha=0.5 | time 1.35s


  [Ridge fold 1/6] alpha=1.0 time=0.21s


  [Ridge fold 2/6] alpha=1.0 time=0.24s


  [Ridge fold 3/6] alpha=1.0 time=0.21s


  [Ridge fold 4/6] alpha=1.0 time=0.21s


  [Ridge fold 5/6] alpha=1.0 time=0.21s


  [Ridge fold 6/6] alpha=1.0 time=0.21s


--> Ridge+Platt OOF=1.069397 | alpha=1.0 | time 1.31s


  [Ridge fold 1/6] alpha=2.0 time=0.22s


  [Ridge fold 2/6] alpha=2.0 time=0.22s


  [Ridge fold 3/6] alpha=2.0 time=0.21s


  [Ridge fold 4/6] alpha=2.0 time=0.20s


  [Ridge fold 5/6] alpha=2.0 time=0.20s


  [Ridge fold 6/6] alpha=2.0 time=0.21s


--> Ridge+Platt OOF=1.058868 | alpha=2.0 | time 1.29s


  [Ridge fold 1/6] alpha=5.0 time=0.20s


  [Ridge fold 2/6] alpha=5.0 time=0.22s


  [Ridge fold 3/6] alpha=5.0 time=0.22s


  [Ridge fold 4/6] alpha=5.0 time=0.22s


  [Ridge fold 5/6] alpha=5.0 time=0.23s


  [Ridge fold 6/6] alpha=5.0 time=0.22s


--> Ridge+Platt OOF=1.046777 | alpha=5.0 | time 1.36s


Best Ridge+Platt: (5.0, 1.0467774535782812)


Saved oof_ridge_platt.npy, test_ridge_platt.npy and *_logits.npy


Running LDA (lsqr) with shrinkage


  [LDA fold 1/6] shrinkage=auto time=0.22s


  [LDA fold 2/6] shrinkage=auto time=0.23s


  [LDA fold 3/6] shrinkage=auto time=0.24s


  [LDA fold 4/6] shrinkage=auto time=0.24s


  [LDA fold 5/6] shrinkage=auto time=0.23s


  [LDA fold 6/6] shrinkage=auto time=0.24s


--> LDA OOF=1.002910 | shrinkage=auto | time 1.45s


Best LDA: ('auto', 1.0029097252880086)


Saved oof_lda_lsqr.npy, test_lda_lsqr.npy and *_logits.npy


Done. Best Ridge: (5.0, 1.0467774535782812) | Best LDA: ('auto', 1.0029097252880086)
