In [1]:
# Elastic-Net Logistic Regression (saga) with StandardScaler on fixed 6-folds
import numpy as np, pandas as pd, json, time
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss

SEED = 2025
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(train['species'].values)
id_col = 'id'; target_col = 'species'
feature_cols = [c for c in train.columns if c not in [id_col, target_col]]
X = train[feature_cols].values.astype(np.float64)
X_test = test[feature_cols].values.astype(np.float64)
K = len(le.classes_)
with open('folds_6.json', 'r') as f:
    folds = json.load(f)

def clip_norm(P):
    P = np.clip(P, 1e-15, 1-1e-15)
    return P / P.sum(axis=1, keepdims=True)

def run_enet_lr(Cs=(15,20,25,30,35,40,50,60,80), l1_ratios=(0.05,0.1)):
    best = (None, 1e9); best_oof=None; best_test=None
    for l1r in l1_ratios:
        for C in Cs:
            oof = np.zeros((len(X), K), dtype=np.float64)
            test_pred = np.zeros((len(X_test), K), dtype=np.float64)
            t0 = time.time()
            for i, (trn_idx, val_idx) in enumerate(folds, 1):
                trn_idx = np.array(trn_idx); val_idx = np.array(val_idx)
                sc = StandardScaler()
                X_tr = sc.fit_transform(X[trn_idx])
                X_va = sc.transform(X[val_idx])
                X_te = sc.transform(X_test)
                clf = LogisticRegression(multi_class='multinomial', solver='saga', penalty='elasticnet', l1_ratio=l1r, C=C, max_iter=8000, tol=1e-4, random_state=SEED)
                fstart = time.time()
                clf.fit(X_tr, y[trn_idx])
                P_va = clf.predict_proba(X_va)
                oof[val_idx] = P_va
                test_pred += clf.predict_proba(X_te) / len(folds)
                print(f'[ENet LR fold {i}/{len(folds)}] C={C}, l1r={l1r}, time={time.time()-fstart:.2f}s', flush=True)
            ll = log_loss(y, clip_norm(oof), labels=list(range(K)))
            print(f'ENet LR OOF: {ll:.6f} | C={C}, l1r={l1r} in {time.time()-t0:.2f}s', flush=True)
            if ll < best[1]:
                best = ((C, l1r), ll); best_oof=oof; best_test=test_pred
    print('Best ENet LR:', best)
    return best, best_oof, best_test

best_enet, oof_enet, test_enet = run_enet_lr()
np.save('oof_enet_lr.npy', oof_enet)
np.save('test_enet_lr.npy', test_enet)
print('Saved oof_enet_lr.npy and test_enet_lr.npy')



[ENet LR fold 1/6] C=15, l1r=0.05, time=189.39s




[ENet LR fold 2/6] C=15, l1r=0.05, time=199.44s


