In [None]:
import os
import json
import optuna
import numpy as np
import pandas as pd

from sklearn.metrics import roc_auc_score
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import StratifiedKFold

In [None]:
sample_submission = pd.read_csv("../input/tabular-playground-series-oct-2021/sample_submission.csv")

In [None]:
df_train = pd.read_csv("../input/tps10meta/oof.csv")
df_test = pd.read_csv("../input/tps10meta/preds.csv")

X = df_train.drop(columns=["y_valid"])
y = df_train["y_valid"]

test_data = df_test

In [None]:
def objective(trial):
    param = {
        "alpha": trial.suggest_loguniform("alpha", 1e-5, 0.8),
        "l1_ratio": trial.suggest_float("l1_ratio", 1e-5, 1.0),
        "max_iter": trial.suggest_int("max_iter", 1000, 40_000)
    }
    
    meta_oof_tmp = np.zeros((len(X),))
    # create cv
    for fold, (idx_train, idx_valid) in enumerate(StratifiedKFold(n_splits=10, shuffle=True, random_state=1).split(X, y)):
        X_train, y_train = X.iloc[idx_train], y.iloc[idx_train]
        X_valid, y_valid = X.iloc[idx_valid], y.iloc[idx_valid]

        model = ElasticNet(**param)
        model.fit(X_train, y_train)
        meta_oof_tmp[idx_valid] = model.predict(X_valid)
    
    return roc_auc_score(y, meta_oof_tmp)


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.MedianPruner(
        n_startup_trials=5, n_warmup_steps=10, interval_steps=5
    ),
)
study.optimize(objective, n_trials=50)
print("Number of finished trials:", len(study.trials))
print("Best trial:", study.best_trial.params)

params = study.best_trial.params

fig = optuna.visualization.plot_parallel_coordinate(study)
fig.show()

In [None]:
%%time
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import ElasticNet
from sklearn.metrics import roc_auc_score

meta_pred_tmp = []
meta_oof_tmp = np.zeros((len(X),))
scores_tmp = []

# create cv
kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)

for fold, (idx_train, idx_valid) in enumerate(kf.split(X, y)):
    # create train, validation sets
    X_train, y_train = X.iloc[idx_train], y.iloc[idx_train]
    X_valid, y_valid = X.iloc[idx_valid], y.iloc[idx_valid]
    
    model = ElasticNet(**params)
    model.fit(X_train, y_train)
    
    # validation prediction
    meta_oof_tmp[idx_valid] = model.predict(X_valid)
    score = roc_auc_score(y_valid, meta_oof_tmp[idx_valid])
    scores_tmp.append(score)
    
    print(f"Fold: {fold + 1} Score: {score}")
    print("--"*20)
    
    # test prediction based on oof_set
    y_hat = model.predict(test_data)
    meta_pred_tmp.append(y_hat)
    
# print overall validation scores
print(f"Overall Validation Score | Meta: {roc_auc_score(y, meta_oof_tmp)}")
print("::"*20)

In [None]:
X["elasticnet0"] = meta_oof_tmp
test_data["elasticnet0"] = np.mean(np.column_stack(meta_pred_tmp), axis=1)

In [None]:
# create submission file
stacked_submission = sample_submission.copy()
stacked_submission["target"] = test_data["elasticnet0"]
stacked_submission.to_csv("./stacked_submission.csv", index=False)