In [1]:
import torch
from pytorch_tabnet.tab_model import TabNetClassifier
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score

import optuna
from optuna import Trial, visualization



torch.__version__

  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'optuna'

In [7]:
!nvidia-smi

Mon Apr 11 21:50:39 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 512.15       Driver Version: 512.15       CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   52C    P8     4W /  N/A |      0MiB /  6144MiB |      1%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [7]:
data = pd.read_csv('../Data/train_features_computed_tabular.csv')
labels = pd.read_csv('../Data/train_labels.csv')
data = data.merge(labels, on = 'sequence', how = 'left')
X, y = data.drop(['sequence', 'state'], axis = 1).values, data['state'].values

In [6]:
scaler = StandardScaler()
model=scaler.fit(X)
X=model.transform(X)

In [None]:
def Objective(trial):
    mask_type = trial.suggest_categorical("mask_type", ["entmax", "sparsemax"])
    n_da = trial.suggest_int("n_da", 56, 64, step=4)
    n_steps = trial.suggest_int("n_steps", 1, 3, step=1)
    gamma = trial.suggest_float("gamma", 1., 1.4, step=0.2)
    n_shared = trial.suggest_int("n_shared", 1, 3)
    lambda_sparse = trial.suggest_float("lambda_sparse", 1e-6, 1e-3, log=True)
    param = dict(n_d=n_da, n_a=n_da, n_steps=n_steps, gamma=gamma,
                     lambda_sparse=lambda_sparse, optimizer_fn=torch.optim.Adam,
                     optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
                     mask_type=mask_type, n_shared=n_shared,
                     scheduler_params=dict(mode="min",
                                           patience=trial.suggest_int("patienceScheduler",low=3,high=10), # changing sheduler patience to be lower than early stopping patience 
                                           min_lr=1e-5,
                                           factor=0.5,),
                     scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
                     verbose=0,
                     ) #early stopping
    kf = KFold(n_splits=3, random_state=42, shuffle=True)
    CV_score_array    =[]
    for train_index, test_index in kf.split(X):
        X_train, X_valid = X[train_index], X[test_index]
        y_train, y_valid = y[train_index], y[test_index]
        clf = TabNetClassifier(**param)
        clf.fit(X_train=X_train, y_train=y_train,
                  eval_set=[(X_valid, y_valid)],
                  patience=trial.suggest_int("patience",low=15,high=30), max_epochs=trial.suggest_int('epochs', 1, 100),
                  eval_metric=['auc'])
        preds = clf.predict(X_valid)
        acc = roc_auc_score(y_pred=preds, y_true=y_valid)

        CV_score_array.append(acc)
    avg = np.mean(CV_score_array)
    return avg

In [None]:
def Objective(trial):

    param = {
        'tree_method':'gpu_hist',  # this parameter means using the GPU when training our model to speedup the training process
        'lambda': trial.suggest_loguniform('lambda', 1e-3, 10.0),
        'alpha': trial.suggest_loguniform('alpha', 1e-3, 10.0),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]),
        'subsample': trial.suggest_categorical('subsample', [0.4,0.5,0.6,0.7,0.8,1.0]),
        'learning_rate': trial.suggest_categorical('learning_rate', [0.008,0.009,0.01,0.012,0.014,0.016,0.018, 0.02]),
        'n_estimators': 4000,
        'max_depth': trial.suggest_categorical('max_depth', [5,7,9,11,13,15,17,20]),
        'random_state': trial.suggest_categorical('random_state', [24, 48,2020]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 300),
    }
    
    kf = KFold(n_splits=3, random_state=42, shuffle=True)
    CV_score_array    =[]
    for train_index, test_index in kf.split(X):
        X_train, X_valid = X[train_index], X[test_index]
        y_train, y_valid = y[train_index], y[test_index]
        clf = xgb.XGBClassifier(**param)  
        clf.fit(X_train=X_train, y_train=y_train,
                  eval_set=[(X_valid, y_valid)],
                  patience=trial.suggest_int("patience",low=15,high=30),
                  eval_metric=['auc'])
        
        preds = clf.predict_proba(X_valid)
        acc = accuracy_score(y_pred=preds, y_true=y_valid)

        CV_score_array.append(acc)
    avg = np.mean(CV_score_array)
    return avg

In [None]:
study = optuna.create_study(direction="maximise", study_name='TabNet optimization')
study.optimize(Objective, timeout=6*60) #5 hours

best = study.best_params
print('The best parameters are ', best)

In [None]:
best_model_params = dict(
    n_d=best["n_da"],
    n_a=best["n_da"],
    n_steps=best["n_steps"],
    gamma=best["gamma"],
    lambda_sparse=best["lambda_sparse"],
    optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
    mask_type=best["mask_type"],
    n_shared=best["n_shared"],
    scheduler_params=dict(
        mode="min",
        patience=best["patienceScheduler"],
        min_lr=1e-5,
        factor=0.5,
    ),
    scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
    verbose=0,
)
epochs = best["epochs"]

clf = TabNetClassifier(**best_model_params)
clf.fit(X_train=X, y_train=y,
          patience=best['patience'], max_epochs=epochs,
          eval_metric=['auc'])
