In [272]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification, make_regression
from sklearn.metrics import accuracy_score, root_mean_squared_error
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
import xgboost as xgb

In [273]:
def compare_classification(X_train, y_train, X_valid, y_valid, X_test, y_test, num_classes):
    if num_classes == 2:
        clf_tabnet = TabNetClassifier(optimizer_params=dict(lr=0.09), verbose=0)
        clf_xgboost = xgb.XGBClassifier(n_estimators=1000, eval_metric="logloss", early_stopping_rounds=30, learning_rate=0.05)
        type = "Binary Classification"
    else:
        clf_tabnet = TabNetClassifier(optimizer_params=dict(lr=0.09), verbose=0)
        clf_xgboost = xgb.XGBClassifier(n_estimators=1000, eval_metric="mlogloss", early_stopping_rounds=30, learning_rate=0.05)
        type = "Multiclass Classification"


    clf_tabnet.fit(
        X_train, y_train,
        eval_set=[(X_valid, y_valid)],
        eval_metric=["accuracy"],
        max_epochs=100,
        patience=30
    )

    clf_xgboost.fit(
        X_train, y_train,
        eval_set=[(X_valid, y_valid)],
        verbose=False
    )

    pred_tabnet = clf_tabnet.predict(X_test)
    pred_xgboost = clf_xgboost.predict(X_test)

    accuracy_tabnet = accuracy_score(y_test, pred_tabnet)
    accuracy_xgboost = accuracy_score(y_test, pred_xgboost)

    print(f"\n === {type} ===\n")
    print(f"TabNet Accuracy with max epochs, patience, eval_metric: {accuracy_tabnet}\n")
    print(f"XGBoost Accuracy with max epochs, patience, eval_metric: {accuracy_xgboost}\n")

In [274]:
def compare_regression(X_train, y_train, X_valid, y_valid, X_test, y_test):
    reg_tabnet = TabNetRegressor(optimizer_params=dict(lr=0.09), verbose=0)
    reg_xgboost = xgb.XGBRegressor(n_estimators=1000, eval_metric="rmse", early_stopping_rounds=30, learning_rate=0.05)
    y_train = y_train.reshape(-1, 1)
    y_valid = y_valid.reshape(-1, 1)

    reg_tabnet.fit(
        X_train, y_train,
        eval_set=[(X_valid, y_valid)],
        eval_metric=["rmse"],
        max_epochs=100,
        patience=30
    )

    reg_xgboost.fit(
        X_train, y_train,
        eval_set=[(X_valid, y_valid)],
        verbose=False
    )

    pred_tabnet = reg_tabnet.predict(X_test)
    pred_xgboost = reg_xgboost.predict(X_test)

    rmse_tabnet = root_mean_squared_error(y_test, pred_tabnet)
    rmse_xgboost = root_mean_squared_error(y_test, pred_xgboost)

    print("\n=== Regression ===\n")
    print(f"TabNet RMSE with max epochs, patience, eval_metric: {rmse_tabnet}\n")
    print(f"XGBoost RMSE with max epochs, patience, eval_metric: {rmse_xgboost}\n")


In [275]:
# syntetic data for BINARY CLASSIFICATION
X_binary, y_binary = make_classification(n_samples=10000, n_features=100, n_informative=8, n_classes=2, random_state=42)
X_train_bin, X_temp_bin, y_train_bin, y_temp_bin = train_test_split(X_binary, y_binary, test_size=0.3, random_state=42)
X_valid_bin, X_test_bin, y_valid_bin, y_test_bin = train_test_split(X_temp_bin, y_temp_bin, test_size=0.5, random_state=42)

In [276]:
# syntetic data for MULTICLASS CLASSIFICATION
X_multi, y_multi = make_classification(n_samples=10000, n_features=100, n_informative=8, n_classes=3, n_clusters_per_class=1, random_state=42)
X_train_multi, X_temp_multi, y_train_multi, y_temp_multi = train_test_split(X_multi, y_multi, test_size=0.3, random_state=42)
X_valid_multi, X_test_multi, y_valid_multi, y_test_multi = train_test_split(X_temp_multi, y_temp_multi, test_size=0.5, random_state=42)

In [277]:
# syntetic data for REGRESSION
X_reg, y_reg = make_regression(n_samples=10000, n_features=100, n_informative=8, noise=0.1, random_state=42)
X_train_reg, X_temp_reg, y_train_reg, y_temp_reg = train_test_split(X_reg, y_reg, test_size=0.3, random_state=42)
X_valid_reg, X_test_reg, y_valid_reg, y_test_reg = train_test_split(X_temp_reg, y_temp_reg, test_size=0.5, random_state=42)

In [278]:
# Binary Classification
compare_classification(X_train_bin, y_train_bin, X_valid_bin, y_valid_bin, X_test_bin, y_test_bin, 2)


Early stopping occurred at epoch 99 with best_epoch = 69 and best_val_0_accuracy = 0.95733





 === Binary Classification ===

TabNet Accuracy with max epochs, patience, eval_metric: 0.9593333333333334

XGBoost Accuracy with max epochs, patience, eval_metric: 0.92



In [279]:
# MultiClass Classification
compare_classification(X_train_multi, y_train_multi, X_valid_multi, y_valid_multi, X_test_multi, y_test_multi, 3)


Early stopping occurred at epoch 90 with best_epoch = 60 and best_val_0_accuracy = 0.96933





 === Multiclass Classification ===

TabNet Accuracy with max epochs, patience, eval_metric: 0.9646666666666667

XGBoost Accuracy with max epochs, patience, eval_metric: 0.9386666666666666



In [280]:
# Regression
compare_regression(X_train_reg, y_train_reg, X_valid_reg, y_valid_reg, X_test_reg, y_test_reg)


Early stopping occurred at epoch 83 with best_epoch = 53 and best_val_0_rmse = 8.46689





=== Regression ===

TabNet RMSE with max epochs, patience, eval_metric: 8.483128545126485

XGBoost RMSE with max epochs, patience, eval_metric: 31.17581323797507

