In [1]:
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from pytorch_tabnet.multitask import TabNetMultiTaskClassifier

import torch
import warnings
import numpy as np
import pandas as pd
import torch.nn.functional as F

warnings.filterwarnings("ignore")

In [2]:
train = pd.read_csv('../Database/train_preprocessed.csv', index_col='ID')
X = train.drop(columns=['대출등급'])
y = train['대출등급']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

In [32]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model = TabNetMultiTaskClassifier(
    n_d=4,  # Decision 단계의 특성 차원
    n_a=4,  # Attention 단계의 특성 차원
    n_steps=5,  # Attention 단계의 반복 횟수
    gamma=1.5,  # Regularization 강도
    cat_idxs=[i for i in range(6, 10)],  # 범주형 특성의 인덱스
    cat_dims=[2, 11, 4, 12],  # 범주형 특성의 차원
    device_name=device,
    verbose=True,
)

model.fit(
    X_train=X_res.values,
    y_train=y_res.values.reshape(-1, 1),
    eval_set=[(X_test.values, y_test.values.reshape(-1,1))],
    max_epochs=2,
    patience=5,
    loss_fn=F.cross_entropy,
    batch_size=1024,
    virtual_batch_size=512,
)

y_pred = model.predict_proba(X_test.values)

cuda
epoch 0  | loss: 1.79561 | val_0_logloss: 1.60375 |  0:00:09s
epoch 1  | loss: 1.23698 | val_0_logloss: 1.16274 |  0:00:18s
Stop training because you reached max_epochs = 2 with best_epoch = 1 and best_val_0_logloss = 1.16274


In [39]:
y_pred = np.array(y_pred, dtype=np.float64).squeeze()
predictions = np.argmax(y_pred, axis=1)
print("TabNet Classifier Accuracy:", accuracy_score(y_test, predictions))

TabNet Classifier Accuracy: 0.5350745106184122
