In [1]:
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from pytorch_tabnet.multitask import TabNetMultiTaskClassifier

import torch
import warnings
import numpy as np
import pandas as pd
import torch.nn.functional as F

warnings.filterwarnings("ignore")

In [ ]:
train = pd.read_csv('../Database/train_preprocessed.csv', index_col='ID')
X = train.drop(columns=['대출등급'])
y = train['대출등급']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

In [ ]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


model = TabNetMultiTaskClassifier(
    n_d=8,  # Decision 단계의 특성 차원
    n_a=8,  # Attention 단계의 특성 차원
    n_steps=5,  # Attention 단계의 반복 횟수
    gamma=1.5,  # Regularization 강도
    cat_idxs=[i for i in range(9, 13)],  # 범주형 특성의 인덱스
    cat_dims=[2, 16, 4, 12],  # 범주형 특성의 차원
    device_name=device,
)

model.fit(
    X_train=X_train.values,
    y_train=y_train,
    eval_set=[(X_test.values, y_test)],
    max_epochs=2,
    patience=5,
    loss_fn=F.cross_entropy,
    batch_size=64,
    virtual_batch_size=32,
)

preds = model.predict(X_test.values)
print("TabNet Classifier Accuracy:", accuracy_score(y_test, preds))