# 1. 필요 라이브러리 다운로드

In [None]:
!pip install pytorch-tabnet
import torch
from pytorch_tabnet.tab_model import TabNetClassifier

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm
from collections import Counter
from datetime import datetime
import matplotlib.pyplot as plt
import random
import time
import copy
from glob import glob

import warnings
warnings.simplefilter(action='ignore', category=UserWarning)

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.decomposition import PCA

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl (44 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.5/44.5 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pytorch-tabnet
Successfully installed pytorch-tabnet-4.1.0
Mounted at /content/drive


# 1. 데이터 검증

## 1.1 결측치 확인
    * 결측이 존재하는지 여부를 검증한다. 0이 안 나오면 된다.
    

In [None]:
# 만약 0이 아닌 다른 값이 출력된다면 데이터 생성 과정이 잘못된 것
labels = [0,1,2,3,4,5,6,7,8]
folds_lst = [4,5,6,7,8,9,10]

counter = 0
for fold in folds_lst:
    for label in labels:
        counter += summarized_data_dict[fold][label].isna().sum().sum()

print(f"결측의 개수는 {counter}개 입니다.")

결측의 개수는 0개 입니다.


# 2. 데이터셋 형성

* sim, hand와 well을 분리

* 테스트 과정에선 well 데이터만 사용해야 하기 때문에 데이터셋을 구분하기 이전에 먼저 Well 데이터와 Sim, Hand 데이터를 분리한다.

In [None]:
labels = [0, 1,2,3,4,5,6,7,8]
folds_lst = [4,5,6,7,8,9,10]

total_fold_df_dict = {}
for fold in folds_lst:

    total_well_df = pd.DataFrame([])
    total_sim_hand_df = pd.DataFrame([])

    for label in labels:
        well_df = summarized_data_dict[fold][label][summarized_data_dict[fold][label]['id_label'].str.contains('WELL', case=True)].reset_index(drop=True)
        total_well_df = pd.concat([total_well_df, well_df])

        sim_hand_df = summarized_data_dict[fold][label][~summarized_data_dict[fold][label]['id_label'].str.contains('WELL', case=True)].reset_index(drop=True)
        total_sim_hand_df = pd.concat([total_sim_hand_df, sim_hand_df])

    del summarized_data_dict[fold]
    total_fold_df_dict[fold] = {'well' : total_well_df, 'sim_hand' : total_sim_hand_df}

# 3. X_train, X_test, y_train, y_test 구성

    * 아래 함수의 기능은 다음과 같다.
        1. X_train, X_test, y_train, y_test로 데이터셋을 나눈다. 이 때 소수 라벨이 고르게 포함될 수 있도록 startify 옵션을 주었다.
        2. augmentation = True를 주면 train 데이터의 불균형을 upsampling으로 해소한다.
        3. classification_mode : multi, binary / 이진분류로만 할 것인지 다중분류할 것인지 결정한다.
        4. labels : 훈련, 테스트 데이터 내에 존재하는 모든 라벨의 집합

    * 인자 설정은 다음과 같이 진행한다.
        test_ratio = 0.2
        augmentation = False
        classification_mode = 'multi'
        folds_lst = [4,5,6,7,8,9,10]
        labels = [0, 1,2,3,4,5,6,7,8]

    * 이 때 train 데이터의 개수는 1494 / test 데이터의 개수는 373이다

In [None]:
def make_train_test_split(total_well_df, total_sim_hand_df, test_ratio, augmentation, classification_mode, labels=[0,1,2,3,4,5,6,7,8]):
    total_nums = len(total_well_df) + len(total_sim_hand_df)
    test_nums = int(total_nums * test_ratio)
    train_well_data, test_data, _, _ = train_test_split(total_well_df, total_well_df['label'], test_size=test_nums, stratify=total_well_df['label'], shuffle=True)
    train_data = pd.concat([total_sim_hand_df, train_well_data])

    if augmentation == True:
        train_data_dict = {}
        label_nums_dict = {}
        for label in labels:
            label_df = train_data[train_data['label'] == label].reset_index(drop=True)
            train_data_dict[label] = label_df

            label_nums = len(label_df)
            label_nums_dict[label] = label_nums

        max_ = max(label_nums_dict.values())
        result_df_dict = {}
        for label in labels:
            quotient = max_ // label_nums_dict[label]
            result_df = pd.DataFrame([])
            for _ in range(quotient):
                result_df = pd.concat([result_df, train_data_dict[label]])
            result_df_dict[label] = result_df

        augmented_train_data = pd.DataFrame([])
        for label in labels:
            augmented_train_data = pd.concat([augmented_train_data, result_df_dict[label]])

        train_data = augmented_train_data

    X_train = train_data.drop(columns=['id_label', 'label']).reset_index(drop=True)
    X_test = test_data.drop(columns=['id_label', 'label']).reset_index(drop=True)

    if classification_mode == 'multi':
        y_train = train_data['label'].reset_index(drop=True)
        y_test = test_data['label'].reset_index(drop=True)

    elif classification_mode == 'binary':
        multi_y_train = train_data['label']
        multi_y_test = test_data['label']
        y_train = []
        y_test = []
        for y in multi_y_train:
            if y == 0:
                y_train.append(0)
            else:
                y_train.append(1)

        for y in multi_y_test:
            if y == 0:
                y_test.append(0)
            else:
                y_test.append(1)

        y_train = pd.Series(y_train).reset_index(drop=True)
        y_test = pd.Series(y_test).reset_index(drop=True)

    return X_train, y_train, X_test, y_test

In [None]:
test_ratio = 0.2
augmentation = False
classification_mode = 'multi'
folds_lst = [4,5,6,7,8,9,10]
labels = [0, 1,2,3,4,5,6,7,8]

fold_xy_dict={}
for fold in folds_lst:
    total_well_df = total_fold_df_dict[fold]['well']
    total_sim_hand_df = total_fold_df_dict[fold]['sim_hand']

    X_train, y_train, X_test, y_test = make_train_test_split(
        total_well_df = total_well_df,
        total_sim_hand_df = total_sim_hand_df,
        test_ratio = test_ratio,
        augmentation = augmentation,
        classification_mode = classification_mode,
        labels=labels
        )
    fold_xy_dict[fold] = {'X_train' : X_train, 'y_train' : y_train, 'X_test' : X_test, 'y_test' : y_test}

# 4. k-fold 데이터를 통한 meta 데이터 생성

    * 탭넷을 활용한 스태킹 기법을 이용할 예정
    * k-fold를 활용한 meta-dataset 구성이 필요
    * fold 수에 대한 실험이 필요하기 때문에 모든 fold 수에 대한 meta-dataset을 만든다.
    * 이 때 4.3에서 확인한 optimal 조건을 반영하여 데이터 셋을 구성한다.

In [None]:
option_dict = {
    'normal_mean' : True,
    'normal_std' : True,
    'diff_mean' : True,
    'diff_std' : True,
    'normal_pca_mean' : False,
    'normal_pca_std' : True,
    'diff_pca_mean' : False,
    'diff_pca_std' : True,
    'total_pca_mean' : False,
    'total_pca_std' : True
}

folds_lst = [4,5,6,7,8,9,10]
k = 5
SEED = 42
kf = KFold(n_splits=k, shuffle=True, random_state=SEED)

optimal_fold_dict = column_select(fold_xy_dict, folds_lst, option_dict)

optimal_k_fold_dict = {}
for fold_ in folds_lst:
    X_train, y_train, X_test, y_test = optimal_fold_dict[fold_]['X_train'], optimal_fold_dict[fold_]['y_train'], optimal_fold_dict[fold_]['X_test'], optimal_fold_dict[fold_]['y_test']

    fold_dict = {}
    for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
        X_train_fold = X_train.iloc[train_index]
        y_train_fold = y_train[train_index]

        X_val_fold = X_train.iloc[val_index]
        y_val_fold = y_train[val_index]

        X_test_fold = X_test
        y_test_fold = y_test

        result = {'train' : {'X' : X_train_fold, 'y': y_train_fold},
                'val' : {'X' : X_val_fold, 'y' : y_val_fold},
                'test' : {'X':X_test_fold, 'y':y_test_fold}
                }
        fold_dict[fold] = result
    optimal_k_fold_dict[fold_] = fold_dict

In [None]:
SEED = 42
models = {
    'xgbclassifier': XGBClassifier(
        n_estimators=100,  # 트리의 개수 (선택지: 양의 정수)
        max_depth=15,  # 트리의 최대 깊이 (선택지: 양의 정수)
        gamma=0,  # 리프 노드를 추가적으로 나눌지 결정하는 임계값 (선택지: 0 이상의 실수)
        reg_lambda=1,  # L2 정규화 강도 (선택지: 0 이상의 실수)
        random_state=SEED
    ),
    'lgbmclassifier': LGBMClassifier(
        boosting_type='gbdt',  # 부스팅 유형 선택 (선택지: 'gbdt', 'dart', 'goss', 'rf')
        num_leaves=31,  # 각 트리의 최대 잎(리프) 노드 개수 (선택지: 양의 정수)
        learning_rate=0.1,  # 학습률 (선택지: 0 이상의 실수)
        n_estimators=100,  # 트리의 개수 (선택지: 양의 정수)
        max_depth=-1,  # 트리의 최대 깊이 (선택지: -1 또는 양의 정수)
        reg_alpha=0.0,  # L1 정규화 강도 (선택지: 0 이상의 실수)
        reg_lambda=0.0,  # L2 정규화 강도 (선택지: 0 이상의 실수)
        subsample=1.0,  # 트리 구성 시 사용하는 데이터 샘플링 비율 (선택지: 0 이상의 실수)
        colsample_bytree=1.0,  # 트리 구성 시 사용하는 피처 샘플링 비율 (선택지: 0 이상의 실수)
        random_state=SEED
    ),
    'adaclassifier': AdaBoostClassifier(
        n_estimators=50,  # 에이다부스트 알고리즘이 사용할 분류기의 개수 (선택지: 양의 정수)
        learning_rate=1.0,  # 각 분류기의 기여 정도 (선택지: 0 이상의 실수)
        random_state=SEED,
        estimator = RandomForestClassifier(
        n_estimators=100,  # 트리의 개수 (선택지: 양의 정수)
        max_depth=100,  # 트리의 최대 깊이 (선택지: 양의 정수)
        random_state=SEED
    ),
    ),
    'randomforestclassifier': RandomForestClassifier(
        n_estimators=100,  # 트리의 개수 (선택지: 양의 정수)
        max_depth=100,  # 트리의 최대 깊이 (선택지: 양의 정수)
        random_state=SEED
    ),
    'svcclassifier': SVC(
        kernel='linear',  # 커널 함수 선택 (선택지: 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed')
        C=0.08,  # SVM의 오차 허용 범위 (선택지: 양의 실수)
        probability=True,
        random_state=SEED
    ),
    'knn_classifier': KNeighborsClassifier(
        n_neighbors=1,  # 이웃의 개수 (선택지: 양의 정수)
        leaf_size = 10
    ),
    'logistic_regression_classifier': LogisticRegression(
        solver='lbfgs',  # 최적화 알고리즘 선택 (선택지: 'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga')
        C=0.01,  # 로지스틱 회귀의 규제 강도 (선택지: 양의 실수)
        random_state=SEED,
        max_iter = 100
    ),
    'decision_tree_classifier': DecisionTreeClassifier(
        max_depth=200,  # 트리의 최대 깊이 (선택지: 양의 정수)
        random_state=SEED,
        criterion = 'entropy'
    ),
    'GaussianNB_classifier': GaussianNB()
}
folds_lst = [4,5,6,7,8,9,10]

optimal_result_dict = {}
for fold in folds_lst:
    print(f"{fold} fold 실험 중")
    X_train, y_train, X_test, y_test = optimal_fold_dict[fold]['X_train'], optimal_fold_dict[fold]['y_train'], optimal_fold_dict[fold]['X_test'], optimal_fold_dict[fold]['y_test']
    result = classifier(X_train, y_train, X_test, y_test, models)
    optimal_result_dict[fold] = result

4 fold 실험 중
1/9 : xgbclassifier
model_fit 시작
model_predict 시작
Accuracy: 0.9302949061662198
xgbclassifier 걸린 시간: 3.7446506023406982 초
------------------------------


2/9 : lgbmclassifier
model_fit 시작
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 81600
[LightGBM] [Info] Number of data points in the train set: 1494, number of used features: 320
[LightGBM] [Info] Start training from score -1.602102
[LightGBM] [Info] Start training from score -2.472930
[LightGBM] [Info] Start training from score -4.418841
[LightGBM] [Info] Start training from score -2.776613
[LightGBM] [Info] Start training from score -1.996006
[LightGBM] [Info] Start training from score -1.220167
[LightGBM] [Info] Start training from score -1.924717
[LightGBM] [Info] Start training from score -4.744263
[LightGBM] [Info] Start training from score -2.914763
model_predict 시작
Accuracy: 0.935656836461126
lgbmclassifier 걸린 시간: 6.602751016616821 초
------------------------------


3/9 : ad

In [None]:
model_names = result.keys()

result_df = pd.DataFrame([])
for i, name in enumerate(model_names):
    print(f"{i+1}번째 : {name}")
    print(result[name]['cr'])
    print("\n")
    print(result[name]['cf'])
    print("\n")
    print("\n", "_"*30)

1번째 : xgbclassifier
              precision    recall  f1-score     support
0              0.949074  0.976190  0.962441  210.000000
1              1.000000  0.500000  0.666667    2.000000
2              0.000000  0.000000  0.000000    1.000000
3              0.692308  0.692308  0.692308   13.000000
4              0.936170  0.936170  0.936170  141.000000
5              0.000000  0.000000  0.000000    1.000000
6              0.000000  0.000000  0.000000    3.000000
7              0.000000  0.000000  0.000000    2.000000
8              0.000000  0.000000  0.000000    0.000000
accuracy       0.930295  0.930295  0.930295    0.930295
macro avg      0.397506  0.344963  0.361954  373.000000
weighted avg   0.917709  0.930295  0.923448  373.000000


     0  1  2  3    4  5  6  7  8
0  205  0  0  0    5  0  0  0  0
1    1  1  0  0    0  0  0  0  0
2    0  0  0  0    0  0  0  0  1
3    0  0  0  9    4  0  0  0  0
4    6  0  0  3  132  0  0  0  0
5    0  0  0  1    0  0  0  0  0
6    2  0  0  0    

In [None]:
def make_meta_data(x_train, y_train, x_test, y_test, models, result, mode, threshold_option):
    result_dict = {}
    model_dict= {}
    for i, (model_name, model) in enumerate(models.items()):
        if ((threshold_option['f1-score']['bool'] == True) and
         (result[model_name]['cr']['f1-score']['macro avg'] >= threshold_option['f1-score']['threshold']) and
          (threshold_option['accuracy']['bool'] == True) and
           (result[model_name]['cr']['f1-score']['accuracy']>= threshold_option['accuracy']['threshold'])):

            print(f"{model_name} 의 macro f1 score : {result[model_name]['cr']['f1-score']['macro avg']}", end='\n')
            print(f"{model_name} 의 accuracy : {result[model_name]['accuracy']}")
            model_dict[model_name] = model

    result_df = pd.DataFrame([])
    for i, (model_name, model) in enumerate(model_dict.items()):
        print(f"{i+1}/{len(model_dict)} : {model_name}")
        x_train_columns = x_train.columns
        x_test_columns = x_test.columns

        x_train.columns = range(x_train.shape[1])
        x_test.columns = range(x_test.shape[1])
        model.fit(x_train, y_train)
        pred_proba = model.predict_proba(x_test)
        y_pred_proba = pd.DataFrame(pred_proba)
        result_df = pd.concat([result_df, y_pred_proba], axis = 1)
        print("-" * 30)
        print("\n")

    columns = range(0, result_df.shape[1])
    result_df.columns = columns
    if mode == 'train':
        result_df['label'] = y_test
        result_df = result_df.reset_index(drop=True)

    elif mode == 'test':
        result_df = result_df.reset_index(drop=True)

    return result_df

In [None]:
folds_lst = [4,5,6,7,8,9,10]
threshold_option = {'f1-score' : {'bool' : True, 'threshold' : 0.3},
                    'accuracy' : {'bool' : True, 'threshold' : 0.88}}

optimal_meta_data_dict = {}
for fold_ in folds_lst:
        fold_dict = optimal_k_fold_dict[fold_]
        result = optimal_result_dict[fold_]


        print(f"{fold_} fold 수 meta data 생성 중")
        try :
            meta_train_df = pd.DataFrame([])
            meta_test_df_lst = []
            for fold in fold_dict.keys():
                print(f"{fold_} fold 수 : {fold + 1}/{len(fold_dict)}번째 k-fold")
                data = fold_dict[fold]
                X_train, y_train = data['train']['X'].reset_index(drop=True), data['train']['y'].reset_index(drop=True)
                X_val, y_val = data['val']['X'].reset_index(drop=True), data['val']['y'].reset_index(drop=True)
                X_test, y_test = data['test']['X'].reset_index(drop=True), data['test']['y'].reset_index(drop=True)
                print("훈련 데이터 만드는 중")
                val_df = make_meta_data(
                    x_train = X_train,
                    y_train = y_train,
                    x_test = X_val,
                    y_test = y_val,
                    models = models,
                    result = result,
                    mode = 'train',
                    threshold_option = threshold_option
                    )

                meta_train_df = pd.concat([meta_train_df,val_df], axis=0)
                print("테스트 데이터 만드는 중")
                test_df = make_meta_data(
                    x_train = X_train,
                    y_train = y_train,
                    x_test = X_test,
                    y_test = y_test,
                    models = models,
                    result = result,
                    mode = 'test',
                    threshold_option = threshold_option
                    )
                meta_test_df_lst.append(test_df)
                print("-"*30)

        except :
            print("SVC에서 오류가 발생하여 해당 fold에서 SVC는 제외하였습니다")
            meta_train_df = pd.DataFrame([])
            meta_test_df_lst = []
            exception_models = copy.deepcopy(models)
            del exception_models['svcclassifier']
            for fold in fold_dict.keys():

                print(f"{fold_} fold 수 : {fold + 1}/{len(fold_dict)}번째 k-fold")
                data = fold_dict[fold]
                X_train, y_train = data['train']['X'].reset_index(drop=True), data['train']['y'].reset_index(drop=True)
                X_val, y_val = data['val']['X'].reset_index(drop=True), data['val']['y'].reset_index(drop=True)
                X_test, y_test = data['test']['X'].reset_index(drop=True), data['test']['y'].reset_index(drop=True)
                print("훈련 데이터 만드는 중")
                val_df = make_meta_data(
                    x_train = X_train,
                    y_train = y_train,
                    x_test = X_val,
                    y_test = y_val,
                    models = exception_models,
                    result = result,
                    mode = 'train',
                    threshold_option = threshold_option
                    )

                meta_train_df = pd.concat([meta_train_df,val_df], axis=0)
                print("테스트 데이터 만드는 중")
                test_df = make_meta_data(
                    x_train = X_train,
                    y_train = y_train,
                    x_test = X_test,
                    y_test = y_test,
                    models = exception_models,
                    result = result,
                    mode = 'test',
                    threshold_option = threshold_option
                    )
                meta_test_df_lst.append(test_df)
                print("-"*30)

        meta_train_df = meta_train_df.reset_index(drop=True)
        meta_test_df = pd.DataFrame(np.mean([df.values for df in meta_test_df_lst], axis=0)).reset_index(drop=True)
        meta_test_df['label'] = y_test
        optimal_meta_data_dict[fold_] = {'train' : meta_train_df, 'test' : meta_test_df}

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
------------------------------


3/6 : adaclassifier
------------------------------


4/6 : randomforestclassifier
------------------------------


5/6 : svcclassifier
------------------------------


6/6 : knn_classifier
------------------------------


------------------------------
10 fold 수 meta data 생성 중
10 fold 수 : 1/5번째 k-fold
훈련 데이터 만드는 중
xgbclassifier 의 macro f1 score : 0.3619539873660341
xgbclassifier 의 accuracy : 0.9302949061662198
lgbmclassifier 의 macro f1 score : 0.5359294115484103
lgbmclassifier 의 accuracy : 0.935656836461126
adaclassifier 의 macro f1 score : 0.5430156775419773
adaclassifier 의 accuracy : 0.9276139410187667
randomforestclassifier 의 macro f1 score : 0.4146076282296479
randomforestclassifier 의 accuracy : 0.9249329758713136
svcclassifier 의 macro f1 score : 0.6858222198692554
svcclassifier 의 accuracy : 0.8981233243967829
knn_classifier 의 macro f1 score : 0.5152090396585713
knn_classifier 의 accurac

#5. Tabnet 활용

In [None]:
tabnet = TabNetClassifier(
    optimizer_fn=torch.optim.AdamW,
)
previous_state = None
folds_lst = [4,5,6,7,8,9,10]
tabnet_result_dict = {}
for fold in folds_lst:
    meta_train_df, meta_test_df = optimal_meta_data_dict[fold]['train'], optimal_meta_data_dict[fold]['test']
    meta_X_train = meta_train_df.drop(columns=['label']).values
    meta_y_train = meta_train_df['label'].values

    # meta_X_train, meta_X_val, meta_y_train, meta_y_val = train_test_split(meta_X_train, meta_y_train, test_size = 0.2, shuffle = True)
    # meta_X_train = meta_X_train.values
    # meta_X_val = meta_X_val.values
    # meta_y_train = meta_y_train.values
    # meta_y_val = meta_y_val.values

    meta_X_test = meta_test_df.drop(columns=['label']).values
    meta_y_test = meta_test_df['label'].values

    max_epochs = 2000

    tabnet.fit(
        meta_X_train, meta_y_train,
        eval_set=[(meta_X_test, meta_y_test)],
        max_epochs=max_epochs,
        patience=500,
        )

    preds = tabnet.predict(meta_X_test)
    accuracy = accuracy_score(meta_y_test, preds)

    cf = pd.DataFrame(confusion_matrix(meta_y_test, preds))
    cr = pd.DataFrame(classification_report(meta_y_test, preds, output_dict=True)).transpose()
    tabnet_result_dict[fold] = {'accuracy': accuracy, 'cr': cr, 'cf': cf}
    print("Accuracy:", accuracy)
    print("-" * 30)
    print("\n")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
epoch 350| loss: 0.04371 | val_0_accuracy: 0.90617 |  0:00:28s
epoch 351| loss: 0.03811 | val_0_accuracy: 0.91421 |  0:00:28s
epoch 352| loss: 0.05967 | val_0_accuracy: 0.91689 |  0:00:28s
epoch 353| loss: 0.05254 | val_0_accuracy: 0.92493 |  0:00:29s
epoch 354| loss: 0.04875 | val_0_accuracy: 0.92761 |  0:00:29s
epoch 355| loss: 0.05896 | val_0_accuracy: 0.92225 |  0:00:29s
epoch 356| loss: 0.05071 | val_0_accuracy: 0.91957 |  0:00:29s
epoch 357| loss: 0.04034 | val_0_accuracy: 0.91421 |  0:00:29s
epoch 358| loss: 0.05807 | val_0_accuracy: 0.91153 |  0:00:29s
epoch 359| loss: 0.06434 | val_0_accuracy: 0.91957 |  0:00:29s
epoch 360| loss: 0.07193 | val_0_accuracy: 0.92225 |  0:00:29s
epoch 361| loss: 0.05624 | val_0_accuracy: 0.93298 |  0:00:29s
epoch 362| loss: 0.04397 | val_0_accuracy: 0.93029 |  0:00:29s
epoch 363| loss: 0.04816 | val_0_accuracy: 0.92761 |  0:00:29s
epoch 364| loss: 0.04053 | val_0_accuracy: 0.93298 | 

In [None]:
folds_lst = [4,5,6,7,8,9,10]

for fold in folds_lst:
    print(f"{fold} fold 수 결과 \\n\n")
    print(tabnet_result_dict[fold]['cr'])
    print(tabnet_result_dict[fold]['cf'])
    print("-"*30,"\n\n")

4 fold 수 결과 \n

              precision    recall  f1-score     support
0              0.971292  0.966667  0.968974  210.000000
1              0.500000  0.500000  0.500000    2.000000
2              0.000000  0.000000  0.000000    1.000000
3              0.818182  0.692308  0.750000   13.000000
4              0.925170  0.964539  0.944444  141.000000
5              0.000000  0.000000  0.000000    1.000000
6              0.000000  0.000000  0.000000    3.000000
7              0.500000  0.500000  0.500000    2.000000
accuracy       0.938338  0.938338  0.938338    0.938338
macro avg      0.464330  0.452939  0.457927  373.000000
weighted avg   0.930447  0.938338  0.934051  373.000000
     0  1  2  3    4  5  6  7
0  203  0  0  0    6  0  1  0
1    0  1  0  0    0  1  0  0
2    0  0  0  0    1  0  0  0
3    0  0  0  9    4  0  0  0
4    3  0  0  2  136  0  0  0
5    1  0  0  0    0  0  0  0
6    2  0  0  0    0  0  0  1
7    0  1  0  0    0  0  0  1
------------------------------ 


5 fold 수