In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [9]:
train = pd.read_csv('../../datasets/unlisted_filled_train_data.csv')
test = pd.read_csv('../../datasets/unlisted_test_data.csv')

In [17]:
# train_life_cycle = train[['도입기', '성장기', '성숙기', '쇠퇴기']]
# test_life_cycle = test[['도입기', '성장기', '성숙기', '쇠퇴기']]

In [10]:
selected_features = ['CASH FLOW 대 부채비율', '순운전자본비율', '자기자본구성비율', '경영자본순이익률', '총자본영업이익률',
                      '금융비용부담률', '이윤분배율',
                      '도입기', '성장기', '성숙기', '쇠퇴기']

In [11]:
x_train = train[selected_features]
x_test = test[selected_features]

y_train = train['부실판단']
y_test = test['부실판단']

In [12]:
x_test = x_test.fillna(test['영업년수'].median())

In [6]:
# from sklearn.preprocessing import StandardScaler, MinMaxScaler

# labels = x_train.columns

# scaler = StandardScaler()
# x_train = scaler.fit_transform(x_train)
# x_test = scaler.transform(x_test)

# x_train = pd.DataFrame(data=x_train, columns = labels)
# x_test = pd.DataFrame(data=x_test, columns = labels)

In [8]:
# 모델 생성
logit_model = LogisticRegression()

# Cross Validation
cv_accuracy = cross_val_score(logit_model, x_train, y_train, cv=5, scoring='accuracy')
cv_precision = cross_val_score(logit_model, x_train, y_train, cv=5, scoring='precision')
cv_recall = cross_val_score(logit_model, x_train, y_train, cv=5, scoring='recall')
cv_f1 = cross_val_score(logit_model, x_train, y_train, cv=5, scoring='f1')
cv_roc_auc = cross_val_score(logit_model, x_train, y_train, cv=5, scoring='roc_auc')

print("CV_Accuracy_Scores:", cv_accuracy)
print("CV_Precision_Scores:", cv_precision)
print("CV_Recall_Scores:", cv_recall)
print("CV_F1_Scores:", cv_f1)
print("CV_ROC/AUC:", cv_roc_auc)

print('\n=======교차검증 결과=======')
print(f'CV_Accuracy_mean: {cv_accuracy.mean():.3f}')
print(f'CV_Precision_mean: {cv_precision.mean():.3f}')
print(f'CV_Recall_mean: {cv_recall.mean():.3f}')
print(f'CV_F1_스코어_mean: {cv_f1.mean():.3f}')
print(f'CV_ROC_AUC+스코어_mean: {cv_roc_auc.mean():.3f}')

# 모델 학습 및 평가
logit_model.fit(x_train, y_train)
y_pred = logit_model.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

print(f'\n=======Test 데이터 평가======')
print(f'Accuracy: {accuracy:.3f}')
print(f'Precision: {precision:.3f}')
print(f'Recall: {recall:.3f}')
print(f'F1 스코어: {f1:.3f}')
print(f'ROC AUC 스코어: {roc_auc:.3f}')


CV_Accuracy_Scores: [0.66808914 0.81218876 0.8204885  0.83068532 0.67749585]
CV_Precision_Scores: [0.64017398 0.80350554 0.77425903 0.78621256 0.6294504 ]
CV_Recall_Scores: [0.7676624  0.82637571 0.90464896 0.90848743 0.86344239]
CV_F1_Scores: [0.69814575 0.81478017 0.83439072 0.84293885 0.72810876]
CV_ROC/AUC: [0.72037809 0.88867603 0.89643598 0.90325257 0.72292056]

CV_Accuracy_mean: 0.762
CV_Precision_mean: 0.727
CV_Recall_mean: 0.854
CV_F1_스코어_mean: 0.784
CV_ROC_AUC+스코어_mean: 0.826

Accuracy: 0.723
Precision: 0.611
Recall: 0.679
F1 스코어: 0.643
ROC AUC 스코어: 0.714


In [13]:
# Random Forest 모델 생성 및 학습
rf_model = RandomForestClassifier(random_state=42, n_estimators=29, min_samples_split=7, min_samples_leaf=8, max_depth=7)

# # Cross Validation
# cv_accuracy = cross_val_score(rf_model, x_train, y_train, cv=5, scoring='accuracy')
# cv_precision = cross_val_score(rf_model, x_train, y_train, cv=5, scoring='precision')
# cv_recall = cross_val_score(rf_model, x_train, y_train, cv=5, scoring='recall')
# cv_f1 = cross_val_score(rf_model, x_train, y_train, cv=5, scoring='f1')
# cv_roc_auc = cross_val_score(rf_model, x_train, y_train, cv=5, scoring='roc_auc')

# print("CV_Accuracy_Scores:", cv_accuracy)
# print("CV_Precision_Scores:", cv_precision)
# print("CV_Recall_Scores:", cv_recall)
# print("CV_F1_Scores:", cv_f1)
# print("CV_ROC/AUC:", cv_roc_auc)

# print('\n=======교차검증 결과=======')
# print(f'CV_Accuracy_mean: {cv_accuracy.mean():.3f}')
# print(f'CV_Precision_mean: {cv_precision.mean():.3f}')
# print(f'CV_Recall_mean: {cv_recall.mean():.3f}')
# print(f'CV_F1_스코어_mean: {cv_f1.mean():.3f}')
# print(f'CV_ROC_AUC+스코어_mean: {cv_roc_auc.mean():.3f}')


rf_model.fit(x_train, y_train)
y_pred_rf = rf_model.predict(x_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)
roc_auc_rf = roc_auc_score(y_test, y_pred_rf)

print(f'\n=======Test 데이터 평가======')
print(f'Accuracy: {accuracy_rf:.3f}')
print(f'Precision: {precision_rf:.3f}')
print(f'Recall: {recall_rf:.3f}')
print(f'F1 스코어: {f1_rf:.3f}')
print(f'ROC AUC 스코어: {roc_auc_rf:.3f}')


Accuracy: 0.813
Precision: 0.717
Recall: 0.811
F1 스코어: 0.761
ROC AUC 스코어: 0.812


In [10]:
# AdaBoost 모델 생성 및 학습
adaboost_model = AdaBoostClassifier()

# Cross Validation
cv_accuracy = cross_val_score(adaboost_model, x_train, y_train, cv=5, scoring='accuracy')
cv_precision = cross_val_score(adaboost_model, x_train, y_train, cv=5, scoring='precision')
cv_recall = cross_val_score(adaboost_model, x_train, y_train, cv=5, scoring='recall')
cv_f1 = cross_val_score(adaboost_model, x_train, y_train, cv=5, scoring='f1')
cv_roc_auc = cross_val_score(adaboost_model, x_train, y_train, cv=5, scoring='roc_auc')

print('=======교차검증 결과=======')
print("CV_Accuracy_Scores:", cv_accuracy)
print("CV_Precision_Scores:", cv_precision)
print("CV_Recall_Scores:", cv_recall)
print("CV_F1_Scores:", cv_f1)
print("CV_ROC/AUC:", cv_roc_auc)

print('\n=======교차검증 평균값=======')
print(f'CV_Accuracy_mean: {cv_accuracy.mean():.3f}')
print(f'CV_Precision_mean: {cv_precision.mean():.3f}')
print(f'CV_Recall_mean: {cv_recall.mean():.3f}')
print(f'CV_F1_스코어_mean: {cv_f1.mean():.3f}')
print(f'CV_ROC_AUC+스코어_mean: {cv_roc_auc.mean():.3f}')

adaboost_model.fit(x_train, y_train)
y_pred_adaboost = adaboost_model.predict(x_test)

accuracy_adaboost = accuracy_score(y_test, y_pred_adaboost)
precision_adaboost = precision_score(y_test, y_pred_adaboost)
recall_adaboost = recall_score(y_test, y_pred_adaboost)
f1_adaboost = f1_score(y_test, y_pred_adaboost)
roc_auc_adaboost = roc_auc_score(y_test, y_pred_adaboost)

print(f'\n=======Test 데이터 평가======')
print(f'Accuracy: {accuracy_adaboost:.3f}')
print(f'Precision: {precision_adaboost:.3f}')
print(f'Recall: {recall_adaboost:.3f}')
print(f'F1 스코어: {f1_adaboost:.3f}')
print(f'ROC AUC 스코어: {roc_auc_adaboost:.3f}')

CV_Accuracy_Scores: [0.67828355 0.78871235 0.87455537 0.87953521 0.68626986]
CV_Precision_Scores: [0.65027978 0.88007495 0.87328605 0.87670588 0.64534024]
CV_Recall_Scores: [0.77145567 0.66840607 0.87618596 0.88335704 0.82740635]
CV_F1_Scores: [0.70570375 0.75977352 0.8747336  0.88001889 0.72511947]
CV_ROC/AUC: [0.75548076 0.92853716 0.93838382 0.94147811 0.74624002]

CV_Accuracy_mean: 0.781
CV_Precision_mean: 0.785
CV_Recall_mean: 0.805
CV_F1_스코어_mean: 0.789
CV_ROC_AUC+스코어_mean: 0.862

Accuracy: 0.766
Precision: 0.661
Recall: 0.748
F1 스코어: 0.702
ROC AUC 스코어: 0.762


In [11]:
xgboost_model = XGBClassifier()

# Cross Validation
cv_accuracy = cross_val_score(xgboost_model, x_train, y_train, cv=5, scoring='accuracy')
cv_precision = cross_val_score(xgboost_model, x_train, y_train, cv=5, scoring='precision')
cv_recall = cross_val_score(xgboost_model, x_train, y_train, cv=5, scoring='recall')
cv_f1 = cross_val_score(xgboost_model, x_train, y_train, cv=5, scoring='f1')
cv_roc_auc = cross_val_score(xgboost_model, x_train, y_train, cv=5, scoring='roc_auc')

print('=======교차검증 결과=======')
print("CV_Accuracy_Scores:", cv_accuracy)
print("CV_Precision_Scores:", cv_precision)
print("CV_Recall_Scores:", cv_recall)
print("CV_F1_Scores:", cv_f1)
print("CV_ROC/AUC:", cv_roc_auc)

print('\n=======교차검증 평균값=======')
print(f'CV_Accuracy_mean: {cv_accuracy.mean():.3f}')
print(f'CV_Precision_mean: {cv_precision.mean():.3f}')
print(f'CV_Recall_mean: {cv_recall.mean():.3f}')
print(f'CV_F1_스코어_mean: {cv_f1.mean():.3f}')
print(f'CV_ROC_AUC_mean: {cv_roc_auc.mean():.3f}')

xgboost_model.fit(x_train, y_train)
y_pred_xgboost = xgboost_model.predict(x_test)

accuracy_xgboost = accuracy_score(y_test, y_pred_xgboost)
precision_xgboost = precision_score(y_test, y_pred_xgboost)
recall_xgboost = recall_score(y_test, y_pred_xgboost)
f1_xgboost = f1_score(y_test, y_pred_xgboost)
roc_auc_xgboost = roc_auc_score(y_test, y_pred_xgboost)

print(f'\n=======Test 데이터 평가======')
print(f'Accuracy: {accuracy_xgboost:.3f}')
print(f'Precision: {precision_xgboost:.3f}')
print(f'Recall: {recall_xgboost:.3f}')
print(f'F1 스코어: {f1_xgboost:.3f}')
print(f'ROC AUC 스코어: {roc_auc_xgboost:.3f}')

CV_Accuracy_Scores: [0.68610716 0.85155324 0.91795115 0.92387954 0.69409533]
CV_Precision_Scores: [0.66239139 0.84337349 0.89972777 0.89982111 0.65671642]
CV_Recall_Scores: [0.75912755 0.86337761 0.94070209 0.95400664 0.81365576]
CV_F1_Scores: [0.70746796 0.85325832 0.91975881 0.92612198 0.72681067]
CV_ROC/AUC: [0.75807458 0.92837464 0.96831214 0.97135256 0.76047276]

CV_Accuracy_mean: 0.815
CV_Precision_mean: 0.792
CV_Recall_mean: 0.866
CV_F1_스코어_mean: 0.827
CV_ROC_AUC_mean: 0.877

Accuracy: 0.804
Precision: 0.705
Recall: 0.807
F1 스코어: 0.752
ROC AUC 스코어: 0.805


In [12]:
# Bagging 모델 생성 및 학습
bagging_model = BaggingClassifier()

cv_accuracy = cross_val_score(bagging_model, x_train, y_train, cv=5, scoring='accuracy')
cv_precision = cross_val_score(bagging_model, x_train, y_train, cv=5, scoring='precision')
cv_recall = cross_val_score(bagging_model, x_train, y_train, cv=5, scoring='recall')
cv_f1 = cross_val_score(bagging_model, x_train, y_train, cv=5, scoring='f1')
cv_roc_auc = cross_val_score(bagging_model, x_train, y_train, cv=5, scoring='roc_auc')

print('=======교차검증 결과=======')
print("CV_Accuracy_Scores:", cv_accuracy)
print("CV_Precision_Scores:", cv_precision)
print("CV_Recall_Scores:", cv_recall)
print("CV_F1_Scores:", cv_f1)
print("CV_ROC/AUC:", cv_roc_auc)

print('\n=======교차검증 평균값=======')
print(f'CV_Accuracy_mean: {cv_accuracy.mean():.3f}')
print(f'CV_Precision_mean: {cv_precision.mean():.3f}')
print(f'CV_Recall_mean: {cv_recall.mean():.3f}')
print(f'CV_F1_스코어_mean: {cv_f1.mean():.3f}')
print(f'CV_ROC_AUC+스코어_mean: {cv_roc_auc.mean():.3f}')

bagging_model.fit(x_train, y_train)
y_pred_bagging = bagging_model.predict(x_test)

accuracy_bagging = accuracy_score(y_test, y_pred_bagging)
precision_bagging = precision_score(y_test, y_pred_bagging)
recall_bagging = recall_score(y_test, y_pred_bagging)
f1_bagging = f1_score(y_test, y_pred_bagging)
roc_auc_bagging = roc_auc_score(y_test, y_pred_bagging)

print(f'\n=======Test 데이터 평가======')
print(f'Accuracy: {accuracy_bagging:.3f}')
print(f'Precision: {precision_bagging:.3f}')
print(f'Recall: {recall_bagging:.3f}')
print(f'F1 스코어: {f1_bagging:.3f}')
print(f'ROC AUC 스코어: {roc_auc_bagging:.3f}')

CV_Accuracy_Scores: [0.67045993 0.78159829 0.90656865 0.9162912  0.67465023]
CV_Precision_Scores: [0.66268382 0.83363581 0.90286251 0.8981184  0.65772107]
CV_Recall_Scores: [0.69464201 0.71916509 0.91603416 0.92792793 0.70697013]
CV_F1_Scores: [0.68041715 0.77602041 0.90094787 0.92056837 0.68493151]
CV_ROC/AUC: [0.71054745 0.89681635 0.95356633 0.96432363 0.72546095]

CV_Accuracy_mean: 0.790
CV_Precision_mean: 0.791
CV_Recall_mean: 0.793
CV_F1_스코어_mean: 0.793
CV_ROC_AUC+스코어_mean: 0.850

Accuracy: 0.813
Precision: 0.725
Recall: 0.794
F1 스코어: 0.758
ROC AUC 스코어: 0.809


In [13]:
from sklearn.svm import SVC

# SVM 모델 생성 및 학습
svm_model = SVC(kernel='linear')

cv_accuracy = cross_val_score(svm_model, x_train, y_train, cv=5, scoring='accuracy')
cv_precision = cross_val_score(svm_model, x_train, y_train, cv=5, scoring='precision')
cv_recall = cross_val_score(svm_model, x_train, y_train, cv=5, scoring='recall')
cv_f1 = cross_val_score(svm_model, x_train, y_train, cv=5, scoring='f1')
cv_roc_auc = cross_val_score(svm_model, x_train, y_train, cv=5, scoring='roc_auc')

print('=======교차검증 결과=======')
print("CV_Accuracy_Scores:", cv_accuracy)
print("CV_Precision_Scores:", cv_precision)
print("CV_Recall_Scores:", cv_recall)
print("CV_F1_Scores:", cv_f1)
print("CV_ROC/AUC:", cv_roc_auc)

print('\n=======교차검증 평균값=======')
print(f'CV_Accuracy_mean: {cv_accuracy.mean():.3f}')
print(f'CV_Precision_mean: {cv_precision.mean():.3f}')
print(f'CV_Recall_mean: {cv_recall.mean():.3f}')
print(f'CV_F1_스코어_mean: {cv_f1.mean():.3f}')
print(f'CV_ROC_AUC+스코어_mean: {cv_roc_auc.mean():.3f}')

svm_model.fit(x_train, y_train)
y_pred_svm = svm_model.predict(x_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm)
recall_svm = recall_score(y_test, y_pred_svm)
f1_svm = f1_score(y_test, y_pred_svm)
roc_auc_svm = roc_auc_score(y_test, y_pred_svm)

print(f'\n=======Test 데이터 평가======')
print(f'Accuracy: {accuracy_svm:.3f}')
print(f'Precision: {precision_svm:.3f}')
print(f'Recall: {recall_svm:.3f}')
print(f'F1 스코어: {f1_svm:.3f}')
print(f'ROC AUC 스코어: {roc_auc_svm:.3f}')

CV_Accuracy_Scores: [0.67069701 0.82689115 0.8204885  0.83329381 0.67773299]
CV_Precision_Scores: [0.64117647 0.79145516 0.76438356 0.77963405 0.62860082]
CV_Recall_Scores: [0.77524893 0.88757116 0.92647059 0.9293504  0.86913229]
CV_F1_Scores: [0.70186735 0.83676208 0.83765816 0.84793424 0.72955224]
CV_ROC/AUC: [0.71875641 0.89964015 0.90476412 0.91092908 0.7212916 ]

CV_Accuracy_mean: 0.766
CV_Precision_mean: 0.721
CV_Recall_mean: 0.878
CV_F1_스코어_mean: 0.791
CV_ROC_AUC+스코어_mean: 0.831

Accuracy: 0.719
Precision: 0.605
Recall: 0.681
F1 스코어: 0.641
ROC AUC 스코어: 0.711


In [14]:
from sklearn.svm import SVC

# SVM 모델 생성 및 학습
svm_model = SVC(kernel='rbf')

cv_accuracy = cross_val_score(svm_model, x_train, y_train, cv=5, scoring='accuracy')
cv_precision = cross_val_score(svm_model, x_train, y_train, cv=5, scoring='precision')
cv_recall = cross_val_score(svm_model, x_train, y_train, cv=5, scoring='recall')
cv_f1 = cross_val_score(svm_model, x_train, y_train, cv=5, scoring='f1')
cv_roc_auc = cross_val_score(svm_model, x_train, y_train, cv=5, scoring='roc_auc')

print('=======교차검증 결과=======')
print("CV_Accuracy_Scores:", cv_accuracy)
print("CV_Precision_Scores:", cv_precision)
print("CV_Recall_Scores:", cv_recall)
print("CV_F1_Scores:", cv_f1)
print("CV_ROC/AUC:", cv_roc_auc)

print('\n=======교차검증 평균값=======')
print(f'CV_Accuracy_mean: {cv_accuracy.mean():.3f}')
print(f'CV_Precision_mean: {cv_precision.mean():.3f}')
print(f'CV_Recall_mean: {cv_recall.mean():.3f}')
print(f'CV_F1_스코어_mean: {cv_f1.mean():.3f}')
print(f'CV_ROC_AUC+스코어_mean: {cv_roc_auc.mean():.3f}')

svm_model.fit(x_train, y_train)
y_pred_svm = svm_model.predict(x_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm)
recall_svm = recall_score(y_test, y_pred_svm)
f1_svm = f1_score(y_test, y_pred_svm)
roc_auc_svm = roc_auc_score(y_test, y_pred_svm)

print(f'\n=======Test 데이터 평가======')
print(f'Accuracy: {accuracy_svm:.3f}')
print(f'Precision: {precision_svm:.3f}')
print(f'Recall: {recall_svm:.3f}')
print(f'F1 스코어: {f1_svm:.3f}')
print(f'ROC AUC 스코어: {roc_auc_svm:.3f}')


CV_Accuracy_Scores: [0.65457563 0.84870761 0.88617501 0.8928148  0.6718046 ]
CV_Precision_Scores: [0.61760462 0.81276596 0.84579439 0.84913611 0.62007287]
CV_Recall_Scores: [0.81175913 0.90607211 0.94449715 0.95542911 0.88762447]
CV_F1_Scores: [0.7014956  0.8568865  0.89242492 0.89915216 0.7301092 ]
CV_ROC/AUC: [0.71004373 0.91366561 0.93852699 0.94203931 0.73323778]

CV_Accuracy_mean: 0.791
CV_Precision_mean: 0.749
CV_Recall_mean: 0.901
CV_F1_스코어_mean: 0.816
CV_ROC_AUC+스코어_mean: 0.848

Accuracy: 0.764
Precision: 0.657
Recall: 0.752
F1 스코어: 0.702
ROC AUC 스코어: 0.762


In [15]:
from lightgbm import LGBMClassifier

# LGBM 모델 생성 및 학습
lgbm_model = LGBMClassifier()

cv_accuracy = cross_val_score(lgbm_model, x_train, y_train, cv=5, scoring='accuracy')
cv_precision = cross_val_score(lgbm_model, x_train, y_train, cv=5, scoring='precision')
cv_recall = cross_val_score(lgbm_model, x_train, y_train, cv=5, scoring='recall')
cv_f1 = cross_val_score(lgbm_model, x_train, y_train, cv=5, scoring='f1')
cv_roc_auc = cross_val_score(lgbm_model, x_train, y_train, cv=5, scoring='roc_auc')

print('=======교차검증 결과=======')
print("CV_Accuracy_Scores:", cv_accuracy)
print("CV_Precision_Scores:", cv_precision)
print("CV_Recall_Scores:", cv_recall)
print("CV_F1_Scores:", cv_f1)
print("CV_ROC/AUC:", cv_roc_auc)

print('\n=======교차검증 평균값=======')
print(f'CV_Accuracy_mean: {cv_accuracy.mean():.3f}')
print(f'CV_Precision_mean: {cv_precision.mean():.3f}')
print(f'CV_Recall_mean: {cv_recall.mean():.3f}')
print(f'CV_F1_스코어_mean: {cv_f1.mean():.3f}')
print(f'CV_ROC_AUC+스코어_mean: {cv_roc_auc.mean():.3f}')

lgbm_model.fit(x_train, y_train)
y_pred_lgbm = lgbm_model.predict(x_test)

accuracy_lgbm = accuracy_score(y_test, y_pred_lgbm)
precision_lgbm = precision_score(y_test, y_pred_lgbm)
recall_lgbm = recall_score(y_test, y_pred_lgbm)
f1_lgbm = f1_score(y_test, y_pred_lgbm)
roc_auc_lgbm = roc_auc_score(y_test, y_pred_lgbm)

print(f'\n=======Test 데이터 평가======')
print(f'Accuracy: {accuracy_lgbm:.3f}')
print(f'Precision: {precision_lgbm:.3f}')
print(f'Recall: {recall_lgbm:.3f}')
print(f'F1 스코어: {f1_lgbm:.3f}')
print(f'ROC AUC 스코어: {roc_auc_lgbm:.3f}')


[LightGBM] [Info] Number of positive: 8434, number of negative: 8434
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000742 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1794
[LightGBM] [Info] Number of data points in the train set: 16868, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 8435, number of negative: 8434
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000773 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1797
[LightGBM] [Info] Number of data points in the train set: 16869, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500030 -> initscore=0.00011

#### 딥러닝

In [16]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Dropout
# import tensorflow as tf

In [17]:
# model = Sequential()
# model.add(Dense(256, input_dim = 12, activation = 'relu'))
# model.add(Dropout(0.5))
# model.add(Dense(64, activation = 'relu'))
# model.add(Dropout(0.5))
# model.add(Dense(1, activation = 'sigmoid'))

In [18]:
# model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# history = model.fit(x_train, y_train, epochs=200, batch_size=5)

# print("\n Accuracy: %.4f" % (model.evaluate(x_test, y_test)[1]))

In [19]:
# # 모델을 사용하여 테스트 데이터에 대한 예측 수행
# y_pred = model.predict(x_test)
# y_pred = binarize(y_pred, threshold=0.5)  # 예측값을 0.5 임계값을 기준으로 이진 분류로 변환

# # 정확도(accuracy) 계산
# accuracy = accuracy_score(y_test, y_pred)
# print(f"accuracy: {accuracy:.4f}")

# # F1 점수(f1 score) 계산
# f1 = f1_score(y_test, y_pred)
# print(f"F1-Score: {f1:.4f}")

# # 재현율(recall) 계산
# recall = recall_score(y_test, y_pred)
# print(f"recall: {recall:.4f}")

# # 정밀도(precision) 계산
# precision = precision_score(y_test, y_pred)
# print(f"precision: {precision:.4f}")