In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [2]:
train = pd.read_csv('../datasets/all_data_resampled_train_data.csv')
test = pd.read_csv('../datasets/all_data_test.csv')

In [3]:
train_life_cycle = train[['도입기', '성장기', '성숙기', '쇠퇴기']]
test_life_cycle = test[['도입기', '성장기', '성숙기', '쇠퇴기']]

In [4]:
x_train = train[['CASH FLOW 대 매출액비율', 'CASH FLOW 대 부채비율', 'CASH FLOW 대 차입금비율', 'CASH FLOW 대 총자본비율', '유동자산구성비율',
				 '자기자본구성비율', '차입금의존도', '현금비율', '경영자본순이익률', '금융비용부담률', '매출액순이익률', '매출액영업이익률']]

x_test = test[['CASH FLOW 대 매출액비율', 'CASH FLOW 대 부채비율', 'CASH FLOW 대 차입금비율', 'CASH FLOW 대 총자본비율', '유동자산구성비율',
				 '자기자본구성비율', '차입금의존도', '현금비율', '경영자본순이익률', '금융비용부담률', '매출액순이익률', '매출액영업이익률']]

y_train = train['부실판단']
y_test = test['부실판단']

In [5]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

labels = x_train.columns

scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

x_train = pd.DataFrame(data=x_train, columns = labels)
x_test = pd.DataFrame(data=x_test, columns = labels)

In [6]:
model = LogisticRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)


print(f'Accuracy: {accuracy:.3f}')
print(f'Precision: {precision:.3f}')
print(f'Recall: {recall:.3f}')
print(f'F1 스코어: {f1:.2f}')
print(f'ROC AUC 스코어: {roc_auc:.2f}')


Accuracy: 0.729
Precision: 0.614
Recall: 0.696
F1 스코어: 0.65
ROC AUC 스코어: 0.72


In [7]:
# Random Forest 모델 생성 및 학습
rf_model = RandomForestClassifier()
rf_model.fit(x_train, y_train)

# 예측 및 성능 평가
y_pred_rf = rf_model.predict(x_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)
roc_auc_rf = roc_auc_score(y_test, y_pred_rf)

print("Random Forest 모델 평가 지표")
print(f'Accuracy: {accuracy_rf:.3f}')
print(f'Precision: {precision_rf:.3f}')
print(f'Recall: {recall_rf:.3f}')
print(f'F1 스코어: {f1_rf:.2f}')
print(f'ROC AUC 스코어: {roc_auc_rf:.2f}')


Random Forest 모델 평가 지표
Accuracy: 0.799
Precision: 0.696
Recall: 0.801
F1 스코어: 0.74
ROC AUC 스코어: 0.80


In [8]:
# AdaBoost 모델 생성 및 학습
adaboost_model = AdaBoostClassifier()
adaboost_model.fit(x_train, y_train)

# 예측 및 성능 평가
y_pred_adaboost = adaboost_model.predict(x_test)

accuracy_adaboost = accuracy_score(y_test, y_pred_adaboost)
precision_adaboost = precision_score(y_test, y_pred_adaboost)
recall_adaboost = recall_score(y_test, y_pred_adaboost)
f1_adaboost = f1_score(y_test, y_pred_adaboost)
roc_auc_adaboost = roc_auc_score(y_test, y_pred_adaboost)

print("AdaBoost 모델 평가 지표:")
print(f'Accuracy: {accuracy_adaboost:.3f}')
print(f'Precision: {precision_adaboost:.3f}')
print(f'Recall: {recall_adaboost:.3f}')
print(f'F1 스코어: {f1_adaboost:.2f}')
print(f'ROC AUC 스코어: {roc_auc_adaboost:.2f}')


AdaBoost 모델 평가 지표:
Accuracy: 0.775
Precision: 0.685
Recall: 0.711
F1 스코어: 0.70
ROC AUC 스코어: 0.76


In [9]:
from sklearn.ensemble import BaggingClassifier

# Bagging 모델 생성 및 학습
bagging_model = BaggingClassifier()
bagging_model.fit(x_train, y_train)

# 예측 및 성능 평가
y_pred_bagging = bagging_model.predict(x_test)

accuracy_bagging = accuracy_score(y_test, y_pred_bagging)
precision_bagging = precision_score(y_test, y_pred_bagging)
recall_bagging = recall_score(y_test, y_pred_bagging)
f1_bagging = f1_score(y_test, y_pred_bagging)
roc_auc_bagging = roc_auc_score(y_test, y_pred_bagging)

print("Bagging 모델 평가 지표:")
print(f'Accuracy: {accuracy_bagging:.3f}')
print(f'Precision: {precision_bagging:.3f}')
print(f'Recall: {recall_bagging:.3f}')
print(f'F1 스코어: {f1_bagging:.3f}')
print(f'ROC AUC 스코어: {roc_auc_bagging:.3f}')


Bagging 모델 평가 지표:
Accuracy: 0.774
Precision: 0.694
Recall: 0.686
F1 스코어: 0.690
ROC AUC 스코어: 0.756


In [10]:
from sklearn.svm import SVC

# SVM 모델 생성 및 학습
svm_model = SVC(kernel='linear')
svm_model.fit(x_train, y_train)

# 예측 및 성능 평가
y_pred_svm = svm_model.predict(x_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm)
recall_svm = recall_score(y_test, y_pred_svm)
f1_svm = f1_score(y_test, y_pred_svm)
roc_auc_svm = roc_auc_score(y_test, y_pred_svm)

print("SVM 모델 평가 지표:")
print(f'Accuracy: {accuracy_bagging:.3f}')
print(f'Precision: {precision_bagging:.3f}')
print(f'Recall: {recall_bagging:.3f}')
print(f'F1 스코어: {f1_bagging:.3f}')
print(f'ROC AUC 스코어: {roc_auc_svm:.3f}')


SVM 모델 평가 지표:
Accuracy: 0.774
Precision: 0.694
Recall: 0.686
F1 스코어: 0.690
ROC AUC 스코어: 0.717


In [11]:
from sklearn.svm import SVC

# SVM 모델 생성 및 학습
svm_model = SVC(kernel='rbf')
svm_model.fit(x_train, y_train)

# 예측 및 성능 평가
y_pred_svm = svm_model.predict(x_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm)
recall_svm = recall_score(y_test, y_pred_svm)
f1_svm = f1_score(y_test, y_pred_svm)
roc_auc_svm = roc_auc_score(y_test, y_pred_svm)

print("SVM 모델 평가 지표:")
print(f'Accuracy: {accuracy_bagging:.3f}')
print(f'Precision: {precision_bagging:.3f}')
print(f'Recall: {recall_bagging:.3f}')
print(f'F1 스코어: {f1_bagging:.3f}')
print(f'ROC AUC 스코어: {roc_auc_svm:.3f}')


SVM 모델 평가 지표:
Accuracy: 0.774
Precision: 0.694
Recall: 0.686
F1 스코어: 0.690
ROC AUC 스코어: 0.728


In [12]:
from lightgbm import LGBMClassifier

# LGBM 모델 생성 및 학습
lgbm_model = LGBMClassifier()
lgbm_model.fit(x_train, y_train)

# 예측 및 성능 평가
y_pred_lgbm = lgbm_model.predict(x_test)

accuracy_lgbm = accuracy_score(y_test, y_pred_lgbm)
precision_lgbm = precision_score(y_test, y_pred_lgbm)
recall_lgbm = recall_score(y_test, y_pred_lgbm)
f1_lgbm = f1_score(y_test, y_pred_lgbm)
roc_auc_lgbm = roc_auc_score(y_test, y_pred_lgbm)

print("LightGBM 모델 평가 지표:")
print(f'Accuracy: {accuracy_lgbm:.3f}')
print(f'Precision: {precision_lgbm:.3f}')
print(f'Recall: {recall_lgbm:.3f}')
print(f'F1 스코어: {f1_lgbm:.3f}')
print(f'ROC AUC 스코어: {roc_auc_lgbm:.3f}')


[LightGBM] [Info] Number of positive: 4484, number of negative: 4484
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000672 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3060
[LightGBM] [Info] Number of data points in the train set: 8968, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
LightGBM 모델 평가 지표:
Accuracy: 0.801
Precision: 0.695
Recall: 0.812
F1 스코어: 0.749
ROC AUC 스코어: 0.803
