In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [3]:
train = pd.read_csv('../../datasets/listed_resampled_train_data.csv')
test = pd.read_csv('../../datasets/listed_test_data.csv')

In [4]:
selected_features = ['CASH FLOW 대 부채비율', '당좌비율', '순운전자본비율', '자기자본구성비율','경영자본순이익률',
                     '총자본영업이익률', '매출액영업이익률', '금융비용부담률', '이윤분배율', '유형자산회전율',
                     '상장년수', 'PCR', '쭈피처', '도입기', '성장기', '성숙기', '쇠퇴기']

In [5]:
cycle = test[['도입기', '성장기', '성숙기', '쇠퇴기']]

In [6]:
x_train = train[selected_features]
x_test = test[selected_features]

y_train = train['부실판단']
y_test = test['부실판단']

In [7]:
x_test = x_test.fillna(test['영업년수'].median())

In [7]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

labels = x_train.columns

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

x_train = pd.DataFrame(data=x_train, columns = labels)
x_test = pd.DataFrame(data=x_test, columns = labels)

In [8]:
x_test_copy = x_test.copy()
x_test_copy = x_test_copy.drop(columns=['도입기', '성장기', '성숙기', '쇠퇴기'])

In [9]:
logit_model = LogisticRegression()
logit_model.fit(x_train, y_train)
y_pred_logit = logit_model.predict(x_test)

# 수명주기별 성능평가 비교
cycle_df = pd.concat([x_test_copy, cycle, y_test], axis=1)
cycle_df['예측'] = y_pred_logit

cond1 = (cycle_df['도입기'] == 1)
cond2 = (cycle_df['성장기'] == 1)
cond3 = (cycle_df['성숙기'] == 1)
cond4 = (cycle_df['쇠퇴기'] == 1)

# 도입기 성능평가
intro_acc = accuracy_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_pre = precision_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_rec = recall_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_f1 = f1_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_roc_auc = roc_auc_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])

print("\n도입기 예측 결과")
print(f'Accuracy: {intro_acc:.3f}')
print(f'Precision: {intro_pre:.3f}')
print(f'Recall: {intro_rec:.3f}')
print(f'F1 스코어: {intro_f1:.3f}')
print(f'ROC AUC 스코어: {intro_roc_auc:.3f}')

# 성숙기 성능평가
growth_acc = accuracy_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_pre = precision_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_rec = recall_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_f1 = f1_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_roc_auc = roc_auc_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])

print("\n성장기 예측 결과")
print(f'Accuracy: {growth_acc:.3f}')
print(f'Precision: {growth_pre:.3f}')
print(f'Recall: {growth_rec:.3f}')
print(f'F1 스코어: {growth_f1:.3f}')
print(f'ROC AUC 스코어: {growth_roc_auc:.3f}')

# 성숙기 성능평가
maturity_acc = accuracy_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_pre = precision_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_rec = recall_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_f1 = f1_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_roc_auc = roc_auc_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])

print("\n성숙기 예측 결과")
print(f'Accuracy: {maturity_acc:.3f}')
print(f'Precision: {maturity_pre:.3f}')
print(f'Recall: {maturity_rec:.3f}')
print(f'F1 스코어: {maturity_f1:.3f}')
print(f'ROC AUC 스코어: {maturity_roc_auc:.3f}')

# 쇠퇴기 성능평가
decline_acc = accuracy_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_pre = precision_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_rec = recall_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_f1 = f1_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_roc_auc = roc_auc_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])

print("\n쇠퇴기 예측 결과")
print(f'Accuracy: {decline_acc:.3f}')
print(f'Precision: {decline_pre:.3f}')
print(f'Recall: {decline_rec:.3f}')
print(f'F1 스코어: {decline_f1:.3f}')
print(f'ROC AUC 스코어: {decline_roc_auc:.3f}')


도입기 예측 결과
Accuracy: 0.660
Precision: 0.880
Recall: 0.611
F1 스코어: 0.721
ROC AUC 스코어: 0.698

성장기 예측 결과
Accuracy: 0.767
Precision: 0.692
Recall: 0.900
F1 스코어: 0.783
ROC AUC 스코어: 0.776

성숙기 예측 결과
Accuracy: 0.825
Precision: 0.545
Recall: 0.923
F1 스코어: 0.686
ROC AUC 스코어: 0.862

쇠퇴기 예측 결과
Accuracy: 0.805
Precision: 0.929
Recall: 0.812
F1 스코어: 0.867
ROC AUC 스코어: 0.795


In [10]:
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(x_train, y_train)
y_pred_rf = rf_model.predict(x_test)

# 수명주기별 성능평가 비교
cycle_df = pd.concat([x_test_copy, cycle, y_test], axis=1)
cycle_df['예측'] = y_pred_rf

cond1 = (cycle_df['도입기'] == 1)
cond2 = (cycle_df['성장기'] == 1)
cond3 = (cycle_df['성숙기'] == 1)
cond4 = (cycle_df['쇠퇴기'] == 1)

# 도입기 성능평가
intro_acc = accuracy_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_pre = precision_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_rec = recall_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_f1 = f1_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_roc_auc = roc_auc_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])

print("\n도입기 예측 결과")
print(f'Accuracy: {intro_acc:.3f}')
print(f'Precision: {intro_pre:.3f}')
print(f'Recall: {intro_rec:.3f}')
print(f'F1 스코어: {intro_f1:.3f}')
print(f'ROC AUC 스코어: {intro_roc_auc:.3f}')

# 성숙기 성능평가
growth_acc = accuracy_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_pre = precision_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_rec = recall_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_f1 = f1_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_roc_auc = roc_auc_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])

print("\n성장기 예측 결과")
print(f'Accuracy: {growth_acc:.3f}')
print(f'Precision: {growth_pre:.3f}')
print(f'Recall: {growth_rec:.3f}')
print(f'F1 스코어: {growth_f1:.3f}')
print(f'ROC AUC 스코어: {growth_roc_auc:.3f}')

# 성숙기 성능평가
maturity_acc = accuracy_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_pre = precision_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_rec = recall_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_f1 = f1_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_roc_auc = roc_auc_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])

print("\n성숙기 예측 결과")
print(f'Accuracy: {maturity_acc:.3f}')
print(f'Precision: {maturity_pre:.3f}')
print(f'Recall: {maturity_rec:.3f}')
print(f'F1 스코어: {maturity_f1:.3f}')
print(f'ROC AUC 스코어: {maturity_roc_auc:.3f}')

# 쇠퇴기 성능평가
decline_acc = accuracy_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_pre = precision_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_rec = recall_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_f1 = f1_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_roc_auc = roc_auc_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])

print("\n쇠퇴기 예측 결과")
print(f'Accuracy: {decline_acc:.3f}')
print(f'Precision: {decline_pre:.3f}')
print(f'Recall: {decline_rec:.3f}')
print(f'F1 스코어: {decline_f1:.3f}')
print(f'ROC AUC 스코어: {decline_roc_auc:.3f}')


도입기 예측 결과
Accuracy: 0.800
Precision: 0.861
Recall: 0.861
F1 스코어: 0.861
ROC AUC 스코어: 0.752

성장기 예측 결과
Accuracy: 0.744
Precision: 0.696
Recall: 0.800
F1 스코어: 0.744
ROC AUC 스코어: 0.748

성숙기 예측 결과
Accuracy: 0.889
Precision: 0.714
Recall: 0.769
F1 스코어: 0.741
ROC AUC 스코어: 0.845

쇠퇴기 예측 결과
Accuracy: 0.780
Precision: 0.871
Recall: 0.844
F1 스코어: 0.857
ROC AUC 스코어: 0.700


In [11]:
# AdaBoost 모델 생성 및 학습
adaboost_model = AdaBoostClassifier()
adaboost_model.fit(x_train, y_train)
y_pred_adaboost = adaboost_model.predict(x_test)

# 수명주기별 성능평가 비교
cycle_df = pd.concat([x_test_copy, cycle, y_test], axis=1)
cycle_df['예측'] = y_pred_logit

cond1 = (cycle_df['도입기'] == 1)
cond2 = (cycle_df['성장기'] == 1)
cond3 = (cycle_df['성숙기'] == 1)
cond4 = (cycle_df['쇠퇴기'] == 1)

# 도입기 성능평가
intro_acc = accuracy_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_pre = precision_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_rec = recall_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_f1 = f1_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_roc_auc = roc_auc_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])

print("\n도입기 예측 결과")
print(f'Accuracy: {intro_acc:.3f}')
print(f'Precision: {intro_pre:.3f}')
print(f'Recall: {intro_rec:.3f}')
print(f'F1 스코어: {intro_f1:.3f}')
print(f'ROC AUC 스코어: {intro_roc_auc:.3f}')

# 성숙기 성능평가
growth_acc = accuracy_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_pre = precision_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_rec = recall_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_f1 = f1_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_roc_auc = roc_auc_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])

print("\n성장기 예측 결과")
print(f'Accuracy: {growth_acc:.3f}')
print(f'Precision: {growth_pre:.3f}')
print(f'Recall: {growth_rec:.3f}')
print(f'F1 스코어: {growth_f1:.3f}')
print(f'ROC AUC 스코어: {growth_roc_auc:.3f}')

# 성숙기 성능평가
maturity_acc = accuracy_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_pre = precision_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_rec = recall_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_f1 = f1_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_roc_auc = roc_auc_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])

print("\n성숙기 예측 결과")
print(f'Accuracy: {maturity_acc:.3f}')
print(f'Precision: {maturity_pre:.3f}')
print(f'Recall: {maturity_rec:.3f}')
print(f'F1 스코어: {maturity_f1:.3f}')
print(f'ROC AUC 스코어: {maturity_roc_auc:.3f}')

# 쇠퇴기 성능평가
decline_acc = accuracy_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_pre = precision_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_rec = recall_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_f1 = f1_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_roc_auc = roc_auc_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])

print("\n쇠퇴기 예측 결과")
print(f'Accuracy: {decline_acc:.3f}')
print(f'Precision: {decline_pre:.3f}')
print(f'Recall: {decline_rec:.3f}')
print(f'F1 스코어: {decline_f1:.3f}')
print(f'ROC AUC 스코어: {decline_roc_auc:.3f}')


도입기 예측 결과
Accuracy: 0.660
Precision: 0.880
Recall: 0.611
F1 스코어: 0.721
ROC AUC 스코어: 0.698

성장기 예측 결과
Accuracy: 0.767
Precision: 0.692
Recall: 0.900
F1 스코어: 0.783
ROC AUC 스코어: 0.776

성숙기 예측 결과
Accuracy: 0.825
Precision: 0.545
Recall: 0.923
F1 스코어: 0.686
ROC AUC 스코어: 0.862

쇠퇴기 예측 결과
Accuracy: 0.805
Precision: 0.929
Recall: 0.812
F1 스코어: 0.867
ROC AUC 스코어: 0.795


In [12]:
xgboost_model = XGBClassifier()
xgboost_model.fit(x_train, y_train)
y_pred_xgboost = xgboost_model.predict(x_test)


# 수명주기별 성능평가 비교
cycle_df = pd.concat([x_test_copy, cycle, y_test], axis=1)
cycle_df['예측'] = y_pred_logit

cond1 = (cycle_df['도입기'] == 1)
cond2 = (cycle_df['성장기'] == 1)
cond3 = (cycle_df['성숙기'] == 1)
cond4 = (cycle_df['쇠퇴기'] == 1)

# 도입기 성능평가
intro_acc = accuracy_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_pre = precision_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_rec = recall_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_f1 = f1_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_roc_auc = roc_auc_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])

print("\n도입기 예측 결과")
print(f'Accuracy: {intro_acc:.3f}')
print(f'Precision: {intro_pre:.3f}')
print(f'Recall: {intro_rec:.3f}')
print(f'F1 스코어: {intro_f1:.3f}')
print(f'ROC AUC 스코어: {intro_roc_auc:.3f}')

# 성숙기 성능평가
growth_acc = accuracy_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_pre = precision_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_rec = recall_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_f1 = f1_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_roc_auc = roc_auc_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])

print("\n성장기 예측 결과")
print(f'Accuracy: {growth_acc:.3f}')
print(f'Precision: {growth_pre:.3f}')
print(f'Recall: {growth_rec:.3f}')
print(f'F1 스코어: {growth_f1:.3f}')
print(f'ROC AUC 스코어: {growth_roc_auc:.3f}')

# 성숙기 성능평가
maturity_acc = accuracy_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_pre = precision_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_rec = recall_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_f1 = f1_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_roc_auc = roc_auc_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])

print("\n성숙기 예측 결과")
print(f'Accuracy: {maturity_acc:.3f}')
print(f'Precision: {maturity_pre:.3f}')
print(f'Recall: {maturity_rec:.3f}')
print(f'F1 스코어: {maturity_f1:.3f}')
print(f'ROC AUC 스코어: {maturity_roc_auc:.3f}')

# 쇠퇴기 성능평가
decline_acc = accuracy_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_pre = precision_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_rec = recall_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_f1 = f1_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_roc_auc = roc_auc_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])

print("\n쇠퇴기 예측 결과")
print(f'Accuracy: {decline_acc:.3f}')
print(f'Precision: {decline_pre:.3f}')
print(f'Recall: {decline_rec:.3f}')
print(f'F1 스코어: {decline_f1:.3f}')
print(f'ROC AUC 스코어: {decline_roc_auc:.3f}')


도입기 예측 결과
Accuracy: 0.660
Precision: 0.880
Recall: 0.611
F1 스코어: 0.721
ROC AUC 스코어: 0.698

성장기 예측 결과
Accuracy: 0.767
Precision: 0.692
Recall: 0.900
F1 스코어: 0.783
ROC AUC 스코어: 0.776

성숙기 예측 결과
Accuracy: 0.825
Precision: 0.545
Recall: 0.923
F1 스코어: 0.686
ROC AUC 스코어: 0.862

쇠퇴기 예측 결과
Accuracy: 0.805
Precision: 0.929
Recall: 0.812
F1 스코어: 0.867
ROC AUC 스코어: 0.795


In [13]:
# Bagging 모델 생성 및 학습
bagging_model = BaggingClassifier()
bagging_model.fit(x_train, y_train)
y_pred_bagging = bagging_model.predict(x_test)

# 수명주기별 성능평가 비교
cycle_df = pd.concat([x_test_copy, cycle, y_test], axis=1)
cycle_df['예측'] = y_pred_logit

cond1 = (cycle_df['도입기'] == 1)
cond2 = (cycle_df['성장기'] == 1)
cond3 = (cycle_df['성숙기'] == 1)
cond4 = (cycle_df['쇠퇴기'] == 1)

# 도입기 성능평가
intro_acc = accuracy_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_pre = precision_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_rec = recall_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_f1 = f1_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_roc_auc = roc_auc_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])

print("\n도입기 예측 결과")
print(f'Accuracy: {intro_acc:.3f}')
print(f'Precision: {intro_pre:.3f}')
print(f'Recall: {intro_rec:.3f}')
print(f'F1 스코어: {intro_f1:.3f}')
print(f'ROC AUC 스코어: {intro_roc_auc:.3f}')

# 성숙기 성능평가
growth_acc = accuracy_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_pre = precision_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_rec = recall_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_f1 = f1_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_roc_auc = roc_auc_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])

print("\n성장기 예측 결과")
print(f'Accuracy: {growth_acc:.3f}')
print(f'Precision: {growth_pre:.3f}')
print(f'Recall: {growth_rec:.3f}')
print(f'F1 스코어: {growth_f1:.3f}')
print(f'ROC AUC 스코어: {growth_roc_auc:.3f}')

# 성숙기 성능평가
maturity_acc = accuracy_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_pre = precision_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_rec = recall_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_f1 = f1_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_roc_auc = roc_auc_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])

print("\n성숙기 예측 결과")
print(f'Accuracy: {maturity_acc:.3f}')
print(f'Precision: {maturity_pre:.3f}')
print(f'Recall: {maturity_rec:.3f}')
print(f'F1 스코어: {maturity_f1:.3f}')
print(f'ROC AUC 스코어: {maturity_roc_auc:.3f}')

# 쇠퇴기 성능평가
decline_acc = accuracy_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_pre = precision_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_rec = recall_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_f1 = f1_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_roc_auc = roc_auc_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])

print("\n쇠퇴기 예측 결과")
print(f'Accuracy: {decline_acc:.3f}')
print(f'Precision: {decline_pre:.3f}')
print(f'Recall: {decline_rec:.3f}')
print(f'F1 스코어: {decline_f1:.3f}')
print(f'ROC AUC 스코어: {decline_roc_auc:.3f}')



도입기 예측 결과
Accuracy: 0.660
Precision: 0.880
Recall: 0.611
F1 스코어: 0.721
ROC AUC 스코어: 0.698

성장기 예측 결과
Accuracy: 0.767
Precision: 0.692
Recall: 0.900
F1 스코어: 0.783
ROC AUC 스코어: 0.776

성숙기 예측 결과
Accuracy: 0.825
Precision: 0.545
Recall: 0.923
F1 스코어: 0.686
ROC AUC 스코어: 0.862

쇠퇴기 예측 결과
Accuracy: 0.805
Precision: 0.929
Recall: 0.812
F1 스코어: 0.867
ROC AUC 스코어: 0.795


In [14]:
svm_model = SVC(kernel='linear')
svm_model.fit(x_train, y_train)
y_pred_svm = svm_model.predict(x_test)

# 수명주기별 성능평가 비교
cycle_df = pd.concat([x_test_copy, cycle, y_test], axis=1)
cycle_df['예측'] = y_pred_logit

cond1 = (cycle_df['도입기'] == 1)
cond2 = (cycle_df['성장기'] == 1)
cond3 = (cycle_df['성숙기'] == 1)
cond4 = (cycle_df['쇠퇴기'] == 1)

# 도입기 성능평가
intro_acc = accuracy_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_pre = precision_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_rec = recall_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_f1 = f1_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_roc_auc = roc_auc_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])

print("\n도입기 예측 결과")
print(f'Accuracy: {intro_acc:.3f}')
print(f'Precision: {intro_pre:.3f}')
print(f'Recall: {intro_rec:.3f}')
print(f'F1 스코어: {intro_f1:.3f}')
print(f'ROC AUC 스코어: {intro_roc_auc:.3f}')

# 성숙기 성능평가
growth_acc = accuracy_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_pre = precision_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_rec = recall_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_f1 = f1_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_roc_auc = roc_auc_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])

print("\n성장기 예측 결과")
print(f'Accuracy: {growth_acc:.3f}')
print(f'Precision: {growth_pre:.3f}')
print(f'Recall: {growth_rec:.3f}')
print(f'F1 스코어: {growth_f1:.3f}')
print(f'ROC AUC 스코어: {growth_roc_auc:.3f}')

# 성숙기 성능평가
maturity_acc = accuracy_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_pre = precision_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_rec = recall_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_f1 = f1_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_roc_auc = roc_auc_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])

print("\n성숙기 예측 결과")
print(f'Accuracy: {maturity_acc:.3f}')
print(f'Precision: {maturity_pre:.3f}')
print(f'Recall: {maturity_rec:.3f}')
print(f'F1 스코어: {maturity_f1:.3f}')
print(f'ROC AUC 스코어: {maturity_roc_auc:.3f}')

# 쇠퇴기 성능평가
decline_acc = accuracy_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_pre = precision_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_rec = recall_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_f1 = f1_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_roc_auc = roc_auc_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])

print("\n쇠퇴기 예측 결과")
print(f'Accuracy: {decline_acc:.3f}')
print(f'Precision: {decline_pre:.3f}')
print(f'Recall: {decline_rec:.3f}')
print(f'F1 스코어: {decline_f1:.3f}')
print(f'ROC AUC 스코어: {decline_roc_auc:.3f}')


도입기 예측 결과
Accuracy: 0.660
Precision: 0.880
Recall: 0.611
F1 스코어: 0.721
ROC AUC 스코어: 0.698

성장기 예측 결과
Accuracy: 0.767
Precision: 0.692
Recall: 0.900
F1 스코어: 0.783
ROC AUC 스코어: 0.776

성숙기 예측 결과
Accuracy: 0.825
Precision: 0.545
Recall: 0.923
F1 스코어: 0.686
ROC AUC 스코어: 0.862

쇠퇴기 예측 결과
Accuracy: 0.805
Precision: 0.929
Recall: 0.812
F1 스코어: 0.867
ROC AUC 스코어: 0.795


In [15]:
svm_model = SVC(kernel='rbf')
svm_model.fit(x_train, y_train)
y_pred_svm = svm_model.predict(x_test)

# 수명주기별 성능평가 비교
cycle_df = pd.concat([x_test_copy, cycle, y_test], axis=1)
cycle_df['예측'] = y_pred_logit

cond1 = (cycle_df['도입기'] == 1)
cond2 = (cycle_df['성장기'] == 1)
cond3 = (cycle_df['성숙기'] == 1)
cond4 = (cycle_df['쇠퇴기'] == 1)

# 도입기 성능평가
intro_acc = accuracy_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_pre = precision_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_rec = recall_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_f1 = f1_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_roc_auc = roc_auc_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])

print("\n도입기 예측 결과")
print(f'Accuracy: {intro_acc:.3f}')
print(f'Precision: {intro_pre:.3f}')
print(f'Recall: {intro_rec:.3f}')
print(f'F1 스코어: {intro_f1:.3f}')
print(f'ROC AUC 스코어: {intro_roc_auc:.3f}')

# 성숙기 성능평가
growth_acc = accuracy_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_pre = precision_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_rec = recall_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_f1 = f1_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_roc_auc = roc_auc_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])

print("\n성장기 예측 결과")
print(f'Accuracy: {growth_acc:.3f}')
print(f'Precision: {growth_pre:.3f}')
print(f'Recall: {growth_rec:.3f}')
print(f'F1 스코어: {growth_f1:.3f}')
print(f'ROC AUC 스코어: {growth_roc_auc:.3f}')

# 성숙기 성능평가
maturity_acc = accuracy_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_pre = precision_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_rec = recall_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_f1 = f1_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_roc_auc = roc_auc_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])

print("\n성숙기 예측 결과")
print(f'Accuracy: {maturity_acc:.3f}')
print(f'Precision: {maturity_pre:.3f}')
print(f'Recall: {maturity_rec:.3f}')
print(f'F1 스코어: {maturity_f1:.3f}')
print(f'ROC AUC 스코어: {maturity_roc_auc:.3f}')

# 쇠퇴기 성능평가
decline_acc = accuracy_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_pre = precision_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_rec = recall_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_f1 = f1_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_roc_auc = roc_auc_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])

print("\n쇠퇴기 예측 결과")
print(f'Accuracy: {decline_acc:.3f}')
print(f'Precision: {decline_pre:.3f}')
print(f'Recall: {decline_rec:.3f}')
print(f'F1 스코어: {decline_f1:.3f}')
print(f'ROC AUC 스코어: {decline_roc_auc:.3f}')


도입기 예측 결과
Accuracy: 0.660
Precision: 0.880
Recall: 0.611
F1 스코어: 0.721
ROC AUC 스코어: 0.698

성장기 예측 결과
Accuracy: 0.767
Precision: 0.692
Recall: 0.900
F1 스코어: 0.783
ROC AUC 스코어: 0.776

성숙기 예측 결과
Accuracy: 0.825
Precision: 0.545
Recall: 0.923
F1 스코어: 0.686
ROC AUC 스코어: 0.862

쇠퇴기 예측 결과
Accuracy: 0.805
Precision: 0.929
Recall: 0.812
F1 스코어: 0.867
ROC AUC 스코어: 0.795


In [16]:
lgbm_model = LGBMClassifier()
lgbm_model.fit(x_train, y_train)
y_pred_lgbm = lgbm_model.predict(x_test)

# 수명주기별 성능평가 비교
cycle_df = pd.concat([x_test_copy, cycle, y_test], axis=1)
cycle_df['예측'] = y_pred_logit

cond1 = (cycle_df['도입기'] == 1)
cond2 = (cycle_df['성장기'] == 1)
cond3 = (cycle_df['성숙기'] == 1)
cond4 = (cycle_df['쇠퇴기'] == 1)

# 도입기 성능평가
intro_acc = accuracy_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_pre = precision_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_rec = recall_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_f1 = f1_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])
intro_roc_auc = roc_auc_score(cycle_df[cond1]['부실판단'], cycle_df[cond1]['예측'])

print("\n도입기 예측 결과")
print(f'Accuracy: {intro_acc:.3f}')
print(f'Precision: {intro_pre:.3f}')
print(f'Recall: {intro_rec:.3f}')
print(f'F1 스코어: {intro_f1:.3f}')
print(f'ROC AUC 스코어: {intro_roc_auc:.3f}')

# 성숙기 성능평가
growth_acc = accuracy_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_pre = precision_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_rec = recall_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_f1 = f1_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])
growth_roc_auc = roc_auc_score(cycle_df[cond2]['부실판단'], cycle_df[cond2]['예측'])

print("\n성장기 예측 결과")
print(f'Accuracy: {growth_acc:.3f}')
print(f'Precision: {growth_pre:.3f}')
print(f'Recall: {growth_rec:.3f}')
print(f'F1 스코어: {growth_f1:.3f}')
print(f'ROC AUC 스코어: {growth_roc_auc:.3f}')

# 성숙기 성능평가
maturity_acc = accuracy_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_pre = precision_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_rec = recall_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_f1 = f1_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])
maturity_roc_auc = roc_auc_score(cycle_df[cond3]['부실판단'], cycle_df[cond3]['예측'])

print("\n성숙기 예측 결과")
print(f'Accuracy: {maturity_acc:.3f}')
print(f'Precision: {maturity_pre:.3f}')
print(f'Recall: {maturity_rec:.3f}')
print(f'F1 스코어: {maturity_f1:.3f}')
print(f'ROC AUC 스코어: {maturity_roc_auc:.3f}')

# 쇠퇴기 성능평가
decline_acc = accuracy_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_pre = precision_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_rec = recall_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_f1 = f1_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])
decline_roc_auc = roc_auc_score(cycle_df[cond4]['부실판단'], cycle_df[cond4]['예측'])

print("\n쇠퇴기 예측 결과")
print(f'Accuracy: {decline_acc:.3f}')
print(f'Precision: {decline_pre:.3f}')
print(f'Recall: {decline_rec:.3f}')
print(f'F1 스코어: {decline_f1:.3f}')
print(f'ROC AUC 스코어: {decline_roc_auc:.3f}')

[LightGBM] [Info] Number of positive: 128, number of negative: 128
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000478 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 885
[LightGBM] [Info] Number of data points in the train set: 256, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000

도입기 예측 결과
Accuracy: 0.660
Precision: 0.880
Recall: 0.611
F1 스코어: 0.721
ROC AUC 스코어: 0.698

성장기 예측 결과
Accuracy: 0.767
Precision: 0.692
Recall: 0.900
F1 스코어: 0.783
ROC AUC 스코어: 0.776

성숙기 예측 결과
Accuracy: 0.825
Precision: 0.545
Recall: 0.923
F1 스코어: 0.686
ROC AUC 스코어: 0.862

쇠퇴기 예측 결과
Accuracy: 0.805
Precision: 0.929
Recall: 0.812
F1 스코어: 0.867
ROC AUC 스코어: 0.795
