In [3]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
from ANN.get import get, prepro


X_trn, y_trn, X_tst, y_tst = get('data/f5.csv')
# 데이터와 타겟을 X, y로 가정합니다.
X_train, X_test, y_train, y_test = X_trn, X_tst, y_trn, y_tst

# 모델을 딕셔너리 형태로 정의합니다.
models = {
    'xgboost': XGBClassifier(),
    'catboost': CatBoostClassifier(verbose=0),
    'random_forest': RandomForestClassifier()
}

# 각 모델의 파라미터를 딕셔너리 형태로 정의합니다.
params = {
    'xgboost': {'model__n_estimators': [50, 100, 200], 'model__learning_rate': [0.01, 0.1, 0.2], 'model__max_depth': [3, 5, 7, 9]},
    'catboost': {'model__iterations': [50, 100, 200], 'model__learning_rate': [0.01, 0.1, 0.2], 'model__depth': [3, 5, 7, 9]},
    'random_forest': {'model__n_estimators': [50, 100, 200], 'model__max_depth': [None, 5, 10], 'model__min_samples_split': [2, 5, 10]}
}

# 성능 지표를 딕셔너리 형태로 정의합니다.
scoring = {
    'accuracy': make_scorer(accuracy_score),
    'precision': make_scorer(precision_score, average='macro'),
    'recall': make_scorer(recall_score, average='macro'),
    'f1': make_scorer(f1_score, average='macro')
}

for name, model in models.items():
    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('model', model)
    ])

    clf = GridSearchCV(pipe, params[name], cv=5, scoring=scoring, refit='f1')
    clf.fit(X_train, y_train)

    print(f"Best parameters for {name}: {clf.best_params_}")
    print(f"Best score for {name}: {clf.best_score_}")
    for scorer in scoring:
        print(f"{scorer}: {clf.cv_results_['mean_test_'+scorer]}")

Best parameters for xgboost: {'model__learning_rate': 0.1, 'model__max_depth': 9, 'model__n_estimators': 200}
Best score for xgboost: 0.7851960954870625
accuracy: [0.70637739 0.72603902 0.74249725 0.73469476 0.75327132 0.77926285
 0.77050423 0.7771185  0.79078598 0.78587693 0.79081172 0.79900193
 0.7820531  0.79564327 0.8028001  0.7984078  0.80398855 0.80695976
 0.80256746 0.80497035 0.80719226 0.80605533 0.80708881 0.80768314
 0.79484233 0.80378186 0.80677888 0.80236091 0.80590051 0.80497041
 0.80553879 0.80571963 0.80525461 0.80677883 0.80652055 0.80437619]
precision: [0.70001267 0.72452683 0.75325036 0.73766945 0.76386061 0.79892674
 0.78739913 0.79494298 0.81649813 0.80916503 0.81477643 0.82711413
 0.80597677 0.83006283 0.83831551 0.82923434 0.8414903  0.8390314
 0.83933706 0.83921153 0.84094502 0.83692779 0.83969454 0.83799855
 0.83219347 0.84698765 0.84312533 0.84085019 0.83080719 0.83308351
 0.8354065  0.83528986 0.83200956 0.83753831 0.83307587 0.8298905 ]
recall: [0.70637243 0

In [4]:
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import pandas as pd
X_trn, y_trn, X_tst, y_tst = get('data/f5.csv')
X_train, X_test, y_train, y_test = X_trn, X_tst, y_trn, y_tst
# 모델 생성
xgb_model = XGBClassifier(learning_rate=0.1, max_depth=9, n_estimators=200)
cat_model = CatBoostClassifier(depth=9, iterations=200, learning_rate=0.2)
rf_model = RandomForestClassifier(max_depth=None, min_samples_split=2, n_estimators=200)

# 데이터와 레이블로 분리되어 있는 X_train, y_train, X_test, y_test를 사용한다고 가정합니다.

# 모델 학습
xgb_model.fit(X_train, y_train)
cat_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)

# 예측
models = [xgb_model, cat_model, rf_model]
model_names = ['XGBoost', 'CatBoost', 'RandomForest']
results = []

for model, name in zip(models, model_names):
    pred = model.predict(X_test)
    
    # 성능 지표 계산
    accuracy = accuracy_score(y_test, pred)
    precision_per_class = precision_score(y_test, pred, average=None)
    recall_per_class = recall_score(y_test, pred, average=None)
    f1_per_class = f1_score(y_test, pred, average=None)
    confusion_matrix1 = multilabel_confusion_matrix(y_test, pred)
    confusion_matrix2 = confusion_matrix(y_test, pred)

    # 결과 DataFrame 생성
    metrics_df = pd.DataFrame({
        'Model': [name],
        'Accuracy': [accuracy],
        'Precision (Class 0)': [precision_per_class[0]],
        'Precision (Class 1)': [precision_per_class[1]],
        'Precision (Class 2)': [precision_per_class[2]],
        'Recall (Class 0)': [recall_per_class[0]],
        'Recall (Class 1)': [recall_per_class[1]],
        'Recall (Class 2)': [recall_per_class[2]],
        'F1 Score (Class 0)': [f1_per_class[0]],
        'F1 Score (Class 1)': [f1_per_class[1]],
        'F1 Score (Class 2)': [f1_per_class[2]],
        'Label Confusion Matrix': [confusion_matrix1],
        'Confusion Matrix': [confusion_matrix2]
    })
    
    # 결과 저장
    results.append(metrics_df)

# 결과를 하나의 DataFrame으로 결합
final_df = pd.concat(results, ignore_index=True)
final_df.head()


0:	learn: 0.9339140	total: 36.3ms	remaining: 7.21s
1:	learn: 0.8158593	total: 68.5ms	remaining: 6.78s
2:	learn: 0.7338509	total: 104ms	remaining: 6.85s
3:	learn: 0.6794822	total: 137ms	remaining: 6.72s
4:	learn: 0.6355502	total: 172ms	remaining: 6.71s
5:	learn: 0.5997385	total: 204ms	remaining: 6.61s
6:	learn: 0.5738618	total: 244ms	remaining: 6.72s
7:	learn: 0.5538043	total: 280ms	remaining: 6.73s
8:	learn: 0.5359052	total: 317ms	remaining: 6.72s
9:	learn: 0.5180003	total: 352ms	remaining: 6.69s
10:	learn: 0.5061613	total: 387ms	remaining: 6.65s
11:	learn: 0.4970668	total: 423ms	remaining: 6.63s
12:	learn: 0.4880698	total: 458ms	remaining: 6.58s
13:	learn: 0.4816580	total: 493ms	remaining: 6.55s
14:	learn: 0.4741605	total: 526ms	remaining: 6.49s
15:	learn: 0.4696764	total: 561ms	remaining: 6.45s
16:	learn: 0.4664391	total: 597ms	remaining: 6.43s
17:	learn: 0.4621011	total: 631ms	remaining: 6.38s
18:	learn: 0.4599703	total: 664ms	remaining: 6.33s
19:	learn: 0.4569246	total: 699ms	remai

Unnamed: 0,Model,Accuracy,Precision (Class 0),Precision (Class 1),Precision (Class 2),Recall (Class 0),Recall (Class 1),Recall (Class 2),F1 Score (Class 0),F1 Score (Class 1),F1 Score (Class 2),Label Confusion Matrix,Confusion Matrix
0,XGBoost,0.74321,0.832669,0.731734,0.360335,0.932515,0.849186,0.118785,0.879768,0.786097,0.17867,"[[[3946, 336], [121, 1672]], [[1884, 995], [48...","[[1672, 106, 15], [268, 2714, 214], [68, 889, ..."
1,CatBoost,0.74749,0.829871,0.736728,0.396122,0.935862,0.851064,0.131676,0.879685,0.789779,0.19765,"[[[3938, 344], [115, 1678]], [[1907, 972], [47...","[[1678, 102, 13], [271, 2720, 205], [73, 870, ..."
2,RandomForest,0.750782,0.823729,0.736041,0.393258,0.948689,0.862015,0.096685,0.881804,0.794063,0.155211,"[[[3918, 364], [92, 1701]], [[1891, 988], [441...","[[1701, 88, 4], [283, 2755, 158], [81, 900, 105]]"
