In [1]:
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score
def fold_K(X, y, model):    
    kf = KFold(n_splits = 5, shuffle = True, random_state = 42)
    acc_test_score = []
    acc_train_score = []
    rec_test_score = []
    rec_train_score = []
    
    for train_index, test_index in kf.split(X): # 5번
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_pred_train = model.predict(X_train)

        acc_train_score.append(accuracy_score(y_train,y_pred_train))
        acc_test_score.append(accuracy_score(y_test,y_pred))
        
        rec_train_score.append(recall_score(y_train , y_pred_train))
        rec_test_score.append(recall_score(y_test , y_pred))

        
    print('정확도 : train score : {}'.format(np.array(acc_train_score).mean()))
    print('정확도 : test score : {}'.format(np.array(acc_test_score).mean()))
    print('재현율 : train score : {}'.format(np.array(rec_train_score).mean()))
    print('재현율 : test score : {}'.format(np.array(rec_test_score).mean()))


In [3]:
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

df = pd.read_csv('C:/ML_project_predict_heart_disease/data/heart_2020_final.csv')
X = df.drop(columns = ['HeartDisease']).values
y = df['HeartDisease']

smote = SMOTE(random_state = 42)
X_smote, y_smote = smote.fit_resample(X,y)

#### catboost 로드

In [23]:
from catboost import CatBoostClassifier
catboost = CatBoostClassifier(random_state = 42,
                            bagging_temperature = 0,
                            depth = 9,
                            l2_leaf_reg = 3,
                            learning_rate =0.1,
                            task_type = 'GPU')

#### xgb 로드

In [24]:
xgb = XGBClassifier(random_state = 42,
                    colsample_bytree = 0.8,
                    learning_rate = 0.1,
                    max_depth = 5,
                    max_leaf_nodes = 2,
                    min_child_weight = 4,
                    n_estimators = 200,
                    subsample = 0.9,
                    tree_method='gpu_hist')

#### 소프트 보팅 적용

In [21]:
from sklearn.ensemble import VotingClassifier
voting_model = VotingClassifier([('CAT', catboost), ('XGB', xgb)],voting='soft')

In [None]:
fold_K(X_smote, y_smote, voting_model)

In [None]:
from sklearn.ensemble import VotingClassifier
voting_model = VotingClassifier([('CAT', catboost), ('XGB', xgb)],voting='soft')
fold_K(X_smote, y_smote, voting_model)

In [None]:
# 보팅 모델 fit
voting_model.fit(X_smote, y_smote)

# 모델 저장
import joblib
xgb.save('/content/last_voting_model.pkl') 

In [31]:
from xgboost import XGBClassifier
xgb = XGBClassifier()
xgb.load_model('C:/ML_project_predict_heart_disease/data/xgb3.pkl')



