In [13]:
```python
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import ExtraTreesClassifier
from lightgbm import LGBMClassifier
import xgboost
from sklearn.neural_network import MLPClassifier
from imblearn.ensemble import EasyEnsembleClassifier, BalancedBaggingClassifier
from imbens.ensemble import BalancedRandomForestClassifier
from imblearn.ensemble import RUSBoostClassifier
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef, confusion_matrix

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

clf1 = LogisticRegression(random_state=100, C=50, penalty='l2', max_iter=1000)
clf2 = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=10, p=1)
clf4 = RandomForestClassifier(random_state=100, n_estimators=150, max_depth=50, min_samples_split=10, min_samples_leaf=1, max_features='sqrt')
clf5 = DecisionTreeClassifier(random_state=100, max_depth=12, min_samples_split=2, min_samples_leaf=1, max_features=None)
clf6 = GradientBoostingClassifier(random_state=150, n_estimators=200, max_depth=45, min_samples_split=9, min_samples_leaf=7, max_features='sqrt', learning_rate=0.01)
clf7 = SGDClassifier(random_state=100, alpha=0.0001, loss='squared_hinge', penalty='l2', learning_rate='constant', eta0=0.01)
clf8 = GaussianNB()
clf9 = ExtraTreesClassifier(random_state=100, n_estimators=150, max_depth=45, min_samples_split=10, min_samples_leaf=1, max_features='sqrt')
clf10 = AdaBoostClassifier(random_state=100, n_estimators=200, learning_rate=0.06)
clf12 = LGBMClassifier(random_state=100, n_estimators=200, max_depth=3, learning_rate=0.06, subsample=0.2, colsample_bytree=0.1, reg_alpha=0, reg_lambda=1)
clf13 = xgboost.XGBClassifier(random_state=100, n_estimators=200, max_depth=12, learning_rate=0.025)
clf17 = EasyEnsembleClassifier(random_state=150, estimator=AdaBoostClassifier(random_state=100))
clf18 = BalancedBaggingClassifier(random_state=100, estimator=AdaBoostClassifier(random_state=100), n_estimators=90)
clf19 = BalancedRandomForestClassifier(random_state=100, n_estimators=30, max_depth=13, min_samples_split=2, min_samples_leaf=1, max_features='sqrt')
clf20 = RUSBoostClassifier(random_state=100, estimator=LGBMClassifier(random_state=100), learning_rate=0.04, n_estimators=150)
clf31 = SelfPacedEnsembleClassifier(random_state=100, k_bins=10, n_estimators=100, estimator=AdaBoostClassifier(random_state=100))

classfiers = {
    "LR": clf1,
    "KNN": clf2,
    'SGD': clf7,
    'GNB': clf8,
    'MLP': clf35,
    'DTree': clf5,
    'RF': clf4,
    'GBDT': clf6,
    'ExtraTrees': clf9,
    'AdaBoost': clf10,
    'XGBClassifier': clf13,
    'LGBM': clf12,
    'EasyE': clf17,
    'BalancedBagging': clf18,
    'BalancedRandomForest': clf19,
    'RUSBoostClassifier': clf20,
    'BCascade': clf22,
    'spe_ada': clf31
}

data = pd.read_csv('./data_clean.csv')
X = data[['Hdl_Event', 'Hdl_Key', 'Hdl_Timer', 'LDR_Init', 'LDR_Mem', 'MF_InjectTotal', 'MF_InjectUnique', 'Svc_SharedProc', 'CB_Total']]
y = data['Class']
X = X.values
y = y.values

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3, random_state=13, stratify=y)

result_pd = pd.DataFrame()
cls_nameList = []
accuracys = []
precisions = []
recalls = []
F1s = []
AUCs = []
MMCs = []
TPRs = []
FNRs = []
FPRs = []
TNRs = []
Mclasscifications = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=13)

for cls_name, cls in classfiers.items():
    print("start training:", cls_name)
    cls.fit(X_train, Y_train)
    Y_pred = cls.predict(X_test)
    
    cm = confusion_matrix(Y_test, Y_pred, labels=[0, 1])
    tn, fp, fn, tp = cm.ravel()

    accuracy = accuracy_score(Y_test, Y_pred)
    precision = precision_score(Y_test, Y_pred)
    recall = recall_score(Y_test, Y_pred)
    f1 = f1_score(Y_test, Y_pred)
    auc = roc_auc_score(Y_test, Y_pred)
    mmc = matthews_corrcoef(Y_test, Y_pred)
    TPR = tp / (tp + fn)
    FNR = fn / (tp + fn)
    FPR = fp / (tn + fp)
    TNR = tn / (fp + tn)
    Misclassification = (fn + fp) / (tp + tn + fp + fn)

    cls_nameList.append(cls_name)
    accuracys.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    F1s.append(f1)
    AUCs.append(auc)
    MMCs.append(mmc)
    TPRs.append(TPR)
    FNRs.append(FNR)
    FPRs.append(FPR)
    TNRs.append(TNR)
    Mclasscifications.append(Misclassification)

result_pd['classfier_name'] = cls_nameList
result_pd['avg_accuracy'] = accuracys
result_pd['avg_precision'] = precisions
result_pd['avg_recall'] = recalls
result_pd['avg_F1'] = F1s
result_pd['avg_AUC'] = AUCs
result_pd['avg_MMC'] = MMCs
result_pd['avg_TPR'] = TPRs
result_pd['avg_FNR'] = FNRs
result_pd['avg_FPR'] = FPRs
result_pd['avg_TNR'] = TNRs
result_pd['avg_MclasscificationR'] = Mclasscifications

print(cls_nameList)
print(accuracys)
result_pd.to_csv(r'测试集_9F_调参版.csv', index=0)
import winsound
winsound.PlaySound("SystemHand", winsound.SND_ALIAS)
```

start training: LR
start training: KNN
start training: SGD
start training: GNB
start training: MLP
start training: DTree
start training: RF
start training: GBDT
start training: ExtraTrees
start training: AdaBoost
start training: XGBClassifier
start training: LGBM
[LightGBM] [Info] Number of positive: 6668, number of negative: 20461
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001697 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1230
[LightGBM] [Info] Number of data points in the train set: 27129, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.245789 -> initscore=-1.121201
[LightGBM] [Info] Start training from score -1.121201
start training: EasyE
start training: BalancedBagging
start training: BalancedRandomForest
start training: RUSBoostClassifier
[LightGBM] [Info] Number of positive: 6668, number of negative: 6668
[LightGBM] [Info] Auto-choosing col-wise multi-th