In [2]:
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN, BorderlineSMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTETomek, SMOTEENN
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, f1_score, matthews_corrcoef
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# 加载数据
data = pd.read_csv('./data_clean.csv')
X = data[['Hdl_Event', 'Hdl_Key', 'Hdl_Timer', 'LDR_Init', 'LDR_Mem', 'MF_InjectTotal', 'MF_InjectUnique', 'Svc_SharedProc', 'CB_Total']]
y = data['Class']
X = X.values
y = y.values

# 划分训练集和测试集
X_train_ori, X_test, Y_train_ori, Y_test = train_test_split(X, y, test_size=0.3, random_state=13, stratify=y)

# 定义采样方法
sampling_methods = {
    'RandomOverSampler': RandomOverSampler(random_state=42),
    'RandomUnderSampler': RandomUnderSampler(random_state=42),
    'SMOTE': SMOTE(random_state=42),
    'ADASYN': ADASYN(random_state=42),
    'BorderlineSMOTE': BorderlineSMOTE(random_state=42),
    'SMOTETomek': SMOTETomek(random_state=42),
    'SMOTEENN': SMOTEENN(random_state=42)
}

# 定义分类器
classifiers = {
    'AdaBoost': AdaBoostClassifier(random_state=100)
}

# 初始化结果存储
result_pd = pd.DataFrame()

# 性能指标列表
cls_nameList = []
sampling_nameList = []
accuracys = []
precisions = []
recalls = []
F1s = []
AUCs = []
MMCs = []
TPRs = []
FNRs = []
FPRs = []
TNRs = []
Mclasscifications = []

# 循环测试每种采样方法
for sampling_name, sampler in sampling_methods.items():
    print(f"Applying sampling method: {sampling_name}")
    X_train, Y_train = sampler.fit_resample(X_train_ori, Y_train_ori)

    for cls_name, cls in classifiers.items():
        print(f"Training classifier: {cls_name}")
        cls.fit(X_train, Y_train)
        Y_pred = cls.predict(X_test)

        # 计算性能指标
        cm = confusion_matrix(Y_test, Y_pred, labels=[0, 1])
        tn, fp, fn, tp = cm.ravel()

        accuracy = accuracy_score(Y_test, Y_pred)
        precision = precision_score(Y_test, Y_pred)
        recall = recall_score(Y_test, Y_pred)
        f1 = f1_score(Y_test, Y_pred)
        auc = roc_auc_score(Y_test, Y_pred)
        mmc = matthews_corrcoef(Y_test, Y_pred)
        TPR = tp / (tp + fn)
        FNR = fn / (tp + fn)
        FPR = fp / (tn + fp)
        TNR = tn / (fp + tn)
        Misclassification = (fn + fp) / (tp + tn + fp + fn)

        # 存储结果
        cls_nameList.append(cls_name)
        sampling_nameList.append(sampling_name)
        accuracys.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)
        F1s.append(f1)
        AUCs.append(auc)
        MMCs.append(mmc)
        TPRs.append(TPR)
        FNRs.append(FNR)
        FPRs.append(FPR)
        TNRs.append(TNR)
        Mclasscifications.append(Misclassification)

# 将结果保存到 DataFrame
result_pd['classifier_name'] = cls_nameList
result_pd['sampling_method'] = sampling_nameList
result_pd['avg_accuracy'] = accuracys
result_pd['avg_precision'] = precisions
result_pd['avg_recall'] = recalls
result_pd['avg_F1'] = F1s
result_pd['avg_AUC'] = AUCs
result_pd['avg_MMC'] = MMCs
result_pd['avg_TPR'] = TPRs
result_pd['avg_FNR'] = FNRs
result_pd['avg_FPR'] = FPRs
result_pd['avg_TNR'] = TNRs
result_pd['avg_Misclassification'] = Mclasscifications

# 保存结果到 CSV 文件
result_pd.to_csv(r'sampling_methods_results.csv', index=False)

print("All sampling methods tested and results saved.")
import winsound
winsound.PlaySound("SystemHand", winsound.SND_ALIAS)


Applying sampling method: RandomOverSampler
Training classifier: AdaBoost
Applying sampling method: RandomUnderSampler
Training classifier: AdaBoost
Applying sampling method: SMOTE
Training classifier: AdaBoost
Applying sampling method: ADASYN
Training classifier: AdaBoost
Applying sampling method: BorderlineSMOTE
Training classifier: AdaBoost
Applying sampling method: SMOTETomek
Training classifier: AdaBoost
Applying sampling method: SMOTEENN
Training classifier: AdaBoost
All sampling methods tested and results saved.
