In [80]:
import pandas as pd
import joblib
from sklearn.metrics import confusion_matrix, recall_score, precision_score, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler

# 1. 读取测试数据
# data = pd.read_csv('../data/test.csv')
data = pd.read_csv('../data/val.csv')
col_names = ['Amount']

# 2. 特征缩放
def Standard_Scaler(df, col_names):
    features = df[col_names]
    scaler = StandardScaler().fit(features.values)
    features = scaler.transform(features.values)
    df[col_names] = features
    return df

data = Standard_Scaler(data, col_names)

# 3. 特征与标签分离（假设最后一列为标签，你可根据实际调整）
X_test = data.drop(columns='Class')  # 或你实际的标签列名
# X_test = data.drop(columns=['Class', 'Time'])
y_test = data['Class']

In [81]:
# 4. 定义模型及名称
model_files = {
    'Random Forest': '../models/random_forest_model.pkl',
    'Classweights RF': '../models/Classweights_rf_model.pkl',
    'Random Oversampling RF': '../models/random_oversampling_rf_model.pkl',
    'SMOTE RF': '../models/smote_rf_model.pkl',
    'SMOTETomek RF': '../models/SMOTETomek_rf_model.pkl',
    'LightGBM': '../models/lgbm_model.pkl',
    'LightGBM_GridSearchCV': '../models/grid_lgbm_model.pkl',
    'XGBoost': '../models/xgb_model.pkl',
    'XGBoost_GridSearchCV': '../models/grid_xgb_model.pkl',
    'XGBoost_st': '../models/st_xgb_model.pkl',
    # 'best_XGBoost': '../models/best_xgb_model.pkl',
    # 'Random Forest GridSearchCV': '../models/grid_rfb_model.pkl',
}

In [82]:
# 5. 评估函数
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    return recall, precision, f1, accuracy, cm

In [83]:
# 6. 遍历模型评估
results = []
cms = {}
for name, path in model_files.items():
    model = joblib.load(path)
    recall, precision, f1, accuracy, cm = evaluate_model(model, X_test, y_test)
    results.append((name, recall, precision, f1, accuracy))
    cms[name] = cm  # 存混淆矩阵方便查看


In [84]:
# 7. 汇总结果
columns = ['Model', 'Recall', 'Precision', 'F1 Score', 'Accuracy']
results_df = pd.DataFrame(results, columns=columns)
results_df = results_df.sort_values(by='Recall', ascending=False).reset_index(drop=True)

In [85]:
# 8. 展示
results_df
# print(results_df)

Unnamed: 0,Model,Recall,Precision,F1 Score,Accuracy
0,Random Oversampling RF,0.838384,0.243402,0.377273,0.99519
1,Classweights RF,0.828283,0.277027,0.41519,0.995945
2,SMOTETomek RF,0.828283,0.198068,0.319688,0.993873
3,SMOTE RF,0.828283,0.266234,0.402948,0.995734
4,XGBoost_st,0.787879,0.83871,0.8125,0.999368
5,XGBoost,0.777778,0.875,0.823529,0.999421
6,XGBoost_GridSearchCV,0.757576,0.961538,0.847458,0.999526
7,Random Forest,0.737374,0.986486,0.843931,0.999526
8,LightGBM_GridSearchCV,0.717172,0.8875,0.793296,0.99935
9,LightGBM,0.424242,0.176471,0.249258,0.995558


In [86]:
# 查看具体某个模型的混淆矩阵，比如
# print("LightGBM_GridSearchCV 混淆矩阵：\n", cms['LightGBM_GridSearchCV'])

In [87]:
# import joblib
# import numpy as np
#
# # 加载模型
# xgb_model = joblib.load('../models/xgb_model.pkl')
# rf_model = joblib.load('../models/grid_xgb_model.pkl')
#
# # 预测
# xgb_pred = xgb_model.predict(X_test)
# rf_pred = rf_model.predict(X_test)
#
# # 简单投票（多数表决）
# ensemble_pred = (xgb_pred + rf_pred) >= 1  # 只要有一个为1就判为1
# ensemble_pred = ensemble_pred.astype(int)
#
# # 评估
# from sklearn.metrics import recall_score, precision_score, f1_score, accuracy_score, confusion_matrix
#
# recall = recall_score(y_test, ensemble_pred)
# precision = precision_score(y_test, ensemble_pred)
# f1 = f1_score(y_test, ensemble_pred)
# accuracy = accuracy_score(y_test, ensemble_pred)
# cm = confusion_matrix(y_test, ensemble_pred)
#
# print('Recall:', recall)
# print('Precision:', precision)
# print('F1 Score:', f1)
# print('Accuracy:', accuracy)
# print('Confusion Matrix:\n', cm)