## 切分数据

In [27]:
import pandas as pd
from sklearn.metrics import confusion_matrix, recall_score, precision_score, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler
import joblib

# 读取原始数据
df = pd.read_csv('../data/test.csv')

# 筛选 Class=0 和 Class=1 的数据
df_0 = df[df['Class'] == 0]
df_1 = df[df['Class'] == 1]

# 分别保存为新文件
df_0.to_csv('../data/test_class_0.csv', index=False)
df_1.to_csv('../data/test_class_1.csv', index=False)

In [28]:
data = pd.read_csv('../data/test_class_1.csv')
col_names = ['Amount']

# 2. 特征缩放
def Standard_Scaler(df, col_names):
    features = df[col_names]
    scaler = StandardScaler().fit(features.values)
    features = scaler.transform(features.values)
    df[col_names] = features
    return df

data = Standard_Scaler(data, col_names)

X_test = data.drop(columns='Class')  # 或你实际的标签列名
y_test = data['Class']

In [29]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    return recall, precision, f1, accuracy, cm

In [30]:
model_files = {
    'best_XGBoost': '../models/best_xgb_model.pkl',

}

results = []
cms = {}
for name, path in model_files.items():
    model = joblib.load(path)
    recall, precision, f1, accuracy, cm = evaluate_model(model, X_test, y_test)
    results.append((name, recall, precision, f1, accuracy))
    cms[name] = cm  # 存混淆矩阵方便查看

In [31]:
columns = ['Model', 'Recall', 'Precision', 'F1 Score', 'Accuracy']
results_df = pd.DataFrame(results, columns=columns)
results_df = results_df.sort_values(by='Recall', ascending=False).reset_index(drop=True)
results_df

Unnamed: 0,Model,Recall,Precision,F1 Score,Accuracy
0,best_XGBoost,0.755102,1.0,0.860465,0.755102
