In [None]:
import pandas as pd
path = "datasets/creditcard.csv"
df = pd.read_csv(path)

In [None]:
# 将交易金额与交易时间数据正则化
df["Amount"] = (df["Amount"] - df["Amount"].mean()) / df["Amount"].std()
df["Time"] = (df["Time"] - df["Time"].mean()) / df["Time"].std()

In [None]:
# 将正常交易数据与异常交易数据分开
df_normal = df[df["Class"] == 0]
df_fraud = df[df["Class"] == 1]
n_fraud = df_fraud.shape[0]

In [None]:
# 获取训练集数据
df_train = df_normal[0: -n_fraud].drop(["Class"], axis=1)
X_train = df_train.values
y_train = None

In [None]:
# 获取测试集数据
df_test = df_normal[-n_fraud:]
df_test = df_test.append(df_fraud)
df_test_labels = df_test["Class"]
df_test = df_test.drop(["Class"], axis=1)
X_test = df_test.values
y_test = df_test_labels.values

In [None]:
# 构建自编码模型
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam
n_feature = X_train.shape[1]
# 构建编码器
inputs = Input(shape=(n_feature,))
x = Dense(units=20, 
          activation='tanh')(inputs)
encoding = Dense(units=15, 
                activation='tanh')(x)
# 构建解码器
x = Dense(units=20, 
          activation='tanh')(encoding)
outputs = Dense(n_feature, activation='tanh')(x)
# 构建自编码模型
model = Model(inputs, outputs)
model.summary()

In [None]:
model.compile(optimizer=Adam(),
              loss="mse",
              metrics=None)
model.fit(X_train, 
          X_train, 
          epochs=10, 
          batch_size=32, 
          validation_split=0.2, 
          verbose=2,
          shuffle=True)

In [None]:
import numpy as np
X_predict = model.predict(X_test)
reconstruction_error = np.linalg.norm(X_test - X_predict, axis=1)

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams.update({"font.size":30})
# 将模型在测试集交易数据上的重构误差与其标签值对应
df_error = pd.DataFrame({"Reconstruction_Error": reconstruction_error,
                         "True_Class": y_test})
# 将正常交易数据与异常交易数据进行分组
groups = df_error.groupby('True_Class')
fig, ax = plt.subplots(figsize=(20, 10))
for name, group in groups:
    ax.plot(group.index, 
            group.Reconstruction_Error, 
            marker='o', 
            ms=8, 
            linestyle='',
            label="异常交易数据" if name == 1 else "正常交易数据")
# 设置区别正常交易与异常交易数据重构误差的阈值
threshold = 4
# 在图中画出阈值
ax.hlines(threshold, ax.get_xlim()[0], 
          ax.get_xlim()[1], 
          colors='g', 
          zorder=100, 
          label='阈值')
ax.legend()
plt.title("使用自编码模型识别异常交易")
plt.ylabel("重构误差")
plt.xlabel("测试集数据")
plt.show()

In [None]:
fig.savefig("fraud_detection.png")