In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.ensemble import IsolationForest
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
K = keras.backend

# =========================
# 1. Load & Split Data
# =========================
df = pd.read_csv("creditcard.csv")
features = df.drop(columns=["Time", "Class"])
labels   = df["Class"].values

# Train = 60%, Val = 20%, Test = 20%
X_temp, X_test, y_temp, y_test = train_test_split(features, labels, test_size=0.2, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, shuffle=False)

print("Train:", X_train.shape, "Val:", X_val.shape, "Test:", X_test.shape)

# Balanced validation & test sets
fraud_val  = X_val[y_val == 1]
normal_val = X_val[y_val == 0].sample(n=len(fraud_val), random_state=42)
X_val_bal  = pd.concat([fraud_val, normal_val])
y_val_bal  = np.array([1]*len(fraud_val) + [0]*len(normal_val))

fraud_test  = X_test[y_test == 1]
normal_test = X_test[y_test == 0].sample(n=len(fraud_test), random_state=42)
X_test_bal  = pd.concat([fraud_test, normal_test])
y_test_bal  = np.array([1]*len(fraud_test) + [0]*len(normal_test))

# Convert numpy for DL
X_train_np = X_train.to_numpy().astype("float32")
X_val_bal_np  = X_val_bal.to_numpy().astype("float32")
X_test_np     = X_test.to_numpy().astype("float32")
X_test_bal_np = X_test_bal.to_numpy().astype("float32")

# =========================
# 2. Isolation Forest
# =========================
iso = IsolationForest(n_estimators=200, random_state=42, contamination="auto")
iso.fit(X_train[y_train == 0])  # chỉ train trên normal

val_scores  = -iso.score_samples(X_val_bal)
test_scores = -iso.score_samples(X_test)
thr = np.percentile(val_scores, 95)  # hoặc chọn theo F1
y_pred_test = (test_scores >= thr).astype(int)

print("\n=== Isolation Forest (Test Real) ===")
print(classification_report(y_test, y_pred_test, digits=4))
print(confusion_matrix(y_test, y_pred_test))
print("ROC-AUC:", roc_auc_score(y_test, test_scores))

# =========================
# 3. Variational Autoencoder (VAE)
# =========================
input_dim = X_train_np.shape[1]
latent_dim = 8

# Encoder
inputs = keras.Input(shape=(input_dim,))
h = layers.Dense(64, activation="relu")(inputs)
z_mean = layers.Dense(latent_dim)(h)
z_log_var = layers.Dense(latent_dim)(h)
def sampling(args):
    z_mean, z_log_var = args
    eps = K.random_normal(shape=K.shape(z_mean))
    return z_mean + K.exp(0.5 * z_log_var) * eps
z = layers.Lambda(sampling)([z_mean, z_log_var])
encoder = keras.Model(inputs, [z_mean, z_log_var, z])

# Decoder
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(64, activation="relu")(latent_inputs)
outputs = layers.Dense(input_dim, activation="linear")(x)
decoder = keras.Model(latent_inputs, outputs)

# VAE model
recons = decoder(encoder(inputs)[2])
vae = keras.Model(inputs, recons)

# Loss
recon_loss = tf.reduce_mean(tf.square(inputs - recons), axis=-1)
kl_loss = -0.5 * tf.reduce_sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
vae.add_loss(K.mean(recon_loss + kl_loss))
vae.compile(optimizer="adam")
vae.fit(X_train_np[y_train == 0], X_train_np[y_train == 0], epochs=20, batch_size=256, verbose=2)

# Anomaly detection
recon_val = vae.predict(X_val_bal_np, verbose=0)
mse_val = np.mean(np.square(X_val_bal_np - recon_val), axis=1)
thr_vae = np.percentile(mse_val, 95)

recon_test = vae.predict(X_test_np, verbose=0)
mse_test = np.mean(np.square(X_test_np - recon_test), axis=1)
y_pred_vae = (mse_test > thr_vae).astype(int)

print("\n=== VAE (Test Real) ===")
print(classification_report(y_test, y_pred_vae, digits=4))
print(confusion_matrix(y_test, y_pred_vae))
print("ROC-AUC:", roc_auc_score(y_test, mse_test))

# =========================
# 4. GAN (Simple AnoGAN style)
# =========================
latent_dim = 16
generator = keras.Sequential([
    layers.Dense(32, activation="relu", input_dim=latent_dim),
    layers.Dense(input_dim, activation="linear")
])
discriminator = keras.Sequential([
    layers.Dense(32, activation="relu", input_dim=input_dim),
    layers.Dense(1, activation="sigmoid")
])
discriminator.compile(optimizer="adam", loss="binary_crossentropy")

discriminator.trainable = False
z = keras.Input(shape=(latent_dim,))
gen_out = generator(z)
gan_out = discriminator(gen_out)
gan = keras.Model(z, gan_out)
gan.compile(optimizer="adam", loss="binary_crossentropy")

X_train_norm = X_train_np[y_train == 0]

batch_size = 128
epochs = 20
for epoch in range(epochs):
    idx = np.random.randint(0, X_train_norm.shape[0], batch_size)
    real = X_train_norm[idx]

    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake = generator.predict(noise, verbose=0)

    d_loss_real = discriminator.train_on_batch(real, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(fake, np.zeros((batch_size, 1)))
    g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))

    if epoch % 5 == 0:
        print(f"Epoch {epoch}, D Loss: {(d_loss_real+d_loss_fake)/2}, G Loss: {g_loss}")

# Anomaly score = 1 - discriminator confidence
scores = 1 - discriminator.predict(X_test_np, verbose=0).reshape(-1)
thr_gan = np.percentile(scores, 95)
y_pred_gan = (scores > thr_gan).astype(int)

print("\n=== GAN (Test Real) ===")
print(classification_report(y_test, y_pred_gan, digits=4))
print(confusion_matrix(y_test, y_pred_gan))
print("ROC-AUC:", roc_auc_score(y_test, scores))


Train: (170883, 29) Val: (56962, 29) Test: (56962, 29)

=== Isolation Forest (Test Real) ===
              precision    recall  f1-score   support

           0     0.9987    0.9970    0.9979     56887
           1     0.0000    0.0000    0.0000        75

    accuracy                         0.9957     56962
   macro avg     0.4993    0.4985    0.4989     56962
weighted avg     0.9974    0.9957    0.9965     56962

[[56719   168]
 [   75     0]]
ROC-AUC: 0.9492516743720006


ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.ops`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```
