# Skenario 1 : Dataset 1, Tanpa KFold

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Input, Concatenate
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, mean_absolute_error

# Load and preprocess data
data = pd.read_csv('/content/dataset1.csv')

print(data['Class'].isnull().sum())
print(np.isinf(data['Class']).sum())

data = data.dropna(subset=['Class'])

0
0


In [None]:
X = data.drop('Class', axis=1)
y = data['Class'].astype('int64')

# Normalize the data
X = (X - X.min()) / (X.max() - X.min())

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define GAN components
latent_dim = 100

# Generator for cGAN
def build_generator_cgan(latent_dim, n_features):
    noise = Input(shape=(latent_dim,))
    label = Input(shape=(1,))

    # Concatenate noise and label
    x = Concatenate()([noise, label])

    model = Sequential([
        Dense(128, input_dim=latent_dim + 1),
        LeakyReLU(alpha=0.2),
        BatchNormalization(momentum=0.8),
        Dense(256),
        LeakyReLU(alpha=0.2),
        BatchNormalization(momentum=0.8),
        Dense(n_features, activation='tanh')
    ])
    generated_data = model(x)

    return tf.keras.Model([noise, label], generated_data)

# Discriminator for cGAN
def build_discriminator_cgan(n_features):
    data = Input(shape=(n_features,))
    label = Input(shape=(1,))

    # Concatenate data and label
    x = Concatenate()([data, label])

    model = Sequential([
        Dense(256, input_dim=n_features + 1),
        LeakyReLU(alpha=0.2),
        Dense(128),
        LeakyReLU(alpha=0.2),
        Dense(1, activation='sigmoid')
    ])
    validity = model(x)

    return tf.keras.Model([data, label], validity)

# Build cGAN models
generator_cgan = build_generator_cgan(latent_dim, X.shape[1])
discriminator_cgan = build_discriminator_cgan(X.shape[1])

# Optimizer
optimizer_generator = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)
optimizer_discriminator = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)

# Compile Discriminator
discriminator_cgan.compile(optimizer=optimizer_discriminator, loss='binary_crossentropy', metrics=['accuracy'])

# Build and compile cGAN
discriminator_cgan.trainable = False

noise = Input(shape=(latent_dim,))
label = Input(shape=(1,))
generated_data = generator_cgan([noise, label])
validity = discriminator_cgan([generated_data, label])

cgan = tf.keras.Model([noise, label], validity)
cgan.compile(optimizer=optimizer_generator, loss='binary_crossentropy')

# Training cGAN
def train_cgan(cgan, generator, discriminator, data, labels, epochs=1000, batch_size=128, latent_dim=100):
    half_batch = batch_size // 2
    for epoch in range(epochs):
        for _ in range(2):
            # Train Discriminator
            idx = np.random.randint(0, data.shape[0], half_batch)
            real_data = data[idx]
            real_labels = labels[idx]

            noise = np.random.normal(0, 1, (half_batch, latent_dim))
            fake_labels = np.random.randint(0, 2, half_batch).reshape(-1, 1)
            fake_data = generator.predict([noise, fake_labels])

            d_loss_real = discriminator.train_on_batch([real_data, real_labels], np.ones((half_batch, 1)))
            d_loss_fake = discriminator.train_on_batch([fake_data, fake_labels], np.zeros((half_batch, 1)))
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train Generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        random_labels = np.random.randint(0, 2, batch_size).reshape(-1, 1)
        g_loss = cgan.train_on_batch([noise, random_labels], np.ones((batch_size, 1)))

        if epoch % 100 == 0:
            print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}%] [G loss: {g_loss}]")

train_cgan(cgan, generator_cgan, discriminator_cgan, X_train.values, y_train.values, epochs=1000)

# Evaluating GAN
def evaluate_mae(y_true, y_pred):
    # Mean Absolute Error
    mae = mean_absolute_error(y_true, y_pred)
    return mae

def evaluate_gan(generator, discriminator, X_test, y_test, latent_dim=100):
    noise = np.random.normal(0, 1, (X_test.shape[0], latent_dim))
    generated_data = generator.predict([noise, y_test.values.reshape(-1, 1)])

    # Generate predictions: 1 = real, 0 = fake (fraud)
    y_pred_real = discriminator.predict([X_test, y_test.values.reshape(-1, 1)]) > 0.5
    y_pred_fake = discriminator.predict([generated_data, y_test.values.reshape(-1, 1)]) > 0.5

    # Evaluation metrics on real data
    accuracy = accuracy_score(y_test, y_pred_real)
    precision = precision_score(y_test, y_pred_real)
    recall = recall_score(y_test, y_pred_real)
    report_GAN = classification_report(y_test, y_pred_real)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("Laporan Klasifikasi: ")
    print(report_GAN)

    # Evaluate using MAE
    mae = evaluate_mae(y_test, y_pred_real.flatten())
    print("Evaluation Metrics:")
    print("Mean Absolute Error (MAE):", mae)

# Evaluate the GAN model
evaluate_gan(generator_cgan, discriminator_cgan, X_test.values, y_test)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step  




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 




0 [D loss: 0.6571526527404785, acc.: 92.05728769302368%] [G loss: [array(0.6573147, dtype=float32), array(0.6573147, dtype=float32), array(0.9140625, dtype=float32)]]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3m

# Skenario 2 : Dataset 1, Dengan KFold

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Input, Concatenate
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, mean_absolute_error

# Load and preprocess data
data = pd.read_csv('/content/dataset1.csv')

print(data['Class'].isnull().sum())
print(np.isinf(data['Class']).sum())

data = data.dropna(subset=['Class'])

In [None]:
from sklearn.model_selection import KFold

X = data.drop('Class', axis=1)
y = data['Class'].astype('int64')

# Normalize the data
X = (X - X.min()) / (X.max() - X.min())

# Define GAN components
latent_dim = 100

# Generator for cGAN
def build_generator_cgan(latent_dim, n_features):
    noise = Input(shape=(latent_dim,))
    label = Input(shape=(1,))

    # Concatenate noise and label
    x = Concatenate()([noise, label])

    model = Sequential([
        Dense(128, input_dim=latent_dim + 1),
        LeakyReLU(alpha=0.2),
        BatchNormalization(momentum=0.8),
        Dense(256),
        LeakyReLU(alpha=0.2),
        BatchNormalization(momentum=0.8),
        Dense(n_features, activation='tanh')
    ])
    generated_data = model(x)

    return tf.keras.Model([noise, label], generated_data)

# Discriminator for cGAN
def build_discriminator_cgan(n_features):
    data = Input(shape=(n_features,))
    label = Input(shape=(1,))

    # Concatenate data and label
    x = Concatenate()([data, label])

    model = Sequential([
        Dense(256, input_dim=n_features + 1),
        LeakyReLU(alpha=0.2),
        Dense(128),
        LeakyReLU(alpha=0.2),
        Dense(1, activation='sigmoid')
    ])
    validity = model(x)

    return tf.keras.Model([data, label], validity)

# Build cGAN models
generator_cgan = build_generator_cgan(latent_dim, X.shape[1])
discriminator_cgan = build_discriminator_cgan(X.shape[1])

# Optimizer
optimizer_generator = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)
optimizer_discriminator = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)

# Compile Discriminator
discriminator_cgan.compile(optimizer=optimizer_discriminator, loss='binary_crossentropy', metrics=['accuracy'])

# Build and compile cGAN
discriminator_cgan.trainable = False

noise = Input(shape=(latent_dim,))
label = Input(shape=(1,))
generated_data = generator_cgan([noise, label])
validity = discriminator_cgan([generated_data, label])

cgan = tf.keras.Model([noise, label], validity)
cgan.compile(optimizer=optimizer_generator, loss='binary_crossentropy')

# Training cGAN
def train_cgan(cgan, generator, discriminator, data, labels, epochs=1000, batch_size=128, latent_dim=100):
    half_batch = batch_size // 2
    for epoch in range(epochs):
        for _ in range(2):
            # Train Discriminator
            idx = np.random.randint(0, data.shape[0], half_batch)
            real_data = data[idx]
            real_labels = labels[idx]

            noise = np.random.normal(0, 1, (half_batch, latent_dim))
            fake_labels = np.random.randint(0, 2, half_batch).reshape(-1, 1)
            fake_data = generator.predict([noise, fake_labels])

            d_loss_real = discriminator.train_on_batch([real_data, real_labels], np.ones((half_batch, 1)))
            d_loss_fake = discriminator.train_on_batch([fake_data, fake_labels], np.zeros((half_batch, 1)))
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train Generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        random_labels = np.random.randint(0, 2, batch_size).reshape(-1, 1)
        g_loss = cgan.train_on_batch([noise, random_labels], np.ones((batch_size, 1)))

        if epoch % 100 == 0:
            print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}%] [G loss: {g_loss}]")

# Cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Store results for cross-validation
accuracies = []
precisions = []
recalls = []
maes = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X), 1):
    print(f"Fold {fold}")

    # Split data into train and validation sets
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Train the cGAN model on the training set
    train_cgan(cgan, generator_cgan, discriminator_cgan, X_train.values, y_train.values, epochs=1000)

    # Evaluate the model on the validation set
    noise = np.random.normal(0, 1, (X_val.shape[0], latent_dim))
    generated_data = generator_cgan.predict([noise, y_val.values.reshape(-1, 1)])

    # Generate predictions: 1 = real, 0 = fake (fraud)
    y_pred_real = discriminator_cgan.predict([X_val, y_val.values.reshape(-1, 1)]) > 0.5
    y_pred_fake = discriminator_cgan.predict([generated_data, y_val.values.reshape(-1, 1)]) > 0.5

    # Evaluation metrics on real data
    accuracy = accuracy_score(y_val, y_pred_real)
    precision = precision_score(y_val, y_pred_real)
    recall = recall_score(y_val, y_pred_real)
    report_GAN = classification_report(y_val, y_pred_real)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("Laporan Klasifikasi: ")
    print(report_GAN)

    # Evaluate using MAE
    mae = mean_absolute_error(y_val, y_pred_real.flatten())
    print("Evaluation Metrics:")
    print("Mean Absolute Error (MAE):", mae)

    # Store the results for this fold
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    maes.append(mae)

# Print average metrics over all folds
print("\nAverage Results Across 5 Folds:")
print("Average Accuracy:", np.mean(accuracies))
print("Average Precision:", np.mean(precisions))
print("Average Recall:", np.mean(recalls))
print("Average MAE:", np.mean(maes))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Fold 1
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
0 [D loss: 0.6593785285949707, acc.: 71.02864980697632%] [G loss: [array(0.6671255, dtype=float32), array(0.6671255, dtype=float32), array(0.66015625, dtype=float32)]]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
0 [D loss: 0.8148339986801147, acc.: 50.69001913070679%] [G loss: [array(0.814876, dtype=float32), array(0.814876, dtype=float32), array(0.5068369, dtype=float32)]]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

# Skenario 3 : Dataset 2, Tanpa KFold

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Input, Concatenate
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, mean_absolute_error

# Load and preprocess data
data = pd.read_csv('/content/dataset2.csv')

print(data['Class'].isnull().sum())
print(np.isinf(data['Class']).sum())

data = data.dropna(subset=['Class'])

1
0


In [None]:
X = data.drop('Class', axis=1)
y = data['Class'].astype('int64')

# Normalize the data
X = (X - X.min()) / (X.max() - X.min())

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define GAN components
latent_dim = 100

# Generator for cGAN
def build_generator_cgan(latent_dim, n_features):
    noise = Input(shape=(latent_dim,))
    label = Input(shape=(1,))

    # Concatenate noise and label
    x = Concatenate()([noise, label])

    model = Sequential([
        Dense(128, input_dim=latent_dim + 1),
        LeakyReLU(alpha=0.2),
        BatchNormalization(momentum=0.8),
        Dense(256),
        LeakyReLU(alpha=0.2),
        BatchNormalization(momentum=0.8),
        Dense(n_features, activation='tanh')
    ])
    generated_data = model(x)

    return tf.keras.Model([noise, label], generated_data)

# Discriminator for cGAN
def build_discriminator_cgan(n_features):
    data = Input(shape=(n_features,))
    label = Input(shape=(1,))

    # Concatenate data and label
    x = Concatenate()([data, label])

    model = Sequential([
        Dense(256, input_dim=n_features + 1),
        LeakyReLU(alpha=0.2),
        Dense(128),
        LeakyReLU(alpha=0.2),
        Dense(1, activation='sigmoid')
    ])
    validity = model(x)

    return tf.keras.Model([data, label], validity)

# Build cGAN models
generator_cgan = build_generator_cgan(latent_dim, X.shape[1])
discriminator_cgan = build_discriminator_cgan(X.shape[1])

# Optimizer
optimizer_generator = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)
optimizer_discriminator = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)

# Compile Discriminator
discriminator_cgan.compile(optimizer=optimizer_discriminator, loss='binary_crossentropy', metrics=['accuracy'])

# Build and compile cGAN
discriminator_cgan.trainable = False

noise = Input(shape=(latent_dim,))
label = Input(shape=(1,))
generated_data = generator_cgan([noise, label])
validity = discriminator_cgan([generated_data, label])

cgan = tf.keras.Model([noise, label], validity)
cgan.compile(optimizer=optimizer_generator, loss='binary_crossentropy')

# Training cGAN
def train_cgan(cgan, generator, discriminator, data, labels, epochs=1000, batch_size=128, latent_dim=100):
    half_batch = batch_size // 2
    for epoch in range(epochs):
        for _ in range(2):
            # Train Discriminator
            idx = np.random.randint(0, data.shape[0], half_batch)
            real_data = data[idx]
            real_labels = labels[idx]

            noise = np.random.normal(0, 1, (half_batch, latent_dim))
            fake_labels = np.random.randint(0, 2, half_batch).reshape(-1, 1)
            fake_data = generator.predict([noise, fake_labels])

            d_loss_real = discriminator.train_on_batch([real_data, real_labels], np.ones((half_batch, 1)))
            d_loss_fake = discriminator.train_on_batch([fake_data, fake_labels], np.zeros((half_batch, 1)))
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train Generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        random_labels = np.random.randint(0, 2, batch_size).reshape(-1, 1)
        g_loss = cgan.train_on_batch([noise, random_labels], np.ones((batch_size, 1)))

        if epoch % 100 == 0:
            print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}%] [G loss: {g_loss}]")

train_cgan(cgan, generator_cgan, discriminator_cgan, X_train.values, y_train.values, epochs=1000)

# Evaluating GAN
def evaluate_mae(y_true, y_pred):
    # Mean Absolute Error
    mae = mean_absolute_error(y_true, y_pred)
    return mae

def evaluate_gan(generator, discriminator, X_test, y_test, latent_dim=100):
    noise = np.random.normal(0, 1, (X_test.shape[0], latent_dim))
    generated_data = generator.predict([noise, y_test.values.reshape(-1, 1)])

    # Generate predictions: 1 = real, 0 = fake (fraud)
    y_pred_real = discriminator.predict([X_test, y_test.values.reshape(-1, 1)]) > 0.5
    y_pred_fake = discriminator.predict([generated_data, y_test.values.reshape(-1, 1)]) > 0.5

    # Evaluation metrics on real data
    accuracy = accuracy_score(y_test, y_pred_real)
    precision = precision_score(y_test, y_pred_real)
    recall = recall_score(y_test, y_pred_real)
    report_GAN = classification_report(y_test, y_pred_real)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("Laporan Klasifikasi: ")
    print(report_GAN)

    # Evaluate using MAE
    mae = evaluate_mae(y_test, y_pred_real.flatten())
    print("Evaluation Metrics:")
    print("Mean Absolute Error (MAE):", mae)

# Evaluate the GAN model
evaluate_gan(generator_cgan, discriminator_cgan, X_test.values, y_test)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step  




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 




0 [D loss: 0.7220228314399719, acc.: 5.078125%] [G loss: [array(0.7225936, dtype=float32), array(0.7225936, dtype=float32), array(0.0703125, dtype=float32)]]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3m

# Skenario 4 : Dataset 2, Dengan KFold

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Input, Concatenate
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, mean_absolute_error

# Load and preprocess data
data = pd.read_csv('/content/dataset2.csv')

print(data['Class'].isnull().sum())
print(np.isinf(data['Class']).sum())

data = data.dropna(subset=['Class'])

0
0


In [None]:
from sklearn.model_selection import KFold

X = data.drop('Class', axis=1)
y = data['Class'].astype('int64')

# Normalize the data
X = (X - X.min()) / (X.max() - X.min())

# Define GAN components
latent_dim = 100

# Generator for cGAN
def build_generator_cgan(latent_dim, n_features):
    noise = Input(shape=(latent_dim,))
    label = Input(shape=(1,))

    # Concatenate noise and label
    x = Concatenate()([noise, label])

    model = Sequential([
        Dense(128, input_dim=latent_dim + 1),
        LeakyReLU(alpha=0.2),
        BatchNormalization(momentum=0.8),
        Dense(256),
        LeakyReLU(alpha=0.2),
        BatchNormalization(momentum=0.8),
        Dense(n_features, activation='tanh')
    ])
    generated_data = model(x)

    return tf.keras.Model([noise, label], generated_data)

# Discriminator for cGAN
def build_discriminator_cgan(n_features):
    data = Input(shape=(n_features,))
    label = Input(shape=(1,))

    # Concatenate data and label
    x = Concatenate()([data, label])

    model = Sequential([
        Dense(256, input_dim=n_features + 1),
        LeakyReLU(alpha=0.2),
        Dense(128),
        LeakyReLU(alpha=0.2),
        Dense(1, activation='sigmoid')
    ])
    validity = model(x)

    return tf.keras.Model([data, label], validity)

# Build cGAN models
generator_cgan = build_generator_cgan(latent_dim, X.shape[1])
discriminator_cgan = build_discriminator_cgan(X.shape[1])

# Optimizer
optimizer_generator = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)
optimizer_discriminator = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)

# Compile Discriminator
discriminator_cgan.compile(optimizer=optimizer_discriminator, loss='binary_crossentropy', metrics=['accuracy'])

# Build and compile cGAN
discriminator_cgan.trainable = False

noise = Input(shape=(latent_dim,))
label = Input(shape=(1,))
generated_data = generator_cgan([noise, label])
validity = discriminator_cgan([generated_data, label])

cgan = tf.keras.Model([noise, label], validity)
cgan.compile(optimizer=optimizer_generator, loss='binary_crossentropy')

# Training cGAN
def train_cgan(cgan, generator, discriminator, data, labels, epochs=1000, batch_size=128, latent_dim=100):
    half_batch = batch_size // 2
    for epoch in range(epochs):
        for _ in range(2):
            # Train Discriminator
            idx = np.random.randint(0, data.shape[0], half_batch)
            real_data = data[idx]
            real_labels = labels[idx]

            noise = np.random.normal(0, 1, (half_batch, latent_dim))
            fake_labels = np.random.randint(0, 2, half_batch).reshape(-1, 1)
            fake_data = generator.predict([noise, fake_labels])

            d_loss_real = discriminator.train_on_batch([real_data, real_labels], np.ones((half_batch, 1)))
            d_loss_fake = discriminator.train_on_batch([fake_data, fake_labels], np.zeros((half_batch, 1)))
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train Generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        random_labels = np.random.randint(0, 2, batch_size).reshape(-1, 1)
        g_loss = cgan.train_on_batch([noise, random_labels], np.ones((batch_size, 1)))

        if epoch % 100 == 0:
            print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}%] [G loss: {g_loss}]")

# Cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Store results for cross-validation
accuracies = []
precisions = []
recalls = []
maes = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X), 1):
    print(f"Fold {fold}")

    # Split data into train and validation sets
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Train the cGAN model on the training set
    train_cgan(cgan, generator_cgan, discriminator_cgan, X_train.values, y_train.values, epochs=1000)

    # Evaluate the model on the validation set
    noise = np.random.normal(0, 1, (X_val.shape[0], latent_dim))
    generated_data = generator_cgan.predict([noise, y_val.values.reshape(-1, 1)])

    # Generate predictions: 1 = real, 0 = fake (fraud)
    y_pred_real = discriminator_cgan.predict([X_val, y_val.values.reshape(-1, 1)]) > 0.5
    y_pred_fake = discriminator_cgan.predict([generated_data, y_val.values.reshape(-1, 1)]) > 0.5

    # Evaluation metrics on real data
    accuracy = accuracy_score(y_val, y_pred_real)
    precision = precision_score(y_val, y_pred_real)
    recall = recall_score(y_val, y_pred_real)
    report_GAN = classification_report(y_val, y_pred_real)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("Laporan Klasifikasi: ")
    print(report_GAN)

    # Evaluate using MAE
    mae = mean_absolute_error(y_val, y_pred_real.flatten())
    print("Evaluation Metrics:")
    print("Mean Absolute Error (MAE):", mae)

    # Store the results for this fold
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    maes.append(mae)

# Print average metrics over all folds
print("\nAverage Results Across 5 Folds:")
print("Average Accuracy:", np.mean(accuracies))
print("Average Precision:", np.mean(precisions))
print("Average Recall:", np.mean(recalls))
print("Average MAE:", np.mean(maes))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Fold 1
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step  




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 




0 [D loss: 0.6899368762969971, acc.: 56.77083730697632%] [G loss: [array(0.6920772, dtype=float32), array(0.6920772, dtype=float32), array(0.546875, dtype=float32)]]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 