In [None]:
import pandas as pd


file_path = "/content/drive/MyDrive/Colab Notebooks/processed_fraud_data.csv"


df = pd.read_csv(file_path)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [None]:
fraud_data = df[df['Fraud_Label'] == 1].copy()
fraud_data.drop(columns=['Fraud_Label'], inplace=True)

In [None]:
continuous_columns = [
    'Transaction_Amount', 'Account_Balance', 'Avg_Transaction_Amount_7d',
    'Transaction_Distance', 'Daily_Transaction_Count',
    'Year', 'Month', 'Day', 'Hour', 'Minute', 'Second'
]

scaler = MinMaxScaler(feature_range=(-1, 1))
fraud_data_cont = fraud_data[continuous_columns]
fraud_data_cat = fraud_data.drop(columns=continuous_columns)

fraud_data_cont_scaled = scaler.fit_transform(fraud_data_cont)
fraud_data_scaled = pd.DataFrame(fraud_data_cont_scaled, columns=continuous_columns)
fraud_data_scaled = pd.concat([fraud_data_scaled, fraud_data_cat.reset_index(drop=True)], axis=1)

fraud_data_array = fraud_data_scaled.values

In [None]:
def build_generator(noise_dim, output_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, input_dim=noise_dim, activation='relu'))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(output_dim, activation='tanh'))
    return model

noise_dim = 100
input_dim = fraud_data_array.shape[1]
generator = build_generator(noise_dim, input_dim)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
def build_critic(input_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(512, input_dim=input_dim, activation='relu'))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1))  
    return model

critic = build_critic(input_dim)

In [None]:
optimizer_gen = tf.keras.optimizers.RMSprop(learning_rate=0.00005)
optimizer_critic = tf.keras.optimizers.RMSprop(learning_rate=0.00005)

In [None]:
batch_size = 256
epochs = 5000
clip_value = 0.01
critic_updates = 5

for epoch in range(epochs):
    for _ in range(critic_updates):
        idx = np.random.randint(0, fraud_data_array.shape[0], batch_size)
        real_samples = fraud_data_array[idx]

        noise = np.random.normal(0, 1, (batch_size, noise_dim))
        generated_samples = generator.predict(noise, verbose=0)

        with tf.GradientTape() as tape:
            real_pred = critic(real_samples)
            fake_pred = critic(generated_samples)
            loss_critic = -(tf.reduce_mean(real_pred) - tf.reduce_mean(fake_pred))

        grads = tape.gradient(loss_critic, critic.trainable_variables)
        optimizer_critic.apply_gradients(zip(grads, critic.trainable_variables))

        for layer in critic.layers:
            weights = layer.get_weights()
            weights = [np.clip(w, -clip_value, clip_value) for w in weights]
            layer.set_weights(weights)

    noise = np.random.normal(0, 1, (batch_size, noise_dim))
    with tf.GradientTape() as tape:
        generated_samples = generator(noise, training=True)
        fake_pred = critic(generated_samples)
        loss_gen = -tf.reduce_mean(fake_pred)

    grads = tape.gradient(loss_gen, generator.trainable_variables)
    optimizer_gen.apply_gradients(zip(grads, generator.trainable_variables))

    if epoch % 500 == 0:
        print(f"Epoch {epoch} completed")

Epoch 0 completed
Epoch 500 completed
Epoch 1000 completed
Epoch 1500 completed
Epoch 2000 completed
Epoch 2500 completed
Epoch 3000 completed
Epoch 3500 completed
Epoch 4000 completed
Epoch 4500 completed


In [None]:
noise = np.random.normal(0, 1, (17000, noise_dim))
generated_samples = generator.predict(noise, verbose=0)

In [None]:
# Convert generated synthetic samples (normalized) to a DataFrame
generated_samples_df = pd.DataFrame(generated_samples, columns=fraud_data_scaled.columns)

# View first 5 rows
generated_samples_df.head()

Unnamed: 0,Transaction_Amount,Account_Balance,Avg_Transaction_Amount_7d,Transaction_Distance,Daily_Transaction_Count,Year,Month,Day,Hour,Minute,Second,Transaction_Type,Device_Type,IP_Address_Flag,Previous_Fraudulent_Activity,Failed_Transaction_Count_7d,Card_Type,Authentication_Method,Is_Weekend
0,-0.915336,-0.103862,-0.535502,-0.076594,-0.128896,-0.851492,-0.138235,0.081686,0.243008,0.028346,-0.947073,0.999996,0.934248,0.65312,0.705276,0.999998,0.999992,0.999999,0.561675
1,-0.885426,-0.044091,-0.346142,0.125152,0.263714,-0.894981,-0.013943,-0.227179,-0.248944,0.478384,-0.958187,0.999987,0.985405,0.266731,-0.28592,0.999995,0.999986,0.999993,0.077974
2,-0.567258,-0.075059,-0.185482,-0.368736,-0.333844,-0.713119,0.223762,0.463397,0.153253,0.370367,-0.677918,0.999953,0.830057,0.014997,-0.301738,0.999931,0.999945,0.99994,0.58409
3,-0.911197,-0.060886,0.120452,0.138476,0.269871,-0.971599,-0.512396,0.442811,0.240928,-0.49017,-0.91531,0.999962,0.972576,0.200269,0.221732,0.999967,0.999945,0.999966,0.856017
4,-0.871496,0.355046,0.083189,0.291527,-0.44105,-0.857925,-0.080708,0.277216,0.308595,0.484547,-0.895405,0.999975,0.958383,-0.455974,-0.474394,0.999992,0.999994,0.999976,-0.088213


In [None]:
generated_samples_df = pd.DataFrame(generated_samples, columns=fraud_data_scaled.columns)

# Separate continuous & categorical parts
gen_cont = generated_samples_df[continuous_columns].values
gen_cat = generated_samples_df.drop(columns=continuous_columns).reset_index(drop=True)

# Inverse transform continuous columns
gen_cont_rescaled = scaler.inverse_transform(gen_cont)
gen_cont_df = pd.DataFrame(gen_cont_rescaled, columns=continuous_columns)

# Combine continuous + categorical
synthetic_fraud_wgan = pd.concat([gen_cont_df, gen_cat], axis=1)
synthetic_fraud_wgan['Fraud_Label'] = 1

In [None]:
augmented_wgan_data = pd.concat([df, synthetic_fraud_wgan], ignore_index=True)
augmented_wgan_data.to_csv('/content/balanced_WGAN.csv', index=False)
print("✅ 17,000 synthetic fraud samples generated using WGAN and saved as balanced_WGAN.csv")

✅ 17,000 synthetic fraud samples generated using WGAN and saved as balanced_WGAN.csv


In [None]:
from google.colab import files
files.download('/content/balanced_WGAN.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 20 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Transaction_Amount            50000 non-null  float64
 1   Transaction_Type              50000 non-null  int64  
 2   Account_Balance               50000 non-null  float64
 3   Device_Type                   50000 non-null  int64  
 4   IP_Address_Flag               50000 non-null  int64  
 5   Previous_Fraudulent_Activity  50000 non-null  int64  
 6   Daily_Transaction_Count       50000 non-null  int64  
 7   Avg_Transaction_Amount_7d     50000 non-null  float64
 8   Failed_Transaction_Count_7d   50000 non-null  int64  
 9   Card_Type                     50000 non-null  int64  
 10  Transaction_Distance          50000 non-null  float64
 11  Authentication_Method         50000 non-null  int64  
 12  Is_Weekend                    50000 non-null  int64  
 13  F

In [None]:
print(df['Fraud_Label'].value_counts())


Fraud_Label
0    33933
1    16067
Name: count, dtype: int64
