In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from keras import layers, models
import matplotlib.pyplot as plt
import os

In [None]:

# Load and preprocess the dataset
def load_data(file_path):
    # Load the dataset from a CSV file
    data = pd.read_csv(file_path)
    
    # Drop non-numeric columns if any (e.g., labels, timestamps)
    data = data.select_dtypes(include=[np.number])
    
    # Replace infinite values with NaN
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    
    # Drop rows with NaN values
    data.dropna(inplace=True)
    
    # Normalize the data to the range [-1, 1]
    scaler = MinMaxScaler(feature_range=(-1, 1))
    data = scaler.fit_transform(data)
    
    # Reshape the data if necessary (e.g., add channel dimension)
    data = np.expand_dims(data, axis=-1)
    
    return data

In [None]:

# Define the generator model
def build_generator(input_dim=100, output_shape=(78, 1)):
    model = models.Sequential()
    model.add(layers.Dense(256, input_dim=input_dim))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Dense(512))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Dense(1024))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Dense(np.prod(output_shape), activation='tanh'))
    model.add(layers.Reshape(output_shape))
    return model

In [None]:
# Define the discriminator model
def build_discriminator(input_shape=(78, 1)):
    model = models.Sequential()
    model.add(layers.Flatten(input_shape=input_shape))
    model.add(layers.Dense(512))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dense(256))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

In [None]:
# Compile the models
def compile_models(generator, discriminator):
    discriminator.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5), metrics=['accuracy'])
    discriminator.trainable = False
    gan_input = layers.Input(shape=(100,))
    generated_image = generator(gan_input)
    gan_output = discriminator(generated_image)
    gan = models.Model(gan_input, gan_output)
    gan.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5))
    return gan

In [None]:
# Save generated samples
def save_samples(generator, epoch, output_dir='gan_samples', examples=5):
    noise = np.random.normal(0, 1, (examples, 100))
    generated_samples = generator.predict(noise)
    generated_samples = 0.5 * generated_samples + 0.5 

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for i in range(examples):
        plt.figure(figsize=(10, 2))
        plt.plot(generated_samples[i, :, 0])
        plt.title(f'Sample {i + 1} at Epoch {epoch}')
        plt.xlabel('Feature Index')
        plt.ylabel('Value')
        plt.savefig(f"{output_dir}/gan_generated_sample_epoch_{epoch}_sample_{i + 1}.png")
        plt.close()

In [None]:
# Plot training losses
def plot_losses(d_losses, g_losses, output_dir='gan_samples'):
    plt.figure(figsize=(10, 5))
    plt.plot(d_losses, label='Discriminator Loss')
    plt.plot(g_losses, label='Generator Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(f"{output_dir}/gan_losses.png")
    plt.close()

In [None]:
# Train the GAN
def train_gan(generator, discriminator, gan, data, epochs=10000, batch_size=64, save_interval=1000):
    half_batch = batch_size // 2
    d_losses = []
    g_losses = []
    
    for epoch in range(epochs):
        # Train discriminator
        idx = np.random.randint(0, data.shape[0], half_batch)
        real_samples = data[idx]
        noise = np.random.normal(0, 1, (half_batch, 100))
        fake_samples = generator.predict(noise)
        d_loss_real = discriminator.train_on_batch(real_samples, np.ones((half_batch, 1)))
        d_loss_fake = discriminator.train_on_batch(fake_samples, np.zeros((half_batch, 1)))
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
        
        # Train generator
        noise = np.random.normal(0, 1, (batch_size, 100))
        valid_y = np.array([1] * batch_size)
        g_loss = gan.train_on_batch(noise, valid_y)
        
        # Save losses for plotting
        d_losses.append(d_loss[0])
        g_losses.append(g_loss)
        
        # Print progress
        if epoch % 100 == 0:
            print(f"{epoch} [D loss: {d_loss[0]} | D accuracy: {100*d_loss[1]}] [G loss: {g_loss}]")
        
        # Save generated samples at save_interval
        if epoch % save_interval == 0:
            save_samples(generator, epoch)
    
    # Plot the losses
    plot_losses(d_losses, g_losses)


In [None]:
# Main function to run the GAN
def main():
    file_path = '/home/cse/Documents/base-folder/DatasetToCheck/CICIDS2017/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv'
    data = load_data(file_path)
    generator = build_generator()
    discriminator = build_discriminator()
    gan = compile_models(generator, discriminator)
    train_gan(generator, discriminator, gan, data)

if __name__ == "__main__":
    main()