In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# Step 1: Load the CSV data
normal_data = pd.read_csv("normal.csv")
adv_data = pd.read_csv("adjusted_adversarial_examples_CWinf.csv")
adv_data = adv_data.drop(adv_data.columns[0], axis=1)

# Step 2: Preprocess the data
# Normalize the data and convert it to PyTorch tensors
normal_data = torch.tensor(normal_data.values[:, :-1], dtype=torch.float32)
adv_data = torch.tensor(adv_data.values[:, :-1], dtype=torch.float32)




  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Step 3: Define the Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, latent_dim),
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, input_dim),
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# Step 4: Train the Autoencoder on normal data
input_dim = normal_data.shape[1]
latent_dim = 100  # Adjust this
autoencoder = Autoencoder(input_dim, latent_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=1e-3)

epochs = 50 # Adjust this #5BIM;
for epoch in range(epochs):
    # Forward pass
    outputs = autoencoder(normal_data)
    loss = criterion(outputs, normal_data)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Step 5: Extract the latent spaces
normal_latent = autoencoder.encoder(normal_data)
adv_latent = autoencoder.encoder(adv_data)

# Step 6: Create the GAN model
class Generator(nn.Module):
    def __init__(self, latent_dim, output_dim):
        super(Generator, self).__init__()
        self.latent_dim = latent_dim
        self.output_dim = output_dim

        self.model = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, output_dim),
        )

    def forward(self, z):
        generated_data = self.model(z)
        return generated_data

class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.input_dim = input_dim

        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        validity = self.model(x)
        return validity



In [3]:
latent_dim = 100 # Adjust this
output_dim = input_dim  # Adjust this
generator = Generator(latent_dim, output_dim)
discriminator = Discriminator(input_dim)
batch_size=32
# Step 7: Train the GAN model
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002)
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002)

adversarial_loss = nn.BCELoss()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move models to device
generator = generator.to(device)
discriminator = discriminator.to(device)

num_epochs =5#0 Adjust this #5:FSFM; BIM;

# Training loop
for epoch in range(num_epochs):
    for i in range(0, len(adv_data), batch_size):
        real_data = normal_data[i:i+batch_size].to(device)
        adv_samples = adv_data[i:i+batch_size].to(device)

        # Adversarial ground truths
        valid = torch.ones((real_data.size(0), 1)).to(device)
        fake = torch.zeros((real_data.size(0), 1)).to(device)

        # ---------------------
        #  Train Discriminator
        # ---------------------

        optimizer_D.zero_grad()

        # Generate a batch of fake samples from adversarial samples
        fake_data = generator(adv_samples)

        # Measure discriminator's ability to classify real from generated samples
        real_loss = adversarial_loss(discriminator(real_data), valid[:real_data.size(0)])
        fake_loss = adversarial_loss(discriminator(fake_data.detach()), fake[:fake_data.size(0)])
        discriminator_loss = (real_loss + fake_loss) / 2

        discriminator_loss.backward(retain_graph=True)
        optimizer_D.step()

        # -----------------
        #  Train Generator
        # -----------------

        optimizer_G.zero_grad()

        # Generate a batch of fake samples from adversarial samples
        fake_data = generator(adv_samples)

        # Measure generator's ability to fool the discriminator
        generator_loss = adversarial_loss(discriminator(fake_data), valid[:fake_data.size(0)])

        generator_loss.backward(retain_graph=True)
        optimizer_G.step()

        if i % 1000 == 0:
            print(
                f"[Epoch {epoch+1}/{num_epochs}] [Batch {i}/{len(adv_data)}] "
                f"Discriminator Loss: {discriminator_loss.item():.4f} "
                f"Generator Loss: {generator_loss.item():.4f}"
            )



[Epoch 1/5] [Batch 0/109128] Discriminator Loss: 0.4416 Generator Loss: 0.6937
[Epoch 1/5] [Batch 4000/109128] Discriminator Loss: 0.3426 Generator Loss: 0.7227
[Epoch 1/5] [Batch 8000/109128] Discriminator Loss: 0.2546 Generator Loss: 0.9683
[Epoch 1/5] [Batch 12000/109128] Discriminator Loss: 0.2765 Generator Loss: 0.9663
[Epoch 1/5] [Batch 16000/109128] Discriminator Loss: 0.4169 Generator Loss: 0.7641
[Epoch 1/5] [Batch 20000/109128] Discriminator Loss: 0.4500 Generator Loss: 0.5272
[Epoch 1/5] [Batch 24000/109128] Discriminator Loss: 0.2292 Generator Loss: 1.2992
[Epoch 1/5] [Batch 28000/109128] Discriminator Loss: 0.0735 Generator Loss: 2.4040
[Epoch 1/5] [Batch 32000/109128] Discriminator Loss: 0.4064 Generator Loss: 0.7442
[Epoch 1/5] [Batch 36000/109128] Discriminator Loss: 0.1053 Generator Loss: 1.8298
[Epoch 1/5] [Batch 40000/109128] Discriminator Loss: 4.8083 Generator Loss: 1.8239
[Epoch 1/5] [Batch 44000/109128] Discriminator Loss: 0.1072 Generator Loss: 2.2301
[Epoch 1/5

[Epoch 4/5] [Batch 60000/109128] Discriminator Loss: 0.0000 Generator Loss: 13.9437
[Epoch 4/5] [Batch 64000/109128] Discriminator Loss: 4.6901 Generator Loss: 7.2283
[Epoch 4/5] [Batch 68000/109128] Discriminator Loss: 0.0296 Generator Loss: 8.3184
[Epoch 4/5] [Batch 72000/109128] Discriminator Loss: 0.0057 Generator Loss: 10.0365
[Epoch 4/5] [Batch 76000/109128] Discriminator Loss: 1.5625 Generator Loss: 13.2009
[Epoch 4/5] [Batch 80000/109128] Discriminator Loss: 9.6243 Generator Loss: 5.4003
[Epoch 4/5] [Batch 84000/109128] Discriminator Loss: 1.5625 Generator Loss: 22.3098
[Epoch 4/5] [Batch 88000/109128] Discriminator Loss: 3.1250 Generator Loss: 45.5087
[Epoch 4/5] [Batch 92000/109128] Discriminator Loss: 0.0000 Generator Loss: 18.1833
[Epoch 4/5] [Batch 96000/109128] Discriminator Loss: 4.6875 Generator Loss: 17.8668
[Epoch 4/5] [Batch 100000/109128] Discriminator Loss: 3.4832 Generator Loss: 6.7911
[Epoch 4/5] [Batch 104000/109128] Discriminator Loss: 0.0379 Generator Loss: 6.

In [4]:
# Step 8: Generate new samples with the GAN
num_samples = len(adv_data)
z = torch.randn((num_samples, latent_dim)).to(device)
generated_samples = generator(z).detach().cpu()

# Step 9: Decode the generated samples using the Autoencoder
decoded_samples = autoencoder.decoder(generated_samples).detach().cpu()

# Step 10: Save the decoded samples as CSV file
output_data = pd.DataFrame(decoded_samples.numpy(), columns=[f"Feature_{i}" for i in range(decoded_samples.shape[1])])

# Set the label column based on the length of adv_data
output_data['Label'] = np.ones(len(output_data))

output_data.to_csv("modified_adv_samplesADvNormalCWinf.csv", index=False)