##Imports

In [None]:
import pandas as pd
import numpy as np
from scipy.io.arff import loadarff
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import os
from collections import defaultdict
import matplotlib.pyplot as plt

## Preprocessing

In [None]:
class DataPreprocessor:
    def __init__(self):
        self.continuous_features = ['age', 'fnlwgt', 'education-num', 'capital-gain',
                                  'capital-loss', 'hours-per-week']
        self.categorical_features = ['workclass', 'education', 'marital-status',
                                   'occupation', 'relationship', 'race', 'sex',
                                   'native-country']
        self.target = 'income'

        self.scalers = {}
        self.encoders = {}
        self.feature_dims = {}

    def load_and_preprocess(self, arff_path, random_seed=42):
        # Load ARFF file
        data, meta = loadarff(arff_path)
        df = pd.DataFrame(data)

        # Convert bytes to string for categorical columns
        for col in df.select_dtypes(include=['object']):
            df[col] = df[col].str.decode('utf-8')

        # Process continuous features
        for feat in self.continuous_features:
            scaler = MinMaxScaler()
            df[feat] = scaler.fit_transform(df[[feat]])
            self.scalers[feat] = scaler
            self.feature_dims[feat] = 1

        # Process categorical features
        for feat in self.categorical_features:
            encoder = OneHotEncoder(sparse_output=False)
            encoded = encoder.fit_transform(df[[feat]])

            # Create new column names
            feat_names = [f"{feat}_{val}" for val in encoder.categories_[0]]

            # Replace original column with encoded columns
            df = df.drop(columns=[feat])
            for i, name in enumerate(feat_names):
                df[name] = encoded[:, i]

            self.encoders[feat] = encoder
            self.feature_dims[feat] = len(encoder.categories_[0])

        # Encode target
        target_encoder = LabelEncoder()
        df[self.target] = target_encoder.fit_transform(df[self.target])
        self.encoders[self.target] = target_encoder

        # Split into features and target
        X = df.drop(columns=[self.target])
        y = df[self.target]

        # Create train/test split maintaining label ratios
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_seed, stratify=y
        )

        return X_train, X_test, y_train, y_test

    @property
    def input_dim(self):
        return sum(dim for dim in self.feature_dims.values())

## Generator Classes

In [None]:
class BaseGenerator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LeakyReLU(0.2),
            nn.BatchNorm1d(256),
            nn.Linear(256, 512),
            nn.LeakyReLU(0.2),
            nn.BatchNorm1d(512),
            nn.Linear(512, 1024),
            nn.LeakyReLU(0.2),
            nn.BatchNorm1d(1024),
            nn.Linear(1024, output_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [None]:
class Generator(BaseGenerator):
    def __init__(self, noise_dim, output_dim):
        super().__init__(noise_dim, output_dim)

In [None]:
class ConditionalGenerator(BaseGenerator):
    def __init__(self, noise_dim, label_dim, output_dim):
        super().__init__(noise_dim + label_dim, output_dim)

    def forward(self, noise, labels):
        input_data = torch.cat((noise, labels), dim=1)
        return super().forward(input_data)

## Discriminator Classes

In [None]:
class BaseDiscriminator(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [None]:
class Discriminator(BaseDiscriminator):
    def __init__(self, input_dim):
        super().__init__(input_dim)

In [None]:
class ConditionalDiscriminator(BaseDiscriminator):
    def __init__(self, input_dim, label_dim):
        super().__init__(input_dim + label_dim)

    def forward(self, data, labels):
        input_data = torch.cat((data, labels), dim=1)
        return super().forward(input_data)

## GANS Classes

In [None]:
class BaseGAN:
    def __init__(self, generator, discriminator, lr=0.0002):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.generator = generator.to(self.device)
        self.discriminator = discriminator.to(self.device)
        self.noise_dim = getattr(self.generator, 'noise_dim', 100)

        self.g_optimizer = optim.Adam(self.generator.parameters(), lr=lr, betas=(0.5, 0.999))
        self.d_optimizer = optim.Adam(self.discriminator.parameters(), lr=lr, betas=(0.5, 0.999))
        self.criterion = nn.BCELoss()

    def _train_discriminator(self, real_data, real_labels=None):
        batch_size = real_data.size(0)
        label_real = torch.ones(batch_size, 1).to(self.device)
        label_fake = torch.zeros(batch_size, 1).to(self.device)

        self.d_optimizer.zero_grad()

        if real_labels is not None:
            d_output_real = self.discriminator(real_data, real_labels)
            noise = torch.randn(batch_size, self.noise_dim).to(self.device)
            fake_data = self.generator(noise, real_labels)
            d_output_fake = self.discriminator(fake_data.detach(), real_labels)
        else:
            d_output_real = self.discriminator(real_data)
            noise = torch.randn(batch_size, self.noise_dim).to(self.device)
            fake_data = self.generator(noise)
            d_output_fake = self.discriminator(fake_data.detach())

        d_loss_real = self.criterion(d_output_real, label_real)
        d_loss_fake = self.criterion(d_output_fake, label_fake)
        d_loss = d_loss_real + d_loss_fake

        d_loss.backward()
        self.d_optimizer.step()

        return d_loss, fake_data

    def _train_generator(self, fake_data, real_labels=None):
        batch_size = fake_data.size(0)
        label_real = torch.ones(batch_size, 1).to(self.device)

        self.g_optimizer.zero_grad()

        if real_labels is not None:
            fake_output = self.discriminator(fake_data, real_labels)
        else:
            fake_output = self.discriminator(fake_data)

        g_loss = self.criterion(fake_output, label_real)
        g_loss.backward()
        self.g_optimizer.step()

        return g_loss

    def train(self, train_data, train_labels=None, epochs=20, batch_size=64):
        if train_labels is not None:
            dataset = list(zip(train_data, train_labels))
        else:
            dataset = train_data

        train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
        history = defaultdict(list)

        for epoch in range(epochs):
            d_losses, g_losses = [], []

            pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}')
            for batch in pbar:
                if train_labels is not None:
                    real_data, real_labels = batch
                    real_data, real_labels = real_data.to(self.device), real_labels.to(self.device)
                    d_loss, fake_data = self._train_discriminator(real_data, real_labels)
                    g_loss = self._train_generator(fake_data, real_labels)
                else:
                    real_data = batch
                    real_data = real_data.to(self.device)
                    d_loss, fake_data = self._train_discriminator(real_data)
                    g_loss = self._train_generator(fake_data)

                d_losses.append(d_loss.item())
                g_losses.append(g_loss.item())

                pbar.set_postfix({
                    'D_loss': f'{np.mean(d_losses):.4f}',
                    'G_loss': f'{np.mean(g_losses):.4f}'
                })

            history['d_losses'].append(np.mean(d_losses))
            history['g_losses'].append(np.mean(g_losses))

        return history

In [None]:
class GAN(BaseGAN):
    def __init__(self, input_dim, noise_dim=100, lr=0.0002):
        generator = Generator(noise_dim, input_dim)
        discriminator = Discriminator(input_dim)
        super().__init__(generator, discriminator, lr)

    def generate_samples(self, num_samples):
        self.generator.eval()
        with torch.no_grad():
            noise = torch.randn(num_samples, self.noise_dim).to(self.device)
            fake_data = self.generator(noise)
        return fake_data.cpu().numpy()

In [None]:
class ConditionalGAN(BaseGAN):
    def __init__(self, input_dim, label_dim, noise_dim=100, lr=0.0002):
        generator = ConditionalGenerator(noise_dim, label_dim, input_dim)
        discriminator = ConditionalDiscriminator(input_dim, label_dim)
        super().__init__(generator, discriminator, lr)

    def generate_samples(self, num_samples, labels):
        self.generator.eval()
        with torch.no_grad():
            noise = torch.randn(num_samples, self.noise_dim).to(self.device)
            labels = labels.to(self.device)
            fake_data = self.generator(noise, labels)
        return fake_data.cpu().numpy()

## Trianing GANS

In [None]:
def run_experiment_GAN(arff_path, random_seed):
    # Initialize preprocessor and load data
    preprocessor = DataPreprocessor()
    X_train, X_test, y_train, y_test = preprocessor.load_and_preprocess(
        arff_path, random_seed=random_seed
    )

    # Convert to PyTorch tensors
    train_data = torch.FloatTensor(X_train.values)

    # Initialize and train GAN
    gan = GAN(input_dim=preprocessor.input_dim)
    history = gan.train(train_data, epochs=100, batch_size=64)

    # Generate synthetic samples
    synthetic_samples = gan.generate_samples(len(X_train))

    return history, synthetic_samples, (X_train, X_test, y_train, y_test)

In [None]:
def run_experiment_CGAN(arff_path, random_seed):
    """
    Run experiment for cGAN.
    """
    # Initialize preprocessor and load data
    preprocessor = DataPreprocessor()
    X_train, X_test, y_train, y_test = preprocessor.load_and_preprocess(
        arff_path, random_seed=random_seed
    )

    # Convert to PyTorch tensors
    train_data = torch.FloatTensor(X_train.values)
    train_labels = torch.FloatTensor(pd.get_dummies(y_train).values)  # One-hot encode labels

    # Initialize and train cGAN
    cgan = ConditionalGAN(input_dim=preprocessor.input_dim, label_dim=train_labels.size(1))
    history = cgan.train(train_data, train_labels, epochs=100, batch_size=64)

    # Generate synthetic samples
    synthetic_labels = torch.FloatTensor(pd.get_dummies(y_train).values).to(cgan.device)  # Use training labels
    synthetic_samples = cgan.generate_samples(len(X_train), synthetic_labels)

    return history, synthetic_samples, (X_train, X_test, y_train, y_test)

## Evaluation metrics

In [None]:
def detection_metric(real_data, synthetic_data):
    """
    Evaluate the detection metric with stratified folds to maintain label distribution.
    """
    # Combine real and synthetic data
    combined_X = np.vstack((real_data, synthetic_data))
    combined_y = np.hstack((
        np.ones(len(real_data)),  # Label real as 1
        np.zeros(len(synthetic_data))  # Label synthetic as 0
    ))

    # Stratified cross-validation setup
    skf = StratifiedKFold(n_splits=4, shuffle=True, random_state=42)
    auc_scores = []

    for train_idx, test_idx in skf.split(combined_X, combined_y):
        X_train, X_test = combined_X[train_idx], combined_X[test_idx]
        y_train, y_test = combined_y[train_idx], combined_y[test_idx]

        # Train Random Forest
        model = RandomForestClassifier(random_state=42)
        model.fit(X_train, y_train)

        # Predict probabilities and calculate AUC
        y_pred = model.predict_proba(X_test)[:, 1]
        auc = roc_auc_score(y_test, y_pred)
        auc_scores.append(auc)

    return np.mean(auc_scores)


def efficacy_metric(real_train, real_test, real_labels, synthetic_train, synthetic_labels, test_labels):
    """
    Evaluate the efficacy metric.
    """
    # Train on real data, test on real data
    rf_real = RandomForestClassifier(random_state=42)
    rf_real.fit(real_train, real_labels)
    real_auc = roc_auc_score(test_labels, rf_real.predict_proba(real_test)[:, 1])


    # Train on synthetic data, test on real data
    rf_synthetic = RandomForestClassifier(random_state=42)
    rf_synthetic.fit(synthetic_train, synthetic_labels)
    synthetic_auc = roc_auc_score(test_labels, rf_synthetic.predict_proba(real_test)[:, 1])

    # Compute efficacy ratio
    efficacy_ratio = synthetic_auc / real_auc
    return real_auc, synthetic_auc, efficacy_ratio

## Main

In [None]:
# Example usage for run_experiment_GAN
if __name__ == "__main__":
    arff_path = r"/home/nicoleka/DLL-Ass4/adult.arff"
    random_seeds = [42, 123, 456]

    detection_results = []
    efficacy_results = []

    all_results = []

    for seed in random_seeds:
        print(f"\nRunning experiment with seed {seed}")
        history, synthetic_samples, (X_train, X_test, y_train, y_test) = run_experiment_GAN(
            arff_path, seed
        )

        # Plot training progress
        plt.figure(figsize=(10, 5))
        plt.plot(history['d_losses'], label='Discriminator')
        plt.plot(history['g_losses'], label='Generator')
        plt.title(f'Training Progress (Seed {seed})')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.savefig(f'training_progress_seed_{seed}.png')
        plt.close()

        # Detection Metric
        detection_auc = detection_metric(X_train.values, synthetic_samples)
        print(f"Detection AUC (Seed {seed}): {detection_auc:.4f}")
        detection_results.append({
            'seed': seed,
            'detection_auc': detection_auc
        })

        # Efficacy Metric
        real_auc, synthetic_auc, efficacy_ratio = efficacy_metric(
            X_train.values, X_test.values, y_train.values,
            synthetic_samples, y_train.values, y_test.values
        )
        print(f"Efficacy AUC (Real, Seed {seed}): {real_auc:.4f}")
        print(f"Efficacy AUC (Synthetic, Seed {seed}): {synthetic_auc:.4f}")
        print(f"Efficacy Ratio (Seed {seed}): {efficacy_ratio:.4f}")
        efficacy_results.append({
            'seed': seed,
            'real_auc': real_auc,
            'synthetic_auc': synthetic_auc,
            'efficacy_ratio': efficacy_ratio
        })

    # Summarize results
    print("\nSummary of Detection Results:")
    for result in detection_results:
        print(f"Seed {result['seed']}: Detection AUC = {result['detection_auc']:.4f}")

    print("\nSummary of Efficacy Results:")
    for result in efficacy_results:
        print(f"Seed {result['seed']}: Real AUC = {result['real_auc']:.4f}, Synthetic AUC = {result['synthetic_auc']:.4f}, Ratio = {result['efficacy_ratio']:.4f}")


Running experiment with seed 42


  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
Epoch 1/100: 100%|██████████| 407/407 [00:10<00:00, 38.77it/s, D_loss=0.5194, G_loss=7.9320] 
Epoch 2/100: 100%|██████████| 407/407 [00:02<00:00, 195.31it/s, D_loss=0.3799, G_loss=7.2238]
Epoch 3/100: 100%|██████████| 407/407 [00:02<00:00, 195.58it/s, D_loss=0.2986, G_loss=5.8498]
Epoch 4/100: 100%|██████████| 407/407 [00:02<00:00, 198.10it/s, D_loss=0.3218, G_loss=4.4395]
Epoch 5/100: 100%|██████████| 407/407 [00:02<00:00, 195.26it/s, D_loss=0.3330, G_loss=3.0974]
Epoch 6/100: 100%|██████████| 407/407 [00:02<00:00, 194.41it/s, D_loss=0.3002, G_loss=3.0449]
Epoch 7/100: 100%|██████████| 407/407 [00:02<00:00, 196.82it/s, D_loss=0.3251, G_loss=2.9515]
Epoch 8/100: 100%|██████████| 407/407 [00:02<00:00, 199.88it/s, D_loss=0.3475, G_loss=2.7794]
Epoch

Detection AUC (Seed 42): 1.0000
Efficacy AUC (Real, Seed 42): 0.9071
Efficacy AUC (Synthetic, Seed 42): 0.5543
Efficacy Ratio (Seed 42): 0.6111

Running experiment with seed 123


  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
Epoch 1/100: 100%|██████████| 407/407 [00:02<00:00, 199.89it/s, D_loss=0.5465, G_loss=7.9886]
Epoch 2/100: 100%|██████████| 407/407 [00:02<00:00, 197.37it/s, D_loss=0.2852, G_loss=7.8907]
Epoch 3/100: 100%|██████████| 407/407 [00:02<00:00, 198.82it/s, D_loss=0.2441, G_loss=6.5698]
Epoch 4/100: 100%|██████████| 407/407 [00:02<00:00, 194.44it/s, D_loss=0.2422, G_loss=5.4420]
Epoch 5/100: 100%|██████████| 407/407 [00:02<00:00, 197.49it/s, D_loss=0.2015, G_loss=3.9642]
Epoch 6/100: 100%|██████████| 407/407 [00:02<00:00, 198.63it/s, D_loss=0.1841, G_loss=4.2398]
Epoch 7/100: 100%|██████████| 407/407 [00:02<00:00, 198.24it/s, D_loss=0.2324, G_loss=3.4247]
Epoch 8/100: 100%|██████████| 407/407 [00:02<00:00, 192.80it/s, D_loss=0.2186, G_loss=3.3685]
Epoch

Detection AUC (Seed 123): 1.0000
Efficacy AUC (Real, Seed 123): 0.9001
Efficacy AUC (Synthetic, Seed 123): 0.5218
Efficacy Ratio (Seed 123): 0.5798

Running experiment with seed 456


  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
Epoch 1/100: 100%|██████████| 407/407 [00:02<00:00, 198.83it/s, D_loss=0.5479, G_loss=7.9773]
Epoch 2/100: 100%|██████████| 407/407 [00:02<00:00, 199.19it/s, D_loss=0.3274, G_loss=7.5778]
Epoch 3/100: 100%|██████████| 407/407 [00:02<00:00, 199.23it/s, D_loss=0.3093, G_loss=6.2232]
Epoch 4/100: 100%|██████████| 407/407 [00:02<00:00, 197.04it/s, D_loss=0.3177, G_loss=3.9740]
Epoch 5/100: 100%|██████████| 407/407 [00:02<00:00, 196.80it/s, D_loss=0.3388, G_loss=3.0156]
Epoch 6/100: 100%|██████████| 407/407 [00:02<00:00, 199.68it/s, D_loss=0.3180, G_loss=2.9810]
Epoch 7/100: 100%|██████████| 407/407 [00:02<00:00, 199.27it/s, D_loss=0.3302, G_loss=2.8271]
Epoch 8/100: 100%|██████████| 407/407 [00:02<00:00, 196.73it/s, D_loss=0.2963, G_loss=2.8495]
Epoch

Detection AUC (Seed 456): 1.0000
Efficacy AUC (Real, Seed 456): 0.9032
Efficacy AUC (Synthetic, Seed 456): 0.5702
Efficacy Ratio (Seed 456): 0.6313

Summary of Detection Results:
Seed 42: Detection AUC = 1.0000
Seed 123: Detection AUC = 1.0000
Seed 456: Detection AUC = 1.0000

Summary of Efficacy Results:
Seed 42: Real AUC = 0.9071, Synthetic AUC = 0.5543, Ratio = 0.6111
Seed 123: Real AUC = 0.9001, Synthetic AUC = 0.5218, Ratio = 0.5798
Seed 456: Real AUC = 0.9032, Synthetic AUC = 0.5702, Ratio = 0.6313


In [None]:
# Example usage for run_experiment_CGAN
if __name__ == "__main__":
    arff_path = "adult.arff"
    random_seeds = [42, 123, 456]  # Add more seeds if needed

    detection_results = []
    efficacy_results = []

    for seed in random_seeds:
        print(f"\nRunning cGAN experiment with seed {seed}")
        history, synthetic_samples, (X_train, X_test, y_train, y_test) = run_experiment_CGAN(
            arff_path, seed
        )

        # Plot training progress
        plt.figure(figsize=(10, 5))
        plt.plot(history['d_losses'], label='Discriminator')
        plt.plot(history['g_losses'], label='Generator')
        plt.title(f'Training Progress (cGAN, Seed {seed})')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.savefig(f'cgan_training_progress_seed_{seed}.png')
        plt.close()

        # Detection Metric
        detection_auc = detection_metric(X_train.values, synthetic_samples)
        print(f"Detection AUC (Seed {seed}): {detection_auc:.4f}")
        detection_results.append({
            'seed': seed,
            'detection_auc': detection_auc
        })

        # Efficacy Metric
        real_auc, synthetic_auc, efficacy_ratio = efficacy_metric(
            X_train.values, X_test.values, y_train.values,
            synthetic_samples, y_train.values, y_test.values
        )
        print(f"Efficacy AUC (Real, Seed {seed}): {real_auc:.4f}")
        print(f"Efficacy AUC (Synthetic, Seed {seed}): {synthetic_auc:.4f}")
        print(f"Efficacy Ratio (Seed {seed}): {efficacy_ratio:.4f}")
        efficacy_results.append({
            'seed': seed,
            'real_auc': real_auc,
            'synthetic_auc': synthetic_auc,
            'efficacy_ratio': efficacy_ratio
        })

    # Summarize results
    print("\nSummary of Detection Results:")
    for result in detection_results:
        print(f"Seed {result['seed']}: Detection AUC = {result['detection_auc']:.4f}")

    print("\nSummary of Efficacy Results:")
    for result in efficacy_results:
        print(f"Seed {result['seed']}: Real AUC = {result['real_auc']:.4f}, Synthetic AUC = {result['synthetic_auc']:.4f}, Ratio = {result['efficacy_ratio']:.4f}")


Running cGAN experiment with seed 42


  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
Epoch 1/100: 100%|██████████| 407/407 [00:02<00:00, 180.58it/s, D_loss=0.5833, G_loss=7.7545]
Epoch 2/100: 100%|██████████| 407/407 [00:02<00:00, 190.82it/s, D_loss=0.3644, G_loss=7.0087]
Epoch 3/100: 100%|██████████| 407/407 [00:02<00:00, 191.92it/s, D_loss=0.3290, G_loss=5.5325]
Epoch 4/100: 100%|██████████| 407/407 [00:02<00:00, 193.20it/s, D_loss=0.3373, G_loss=4.3142]
Epoch 5/100: 100%|██████████| 407/407 [00:02<00:00, 191.82it/s, D_loss=0.3830, G_loss=3.0155]
Epoch 6/100: 100%|██████████| 407/407 [00:02<00:00, 194.29it/s, D_loss=0.3814, G_loss=2.8175]
Epoch 7/100: 100%|██████████| 407/407 [00:02<00:00, 192.10it/s, D_loss=0.3947, G_loss=2.7003]
Epoch 8/100: 100%|██████████| 407/407 [00:02<00:00, 190.62it/s, D_loss=0.4130, G_loss=2.6046]
Epoch

Detection AUC (Seed 42): 1.0000
Efficacy AUC (Real, Seed 42): 0.9071
Efficacy AUC (Synthetic, Seed 42): 0.7127
Efficacy Ratio (Seed 42): 0.7857

Running cGAN experiment with seed 123


  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
Epoch 1/100: 100%|██████████| 407/407 [00:02<00:00, 193.58it/s, D_loss=0.5757, G_loss=8.0034]
Epoch 2/100: 100%|██████████| 407/407 [00:02<00:00, 193.15it/s, D_loss=0.3364, G_loss=6.9197]
Epoch 3/100: 100%|██████████| 407/407 [00:02<00:00, 191.95it/s, D_loss=0.2884, G_loss=6.1373]
Epoch 4/100: 100%|██████████| 407/407 [00:02<00:00, 193.99it/s, D_loss=0.3303, G_loss=4.5479]
Epoch 5/100: 100%|██████████| 407/407 [00:02<00:00, 191.25it/s, D_loss=0.3519, G_loss=3.0594]
Epoch 6/100: 100%|██████████| 407/407 [00:02<00:00, 192.53it/s, D_loss=0.3445, G_loss=2.9179]
Epoch 7/100: 100%|██████████| 407/407 [00:02<00:00, 192.50it/s, D_loss=0.3517, G_loss=2.8586]
Epoch 8/100: 100%|██████████| 407/407 [00:02<00:00, 192.08it/s, D_loss=0.3685, G_loss=2.6938]
Epoch

Detection AUC (Seed 123): 1.0000
Efficacy AUC (Real, Seed 123): 0.9001
Efficacy AUC (Synthetic, Seed 123): 0.7495
Efficacy Ratio (Seed 123): 0.8328

Running cGAN experiment with seed 456


  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
  df[name] = encoded[:, i]
Epoch 1/100: 100%|██████████| 407/407 [00:02<00:00, 194.10it/s, D_loss=0.5908, G_loss=7.6282]
Epoch 2/100: 100%|██████████| 407/407 [00:02<00:00, 193.15it/s, D_loss=0.3774, G_loss=6.5744]
Epoch 3/100: 100%|██████████| 407/407 [00:02<00:00, 191.80it/s, D_loss=0.3227, G_loss=5.2913]
Epoch 4/100: 100%|██████████| 407/407 [00:02<00:00, 192.80it/s, D_loss=0.3048, G_loss=4.1117]
Epoch 5/100: 100%|██████████| 407/407 [00:02<00:00, 193.46it/s, D_loss=0.3158, G_loss=3.2742]
Epoch 6/100: 100%|██████████| 407/407 [00:02<00:00, 190.44it/s, D_loss=0.3682, G_loss=2.9305]
Epoch 7/100: 100%|██████████| 407/407 [00:02<00:00, 193.66it/s, D_loss=0.4157, G_loss=2.6622]
Epoch 8/100: 100%|██████████| 407/407 [00:02<00:00, 190.87it/s, D_loss=0.3824, G_loss=2.7254]
Epoch

Detection AUC (Seed 456): 1.0000
Efficacy AUC (Real, Seed 456): 0.9032
Efficacy AUC (Synthetic, Seed 456): 0.8048
Efficacy Ratio (Seed 456): 0.8911

Summary of Detection Results:
Seed 42: Detection AUC = 1.0000
Seed 123: Detection AUC = 1.0000
Seed 456: Detection AUC = 1.0000

Summary of Efficacy Results:
Seed 42: Real AUC = 0.9071, Synthetic AUC = 0.7127, Ratio = 0.7857
Seed 123: Real AUC = 0.9001, Synthetic AUC = 0.7495, Ratio = 0.8328
Seed 456: Real AUC = 0.9032, Synthetic AUC = 0.8048, Ratio = 0.8911
