In [1]:
pip install --upgrade tensorflow keras


Collecting tensorflow
  Using cached tensorflow-2.18.0-cp311-cp311-win_amd64.whl.metadata (3.3 kB)
Using cached tensorflow-2.18.0-cp311-cp311-win_amd64.whl (7.5 kB)
Installing collected packages: tensorflow
  Attempting uninstall: tensorflow
    Found existing installation: tensorflow 2.16.1
    Uninstalling tensorflow-2.16.1:
      Successfully uninstalled tensorflow-2.16.1
Successfully installed tensorflow-2.18.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, Concatenate, LeakyReLU
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler

# Parameters
latent_dim = 100        # Size of noise vector for the generator
epochs = 10000          # Number of training epochs
batch_size = 32         # Batch size
learning_rate = 0.0002  # Learning rate for Adam optimizer
beta_1 = 0.5            # Adam optimizer decay parameter


In [4]:
# Load the dataset
data = pd.read_csv('final_dataset.csv')

# Separate features and labels
X = data.drop('Type', axis=1)  # Replace 'Type' with the actual label column name if different
Y = data['Type']

# Ensure labels are integers for embedding layers in conditional GANs
Y = Y.astype(int)

# Scale features between 0 and 1 for stable GAN training
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Define GAN parameters based on the dataset
num_classes = len(Y.unique())     # Number of unique labels in your dataset
input_dim = X_scaled.shape[1]     # Number of features in the dataset

print(f"Data shape: {X_scaled.shape}, Number of classes: {num_classes}")


Data shape: (78105, 3), Number of classes: 2


In [5]:
def build_generator(latent_dim, num_classes, input_dim):
    noise_input = Input(shape=(latent_dim,))
    label_input = Input(shape=(1,))

    # Embedding for labels
    label_embedding = Embedding(num_classes, latent_dim)(label_input)
    label_embedding = Flatten()(label_embedding)
    
    # Combine noise and label embedding
    combined_input = Concatenate()([noise_input, label_embedding])

    # Generator layers
    x = Dense(128)(combined_input)
    x = LeakyReLU(0.2)(x)
    x = Dense(256)(x)
    x = LeakyReLU(0.2)(x)
    generated_data = Dense(input_dim, activation='tanh')(x)  # Output layer

    return Model([noise_input, label_input], generated_data)

generator = build_generator(latent_dim, num_classes, input_dim)
generator.summary()


In [10]:
def build_critic(input_dim, num_classes):
    data_input = Input(shape=(input_dim,))
    label_input = Input(shape=(1,))

    # Embedding for labels
    label_embedding = Embedding(num_classes, input_dim)(label_input)
    label_embedding = Flatten()(label_embedding)
    
    # Combine data and label embedding
    combined_input = Concatenate()([data_input, label_embedding])

    # Critic layers
    x = Dense(256)(combined_input)
    x = LeakyReLU(0.2)(x)
    x = Dense(128)(x)
    x = LeakyReLU(0.2)(x)
    validity = Dense(1)(x)  # Linear output for Wasserstein loss

    return Model([data_input, label_input], validity)

critic = build_critic(input_dim, num_classes)
critic.summary()


In [11]:
# Wasserstein loss
def wasserstein_loss(y_true, y_pred):
    return tf.reduce_mean(y_true * y_pred)

# Optimizers
critic_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)
generator_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)


In [None]:
epochs = 50000  # Set this to a larger number to increase training time
batch_size = 64  # Adjust batch size as needed

for epoch in range(epochs):
    # Select a random batch of real data
    idx = np.random.randint(0, X_scaled.shape[0], batch_size)
    real_data = tf.convert_to_tensor(X_scaled[idx].astype('float32'))
    real_label = tf.convert_to_tensor(Y.iloc[idx].values.reshape(-1, 1).astype('float32'))

    # Generate random noise and labels for the generator
    noise = tf.random.normal((batch_size, latent_dim))
    random_labels = tf.convert_to_tensor(np.random.randint(0, num_classes, batch_size).reshape(-1, 1).astype('float32'))

    # Generate synthetic data using the generator
    with tf.GradientTape() as tape_critic:
        generated_data = generator([noise, random_labels], training=True)
        
        # Critic predictions on real and fake data
        real_validity = critic([real_data, real_label], training=True)
        fake_validity = critic([generated_data, random_labels], training=True)
        
        # Wasserstein loss for critic
        c_loss = wasserstein_loss(tf.ones_like(real_validity), real_validity) + wasserstein_loss(-tf.ones_like(fake_validity), fake_validity)
        
    # Update critic
    grads_critic = tape_critic.gradient(c_loss, critic.trainable_variables)
    critic_optimizer.apply_gradients(zip(grads_critic, critic.trainable_variables))

    # Train the generator
    with tf.GradientTape() as tape_generator:
        generated_data = generator([noise, random_labels], training=True)
        fake_validity = critic([generated_data, random_labels], training=True)
        
        # Generator wants critic to output 1 for fake data
        g_loss = wasserstein_loss(tf.ones_like(fake_validity), fake_validity)
        
    # Update generator
    grads_generator = tape_generator.gradient(g_loss, generator.trainable_variables)
    generator_optimizer.apply_gradients(zip(grads_generator, generator.trainable_variables))

    # Print progress every 1000 epochs
    if epoch % 1000 == 0:
        print(f"Epoch: {epoch} | Critic Loss: {c_loss.numpy():.4f} | Generator Loss: {g_loss.numpy():.4f}")



In [8]:
import pandas as pd
import numpy as np

# Assuming these variables are defined based on your setup
num_samples_per_class = 1000  # Adjust as needed
noise = np.random.normal(0, 1, (num_samples_per_class * num_classes, latent_dim))
labels = np.array([[i] * num_samples_per_class for i in range(num_classes)]).flatten()
labels = labels.reshape(-1, 1)

# Generate synthetic data using the trained generator model
synthetic_data = generator.predict([noise, labels])

# If needed, inverse transform the data to original scale
synthetic_data_original_scale = scaler.inverse_transform(synthetic_data)

# Create a DataFrame with features and labels
synthetic_df = pd.DataFrame(synthetic_data_original_scale, columns=X.columns)  # Use original feature column names
synthetic_df['label'] = labels

# Save the synthetic data to a CSV file
synthetic_df.to_csv('synthetic_data.csv', index=False)
print("Synthetic data saved as 'synthetic_data.csv'")


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Synthetic data saved as 'synthetic_data.csv'




In [None]:
from tensorflow.keras import backend as K
K.clear_session()


In [9]:
!pip install tensorflow==2.16.1 keras==3.0.0

Collecting tensorflow==2.16.1
  Using cached tensorflow-2.16.1-cp311-cp311-win_amd64.whl.metadata (3.5 kB)
Collecting keras==3.0.0
  Using cached keras-3.0.0-py3-none-any.whl.metadata (5.3 kB)
Collecting tensorflow-intel==2.16.1 (from tensorflow==2.16.1)
  Using cached tensorflow_intel-2.16.1-cp311-cp311-win_amd64.whl.metadata (5.0 kB)
Collecting ml-dtypes~=0.3.1 (from tensorflow-intel==2.16.1->tensorflow==2.16.1)
  Using cached ml_dtypes-0.3.2-cp311-cp311-win_amd64.whl.metadata (20 kB)
Collecting tensorboard<2.17,>=2.16 (from tensorflow-intel==2.16.1->tensorflow==2.16.1)
  Using cached tensorboard-2.16.2-py3-none-any.whl.metadata (1.6 kB)
Using cached tensorflow-2.16.1-cp311-cp311-win_amd64.whl (2.1 kB)
Using cached keras-3.0.0-py3-none-any.whl (997 kB)
Using cached tensorflow_intel-2.16.1-cp311-cp311-win_amd64.whl (377.0 MB)
Using cached ml_dtypes-0.3.2-cp311-cp311-win_amd64.whl (127 kB)
Using cached tensorboard-2.16.2-py3-none-any.whl (5.5 MB)
Installing collected packages: ml-dtype

  You can safely remove it manually.
  You can safely remove it manually.

[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip
