In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU
from tensorflow.keras.optimizers import Adam

# Assume minority_class_embeddings is the minority class data (shape: [num_samples, embedding_dim])
minority_class_embeddings = np.random.rand(300, 10000)  # Example minority class embeddings
embedding_dim = minority_class_embeddings.shape[1]

# Define the Generator
def build_generator(noise_dim, embedding_dim):
    model = Sequential([
        Dense(256, input_dim=noise_dim),
        LeakyReLU(alpha=0.2),
        Dense(512),
        LeakyReLU(alpha=0.2),
        Dense(embedding_dim, activation='linear'),
    ])
    return model

# Define the Discriminator
def build_discriminator(embedding_dim):
    model = Sequential([
        Dense(512, input_dim=embedding_dim),
        LeakyReLU(alpha=0.2),
        Dense(256),
        LeakyReLU(alpha=0.2),
        Dense(1, activation='sigmoid'),
    ])
    return model

# Hyperparameters
noise_dim = 100  # Size of the random noise vector
batch_size = 64
epochs = 100
learning_rate = 0.0002

# Build and compile the discriminator
discriminator = build_discriminator(embedding_dim)
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate), metrics=['accuracy'])

# Build the generator
generator = build_generator(noise_dim, embedding_dim)

# Build and compile the GAN
discriminator.trainable = False  # Freeze the discriminator during generator training
gan_input = tf.keras.Input(shape=(noise_dim,))
gan_output = discriminator(generator(gan_input))
gan = tf.keras.Model(gan_input, gan_output)
gan.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate))

# Training the GAN
for epoch in range(epochs):
    for _ in range(len(minority_class_embeddings) // batch_size):
        # Train Discriminator
        idx = np.random.randint(0, minority_class_embeddings.shape[0], batch_size)
        real_embeddings = minority_class_embeddings[idx]
        real_labels = np.ones((batch_size, 1))  # Label 1 for real data

        noise = np.random.normal(0, 1, (batch_size, noise_dim))
        fake_embeddings = generator.predict(noise)
        fake_labels = np.zeros((batch_size, 1))  # Label 0 for fake data

        # Combine real and fake data
        combined_embeddings = np.vstack([real_embeddings, fake_embeddings])
        combined_labels = np.vstack([real_labels, fake_labels])

        # Train the discriminator
        d_loss = discriminator.train_on_batch(combined_embeddings, combined_labels)

        # Train Generator
        noise = np.random.normal(0, 1, (batch_size, noise_dim))
        valid_labels = np.ones((batch_size, 1))  # Label 1 for fooling the discriminator
        g_loss = gan.train_on_batch(noise, valid_labels)

    # Logging
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch + 1}/{epochs} | D Loss: {d_loss[0]:.4f}, Acc: {d_loss[1]*100:.2f}% | G Loss: {g_loss:.4f}")

# Generate synthetic embeddings
def generate_synthetic_data(generator, num_samples, noise_dim):
    noise = np.random.normal(0, 1, (num_samples, noise_dim))
    synthetic_data = generator.predict(noise)
    return synthetic_data

# Generate new minority class samples
num_new_samples = 500  # Number of synthetic samples to generate
synthetic_embeddings = generate_synthetic_data(generator, num_new_samples, noise_dim)

print("Synthetic embeddings generated with shape:", synthetic_embeddings.shape)
