In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

# Load the original dataset
data = pd.read_csv('train.csv')

# Preprocess the data
# Normalize the features
norm_data = (data - data.mean()) / data.std()
# Split the data into training and testing sets
train_data = norm_data.sample(frac=0.8, random_state=42)
test_data = norm_data.drop(train_data.index)

# Define the GAN architecture
def make_generator_model():
    model = Sequential()
    model.add(Dense(64, input_dim=6))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(32))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(6, activation='tanh'))
    return model

def make_discriminator_model():
    model = Sequential()
    model.add(Dense(64, input_dim=6))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(32))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(6, activation='sigmoid'))
    return model

# Initialize the GAN
generator = make_generator_model()
discriminator = make_discriminator_model()

# Define the loss functions
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

# Define the optimizers
generator_optimizer = Adam(1e-4)
discriminator_optimizer = Adam(1e-4)

# Define the training loop
@tf.function
def train_step(real_data, batch_size, noise_dim):
    # Generate noise
    noise = tf.random.normal([batch_size, noise_dim])

    # Train the discriminator
    with tf.GradientTape() as tape:
        generated_data = generator(noise, training=True)

        real_output = discriminator(real_data, training=True)
        fake_output = discriminator(generated_data, training=True)

        real_loss = cross_entropy(tf.ones_like(real_output), real_output)
        fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
        total_loss = real_loss + fake_loss

    grads = tape.gradient(total_loss, discriminator.trainable_variables)
    discriminator_optimizer.apply_gradients(zip(grads, discriminator.trainable_variables))

    # Train the generator
    with tf.GradientTape() as tape:
        generated_data = generator(noise, training=True)
        fake_output = discriminator(generated_data, training=True)
        gen_loss = cross_entropy(tf.ones_like(fake_output), fake_output)

    grads = tape.gradient(gen_loss, generator.trainable_variables)
    generator_optimizer.apply_gradients(zip(grads, generator.trainable_variables))

# Train the GAN
def train_gan(generator, discriminator, train_data, epochs, batch_size, noise_dim):
    for epoch in range(epochs):
        for i in range(len(train_data) // batch_size):
            real_data = train_data[i * batch_size:(i + 1) * batch_size]
            train_step(real_data, batch_size, noise_dim)

# Generate synthetic data
def generate_synthetic_data(generator, noise_dim, num_samples):
    noise = tf.random.normal([num_samples, noise_dim])
    generated_data = generator(noise, training=False)
    return generated_data

# # Train the GAN on the training data
# epochs = 1000
# batch_size = 32
# noise_dim = 32

# train_gan(generator, discriminator, train_data)

In [11]:
# Train the GAN on the training data
epochs = 1000
batch_size = 32
noise_dim = 32

train_gan(generator, discriminator, train_data, epochs, batch_size, noise_dim)

# Generate synthetic data
num_samples = 200
synthetic_data = generate_synthetic_data(generator, noise_dim, num_samples)

# Inverse the normalization
synthetic_data = (synthetic_data * data.std()) + data.mean()

# Add the synthetic data to the original data
augmented_data = pd.concat([data, synthetic_data], ignore_index=True)

# Split the augmented data into training and testing sets
aug_train_data = augmented_data.sample(frac=0.8, random_state=42)
aug_test_data = augmented_data.drop(aug_train_data.index)


ValueError: in user code:

    File "C:\Users\Slmss\AppData\Local\Temp\ipykernel_3248\4150616828.py", line 25, in train_step  *
        real_output = discriminator(real_data, training=True)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_11" is incompatible with the layer: expected shape=(None, 6), found shape=(32, 8)
