<a href="https://colab.research.google.com/github/judebebo32/ML_CLG/blob/main/Copy_of_gan_new_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow pandas


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense, BatchNormalization, LeakyReLU, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load data
file_path = "/content/Neworiginal.xlsx"
data = pd.read_excel(file_path)
data = data[(data >= 0).all(axis=1)]  # Remove negative values

# Preprocess data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)
data_train, _ = train_test_split(data_scaled, test_size=0.2, random_state=42)

# GAN architecture
def create_generator():
    input_layer = Input(shape=(11,))
    x = Dense(128)(input_layer)
    x = BatchNormalization()(x)
    x = LeakyReLU()(x)
    x = Dense(256)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU()(x)
    x = Dense(11, activation="tanh")(x)
    return Model(inputs=input_layer, outputs=x)

def create_discriminator():
    input_layer = Input(shape=(11,))
    x = Dense(256)(input_layer)
    x = LeakyReLU()(x)
    x = Dropout(0.5)(x)
    x = Dense(128)(x)
    x = LeakyReLU()(x)
    x = Dropout(0.5)(x)
    x = Dense(1, activation="sigmoid")(x)
    return Model(inputs=input_layer, outputs=x)

# GAN training
def train_gan(epochs, batch_size):
    generator = create_generator()
    discriminator = create_discriminator()
    optimizer = tf.keras.optimizers.legacy.Adam(0.0002, 0.5)  # Use the legacy optimizer
    discriminator.compile(loss="binary_crossentropy", optimizer=optimizer)
    
    input_layer = Input(shape=(11,))
    generated_data = generator(input_layer)
    discriminator.trainable = False
    validity = discriminator(generated_data)
    combined = Model(inputs=input_layer, outputs=validity)  # Define the combined model
    combined.compile(loss="binary_crossentropy", optimizer=optimizer)


    for epoch in range(epochs):
        idx = np.random.randint(0, data_train.shape[0], batch_size)
        real_data = data_train[idx]
        noise = np.random.normal(0, 1, (batch_size, 11))
        generated_data = generator.predict(noise)
        real_labels = np.ones((batch_size, 1))
        fake_labels = np.zeros((batch_size, 1))
        d_loss_real = discriminator.train_on_batch(real_data, real_labels)
        d_loss_fake = discriminator.train_on_batch(generated_data, fake_labels)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
        g_loss = combined.train_on_batch(noise, real_labels)

        if epoch % 1000 == 0:
            print(f"Epoch {epoch}, Discriminator loss: {d_loss}, Generator loss: {g_loss}")

    return generator




In [None]:
# Generate synthetic data
def generate_synthetic_data(generator, num_datapoints):
    noise = np.random.normal(0, 1, (num_datapoints, 11))
    synthetic_data = generator.predict(noise)
    synthetic_data = scaler.inverse_transform(synthetic_data)

    # Remove negative values and outliers
    synthetic_data = pd.DataFrame(synthetic_data, columns=data.columns)
    synthetic_data = synthetic_data[(synthetic_data >= 0).all(axis=1)]
    synthetic_data = synthetic_data[~((synthetic_data - synthetic_data.mean()).abs() > 3 * synthetic_data.std()).any(axis=1)]

    return synthetic_data

# Training parameters
epochs = 20000
batch_size = 64

# Train GAN
generator = train_gan(epochs, batch_size)

# Generate synthetic data
num_datapoints = 500
synthetic_data = generate_synthetic_data(generator, num_datapoints)

# Save synthetic data to Excel file
synthetic_data.to_excel("synthetic_data.xlsx", index=False)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 16000, Discriminator loss: 0.6772544980049133, Generator loss: 0.7558311820030212
Epoch 17000, Discriminator loss: 0.6485884189605713, Generator loss: 0.7859036922454834
Epoch 18000, Discriminator loss: 0.6688077747821808, Generator loss: 0.8249547481536865
Epoch 19000, Discriminator loss: 0.6529475450515747, Generator loss: 0.862553060054779
