In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd

In [2]:
# Load your EEG dataset
train_df = pd.read_csv("data/train.csv")
feature_columns = [col for col in train_df.columns if col.startswith("AB") or col.startswith("COH")]
data = train_df[feature_columns].values  # Extract only relevant features
labels = train_df["main.disorder"].values  # Extract labels
input_dim = data.shape[1]

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/train.csv'

In [1]:
### VARIATIONAL AUTOENCODER (VAE) ###
latent_dim = 128  # Dimensionality of the latent space

# Encoder
inputs = keras.Input(shape=(input_dim,))
x = layers.Dense(512, activation="relu")(inputs)
x = layers.Dense(256, activation="relu")(x)

z_mean = layers.Dense(latent_dim)(x)
z_log_var = layers.Dense(latent_dim)(x)

def sampling(args):
    z_mean, z_log_var = args
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

z = layers.Lambda(sampling)([z_mean, z_log_var])

encoder = keras.Model(inputs, [z_mean, z_log_var, z], name="encoder")

# Decoder
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(256, activation="relu")(latent_inputs)
x = layers.Dense(512, activation="relu")(x)
outputs = layers.Dense(input_dim, activation="sigmoid")(x)

decoder = keras.Model(latent_inputs, outputs, name="decoder")

In [2]:
# VAE Model
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
    
    def call(self, inputs):
        z_mean, z_log_var, z = self.encoder(inputs)
        reconstructed = self.decoder(z)
        return reconstructed

vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam(), loss='mse')
vae.fit(data, data, epochs=50, batch_size=32)

# Generate synthetic data
latent_samples = np.random.normal(size=(len(data), latent_dim))
synthetic_data_vae = decoder.predict(latent_samples)

# Save synthetic VAE data with labels
vae_df = pd.DataFrame(synthetic_data_vae, columns=feature_columns)
vae_df['main.disorder'] = labels
vae_df.to_csv("synthetic_data_vae.csv", index=False)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50

KeyboardInterrupt: 

In [2]:
### GENERATIVE ADVERSARIAL NETWORK (GAN) ###
latent_dim = 128

# Generator
generator = keras.Sequential([
    layers.Dense(512, activation="relu", input_shape=(latent_dim,)),
    layers.Dense(1024, activation="relu"),
    layers.Dense(input_dim, activation="sigmoid")
])

# Discriminator
discriminator = keras.Sequential([
    layers.Dense(1024, activation="relu", input_shape=(input_dim,)),
    layers.Dense(512, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])

discriminator.compile(optimizer=keras.optimizers.Adam(), loss="binary_crossentropy", metrics=["accuracy"])

discriminator.trainable = False

gan_input = keras.Input(shape=(latent_dim,))
synthetic_output = generator(gan_input)
validity = discriminator(synthetic_output)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [3]:
# GAN Model
gan = keras.Model(gan_input, validity)
gan.compile(optimizer=keras.optimizers.Adam(), loss="binary_crossentropy")

# Training
batch_size = 32
epochs = 5000

for epoch in range(epochs):
    # Train Discriminator
    real_samples = data[np.random.randint(0, data.shape[0], batch_size)]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_samples = generator.predict(noise)
    
    d_loss_real = discriminator.train_on_batch(real_samples, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(fake_samples, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
    
    # Train Generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))
    
    if epoch % 500 == 0:
        print(f"Epoch {epoch}: D Loss: {d_loss[0]}, G Loss: {g_loss}")

# Generate synthetic data using GAN
latent_samples_gan = np.random.normal(size=(len(data), latent_dim))
synthetic_data_gan = generator.predict(latent_samples_gan)

# Save synthetic GAN data with labels
gan_df = pd.DataFrame(synthetic_data_gan, columns=feature_columns)
gan_df['main.disorder'] = labels
gan_df.to_csv("synthetic_data_gan.csv", index=False)


Epoch 0: D Loss: 4.989405632019043, G Loss: 0.018551619723439217
Epoch 500: D Loss: 0.8444039821624756, G Loss: 4.3874592781066895
Epoch 1000: D Loss: 0.4192866310477257, G Loss: 2.5579638481140137
Epoch 1500: D Loss: 0.48330119252204895, G Loss: 1.1079009771347046
Epoch 2000: D Loss: 0.601720005273819, G Loss: 1.2054874897003174
Epoch 2500: D Loss: 0.5369901359081268, G Loss: 1.2358264923095703
Epoch 3000: D Loss: 0.7034110426902771, G Loss: 0.8562377691268921
Epoch 3500: D Loss: 0.791479229927063, G Loss: 0.6483489274978638
Epoch 4000: D Loss: 0.3079170733690262, G Loss: 1.4173649549484253
Epoch 4500: D Loss: 0.7473418116569519, G Loss: 0.680052638053894
