In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

2024-03-11 06:14:59.972391: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-11 06:14:59.972470: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-11 06:15:00.138060: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Define the generator model
def build_generator(latent_dim, output_dim):
    inputs = Input(shape=(latent_dim,))
    x = Dense(128, activation='relu')(inputs)
    x = Dense(256, activation='relu')(x)
    outputs = Dense(output_dim, activation='tanh')(x)
    generator = Model(inputs, outputs)
    return generator

In [3]:
# Define the discriminator model
def build_discriminator(input_dim):
    inputs = Input(shape=(input_dim,))
    x = Dense(256, activation='relu')(inputs)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(1, activation='sigmoid')(x)
    discriminator = Model(inputs, outputs)
    discriminator.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
    return discriminator

In [4]:
# Define the GAN model
def build_gan(generator, discriminator):
    discriminator.trainable = True
    gan_input = Input(shape=(latent_dim,))
    gan_output = discriminator(generator(gan_input))
    gan = Model(gan_input, gan_output)
    gan.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
    return gan

# Load keypoint data as pandas DataFrame
def load_keypoint_data(df):
    return df.values

# Preprocess the keypoint data
def preprocess_data(keypoint_data):
    # Normalize the data to range [-1, 1]
    keypoint_data = (keypoint_data - 0.5) / 0.5
    return keypoint_data

# Generate random latent vectors
def generate_latent_vectors(latent_dim, n_samples):
    return np.random.normal(0, 1, (n_samples, latent_dim))

# Train the GAN
def train_gan(generator, discriminator, gan, X_train, latent_dim, epochs, batch_size, unfreeze_discriminator_epoch):
    for epoch in range(epochs):
        for _ in range(X_train.shape[0] // batch_size):
            # Train discriminator
            noise = generate_latent_vectors(latent_dim, batch_size)
            fake_data = generator.predict(noise)
            real_data = X_train[np.random.randint(0, X_train.shape[0], batch_size)]
            X = np.concatenate([real_data, fake_data])
            y_dis = np.zeros(2 * batch_size)
            y_dis[:batch_size] = 0.9
            discriminator.trainable = True
            d_loss = discriminator.train_on_batch(X, y_dis)

            # Train generator
            noise = generate_latent_vectors(latent_dim, batch_size)
            y_gen = np.ones(batch_size)
            discriminator.trainable = False
            g_loss = gan.train_on_batch(noise, y_gen)

        # Print progress
        print(f'Epoch {epoch + 1}, Discriminator Loss: {d_loss[0]}, Generator Loss: {g_loss}')
        
        # Unfreeze discriminator after specified epoch
        if epoch == unfreeze_discriminator_epoch:
            discriminator.trainable = True
            discriminator.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
            print("Discriminator unfrozen and recompiled.")

In [None]:
# Define parameters
latent_dim = 100
input_dim = 52  # Adjust this based on the number of features in your skeletal data
output_dim = input_dim
epochs = 20
batch_size = 32
unfreeze_discriminator_epoch = 7


# Load keypoint data
# Assuming df is your pandas DataFrame containing keypoint data

df = pd.read_csv('/kaggle/input/abc-2024-dataset/keypoints/Copy of N01T1_keypoint.csv')
keypoint_data = load_keypoint_data(df)

# Preprocess data
keypoint_data = preprocess_data(keypoint_data)

# Split data into train and test sets
X_train, X_test = train_test_split(keypoint_data, test_size=0.2, random_state=42)

# Build and compile models
generator = build_generator(latent_dim, output_dim)
discriminator = build_discriminator(input_dim)
gan = build_gan(generator, discriminator)

# Train GAN
train_gan(generator, discriminator, gan, X_train, latent_dim, epochs, batch_size, unfreeze_discriminator_epoch)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step


I0000 00:00:1710137713.684848     107 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17

In [None]:
# Generate new samples using the trained generator
def generate_samples(generator, latent_dim, n_samples):
    noise = np.random.normal(0, 1, (n_samples, latent_dim))
    generated_data = generator.predict(noise)
    return generated_data

# Denormalize generated data
def denormalize_data(generated_data):
    generated_data = generated_data * 0.5 + 0.5  # Denormalize to original range
    return generated_data

# Generate new samples
n_samples = 7000  # Number of samples to generate
generated_data = generate_samples(generator, latent_dim, n_samples)

# Denormalize generated data if needed
generated_data = denormalize_data(generated_data)

# Convert generated data to DataFrame if needed
generated_df = pd.DataFrame(generated_data, columns=df.columns)  # Assuming df is your original DataFrame

generated_df.to_csv('/kaggle/working/generated_samples.csv')

In [None]:
#import os
#directory ='/kaggle/working/'
#file_list = os.listdir(directory)

# Iterate over each file and delete it
#for file_name in file_list:
#   file_path = os.path.join(directory, file_name)
#   if os.path.isfile(file_path):
#       os.remove(file_path)
#       print(f"Deleted file: {file_path}")
#   else:
#       print(f"Not a file: {file_path}")