## Import Libraries
This section imports all necessary libraries that are essential for building and training the GAN and processing the data. It includes PyTorch for deep learning tasks, NumPy for handling numerical operations, and OS for file and directory operations.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset

## Load and Preprocess Data
This section defines the `load_data` function which loads and preprocesses the IMU data from specified directories for each type of figure skating jump. It applies necessary scaling to accelerometer and gyroscopic data, preparing it for use in both the GAN and the LSTM classifier.

In [None]:
def load_data(directory):
    data = []
    labels = []
    categories = ['axel', 'flip', 'loop', 'lutz', 'salchow', 'toe']
    for idx, category in enumerate(categories):
        path = os.path.join(directory, category)
        for file in os.listdir(path):
            file_path = os.path.join(path, file)
            with open(file_path, 'rb') as f:
                # Load raw data without reshaping
                raw_data = np.fromfile(f, dtype=np.int16)
                raw_data = raw_data.astype(np.float64)

                for i in range(0, len(raw_data), 6):  # Step over each group of 6 (3 accel, 3 gyro)
                    # Apply scaling for accelerometer data (indices 0, 1, 2 in each group of 6)
                    raw_data[i:i+3] /= 2048.0
                    # Apply scaling for gyroscopic data (indices 3, 4, 5 in each group of 6)
                    raw_data[i+3:i+6] /= 16.4
                
                data.append(raw_data)  # Append the scaled flat data
                labels.append(idx)
    return np.array(data), np.array(labels)

data, labels = load_data('../data/labeled_data')


## Define Directory Paths
Set up the paths for the processed data and the directory where the generated data will be stored. Ensure that directories for storing generated data are created if they do not exist.

In [None]:
jump_types = ['axel', 'flip', 'loop', 'lutz', 'salchow', 'toe']
base_dir = '../data'
processed_data_dir = os.path.join(base_dir, 'processed_data')
generated_data_dir = os.path.join(base_dir, 'generated_data')

for jump_type in jump_types:
    dir_path = os.path.join(generated_data_dir, jump_type)
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

## Generator and Discriminator Architecture
Define the structures for the generator and discriminator networks. The generator aims to produce new, synthetic IMU data mimicking real jump data, while the discriminator tries to distinguish between real and generated data.

In [None]:
# Generator definition
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):  # output_dim set to 4500
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 1024),
            nn.ReLU(),
            nn.Linear(1024, output_dim),  # Final output matching the discriminator's input size
            nn.Tanh()
        )

    def forward(self, z):
        return self.model(z)

# Discriminator definition
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(4500, 1024),  # First input layer accepts a vector of size 4500
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 1),
            nn.Sigmoid()
        )

    def forward(self, imu_data):
        return self.model(imu_data)

## Setup DataLoader
Create a DataLoader for the loaded data. This DataLoader will be used to feed data to both the discriminator for real data samples and the GAN for training.

In [None]:
tensor_data = torch.Tensor(data)
tensor_labels = torch.LongTensor(labels)

dataset = TensorDataset(tensor_data, tensor_labels)
data_loader = DataLoader(dataset, batch_size=16, shuffle=True)

## GAN Training Setup
Prepare the environment for training the GAN, including the initialization of the generator and discriminator, defining the loss function, and setting up the optimizers. This setup is crucial for effectively training the GAN to generate realistic data.

In [None]:
generator = Generator(input_dim=100, output_dim=4500)
discriminator = Discriminator()

adversarial_loss = torch.nn.BCELoss()

optimizer_G = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))

## GAN Training Loop
Define the training loop for the GAN, where the generator and discriminator are trained alternately. The generator tries to create data that the discriminator will classify as real, while the discriminator learns to distinguish between real and generated data.


In [None]:
num_epochs = 100
latent_dim = 100

# Define the device as the first visible cuda device if GPU is available, otherwise use CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for epoch in range(num_epochs):
    for i, (real_data, _) in enumerate(data_loader):
        # Ensure real_data is in the correct shape (batch_size, 4500)
        real_data = real_data.view(-1, 4500)  # Reshape real data to match discriminator input

        # Generate fake data
        z = torch.randn(real_data.size(0), 100)  # Latent space input for generator
        generated_data = generator(z)
        generated_data = generated_data.view(-1, 4500)  # Ensure generated data is correctly reshaped

        # Define targets for real and fake data
        valid = torch.ones(real_data.size(0), 1, device=device)
        fake = torch.zeros(real_data.size(0), 1, device=device)

        # Train Generator
        optimizer_G.zero_grad()
        g_loss = adversarial_loss(discriminator(generated_data), valid)
        g_loss.backward()
        optimizer_G.step()

        # Train Discriminator
        optimizer_D.zero_grad()
        real_loss = adversarial_loss(discriminator(real_data), valid)
        fake_loss = adversarial_loss(discriminator(generated_data.detach()), fake)
        d_loss = (real_loss + fake_loss) / 2
        d_loss.backward()
        optimizer_D.step()

        print(f'Epoch {epoch + 1}/{num_epochs}, Discriminator Loss: {d_loss.item()}, Generator Loss: {g_loss.item()}')


## GAN Training and Data Generation
Modify the GAN training loop to handle different types of jumps. Generate data for each jump type and save it into specific folders structured by jump type.

In [None]:
num_samples_to_generate = 50

generator.eval()

with torch.no_grad():
    for jump_type in jump_types:
        z = torch.randn(num_samples_to_generate, 100, device=device)
        generated_data = generator(z)
        generated_data = generated_data.view(num_samples_to_generate, -1)  # Ensuring it's flat if not already

        # Reverse scaling for accelerometer and gyroscope data
        for i in range(0, generated_data.shape[1], 6):  # Assuming the data is structured in groups of 6 as before
            generated_data[:, i:i+3] *= 2048.0  # Accelerometer indices
            generated_data[:, i+3:i+6] *= 16.4  # Gyroscope indices

        # Convert to numpy and adjust datatype to int16
        generated_data = generated_data.cpu().numpy().astype(np.int16)

        # Save data
        for i, data in enumerate(generated_data):
            file_path = os.path.join(generated_data_dir, jump_type, f'jump_{i}.bin')
            data.tofile(file_path)
