In [29]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from TimeGAN.timegan import timegan



In [30]:
class TimeGAN:
    def __init__(self, seq_len, dim, hidden_dim, num_layers, gamma, beta):
        self.seq_len = seq_len
        self.dim = dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.gamma = gamma
        self.beta = beta
        
        self.generator = self.build_generator()
        self.discriminator = self.build_discriminator()
        self.rnn_cell = tf.keras.layers.LSTMCell(self.hidden_dim)
        self.rnn = tf.keras.layers.RNN(self.rnn_cell, return_sequences=True)
    
    def build_generator(self):
        model = tf.keras.Sequential()
        model.add(tf.keras.Input(shape=(self.seq_len, self.dim)))
        model.add(tf.keras.layers.LSTM(self.hidden_dim, return_sequences=True))
        model.add(tf.keras.layers.Dropout(0.2))
        model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(self.dim)))
        return model
    
    def build_discriminator(self):
        model = tf.keras.Sequential()
        model.add(tf.keras.Input(shape=(self.seq_len, self.dim)))
        model.add(tf.keras.layers.LSTM(self.hidden_dim, return_sequences=False))
        model.add(tf.keras.layers.Dropout(0.2))
        model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
        return model
    
    def train(self, train_data, val_data, epochs=100, batch_size=64):
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        self.discriminator.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
        self.generator.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])
        # Training process
        for epoch in range(epochs):
            # Add training steps here
            pass
    
    def generate(self, n_samples):
        random_data = np.random.rand(n_samples, self.seq_len, self.dim)
        synthetic_data = self.generator.predict(random_data)
        return synthetic_data


In [31]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from TimeGAN.timegan import timegan # Assuming you have a TimeGAN implementation

def clean_data(df):
    # Convert all columns to numeric, forcing non-numeric values to NaN
    df = df.apply(pd.to_numeric, errors='coerce')
    # Fill NaN values with 0 or any other value as per your requirements
    df = df.fillna(0)
    return df

if __name__ == "__main__":
    # Load your data
    train_data = pd.read_csv('training_set _labelled.csv')
    val_data = pd.read_csv('validation_set _labelled.csv')
    test_data = pd.read_csv('testing_set _labelled.csv')
    
    # Clean the data
    train_data = clean_data(train_data)
    val_data = clean_data(val_data)
    test_data = clean_data(test_data)
    
    # Initialize scaler and fit on train_data
    scaler = MinMaxScaler()
    train_data_scaled = scaler.fit_transform(train_data.values)
    val_data_scaled = scaler.transform(val_data.values)
    
    # Initialize TimeGAN
    timegan_model = TimeGAN(seq_len=30, dim=train_data.shape[1], hidden_dim=24, num_layers=3, gamma=1, beta=1)
    
    # Train TimeGAN
    timegan_model.train(train_data_scaled, val_data_scaled, epochs=100, batch_size=64)
    
    # Generate synthetic data
    synthetic_data = timegan_model.generate(n_samples=1000)
    
    # Handle NaN values
    synthetic_data = np.nan_to_num(synthetic_data, nan=0)
    
    # Check the original data shape
    original_data_shape = train_data.shape
    print("Original data shape:", original_data_shape)
    
    # Check the synthetic data shape before reshaping
    print("Synthetic data shape before reshaping:", synthetic_data.shape)
    
    # Assuming we need to match synthetic data to the original data's feature dimension
    synthetic_data_flattened = synthetic_data.reshape(-1, synthetic_data.shape[-1])
    print("Synthetic data shape after flattening:", synthetic_data_flattened.shape)
    
    # Perform the inverse transformation
    synthetic_data_scaled = scaler.inverse_transform(synthetic_data_flattened)
    
    # Optionally, reshape back to 3D shape if needed
    synthetic_data_scaled_reshaped = synthetic_data_scaled.reshape(synthetic_data.shape)
    print("Synthetic data shape after inverse transformation and reshaping:", synthetic_data_scaled_reshaped.shape)
    
    # Save synthetic data to CSV
    synthetic_data_df = pd.DataFrame(synthetic_data_scaled_reshaped.reshape(-1, synthetic_data.shape[-1]))
    synthetic_data_df.to_csv('synthetic_data_timegan.csv', index=False)
    print("Synthetic data saved to synthetic_data_timegan.csv")


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
Original data shape: (700, 13)
Synthetic data shape before reshaping: (1000, 30, 13)
Synthetic data shape after flattening: (30000, 13)
Synthetic data shape after inverse transformation and reshaping: (1000, 30, 13)
Synthetic data saved to synthetic_data_timegan.csv
