In [31]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.compat.v2 as tf
import tensorflow_probability as tfp
from tensorflow.keras.layers import LSTM
import matplotlib.pyplot as plt
from ast import literal_eval
import os

import sys
sys.path.append('C:/Users/z004uyxr/Documents/CVAE-Siemens/vae_probability')

from src.data_generator.sequence_with_categories import generate_data
from src.vae_model.multi_categorical_vae import build_vae_submodels
from src.vae_model.common_functions import build_vae_from_models #changed to from src.vae_model.multi_categorical_vae import tfk 
                                                                 #(from vae_model.multivariate_sequence_vae import tfk)

tf.enable_v2_behavior()

tfk = tf.keras
tfkl = tf.keras.layers
tfpl = tfp.layers
tfd = tfp.distributions

In [30]:
def prepare_sequence_data(file='input_data/prepared_data_182627.csv'):
    def read_data_from_csv(file):
        df = pd.read_csv(file)
        df['Sequences'] = df['Sequences'].apply(literal_eval)
        return df
    
    # Get the directory of the current script
    script_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in locals() else os.getcwd()
    
    # Create the full path to the CSV file
    file_path = os.path.join(script_dir, '..', 'input_data', 'prepared_data_182627.csv')
    print("File path:", file_path)

    # Load data from CSV file
    df = read_data_from_csv(file_path)

    # Extract sequences
    sequences = np.array(df['Sequences'])

    return sequences


# Usage
sequences = prepare_sequence_data()


File path: C:\Users\z004uyxr\Documents\CVAE-Siemens\vae_probability\model\..\input_data\prepared_data_182627.csv


In [4]:
# Print GPU information
if tf.test.gpu_device_name() != '/device:GPU:0':
    print('WARNING: GPU device not found.')
else:
    print('SUCCESS: Found GPU: {}'.format(tf.test.gpu_device_name()))



In [5]:
# Input shape and VAE parameters
input_shape = (25, 66)
encoded_size = 16
base_depth = 32

In [6]:
# Prior distribution
prior = tfd.Independent(tfd.Normal(loc=tf.zeros(encoded_size), scale=1),
                        reinterpreted_batch_ndims=1)

In [7]:
encoder = tfk.Sequential([
    tfkl.InputLayer(input_shape=(25, 66)),  # Adjust input shape based on your data
    tfkl.Lambda(lambda x: tf.cast(x, tf.float32) - 0.5),
    tfkl.LSTM(2 * base_depth, return_sequences=True),
    tfkl.LSTM(2 * base_depth, return_sequences=True),
    tfkl.LSTM(2 * base_depth, return_sequences=True),
    tfkl.LSTM(base_depth, return_sequences=True),
    tfkl.LSTM(base_depth, return_sequences=True),
    tfkl.LSTM(base_depth, return_sequences=True),
    tfkl.Flatten(),
    tfkl.Dense(tfpl.MultivariateNormalTriL.params_size(encoded_size),
               activation=None),
    tfpl.MultivariateNormalTriL(
        encoded_size,
        activity_regularizer=tfpl.KLDivergenceRegularizer(prior)),
])

# Print shapes of tensors
for layer in encoder.layers:
    print(layer.name, layer.output_shape)



lambda (None, 25, 66)
lstm (None, 25, 64)
lstm_1 (None, 25, 64)
lstm_2 (None, 25, 64)
lstm_3 (None, 25, 32)
lstm_4 (None, 25, 32)
lstm_5 (None, 25, 32)
flatten (None, 800)
dense (None, 152)
multivariate_normal_tri_l ((None, 16), (None, 16))


In [138]:
d = input_shape[1]  # Number of categories, adjust based on your data
k = input_shape[0]  # Number of mixture components, adjust based on your requirement

# Decoder model with LSTMs and CategoricalMixtureOfOneHotCategorical
decoder = tfk.Sequential([
    tfkl.InputLayer(input_shape=[encoded_size]),
    tfkl.Reshape([1, encoded_size]),
    LSTM(2 * base_depth, return_sequences=True),
    LSTM(2 * base_depth, return_sequences=True),
    LSTM(2 * base_depth, return_sequences=True),
    LSTM(base_depth, return_sequences=True),
    LSTM(base_depth, return_sequences=True),
    LSTM(base_depth, return_sequences=True),
    tfkl.Flatten(),
    tfkl.Dense(d*k, activation=None),
    tfpl.CategoricalMixtureOfOneHotCategorical(d, k),  # Output layer with CategoricalMixtureOfOneHotCategorical
])

In [139]:
decoder.summary()

Model: "sequential_27"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_16 (Reshape)        (None, 1, 16)             0         
                                                                 
 lstm_183 (LSTM)             (None, 1, 64)             20736     
                                                                 
 lstm_184 (LSTM)             (None, 1, 64)             33024     
                                                                 
 lstm_185 (LSTM)             (None, 1, 64)             33024     
                                                                 
 lstm_186 (LSTM)             (None, 1, 32)             12416     
                                                                 
 lstm_187 (LSTM)             (None, 1, 32)             8320      
                                                                 
 lstm_188 (LSTM)             (None, 1, 32)           

In [141]:
# VAE model
np.array(sequences[0]) == tfk.Model(inputs=encoder.inputs, outputs=decoder(encoder.outputs[0]))

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [142]:
# Compile the VAE model
negloglik = lambda x, rv_x: -rv_x.log_prob(x)
vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss=negloglik)

In [143]:
# Example from Seq Array
print(np.array(sequences[0]).shape)

# Example Data
example = np.array(sequences[0])
output = vae(np.array([example]))

(25, 66)


In [144]:
output.sample()

<tf.Tensor: shape=(1, 66), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.]], dtype=float32)>

In [145]:
# Convert sequences to NumPy array
sequences_array = np.array(sequences)

In [146]:
# Train the VAE model with only sequences
history = vae.fit(sequences_array, sequences_array, epochs=15, batch_size=256, validation_split=0.1)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list).

In [147]:
# Plot training loss and validation loss over epochs
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

NameError: name 'history' is not defined

In [148]:
# Generate sequences
generated_sequences = vae.predict(sequences)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list).

In [149]:
# Display the randomly generated samples
print('Randomly Generated Samples:')
# Adjust the display function based on the nature of the output
# For text-based output, you can use print or other suitable methods
print(generated_sequences)

Randomly Generated Samples:
[[[0.5000012 ]]

 [[0.5000057 ]]

 [[0.50000155]]

 [[0.50000155]]

 [[0.49999455]]

 [[0.4999967 ]]

 [[0.49999925]]

 [[0.5000085 ]]

 [[0.49999574]]

 [[0.49999544]]

 [[0.49999836]]

 [[0.49999976]]

 [[0.49999693]]

 [[0.49999753]]

 [[0.4999977 ]]

 [[0.49998787]]

 [[0.5000007 ]]

 [[0.5000026 ]]

 [[0.49999413]]

 [[0.49999943]]

 [[0.500002  ]]

 [[0.49999908]]

 [[0.50000143]]

 [[0.50000215]]

 [[0.49999997]]

 [[0.49999988]]

 [[0.50000596]]

 [[0.50000155]]

 [[0.49999717]]

 [[0.50000453]]

 [[0.49999723]]

 [[0.49999997]]

 [[0.49999556]]

 [[0.5000061 ]]

 [[0.5000032 ]]

 [[0.500004  ]]

 [[0.49999562]]

 [[0.50000215]]

 [[0.49999392]]

 [[0.49999782]]

 [[0.49999395]]

 [[0.50000054]]

 [[0.5000056 ]]

 [[0.49999577]]

 [[0.5000032 ]]

 [[0.49999285]]

 [[0.5000042 ]]

 [[0.50000167]]

 [[0.50000155]]

 [[0.50000083]]

 [[0.5000038 ]]

 [[0.50000143]]

 [[0.4999956 ]]

 [[0.50000226]]

 [[0.4999983 ]]

 [[0.49999782]]

 [[0.49999577]]

 [[