In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error




In [2]:
# Load the already encoded data
file_path = "../data/encoded_176398_HEAD.csv"
df = pd.read_csv(file_path)
print(f"Data loaded successfully from {file_path}.\n")
print("Columns in DataFrame:", df.columns.tolist())

Data loaded successfully from ../data/encoded_176398_HEAD.csv.

Columns in DataFrame: ['datetime', 'sourceID', 'timediff', 'ZAxisInPossible', 'ZAxisOutPossible', 'YAxisDownPossible', 'YAxisUpPossible', 'PTAB', 'BC', 'S1', 'S10', 'S11', 'S12', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'BO1', 'BO2', 'BO3', 'B1', 'B2', 'B3', 'B4', 'B5', 'HE2', 'HE4', 'NE2', 'HE1', 'HE3', 'NE1', 'SHA', 'HW1', 'HW2', 'HW3', '18K', 'FA', 'TO', 'BAL', 'BAR', 'BCL', 'BCR', 'SN', 'HC2', 'HC4', 'HC6', 'HC7', 'NC2', 'HC1', 'HC3', 'HC5', 'NC1', 'Na', 'UFL', 'PA1', 'PA2', 'PA3', 'PA4', 'PA5', 'PA6', 'SP1', 'SP2', 'SP3', 'SP4', 'SP5', 'SP6', 'SP7', 'SP8', 'BL8', 'BR8', 'UFS', 'HEA', 'HEP', 'SC', 'PeH', 'PeN', 'FS', 'FL', 'BY1', 'BY2', 'BY3', 'BL', 'BR', 'HE', 'BL4', 'BR4', 'BL1', 'BR1', 'BL2', 'BR2', 'L7', 'L4', 'H2L', 'N2L', 'H1U', 'N1U', 'He1', 'He2', 'TR1', 'TR2', 'TR3', 'TR4', 'TR5', 'TR6', 'MR', 'ML', 'BL5', 'BR5', 'C24', 'EN', 'SHL', 'SHS', 'BodyPart_from', 'BodyPart_to', 'PatientID_from', 'PatientID_to'

In [3]:
# Drop irrelevant columns
df = df.drop(columns=['datetime','PatientID_from', 'PatientID_to', 'SN', 'BodyPart_from', 'BodyPart_to'])
print("Dropped 'datetime', 'PatientID' and 'BodyPart' columns")

Dropped 'datetime', 'PatientID' and 'BodyPart' columns


In [4]:
# Print the shape of the dataframe after dropping columns
print("DataFrame shape after dropping columns:", df.shape)

DataFrame shape after dropping columns: (4501, 116)


In [5]:
# Separate features and target columns
X = df.drop(columns=['BodyGroup_from', 'BodyGroup_to', 'sourceID'])
y_bodygroup_from = df['BodyGroup_from']
y_bodygroup_to = df['BodyGroup_to']
y_sourceid = df['sourceID']
print("Separated features and targets")

Separated features and targets


In [6]:
# Limit sourceID to the first 30 unique values
unique_source_ids = y_sourceid.unique()[:30]
y_sourceid = y_sourceid[y_sourceid.isin(unique_source_ids)].reset_index(drop=True)

In [7]:
# Normalize the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
print("Scaled features shape:", X_scaled.shape)

Scaled features shape: (4501, 113)


  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


In [8]:
# Define the VAE architecture
input_dim = X_scaled.shape[1]
latent_dim = 2  # Latent space dimension

In [9]:
# Encoder
inputs = Input(shape=(input_dim,))
h = Dense(64, activation='relu')(inputs)
h = Dense(32, activation='relu')(h)
z_mean = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)




In [10]:
# Reparameterization trick
def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

z = Lambda(sampling)([z_mean, z_log_var])

In [11]:
# Decoder
decoder_h = Dense(32, activation='relu')
decoder_mean = Dense(64, activation='relu')
outputs = Dense(input_dim, activation='sigmoid')(decoder_mean(decoder_h(z)))

In [12]:
# VAE model
vae = Model(inputs, outputs)

In [13]:
# Loss function
def vae_loss(y_true, y_pred):
    reconstruction_loss = K.mean(K.square(y_true - y_pred))
    kl_loss = -0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var))
    return reconstruction_loss + kl_loss

In [14]:
# Compile the model
vae.compile(optimizer='adam', loss=vae_loss)




In [15]:
# Fit the model
vae.fit(X_scaled, X_scaled, epochs=100, batch_size=32)
print("VAE model training completed.")

Epoch 1/100



TypeError: in user code:

    File "C:\Users\z004uyxr\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\z004uyxr\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\z004uyxr\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\z004uyxr\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1151, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Users\z004uyxr\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1209, in compute_loss
        return self.compiled_loss(
    File "C:\Users\z004uyxr\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\compile_utils.py", line 329, in __call__
        self._total_loss_mean.update_state(
    File "C:\Users\z004uyxr\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\metrics_utils.py", line 77, in decorated
        result = update_state_fn(*args, **kwargs)
    File "C:\Users\z004uyxr\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\metrics\base_metric.py", line 140, in update_state_fn
        return ag_update_state(*args, **kwargs)
    File "C:\Users\z004uyxr\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\metrics\base_metric.py", line 509, in update_state  **
        sample_weight = tf.__internal__.ops.broadcast_weights(
    File "C:\Users\z004uyxr\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\keras_tensor.py", line 285, in __array__
        raise TypeError(

    TypeError: You are passing KerasTensor(type_spec=TensorSpec(shape=(), dtype=tf.float32, name=None), name='Placeholder:0', description="created by layer 'tf.cast_2'"), an intermediate Keras symbolic input/output, to a TF API that does not allow registering custom dispatchers, such as `tf.cond`, `tf.function`, gradient tapes, or `tf.map_fn`. Keras Functional model construction only supports TF API calls that *do* support dispatching, such as `tf.math.add` or `tf.reshape`. Other APIs cannot be called directly on symbolic Kerasinputs/outputs. You can work around this limitation by putting the operation in a custom Keras layer `call` and calling that layer on this symbolic input/output.


In [16]:
# Encode the data using the trained VAE
encoded_data = vae.predict(X_scaled)

# Print the encoded data shape
print("Encoded data shape:", encoded_data.shape)

Encoded data shape: (4501, 113)


In [17]:
# Combine the encoded data with original labels
encoded_df = pd.DataFrame(encoded_data, columns=[f'encoded_{i}' for i in range(encoded_data.shape[1])])
encoded_df['BodyGroup_from'] = y_bodygroup_from['BodyGroup_from'].reset_index(drop=True)
encoded_df['BodyGroup_to'] = y_bodygroup_from['BodyGroup_to'].reset_index(drop=True)
encoded_df['sourceID'] = y_sourceid.reset_index(drop=True)

KeyError: 'BodyGroup_from'

In [18]:
# Output the predicted data
output_file = "../data/predicted_output.csv"
encoded_df.to_csv(output_file, index=False)
print(f"Predicted data saved to {output_file}.")

Predicted data saved to ../data/predicted_output.csv.
