In [7]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import sys
print(sys.executable)
sys.path.insert(1, '../src/')
from config import raw_data_path, univariate_data_path, processed_data_path, models_path
from tensorflow.keras import layers, models
from skopt import gp_minimize
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args
import tensorflow as tf
from skopt import gp_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args
import matplotlib.pyplot as plt

/bin/python3.11


In [8]:
data_file = os.path.join(univariate_data_path, 'merged_univariate.npy')
data = np.load(data_file, allow_pickle=True)
print(data.shape)

# Check for NaN or Inf values in signals
for sample in data:
    if np.isnan(sample['signal']).any() or np.isinf(sample['signal']).any():
        print(f"NaN or Inf detected in {sample['record_name']}")

(666,)


In [9]:
def create_windows(sequence, window_size, step_size):
    windows = [sequence[i:i+window_size] for i in range(0, len(sequence) - window_size + 1, step_size)]
    return np.array(windows)

# Set windowing parameters
window_size = 500  
step_size = 250    

# Process each record and apply windowing
all_windows = [create_windows(record['signal'], window_size, step_size) for record in data]
all_windows = np.concatenate(all_windows, axis=0)

KeyboardInterrupt: 

In [4]:
# Function to mask a percentage of data in each window
def mask_data(windows, mask_percentage=0.2):
    masked_windows = windows.copy()
    num_masked = int(mask_percentage * windows.shape[1])
    for i in range(windows.shape[0]):
        mask_indices = np.random.choice(windows.shape[1], num_masked, replace=False)
        masked_windows[i, mask_indices] = np.nan  
    return np.nan_to_num(masked_windows, nan=0.0)



In [5]:
# Code for final autoencoder
# import tensorflow as tf
# from tensorflow.keras import layers, models
# import matplotlib.pyplot as plt

# class MaskedAutoencoder(tf.keras.Model):
#     def __init__(self, input_dim):
#         super(MaskedAutoencoder, self).__init__()

#         # Encoder
#         self.encoder = models.Sequential([
#             layers.Dense(128),
#             layers.ReLU(),
#             layers.Dense(64),
#             layers.ReLU(),
#         ])

#         # Decoder
#         self.decoder = models.Sequential([
#             layers.Dense(128),
#             layers.ReLU(),
#             layers.Dense(input_dim, activation='linear')  # Reconstruct original input
#         ])

#     def call(self, inputs):
#         encoded = self.encoder(inputs)
#         decoded = self.decoder(encoded)
#         return decoded

# # Set input shape
# input_dim = 500  # Each window has 500 elements
# autoencoder = MaskedAutoencoder(input_dim)

# # Use Adam optimizer with lower learning rate and gradient clipping
# optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4, clipnorm=1.0)

# autoencoder.compile(optimizer=optimizer, loss='mse')

# # Train the model and store history
# history = autoencoder.fit(masked_windows, all_windows, epochs=20, batch_size=64, validation_split=0.1)

# # Plot loss per epoch
# plt.figure(figsize=(8, 5))
# plt.plot(history.history['loss'], label='Training Loss')
# plt.plot(history.history['val_loss'], label='Validation Loss', linestyle='dashed')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.title('Autoencoder Training Loss per Epoch')
# plt.legend()
# plt.grid()
# plt.show()
# # Save the encoder model


In [6]:
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
from skopt import gp_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args

# Hyperparameter search space (excluding num_layers)
search_space = [
    Real(0.1, 0.75, name='masking_ratio'),  # Masking ratio (10% - 75%)
    Real(1e-5, 1e-2, "log-uniform", name='learning_rate'),  # Learning rate
    Categorical([16, 32, 64], name='batch_size')  # Only powers of 2
]

class MaskedAutoencoder(tf.keras.Model):
    def __init__(self, input_dim):
        super(MaskedAutoencoder, self).__init__()

        # Encoder
        self.encoder = models.Sequential([
            layers.Dense(128),
            layers.ReLU(),
            layers.Dense(64),
            layers.ReLU(),
        ])

        # Decoder
        self.decoder = models.Sequential([
            layers.Dense(128),
            layers.ReLU(),
            layers.Dense(input_dim, activation='linear')  # Reconstruct original input
        ])

    def call(self, inputs):
        encoded = self.encoder(inputs)
        decoded = self.decoder(encoded)
        return decoded

# Optimization function
@use_named_args(search_space)
def objective(masking_ratio, learning_rate, batch_size):
    print("\nTesting Hyperparameter Combination:")
    print(f" - Masking Ratio: {masking_ratio:.2f}")
    print(f" - Learning Rate: {learning_rate:.5f}")
    print(f" - Batch Size: {batch_size}")

    # Apply masking
    masked_windows = mask_data(all_windows, mask_percentage=masking_ratio)

    # Build and compile model
    autoencoder = MaskedAutoencoder(input_dim=window_size)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, clipnorm=1.0)
    autoencoder.compile(optimizer=optimizer, loss='mse')

    # Train the model
    history = autoencoder.fit(masked_windows, all_windows, epochs=10, batch_size=batch_size, validation_split=0.1, verbose=0)

    final_loss = history.history['val_loss'][-1]
    print(f" - Validation Loss: {final_loss:.5f}")

    return final_loss

# Run Bayesian Optimization
result = gp_minimize(objective, search_space, n_calls=30, random_state=42)

# Print best hyperparameters
print("\nBest Hyperparameters:")
print(f"Masking Ratio: {result.x[0]:.2f}")
print(f"Learning Rate: {result.x[1]:.5f}")
print(f"Batch Size: {result.x[2]}")

# Plot optimization results
plt.plot(result.func_vals)
plt.xlabel("Iteration")
plt.ylabel("Validation Loss (MSE)")
plt.title("Bayesian Optimization Progress")
plt.show()



Testing Hyperparameter Combination:
 - Masking Ratio: 0.62
 - Learning Rate: 0.00004
 - Batch Size: 64


2025-03-14 15:20:32.843365: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


 - Validation Loss: 0.51119

Testing Hyperparameter Combination:
 - Masking Ratio: 0.49
 - Learning Rate: 0.00022
 - Batch Size: 16
 - Validation Loss: 0.10445

Testing Hyperparameter Combination:
 - Masking Ratio: 0.40
 - Learning Rate: 0.00010
 - Batch Size: 16
 - Validation Loss: 0.08992

Testing Hyperparameter Combination:
 - Masking Ratio: 0.52
 - Learning Rate: 0.00001
 - Batch Size: 64
 - Validation Loss: 0.95752

Testing Hyperparameter Combination:
 - Masking Ratio: 0.71
 - Learning Rate: 0.00001
 - Batch Size: 64
 - Validation Loss: 1.79948

Testing Hyperparameter Combination:
 - Masking Ratio: 0.50
 - Learning Rate: 0.00068
 - Batch Size: 16
 - Validation Loss: 0.16797

Testing Hyperparameter Combination:
 - Masking Ratio: 0.11
 - Learning Rate: 0.00038
 - Batch Size: 32
 - Validation Loss: 0.03292

Testing Hyperparameter Combination:
 - Masking Ratio: 0.13
 - Learning Rate: 0.00834
 - Batch Size: 16
 - Validation Loss: 1.86019

Testing Hyperparameter Combination:
 - Masking 

KeyboardInterrupt: 

In [26]:
path = os.path.join(models_path, 'encoder_model.keras')
autoencoder.encoder.save(path)  # Saves in Keras format
print('Saved encoder!')

NameError: name 'autoencoder' is not defined