In [None]:
import os
import numpy as np
import sys
print(sys.executable)
import tensorflow as tf
from tensorflow.keras import layers, models
import import_ipynb
import sys
sys.path.insert(1, '../src/')
from config import raw_data_path, univariate_data_path, processed_data_path


In [None]:
def create_windows(data, window_size=500, step=250):
    all_windows, labels, record_names = [], [], []
    
    for sample in data:
        signal, label, record_name = sample["signal"], sample["preterm"], sample["record_name"]
        
        # Ensure the label is a valid integer
        if label is None:  # Skip instances with None labels
            continue
        
        for start in range(0, len(signal) - window_size + 1, step):
            window = signal[start : start + window_size]
            all_windows.append(window)
            labels.append(label)
            record_names.append(record_name)
    
    return np.array(all_windows, dtype=np.float32), np.array(labels, dtype=np.int32), record_names

# Example usage:
x_train, y_train, record_names = create_windows(target_data)


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

class LSTMClassifier:
    def __init__(self, encoder, input_shape, num_classes, lstm_units=64):
        self.encoder = encoder
        self.model = self._build_classifier(input_shape, num_classes, lstm_units)
    
    def _build_classifier(self, input_shape, num_classes, lstm_units):
        inputs = layers.Input(shape=input_shape)

        # Freeze encoder weights
        self.encoder.trainable = False

        # Pass each time step through the encoder separately
        encoded_sequence = layers.TimeDistributed(self.encoder)(inputs)

        # LSTM layers for sequential modeling
        x = layers.LSTM(lstm_units, return_sequences=True)(encoded_sequence)
        x = layers.LSTM(lstm_units, return_sequences=False)(x)  # Get final output

        # Fully connected layers
        x = layers.Dense(32, activation="relu")(x)
        x = layers.Dense(16, activation="relu")(x)
        outputs = layers.Dense(num_classes, activation="softmax")(x)

        model = models.Model(inputs, outputs)
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
                      loss="sparse_categorical_crossentropy",
                      metrics=["accuracy"])
        return model

    def train(self, x_train, y_train, batch_size=64, epochs=50, validation_split=0.1):
        history = self.model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split)
        return history
    
    def evaluate(self, x_test, y_test):
        return self.model.evaluate(x_test, y_test)
    
    def predict(self, x):
        return self.model.predict(x)


In [None]:
from collections import defaultdict

# Group windows by record name
grouped_windows = defaultdict(list)
for i, record_name in enumerate(record_names):
    grouped_windows[record_name].append(x_train[i])

# Now you can concatenate the windows for each record
concatenated_windows = []
for record_name, windows in grouped_windows.items():
    # Concatenate windows of the same record
    concatenated_windows.append(np.concatenate(windows, axis=0))  # Shape: (num_windows * window_size, )

# Convert to numpy array for use in the model
concatenated_windows = np.array(concatenated_windows)
# Reshape data for LSTM (add feature dimension if needed)
concatenated_windows = concatenated_windows.reshape(concatenated_windows.shape[0], -1, 1)  # (batch_size, sequence_length, features)

# Now use the LSTM classifier to train on these windows
classifier = Classifier(encoder, input_shape=(concatenated_windows.shape[1], 1), num_classes=num_classes)
classifier.train(concatenated_windows, y_train)

