In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


All the code and functions (to load data and to load the model) are based on my drive organization of the files:
  

```
  Drive:
      - Group Project:
                  - Cross:
                        - train
                        - test1...
                  - best_model_2 - where the model is saved
```


So the path to the train set is: '/content/drive/My Drive/Group Project/Cross/train' and the path to the model folder is  '/content/drive/My Drive/Group Project/best_model_2'




## LIB & FUNCTIONS

In [None]:
###### ----- FUNCTION ----- ######
import json
import os
import h5py
import numpy as np
import re
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
import numpy as np
import tensorflow as tf
from sklearn.model_selection import ParameterGrid
from tensorflow.keras.layers import Input, Conv1D, Conv2D, MaxPooling2D, Flatten, Dense, ReLU, BatchNormalization, Reshape, Softmax, Lambda, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
import gc
from tensorflow.keras import backend as K
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
import sys

def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

def get_label_from_filename(filename):
    # Regular expression to match the pattern '_<digits>_<digits>.h5' in the filename
    pattern = r'(_\d+_\d+\.h5)$'
    # Remove the matched pattern to get the label
    label = re.sub(pattern, '', filename)
    return label

def downsample_tensor(meg_tensor, downsampling_factor):
    processed_tensor = []

    for matrix in meg_tensor:
        # Squeeze the single-channel dimension for processing
        matrix_squeezed = np.squeeze(matrix, axis=-1)

        # Downsample the data
        matrix_downsampled = matrix_squeezed[:, ::downsampling_factor]

        # Expand dimensions to get back to 3D
        matrix_standardized_3d = np.expand_dims(matrix_downsampled, axis=-1)

        processed_tensor.append(matrix_standardized_3d)

    # Convert list to numpy array
    return np.array(processed_tensor)

def downsample_and_scale_tensor(meg_tensor, downsampling_factor):
    processed_tensor = []

    for matrix in meg_tensor:
        # Squeeze the single-channel dimension for processing
        matrix_squeezed = np.squeeze(matrix, axis=-1)

        # Downsample the data
        matrix_downsampled = matrix_squeezed[:, ::downsampling_factor]

        # Initialize the StandardScaler
        scaler = StandardScaler()

        # Time-wise scaling for each matrix
        matrix_standardized = scaler.fit_transform(matrix_downsampled.T).T  # Transpose data to scale along the correct axis

        # Expand dimensions to get back to 3D
        matrix_standardized_3d = np.expand_dims(matrix_standardized, axis=-1)

        processed_tensor.append(matrix_standardized_3d)

    # Convert list to numpy array
    return np.array(processed_tensor)


def load_tests():
    ## -------  TEST DATASET 1  ------##
    directory = '/content/drive/My Drive/Group Project/Cross/test1'
    # directory = "Cross/test1"
    all_data = []
    labels = []

    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith(".h5"):
            file_path = os.path.join(directory, filename)
            with h5py.File(file_path, 'r') as f:
                for dataset_name in f.keys():
                    data = f[dataset_name][()]

                    # Add a new axis to make it a 3D tensor
                    data_3d = data[:, :, np.newaxis]

                    all_data.append(data_3d)
                    labels.append(get_label_from_filename(filename))

    # Convert list to numpy array
    test_data_1 = np.stack(all_data)
    test_label_1 = np.array(labels)
    test_data_1 = downsample_and_scale_tensor(test_data_1, 10)


    ## -------  TEST DATASET 2  ------##
    directory = '/content/drive/My Drive/Group Project/Cross/test2'
    # directory = "Cross/test2"
    all_data = []
    labels = []

    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith(".h5"):
            file_path = os.path.join(directory, filename)
            with h5py.File(file_path, 'r') as f:
                for dataset_name in f.keys():
                    data = f[dataset_name][()]

                    # Add a new axis to make it a 3D tensor
                    data_3d = data[:, :, np.newaxis]

                    all_data.append(data_3d)
                    labels.append(get_label_from_filename(filename))

    # Convert list to numpy array
    test_data_2 = np.stack(all_data)
    test_label_2 = np.array(labels)
    test_data_2 = downsample_and_scale_tensor(test_data_2, 10)

    ## -------  TEST DATASET 3  ------##
    directory = '/content/drive/My Drive/Group Project/Cross/test3'
    # directory = "Cross/test3"
    all_data = []
    labels = []

    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith(".h5"):
            file_path = os.path.join(directory, filename)
            with h5py.File(file_path, 'r') as f:
                for dataset_name in f.keys():
                    data = f[dataset_name][()]

                    # Add a new axis to make it a 3D tensor
                    data_3d = data[:, :, np.newaxis]

                    all_data.append(data_3d)
                    labels.append(get_label_from_filename(filename))

    # Convert list to numpy array
    test_data_3 = np.stack(all_data)
    test_label_3 = np.array(labels)
    test_data_3 = downsample_and_scale_tensor(test_data_3, 10)

    return test_data_1, test_label_1, test_data_2, test_label_2, test_data_3, test_label_3

def get_one_hot_label(labels):

    # Unique string labels
    unique_labels = np.unique(labels)
    num_classes = len(unique_labels)

    # Create a mapping from string labels to integers
    label_to_int = {label: i for i, label in enumerate(unique_labels)}

    # Convert string labels to integers using the mapping
    labels_int = np.array([label_to_int[label] for label in labels])


    # Convert integer labels to one-hot encoding
    labels_cat = to_categorical(labels_int, num_classes=num_classes)

    return labels_cat

def load_test():
    test_data_1, test_label_1, test_data_2, test_label_2, test_data_3, test_label_3 = load_tests()
    test_label_1_cat = get_one_hot_label(test_label_1)
    test_label_2_cat = get_one_hot_label(test_label_2)
    test_label_3_cat = get_one_hot_label(test_label_3)
    test_data = tf.concat([test_data_1, test_data_2,  test_data_3], axis=0)
    test_label_cat = tf.concat([test_label_1_cat, test_label_2_cat, test_label_3_cat], axis=0)
    test_label = tf.concat([test_label_1, test_label_2, test_label_3], axis=0)
    return test_data, test_label_cat

def load_datasets():

    ## -----  TRAIN DATASET  ----- ##
    directory = '/content/drive/My Drive/Group Project/Cross/train'
    # directory = "Cross/train"
    all_data = []
    labels = []

    # Iterate through all files in the directory
    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith(".h5"):
            file_path = os.path.join(directory, filename)
            with h5py.File(file_path, 'r') as f:
                for dataset_name in f.keys():
                    data = f[dataset_name][()]

                    # Add a new axis to make it a 3D tensor
                    data_3d = data[:, :, np.newaxis]

                    all_data.append(data_3d)
                    labels.append(get_label_from_filename(filename))

    # Convert list to numpy array
    train_data = np.stack(all_data)
    train_label = np.array(labels)
    train_data = downsample_and_scale_tensor(train_data, 10)
    train_label_cat = get_one_hot_label(train_label)

    print("Data Train shape:", train_data.shape, " - size: ", sizeof_fmt(sys.getsizeof(train_data)))
    print("Labels Train one hot shape:", train_label_cat.shape)


    test_data, test_label_cat = load_test()
    test_data = test_data.numpy()

    print("Data Test shape:", test_data.shape, " - size: ", sizeof_fmt(sys.getsizeof(test_data)))
    print("Labels Test one hot shape:", test_label_cat.shape)

    test_data = tf.convert_to_tensor(test_data)
    train_data = tf.convert_to_tensor(train_data)
    train_label_cat = tf.convert_to_tensor(train_label_cat)

    return train_data, train_label_cat, test_data, test_label_cat



In [None]:
def load_model(path):
    best_model = tf.keras.models.load_model(path)
    with open(path + '/hyperparameters.json', 'r') as file:
        best_params = json.load(file)
    print("Hyperparameters:", best_params)
    best_model.summary()
    return best_model, best_params

def accuracy_on_tests(best_model, scale):
  test_data_1, test_label_1, test_data_2, test_label_2, test_data_3, test_label_3 = load_tests()
  test_label_1_cat = get_one_hot_label(test_label_1)
  test_label_2_cat = get_one_hot_label(test_label_2)
  test_label_3_cat = get_one_hot_label(test_label_3)
  test_data_1 = downsample_tensor(test_data_1, scale)
  test_data_2 = downsample_tensor(test_data_2, scale)
  test_data_3 = downsample_tensor(test_data_3, scale)
  print("Model validation on Cross/test1: ")
    # Evaluate the model on the validation set
  val_loss_1, val_accuracy_1 = best_model.evaluate(test_data_1, test_label_1_cat)
  print("Validation Loss:", val_loss_1)
  print("Validation Accuracy:", val_accuracy_1)

  print("------------------------------------------")
  print("Model validation on Cross/test2: ")
    # Evaluate the model on the validation set
  val_loss_2, val_accuracy_2 = best_model.evaluate(test_data_2, test_label_2_cat)
  print("Validation Loss:", val_loss_2)
  print("Validation Accuracy:", val_accuracy_2)

  print("------------------------------------------")
  print("Model validation on Cross/test3: ")
    # Evaluate the model on the validation set
  val_loss_3, val_accuracy_3 = best_model.evaluate(test_data_3, test_label_3_cat)
  print("Validation Loss:", val_loss_3)
  print("Validation Accuracy:", val_accuracy_3)

  print("------------------------------------------")

def accuracy_on_test(best_model, scale):
  test_data, test_label_cat = load_test()
  test_data = downsample_tensor(test_data, scale)
  print("Model validation on Cross/test1+2+3: ")
    # Evaluate the model on the validation set
  val_loss_1, val_accuracy_1 = best_model.evaluate(test_data, test_label_cat)
  print("Validation Loss:", val_loss_1)
  print("Validation Accuracy:", val_accuracy_1)

  print("------------------------------------------")




## DATA LOAD

##### There is no need to run this codes - Go directly to Model Load and Test



In [None]:
# To load the train and test datasets with labels in one-hot encoding structure
# the test dataset is formed by the 3 test set in cross folder concatenated
# the load_datasets() function already load the dataset downscaled by a factor 10 and scaled
train_data, train_label_cat, test_data, test_label_cat = load_datasets()

KeyboardInterrupt: 

In [None]:
# The function load_tests() load the 3 test sets individually
# The test set are downsamplet by a factor 10 and the lable are not in one-hot encoding
test_data_1, test_label_1, test_data_2, test_label_2, test_data_3, test_label_3 = load_tests()

In [None]:
# Same as before just to load the merged test set with one-hot encoding labels
test_data, test_label_cat = load_test()

 ## MODEL LOAD


In [26]:
best_model_path = '/content/drive/My Drive/Group Project/best_model_2'
best_model, best_params = load_model(best_model_path)

Hyperparameters: {'k': 3, 'l': 6, 'p': 1, 's': 1, 'dropout_rate': 0.8, 'l2_lambda': 0.001}
Model: "model_1408"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1409 (InputLayer)     [(None, 248, 594, 1)]     0         
                                                                 
 conv2d_1408 (Conv2D)        (None, 1, 589, 3)         4467      
                                                                 
 lambda_1408 (Lambda)        (None, 3, 589, 1)         0         
                                                                 
 max_pooling2d_1408 (MaxPoo  (None, 3, 196, 1)         0         
 ling2D)                                                         
                                                                 
 flatten_1408 (Flatten)      (None, 588)               0         
                                                                 
 dropout_1408 (Dropout)      (N

## MODEL TEST


In [None]:
# the second argument is the downsampling factor
# Consider that the function that the data are loaded downsample by a factor 10, so with size: 248x3563x1
# So if the model has input size 248x594x1 you need a further downsampling by a factor 6 to make the data have the same size of the model
# The dataset are loaded within the function accuracy_on_tests() so it may take a while to run.
accuracy_on_tests(best_model, 6)

Model validation on Cross/test1: 
Validation Loss: 0.8621693253517151
Validation Accuracy: 0.6875
------------------------------------------
Model validation on Cross/test2: 
Validation Loss: 1.015397548675537
Validation Accuracy: 0.6875
------------------------------------------
Model validation on Cross/test3: 
Validation Loss: 0.8193597197532654
Validation Accuracy: 0.75
------------------------------------------


In [None]:
accuracy_on_test(best_model, 6)

Model validation on Cross/test1+2+3: 
Validation Loss: 0.8989755511283875
Validation Accuracy: 0.7083333134651184
------------------------------------------


In [None]:
best_model.layers[2].get_config()

{'name': 'lambda_1408',
 'trainable': True,
 'dtype': 'float32',
 'function': ('4wEAAAAAAAAAAAAAAAUAAAATAAAA8zwAAACXAHQAAAAAAAAAAAAAAKABAAAAAAAAAAAAAAAAAAAA\nAAAAAAB8AGcAZAGiAaYCAACrAgAAAAAAAAAAUwApAk4pBOkAAAAA6QMAAADpAgAAAOkBAAAAKQLa\nAnRm2gl0cmFuc3Bvc2UpAdoBeHMBAAAAIPo/QzovVXNlcnMvbmljY28vQXBwRGF0YS9Mb2NhbC9U\nZW1wL2lweWtlcm5lbF8xNDI5Ni8zMjk1NDc4MDE4LnB5+gg8bGFtYmRhPvohYnVpbGRfY25uX21v\nZGVsLjxsb2NhbHM+LjxsYW1iZGE+CgAAAHMYAAAAgAClEqccohyoYbAcsBywHNEhPtQhPoAA8wAA\nAAA=\n',
  None,
  None),
 'function_type': 'lambda',
 'module': '__main__',
 'output_shape': None,
 'output_shape_type': 'raw',
 'output_shape_module': None,
 'arguments': {}}