In [2]:
import numpy as np
import os
from utilities import *
import h5py

# Use os.path.join() to create the correct file path
filepath = get_filepath()
print(f"Base filepath: {filepath}")

# Path to the specific h5 file
h5_filepath = os.path.join(filepath, "Intra", "train", "rest_105923_1.h5")
print(f"H5 file path: {h5_filepath}")

# Load the data
data = read_h5py_file(h5_filepath)

# Check the data properties
print(f"Data min: {np.min(data)}, max: {np.max(data)}, mean: {np.mean(data)}")

# Z-score normalization (time-wise normalization as suggested in the document)
# This normalizes each sensor's time series independently
def z_score_normalize(data):
    """
    Perform z-score normalization on the data time-wise (for each sensor)
    
    Parameters:
    data (numpy.ndarray): Data with shape (n_sensors, n_timepoints)
    
    Returns:
    numpy.ndarray: Normalized data with the same shape
    """
    # Calculate mean and std for each sensor (row)
    mean = np.mean(data, axis=1, keepdims=True)
    std = np.std(data, axis=1, keepdims=True)
    
    # Replace zero std with 1 to avoid division by zero
    std[std == 0] = 1.0
    
    # Z-score normalization: (x - mean) / std
    normalized_data = (data - mean) / std
    
    return normalized_data

# Apply z-score normalization
normalized_data = z_score_normalize(data)

# Check the normalized data properties
print(f"Normalized data min: {np.min(normalized_data)}, max: {np.max(normalized_data)}, mean: {np.mean(normalized_data)}")

# Verify that the normalization worked as expected
# After z-score normalization, each row should have mean ≈ 0 and std ≈ 1
means = np.mean(normalized_data, axis=1)
stds = np.std(normalized_data, axis=1)
print(f"Mean of means: {np.mean(means)}, should be close to 0")
print(f"Mean of stds: {np.mean(stds)}, should be close to 1")



Base filepath: /Users/jesseh/Library/Mobile Documents/com~apple~CloudDocs/AA Master AI/Deep Learning
H5 file path: /Users/jesseh/Library/Mobile Documents/com~apple~CloudDocs/AA Master AI/Deep Learning/Intra/train/rest_105923_1.h5
Data min: -2.788253829211218e-11, max: 1.3240070498299339e-11, mean: 4.094354904508632e-14
Normalized data min: -6.443676015954523, max: 6.593805371269012, mean: 2.03831359330788e-17
Mean of means: 2.3337982895599773e-17, should be close to 0
Mean of stds: 1.0, should be close to 1


  modified_path = localpath.replace('\DL_2', '')


In [None]:
def load_and_normalize_files(directory_path, max_files=None, downsample_factor=None):
    """
    Load and normalize all h5 files in the specified directory
    
    Parameters:
    directory_path (str): Path to the directory containing h5 files
    max_files (int, optional): Maximum number of files to load
    downsample_factor (int, optional): Factor by which to downsample the data
    
    Returns:
    tuple: (data, labels) where data is a list of normalized matrices and labels are the corresponding task types
    """
    data_list = []
    labels = []
    
    # Get all h5 files in the directory
    h5_files = [f for f in os.listdir(directory_path) if f.endswith('.h5')]
    
    # Limit the number of files if specified
    if max_files is not None:
        h5_files = h5_files[:max_files]
    
    for file in h5_files:
       # Extract the task type from the filename
        if file.startswith("task_"):
            parts = file.split('_')
            task_type = '_'.join(parts[:2])  # e.g., task_motor or task_working
        else:
            task_type = file.split('_')[0]  # e.g., rest

        # Load the data
        file_path = os.path.join(directory_path, file)
        matrix = read_h5py_file(file_path)
        
        # Downsample if specified
        if downsample_factor is not None:
            matrix = matrix[:, ::downsample_factor]
        
        # Normalize the data
        normalized_matrix = z_score_normalize(matrix)
        
        # Add to lists
        data_list.append(normalized_matrix)
        labels.append(task_type)
    
    return data_list, labels

# Example usage for Intra-subject classification
intra_train_path = os.path.join(filepath, "Intra", "train")
intra_test_path = os.path.join(filepath, "Intra", "test")

# Load a small subset of files to test the function
# Downsample factor is set to 16 to speed up the process, CHANGE LATER!
train_data, train_labels = load_and_normalize_files(intra_train_path, downsample_factor=16)
test_data, test_labels = load_and_normalize_files(intra_test_path, downsample_factor=16)

# Print summary
print(f"\nLoaded {len(train_data)} training files and {len(test_data)} test files")
print(f"Training labels: {train_labels}")
print(f"Test labels: {test_labels}")
print(f"Shape of first training sample after downsampling: {train_data[0].shape}")


Loaded 32 training files and 8 test files
Training labels: ['rest', 'task_story', 'task_motor', 'task_story', 'task_story', 'task_working', 'task_motor', 'rest', 'rest', 'task_working', 'task_motor', 'rest', 'task_motor', 'task_working', 'task_motor', 'rest', 'rest', 'task_motor', 'task_working', 'task_story', 'task_motor', 'task_working', 'task_motor', 'task_working', 'rest', 'rest', 'task_working', 'task_story', 'task_story', 'task_story', 'task_working', 'task_story']
Test labels: ['task_motor', 'rest', 'task_story', 'task_working', 'task_motor', 'task_story', 'task_working', 'rest']
Shape of first training sample after downsampling: (248, 2227)
