In [1]:
# FEATURE EXTRACTION (for the example file)

In [2]:
import h5py
import numpy as np
import torch

# Load HDF5 file
hdf5_filename = "data/preprocessed/preprocessed_data.h5"
segment_duration = 600  # 10 minutes (600s), matches 30Hz
time_steps = segment_duration * 30  # 30Hz sampling rate

def load_hdf5_data(file_path):
    """Loads HDF5 data and converts it into (batch_size, 3, T) format."""
    all_segments = []
    
    with h5py.File(file_path, "r") as hf:
        for night in hf.keys():  # Iterate through nights
            x = hf[night]["x"][:]
            y = hf[night]["y"][:]
            z = hf[night]["z"][:]
            
            # Stack to form (3, T) format
            data = np.stack([x, y, z], axis=0)  # Shape: (3, total_timesteps)
            
            # Split into 10-minute segments
            for i in range(0, data.shape[1] - time_steps, time_steps):
                segment = data[:, i:i+time_steps]  # Shape: (3, T)
                all_segments.append(segment)
    
    all_segments = np.array(all_segments)  # Convert list to numpy array
    return all_segments

# Load and preprocess data
data_segments = load_hdf5_data(hdf5_filename)

# Convert to 10-second windows before passing through model
window_size = 300  # 10 sec × 30Hz
num_windows = 18000 // window_size  # Each 10-minute segment contains 60 windows

# Reshape data to break 10-minute segments into 10-second segments
reshaped_data = data_segments.reshape(-1, 3, window_size)  # Shape: (total_windows, 3, 300)

print(f"Reshaped input shape for model: {reshaped_data.shape}")  # Should be (num_samples * 60, 3, 300)

# Convert to PyTorch tensor
data_tensor = torch.tensor(reshaped_data, dtype=torch.float32)

print(f"Data shape ready for SSL-Wearables ResNet model: {data_tensor.shape}")  # Should be (num_samples * 60, 3, 300)

Reshaped input shape for model: (19740, 3, 300)
Data shape ready for SSL-Wearables ResNet model: torch.Size([19740, 3, 300])


In [3]:
# Load pretrained model from SSL-Wearables
repo = "OxWearables/ssl-wearables"
resnet_model = torch.hub.load(repo, "harnet10", class_num=5, pretrained=True)  # Load model
resnet_model = resnet_model.feature_extractor  # Keep only the feature extractor part

# Debugging
print(f"Input shape: {data_tensor.shape}")  # Should be (batch_size, 3, 300)

# Extract features in batches
extracted_features = []
resnet_model.eval()  # Set to evaluation mode (no gradients needed)

with torch.no_grad():  # Disable gradient tracking
    for batch in data_tensor:
        batch = batch.unsqueeze(0).to("cpu")  # Add batch dimension
        batch_features = resnet_model(batch).squeeze(-1)  # Extract features
        extracted_features.append(batch_features.numpy())  # Convert to NumPy

# Convert extracted features into a NumPy array
extracted_features = np.vstack(extracted_features)  # Shape: (total_samples, feature_dim)

print(f"Extracted feature shape: {extracted_features.shape}")

Using cache found in /home/sara/.cache/torch/hub/OxWearables_ssl-wearables_main


131 Weights loaded
Input shape: torch.Size([19740, 3, 300])
Extracted feature shape: (19740, 1024)


In [4]:
# Save features for later classification
np.save("data/features/extracted_features.npy", extracted_features)

print(f"Features saved to 'data/features/extracted_features.npy'")

Features saved to 'data/features/extracted_features.npy'
