<a href="https://colab.research.google.com/github/petervinhchau/public/blob/main/nndl_p1_wk1_ver1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Darren's Code: Environment Setup
# Install necessary libraries quietly
!pip install torch torchvision torchaudio numpy pandas matplotlib seaborn h5py --quiet

# Check PyTorch version and GPU availability
import torch

print(f"PyTorch version: {torch.__version__}")

if torch.cuda.is_available():
    print(f"✅ GPU detected: {torch.cuda.get_device_name(0)}")
else:
    print("⚠️ GPU not detected. Using CPU only.")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m121.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m90.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m54.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m39.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [42]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [53]:
# Alejandro's Code Block: Inspect Dataset Structure Clearly
import h5py
import os

# Set path to your EMG data files
DATA_DIR = "/content/drive/MyDrive/emg_data/subject_89335547"

# Find all .hdf5 files in directory
data_files = [os.path.join(DATA_DIR, file) for file in os.listdir(DATA_DIR) if file.endswith('.hdf5')]

# Clearly print dataset structure from the first file
with h5py.File(data_files[0], 'r') as f:
    print("📂 HDF5 File Structure:")
    def print_structure(name, obj):
        if isinstance(obj, h5py.Dataset):
            print(f" - Dataset: {name}, Fields: {obj.dtype.names}")
        elif isinstance(obj, h5py.Group):
            print(f" - Group: {name}")
    f.visititems(print_structure)


📂 HDF5 File Structure:
 - Group: emg2qwerty
 - Dataset: emg2qwerty/timeseries, Fields: ('emg_right', 'time', 'emg_left')


In [54]:
# Alejandro's Final Corrected Data Loading Code
import h5py
import numpy as np
import os

DATA_DIR = "/content/drive/MyDrive/emg_data/subject_89335547"
data_files = [os.path.join(DATA_DIR, file) for file in os.listdir(DATA_DIR) if file.endswith('.hdf5')]

print(f"✅ Found {len(data_files)} data files.")

emg_right_list, emg_left_list, timestamps_list = [], [], []

for file in data_files:
    with h5py.File(file, 'r') as f:
        dataset = f['emg2qwerty/timeseries']
        emg_right_list.append(np.array(dataset['emg_right']))
        emg_left_list.append(np.array(dataset['emg_left']))
        timestamps_list.append(np.array(dataset['time']))
    print(f"✔️ Loaded file: {os.path.basename(file)}")

# Concatenate EMG data from right and left wrists separately
emg_right = np.concatenate(emg_right_list, axis=0)
emg_left = np.concatenate(emg_left_list, axis=0)
timestamps = np.concatenate(timestamps_list, axis=0)

# Final data shape confirmation
print("\n✅ All EMG sessions loaded and concatenated successfully.")
print(f" - EMG Right data shape: {emg_right.shape}")
print(f" - EMG Left data shape: {emg_left.shape}")
print(f" - Timestamps shape: {timestamps.shape}")


✅ Found 18 data files.
✔️ Loaded file: 2021-07-22-1627001995-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f.hdf5
✔️ Loaded file: 2021-06-03-1622766673-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f.hdf5
✔️ Loaded file: 2021-07-21-1626915176-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f.hdf5
✔️ Loaded file: 2021-06-04-1622862148-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f.hdf5
✔️ Loaded file: 2021-07-22-1627004019-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f.hdf5
✔️ Loaded file: 2021-06-04-1622863166-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f.hdf5
✔️ Loaded file: 2021-06-04-1622861066-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f.hdf5
✔️ Loaded file: 2021-06-03-1622764398-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f.hdf5
✔️ Loaded file: 2021-06-03-1622765527-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f.hdf5
✔️ Loaded file: 2021-07-21-1626916256-keystrokes-d

✅ **Summary of Initial Observations**

- All 18 EMG dataset files loaded successfully without issues.
- **EMG Right Wrist Data Shape:** `(34,689,251 samples, 16 electrodes)`
- **EMG Left Wrist Data Shape:** `(34,689,251 samples, 16 electrodes)`
- **Timestamps Shape:** `(34,689,251 samples,)`
- Data structure and dimensions precisely match the project's guidelines.
- Data integrity is confirmed, with no immediate anomalies detected.

**Next Step (Week 1):** Proceed to data preprocessing (Sriharsha) and model architecture selection (Peter).

