In [2]:
# === Import thư viện ===
import os
import pandas as pd
import mne
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal

In [3]:
# === Config ===
raw_dir = "../data/raw"
out_dir = "../data/processed_png"
participants_file = os.path.join(raw_dir, "participants.tsv")

os.makedirs(out_dir, exist_ok=True)

# Thông số phân đoạn
segment_length_sec = 5
sampling_rate = 500
segment_length = segment_length_sec * sampling_rate

overlap_ratio = 0.5
overlap_step = int(segment_length * (1 - overlap_ratio))

In [4]:
# ===  Load participants metadata ===
participants_df = pd.read_csv(participants_file, sep="\t")
participants_df = participants_df[["participant_id", "Group"]]
group_mapping = participants_df.set_index("participant_id")["Group"].to_dict()

participants_df.head()


Unnamed: 0,participant_id,Group
0,sub-001,A
1,sub-002,A
2,sub-003,A
3,sub-004,A
4,sub-005,A


In [5]:
# === Hàm helper để lưu spectrogram ===
def save_spectrogram(signal_1d, fs, out_file):
    f, t, Sxx = signal.spectrogram(signal_1d, fs=fs, nperseg=256, noverlap=128)
    
    # Giới hạn dải tần EEG 0.5–45 Hz để loại nhiễu
    f_mask = (f >= 0.5) & (f <= 45)
    f, Sxx = f[f_mask], Sxx[f_mask, :]

    plt.figure(figsize=(3, 3))
    plt.pcolormesh(t, f, 10 * np.log10(Sxx + 1e-8), shading="gouraud", cmap="jet")
    plt.axis("off")
    plt.tight_layout(pad=0)
    plt.savefig(out_file, bbox_inches="tight", pad_inches=0)
    plt.close()


In [12]:
# ==== Chạy từ sub-020 trở đi ====
batch_size = 10
total_subjects = len(participants_df)

# Tìm index của sub-020
start_idx = participants_df[participants_df["participant_id"] == "sub-080"].index[0]
end_idx = min(start_idx + batch_size, total_subjects)

batch_subjects = participants_df["participant_id"].iloc[start_idx:end_idx]

for subj in batch_subjects:
    subj_dir = os.path.join(raw_dir, subj, "eeg")
    eeg_file = os.path.join(subj_dir, f"{subj}_task-photomark_eeg.set")

    if not os.path.exists(eeg_file):
        print(f"File EEG không tồn tại: {subj}")
        continue

    print(f"🔹 Loading {subj} ...")
    raw = mne.io.read_raw_eeglab(eeg_file, preload=True)
    data = raw.get_data()  # (n_channels, n_samples)
    n_channels, n_samples = data.shape

    label = group_mapping.get(subj, "Unknown")

    # ==== Sliding window segmentation ====
    seg_id = 0
    start = 0
    while start + segment_length <= n_samples:
        end = start + segment_length
        segment = data[:, start:end]

        # Lưu spectrogram cho từng kênh
        for ch_idx, ch_name in enumerate(raw.ch_names):
            out_subdir = os.path.join(out_dir, label)
            os.makedirs(out_subdir, exist_ok=True)

            out_file = os.path.join(out_subdir, f"{subj}_segment{seg_id:04d}_{ch_name}.png")
            save_spectrogram(segment[ch_idx], sampling_rate, out_file)

        seg_id += 1
        start += overlap_step

    # Giải phóng RAM
    del raw
    import gc; gc.collect()

    print(f"{subj}: {seg_id} segments x {n_channels} channels saved.")

print(f"Batch done: subjects {batch_subjects.iloc[0]} → {batch_subjects.iloc[-1]}")


🔹 Loading sub-080 ...


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True)


sub-080: 28 segments x 19 channels saved.
🔹 Loading sub-081 ...


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True)
  raw = mne.io.read_raw_eeglab(eeg_file, preload=True)


sub-081: 34 segments x 19 channels saved.
🔹 Loading sub-082 ...


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True)


sub-082: 27 segments x 19 channels saved.
🔹 Loading sub-083 ...


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True)


sub-083: 26 segments x 19 channels saved.
🔹 Loading sub-084 ...


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True)


sub-084: 27 segments x 19 channels saved.
🔹 Loading sub-085 ...


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True)


sub-085: 37 segments x 19 channels saved.
🔹 Loading sub-086 ...


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True)
  raw = mne.io.read_raw_eeglab(eeg_file, preload=True)


sub-086: 30 segments x 19 channels saved.
🔹 Loading sub-087 ...


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True)
  raw = mne.io.read_raw_eeglab(eeg_file, preload=True)


sub-087: 28 segments x 19 channels saved.
🔹 Loading sub-088 ...


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True)


sub-088: 49 segments x 19 channels saved.
Batch done: subjects sub-080 → sub-088
