In [None]:
import mne
import numpy as np
import pickle
from pathlib import Path

def preprocess_and_pickle(subject_id: str,
                          gdf_dir: Path,
                          out_dir: Path,
                          l_freq=7.0,
                          h_freq=30.0,
                          tmin=0.0,
                          tmax=4.0):
    """
    subject_id（"A01" など）の E/T gdf を読み込み，
    EEGCCT 論文準拠の前処理を行って pickle に保存する。
    """
    def load_and_epoch(fname):
        raw = mne.io.read_raw_gdf(fname, preload=True)
        # バンドパスフィルタ
        raw.filter(l_freq, h_freq, fir_design='firwin', verbose=False)
        # イベント取得
        events, event_id = mne.events_from_annotations(raw, verbose=False)
        # エポック化
        epochs = mne.Epochs(raw,
                            events=events,
                            event_id=event_id,
                            tmin=tmin,
                            tmax=tmax,
                            baseline=None,
                            preload=True,
                            event_repeated='drop', 
                            verbose=False)
        # Zスコア正規化（チャネルごとに全エポック・時間軸で）
        data = epochs.get_data()  # shape: (n_epochs, n_channels, n_times)
        mean = data.mean(axis=(0,2), keepdims=True)
        std  = data.std(axis=(0,2),  keepdims=True)
        data = (data - mean) / (std + 1e-6)
        # モデル入力形状に調整
        X = data[:, None, :, :]  # → (n_epochs, 1, n_channels, n_times)
        y = epochs.events[:, 2]  # 数値ラベル
        return X, y

    # 訓練／テストそれぞれ処理
    X_train, y_train = load_and_epoch(gdf_dir / f"{subject_id}E.gdf")
    X_test,  y_test  = load_and_epoch(gdf_dir / f"{subject_id}T.gdf")

    # pickle 出力
    out = {
        "X_train": X_train.astype(np.float32),
        "y_train": y_train.astype(np.int64),
        "X_test":  X_test.astype(np.float32),
        "y_test":  y_test.astype(np.int64),
    }
    out_dir.mkdir(parents=True, exist_ok=True)
    with open(out_dir / f"{subject_id}.pkl", "wb") as f:
        pickle.dump(out, f)

    print(f"[+] Saved {subject_id}.pkl: "
          f" train {X_train.shape}/{y_train.shape}, "
          f" test {X_test.shape}/{y_test.shape}")


if __name__ == "__main__":
    gdf_dir = Path("/workspace-cloud/seiya.narukawa/EEGCCT/data/BCICIV_2a")
    out_dir = Path("/workspace-cloud/seiya.narukawa/EEGCCT/dataset/BCICIV_2a")
    subjects = [f"A{str(i).zfill(2)}" for i in range(1, 10)]

    for sid in subjects:
        preprocess_and_pickle(sid, gdf_dir, out_dir)


Extracting EDF parameters from /workspace-cloud/seiya.narukawa/EEGCCT/data/BCICIV_2a/A01E.gdf...
GDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
EEG-Fz, EEG, EEG, EEG, EEG, EEG, EEG, EEG-C3, EEG, EEG-Cz, EEG, EEG-C4, EEG, EEG, EEG, EEG, EEG, EEG, EEG, EEG-Pz, EEG, EEG, EOG-left, EOG-central, EOG-right
Creating raw.info structure...
Reading 0 ... 686999  =      0.000 ...  2747.996 secs...


  next(self.gen)


RuntimeError: Event time samples were not unique. Consider setting the `event_repeated` parameter."