In [None]:
import mne
import numpy as np
import pickle
from pathlib import Path

def preprocess_and_pickle(subject_id: str,
                          gdf_dir: Path,
                          out_dir: Path,
                          l_freq=7.0,
                          h_freq=30.0,
                          tmin=0.0,
                          tmax=4.0):
    """
    subject_id（"A01" など）の E/T gdf を読み込み，
    EEGCCT 論文準拠の前処理を行って pickle に保存する。
    """
    def load_and_epoch(fname):
        raw = mne.io.read_raw_gdf(fname, preload=True)
        # バンドパスフィルタ
        raw.filter(l_freq, h_freq, fir_design='firwin', verbose=False)
        # イベント取得
        events, event_id = mne.events_from_annotations(raw, verbose=False)
        # エポック化
        epochs = mne.Epochs(raw,
                            events=events,
                            event_id=event_id,
                            tmin=tmin,
                            tmax=tmax,
                            baseline=None,
                            preload=True,
                            event_repeated='drop', 
                            verbose=False)
        # Zスコア正規化（チャネルごとに全エポック・時間軸で）
        data = epochs.get_data()  # shape: (n_epochs, n_channels, n_times)
        mean = data.mean(axis=(0,2), keepdims=True)
        std  = data.std(axis=(0,2),  keepdims=True)
        data = (data - mean) / (std + 1e-6)
        # モデル入力形状に調整
        X = data[:, None, :, :]  # → (n_epochs, 1, n_channels, n_times)
        y = epochs.events[:, 2]  # 数値ラベル
        return X, y

    # 訓練／テストそれぞれ処理
    X_train, y_train = load_and_epoch(gdf_dir / f"{subject_id}E.gdf")
    X_test,  y_test  = load_and_epoch(gdf_dir / f"{subject_id}T.gdf")

    # pickle 出力
    out = {
        "X_train": X_train.astype(np.float32),
        "y_train": y_train.astype(np.int64),
        "X_test":  X_test.astype(np.float32),
        "y_test":  y_test.astype(np.int64),
    }
    out_dir.mkdir(parents=True, exist_ok=True)
    with open(out_dir / f"{subject_id}.pkl", "wb") as f:
        pickle.dump(out, f)

    print(f"[+] Saved {subject_id}.pkl: "
          f" train {X_train.shape}/{y_train.shape}, "
          f" test {X_test.shape}/{y_test.shape}")


if __name__ == "__main__":
    gdf_dir = Path("/workspace-cloud/seiya.narukawa/EEGCCT/data/BCICIV_2a")
    out_dir = Path("/workspace-cloud/seiya.narukawa/EEGCCT/dataset/BCICIV_2a")
    subjects = [f"A{str(i).zfill(2)}" for i in range(1, 10)]

    for sid in subjects:
        preprocess_and_pickle(sid, gdf_dir, out_dir)


Extracting EDF parameters from /workspace-cloud/seiya.narukawa/EEGCCT/data/BCICIV_2a/A01E.gdf...
GDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
EEG-Fz, EEG, EEG, EEG, EEG, EEG, EEG, EEG-C3, EEG, EEG-Cz, EEG, EEG-C4, EEG, EEG, EEG, EEG, EEG, EEG, EEG, EEG-Pz, EEG, EEG, EOG-left, EOG-central, EOG-right
Creating raw.info structure...
Reading 0 ... 686999  =      0.000 ...  2747.996 secs...


  next(self.gen)


RuntimeError: Event time samples were not unique. Consider setting the `event_repeated` parameter."

In [1]:
import pickle
import numpy as np
import pandas as pd

pkl_path = '/workspace-cloud/seiya.narukawa/EEGCCT/dataset/BCICIV_2a_dataset.pkl'
with open(pkl_path, 'rb') as f:
    data = pickle.load(f)

n_sessions = len(data)
n_subjects = n_sessions // 2

records = []
for subj in range(n_subjects):
    for sess_label, idx in [('Eval', 2*subj), ('Train', 2*subj+1)]:
        payload = data[idx]
        raw      = payload['raw']
        events   = payload['events']
        event_id = payload['event_id']
        records.append({
            'Subject': subj+1,
            'Session': sess_label,
            'Index': idx,
            'Channels': raw.info['nchan'],
            'Samples': raw.n_times,
            'Events': events.shape[0],
            'Event Codes': list(np.unique(events[:,2])),
            'Event Keys': list(event_id.keys())
        })

df = pd.DataFrame(records)
df


Unnamed: 0,Subject,Session,Index,Channels,Samples,Events,Event Codes,Event Keys
0,1,Eval,0,22,687000,595,"[1, 2, 3, 4, 5, 6, 7]","[1023, 1072, 276, 277, 32766, 768, 783]"
1,1,Train,1,22,672528,603,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]","[1023, 1072, 276, 277, 32766, 768, 769, 770, 7..."
2,2,Eval,2,22,662666,593,"[1, 2, 3, 4, 5, 6, 7]","[1023, 1072, 276, 277, 32766, 768, 783]"
3,2,Train,3,22,677169,606,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]","[1023, 1072, 276, 277, 32766, 768, 769, 770, 7..."
4,3,Eval,4,22,648775,603,"[1, 2, 3, 4, 5, 6, 7]","[1023, 1072, 276, 277, 32766, 768, 783]"
5,3,Train,5,22,660530,606,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]","[1023, 1072, 276, 277, 32766, 768, 769, 770, 7..."
6,4,Eval,6,22,660047,648,"[1, 2, 3, 4, 5, 6, 7]","[1023, 1072, 276, 277, 32766, 768, 783]"
7,4,Train,7,22,600915,610,"[1, 2, 3, 4, 5, 6, 7, 8]","[1023, 1072, 32766, 768, 769, 770, 771, 772]"
8,5,Eval,8,22,679863,600,"[1, 2, 3, 4, 5, 6, 7]","[1023, 1072, 276, 277, 32766, 768, 783]"
9,5,Train,9,22,686120,614,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]","[1023, 1072, 276, 277, 32766, 768, 769, 770, 7..."


In [None]:
datasets = [
    '/workspace-cloud/seiya.narukawa/EEGCCT/dataset/BCICIV_2a_dataset.pkl',]

import pickle
import numpy as np
import pandas as pd

# 1) pickle 読み込み
with open(datasets[0], 'rb') as f:
    data = pickle.load(f)

n_subjects = len(data) // 2

# 2) 左手(769)／右手(770) のラベルIDを取得
#    どのキーに対応しているかは、最初の被験者の Train セッションで確認
first_train_evmap = data[1]['event_id']
left_id  = first_train_evmap['769']
right_id = first_train_evmap['770']

# 3) 各被験者の Train／Eval で左・右の件数をカウント
records = []
for subj in range(n_subjects):
    for sess, idx in [('Eval', 2*subj), ('Train', 2*subj+1)]:
        evmap = data[idx]['event_id']
        evs   = data[idx]['events'][:,2]  # イベントコード列
        
        # 左手／右手それぞれのサンプル数
        # → Eval セッションには769/770 は含まれないので0になるはず
        n_left  = np.sum(evs == left_id)
        n_right = np.sum(evs == right_id)
        
        records.append({
            'Subject': subj+1,
            'Session': sess,
            'Index': idx,
            'Left Trials': int(n_left),
            'Right Trials': int(n_right),
            'Total MI Trials': int(n_left + n_right)
        })

df_counts = pd.DataFrame(records)
df_counts


NameError: name 'datasets' is not defined