In [15]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
import torch

In [16]:
DATA_MODE = "raw"
if DATA_MODE == "processed":
    BASE_DIR = Path("/kaggle/input/hms-preprocessed-dataset")    
else:
    BASE_DIR = Path("/kaggle/input/hms-harmful-brain-activity-classification")

EEG_SAMPLING_TIME = 50  #second
EEG_SAMPLING_RATE = 200 #Hz
EEG_DURATION = EEG_SAMPLING_RATE * EEG_SAMPLING_TIME

N_CLASS = 6
CHANNEL = 20

In [22]:
if os.environ.get('KAGGLE_KERNEL_RUN_TYPE','') == 'Interactive':
    print("Running on Interactive Notebook")
    BATCH_SIZE = 2
    train_df = pd.read_csv(BASE_DIR/"train.csv", nrows=250)
elif os.environ.get('KAGGLE_KERNEL_RUN_TYPE','') == 'Batch':
    print("Running on Background Notebook")
    train_df = pd.read_csv(BASE_DIR/"train.csv")
else:
    print("Running on Local Notebook")
    train_df = pd.read_csv(BASE_DIR/"train.csv")

Running on Interactive Notebook


In [23]:
def set_seed(seed=42):
    np.random.seed(seed)  # 🎲 Set seed for NumPy
    torch.manual_seed(seed)  # 🚀 Set seed for PyTorch on CPU
    torch.cuda.manual_seed(seed)  # 🚀 Set seed for PyTorch on GPU
    
    # ⚙️ When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    # 🔏 Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
# 🌱 Set seed using the configured seed value
set_seed()

In [27]:
def get_eeg(q:dict) -> pd.DataFrame:
    if DATA_MODE == "processed":
        parquet_df = pd.read_parquet(BASE_DIR/f"train_eegs/train_eegs/{q['eeg_id']}.parquet")            
    else:
        parquet_df = pd.read_parquet(BASE_DIR/f"train_eegs/{q['eeg_id']}.parquet")
    eeg_start_index = int(EEG_SAMPLING_RATE * q["eeg_label_offset_seconds"])
    return parquet_df.iloc[eeg_start_index:eeg_start_index+EEG_DURATION]

In [28]:
train_df = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/train.csv')
train_df

Unnamed: 0,eeg_id,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,1628180742,0,0.0,353733,0,0.0,127492639,42516,Seizure,3,0,0,0,0,0
1,1628180742,1,6.0,353733,1,6.0,3887563113,42516,Seizure,3,0,0,0,0,0
2,1628180742,2,8.0,353733,2,8.0,1142670488,42516,Seizure,3,0,0,0,0,0
3,1628180742,3,18.0,353733,3,18.0,2718991173,42516,Seizure,3,0,0,0,0,0
4,1628180742,4,24.0,353733,4,24.0,3080632009,42516,Seizure,3,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106795,351917269,6,12.0,2147388374,6,12.0,4195677307,10351,LRDA,0,0,0,3,0,0
106796,351917269,7,14.0,2147388374,7,14.0,290896675,10351,LRDA,0,0,0,3,0,0
106797,351917269,8,16.0,2147388374,8,16.0,461435451,10351,LRDA,0,0,0,3,0,0
106798,351917269,9,18.0,2147388374,9,18.0,3786213131,10351,LRDA,0,0,0,3,0,0


In [76]:
label_unique = train_df['expert_consensus'].unique()
label_means = {k: list(pd.Series()) for k in label_unique}
label_stds = {k: list(pd.Series()) for k in label_unique}
for i, q in train_df.iterrows():
    eeg = get_eeg(q)
    label = q['expert_consensus']
    label_means[label].append(eeg.mean(axis=0))
    label_stds[label].append(eeg.std(axis=0))

In [77]:
for label in label_unique:
    label_mean_mean = pd.concat(label_means[label], axis=1).mean(axis=1)
    label_mean_std = pd.concat(label_means[label], axis=1).std(axis=1)
    label_std_mean = pd.concat(label_stds[label], axis=1).mean(axis=1)
    label_std_std = pd.concat(label_stds[label], axis=1).std(axis=1)
    print("Label:", label)
    print("Mean of Means:")
    print(label_mean_mean)
    print("Standard Deviation of Means:")
    print(label_mean_std)
    print("Mean of Standard Deviations:")
    print(label_std_mean)
    print("Standard Deviation of Standard Deviations:")
    print(label_std_std)
    print("-"*50)

Label: Seizure
Mean of Means:
Fp1     50.955864
F3      26.456486
C3      28.537029
P3      25.183979
F7      26.924831
T3      26.665384
T5      25.636976
O1      57.573227
Fz      25.919598
Cz      25.685980
Pz      25.052387
Fp2     35.397579
F4      26.829067
C4      32.213108
P4      31.913286
F8      21.598335
T4      24.979172
T6      27.421057
O2      23.954506
EKG    180.313843
dtype: float32
Standard Deviation of Means:
Fp1     575.878601
F3      328.716370
C3      364.685242
P3      322.887848
F7      340.312683
T3      361.525238
T5      314.337006
O1      826.234436
Fz      335.496521
Cz      321.757111
Pz      323.078186
Fp2     440.361603
F4      367.875519
C4      374.227081
P4      454.789246
F8      330.026245
T4      345.016815
T6      359.675262
O2      343.556183
EKG    1919.404663
dtype: float32
Mean of Standard Deviations:
Fp1    384.966644
F3     298.896301
C3     292.065704
P3     291.984161
F7     292.064514
T3     285.609863
T5     282.450226
O1     310.60644