In [1]:
import numpy as np 
import pandas as pd 
from scipy import signal
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, GroupKFold
import torch
from torch.utils.data import Dataset
import torch
import torch.nn as nn
import math
import pickle

In [2]:
EEG_SAMPLING_RATE = 200
EEG_LENGTH = 50
AMOUNT_OF_CHANNELS = 20

In [3]:
data = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/train.csv')

In [4]:
TARGETS = ["seizure_vote", "lpd_vote", "gpd_vote", "lrda_vote", "grda_vote", "other_vote"]
y_data = data[TARGETS].values
y_data = y_data / y_data.sum(axis=1,keepdims=True)
data[TARGETS] = y_data

In [5]:
# https://www.kaggle.com/code/kimbyungchun/preprocess-with-mne-for-human
# @cache
BASE_PATH = '/kaggle/input/hms-harmful-brain-activity-classification'
def load_eeg(eeg_id, data_type='train'):
    eeg_df = pd.read_parquet(f'{BASE_PATH}/{data_type}_eegs/{eeg_id}.parquet')
    return eeg_df

In [6]:
eeg_dict = {}
def getEEG(row, data_type):
    if not row.eeg_id in eeg_dict:
        df = load_eeg(row.eeg_id, data_type)  
        eeg_dict[row.eeg_id] = df
    eeg = eeg_dict[row.eeg_id]
    
    start_eeg = int(row.eeg_label_offset_seconds * EEG_SAMPLING_RATE)
    end_eeg = start_eeg + (EEG_LENGTH * EEG_SAMPLING_RATE)

    row_eeg = eeg[start_eeg:end_eeg]
    return row_eeg


In [7]:
TIME_BINS_PER_SECOND = 10
BIN_LENGTH = int((1/TIME_BINS_PER_SECOND)*EEG_SAMPLING_RATE)
features_dict = {}
def extract_time_bins(row_eeg, key):
    middle_10 = row_eeg.iloc[20*EEG_SAMPLING_RATE:30*EEG_SAMPLING_RATE]
    
#     if middle_10.isnull().values.any():
#         return False
    
    new_fts = []
    for chan in range(middle_10.shape[1]):
        col_eeg = middle_10.iloc[:, chan]
        col_eeg = col_eeg.reset_index(drop=True)
        bin_mns = []
        for bin_index in range(100):
            start_index = bin_index * BIN_LENGTH
            end_index = start_index + BIN_LENGTH
            
            bin_eeg = col_eeg[start_index:end_index]
#             bin_eeg = bin_eeg.dropna()

            mn = np.nanmean(bin_eeg)
            
            
            if math.isnan(mn):
                bin_mns.append(0)
#                 print("NAN!", bin_eeg, start_index, key)
            else:
                bin_mns.append(mn)
    
    
        new_fts.append(bin_mns)

    return new_fts

In [8]:
def getAndExtractEEG(row, data_type):
    key = str(row.eeg_id) + "-" + str(row.eeg_sub_id)
    if not key in features_dict:
        print("N", end = " ")
        row_eeg = getEEG(row, data_type)
        features = extract_time_bins(row_eeg, key) #         features = extract_features(row_eeg, key)
        if not features:
            return False
        features_dict[key] = features
        return features
    print("m", end=" ")
    return features_dict[key]
    


In [9]:
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


In [10]:
with open('/kaggle/input/eeg-bins-2/my_dict.pkl', 'rb') as file:
    features_dict = pickle.load(file)

In [11]:
# Example usage
input_dim_freq = (52 + 4) * AMOUNT_OF_CHANNELS
input_dim = (20*TIME_BINS_PER_SECOND*10) #channels * bins * middle 10 seconds
hidden_dim = 800
output_dim = 6

In [12]:
class CNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 62, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(62, 128, kernel_size=5, padding=1)
        self.conv3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(128, 128, kernel_size=6, padding=2)
        self.conv5 = nn.Conv2d(128, 62, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(62, 2, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(2 * 4 * 24, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)  # 10 output classes (digits 0-9)

    def forward(self, x):
#         print("xsp", x.shape)
        x = F.relu(self.conv1(x), 2)
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = F.relu(self.conv3(x), 2)
        x = F.relu(F.max_pool2d(self.conv4(x), 2))
        x = F.relu(self.conv5(x), 2)
        x = F.relu(self.conv6(x), 2)
#         print(x.shape)
        x = x.view(-1, 2 * 4 * 24)
        x = F.sigmoid(self.fc1(x))
        x = F.dropout(x, 0.3)
        x = self.fc2(x)
        x = F.softmax(x, dim=1)
        return x


In [13]:
# This piece of code is required to make use of the GPU instead of CPU for faster processing
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [14]:
# from sklearn.pipeline import Pipeline
import torch.nn.functional as F

training_samples = 30000
batch_size = 32
num_epochs = 24

X_train = data.iloc[:training_samples]
y_train = data.iloc[:training_samples][TARGETS]
X_train_proc = []
for index, row in X_train.iterrows():
    fts = getAndExtractEEG(row, "train")
    if not fts:
        y_train = y_train.drop(index, axis=0)
        continue
    X_train_proc.append(fts)
        
X_train_proc = torch.FloatTensor(X_train_proc)
X_train_proc = X_train_proc.unsqueeze(1)

y_train = torch.tensor(np.asarray(y_train), dtype=torch.float32)
train_dataset = MyDataset(X_train_proc, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
model = CNNModel(input_dim, hidden_dim, output_dim)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = torch.nn.DataParallel(model, device_ids = [0,1]).to(device)
    
# Define loss function and optimizer (replace with your choices if needed)
criterion = nn.KLDivLoss(reduction="batchmean")
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    
seen_ys = [0,0,0,0,0,0]
    
for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        batch_X = batch_X.to(device)
        seen_ys = seen_ys+ torch.sum(batch_y, dim=0).numpy()
        batch_y = batch_y.to(device)
        optimizer.zero_grad()
        prediction = model(batch_X)
        # Calculate loss
        loss = criterion(prediction, batch_y)
        # Compute gradients
        loss.backward()
        optimizer.step()

m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m N m m N m m m m m m m m m m m m m m m m m m m m m N 

  mn = np.nanmean(bin_eeg)


N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m 

  mn = np.nanmean(bin_eeg)


m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m N N m m m m m m m m m m m m m m m N N N m m m m N m m m m m m m m m m m m m m m m N m m m m m m m m m m N m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m N m m N m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m N m m m m m m m m N m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m 

  mn = np.nanmean(bin_eeg)


m m m m m N N m m m N m m m m m m m m m N m m m m m m m m m m N m m m m m m m m m m m m m m m N m m m m m m N m m m m m m m m m m m m m m N N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N 

  mn = np.nanmean(bin_eeg)


m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m N m m m N m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m N m m m m m m m m N m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m N m m m N m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m N m N m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m N m m m m m m m m m N m m m m N m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m N m m m m N N m m m m m m m m m N m m m N m m m m m m N N m m m m N m m m N m m m m m m m N m m m m m m m m m m m N m m N m m m m m m m m m m m m m m m m m m m m N m m m m m m 

  mn = np.nanmean(bin_eeg)


m m m m m m m m m m N m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m N m m m m m m m m m m m N N m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m N m m m m m m N m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m N m N m m m m m m m N 

  mn = np.nanmean(bin_eeg)


m m m m m m m m m N m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m N m m m m m m m m m m m m m m m m m m m m m m N N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m N m m m N m m m m m m m m m m m m N m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m 

  mn = np.nanmean(bin_eeg)


m m N m m m m m m m m m m m m m m m m m m N m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m N m m m m m m N m m m m m m m m m m m m m m N m m m m m m m m m m m m N m m m m m m N m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m N m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m N N m m m N m m m N m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m N m m m m m m N m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m N m m m m m m m m m m m m m 

  mn = np.nanmean(bin_eeg)


m m m m m m m m m m m m m m m m m m m m m m m N m m N m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m N m m m m N m m m m N m m m m m m m m m m m m m m m N m m N m m N m m m m m m N m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m N m m m m N m m m m m m m m m m m N m m m m N m N N m m m m m m m m m m m m m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m N m m m m m m m m m m m N m m m m m m m m N m N m m m m m m N m m m m m m m m m N m m m m m N m m N m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m m N m m m N m m m m m m m m m m m m m m m m m m m m m m N m m m m N m m N m m m m 

In [15]:
test = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/test.csv')
# test.loc[1] = test.loc[0]
test = test.assign(eeg_sub_id='h') 
test = test.assign(eeg_label_offset_seconds=0) 

test_feats = []
for index, row in test.iterrows():
    fts = getAndExtractEEG(row, "test")
    test_feats.append(fts)

# test_feats = test.apply(lambda row: getAndExtractEEG(row, data_type="test"), axis=1)
test_feats = torch.FloatTensor(test_feats)
test_feats = test_feats.unsqueeze(1)

with torch.no_grad():
    model.eval()
    preds = model(test_feats)

m 

In [16]:
sub = pd.DataFrame({'eeg_id':test.eeg_id.values})
sub[TARGETS] = preds.cpu()
sub.to_csv('submission.csv',index=False)
# sub.head()