In [None]:
import os
os.environ["KERAS_BACKEND"] = "torch" # you can also use tensorflow or torch

import keras_cv
import keras
from keras import ops
import tensorflow as tf

import cv2
import pandas as pd
import numpy as np
from glob import glob
from tqdm.notebook import tqdm
import joblib

import matplotlib.pyplot as plt
import gc

print("TensorFlow:", tf.__version__)
print("Keras:", keras.__version__)
print("KerasCV:", keras_cv.__version__)

In [None]:
REMOVE_DUPLICATE_EEG_IDS = True # if True, each row in training corresponds to a unique eeg_id
MAX_ROWS = None # use only MAX_ROWS rows of train.csv, set to None to use all rows
USE_CACHE = True # stores all accessed eeg and spectrogram files in memory
DATA_DIR = 'data' # /kaggle/input/hms-harmful-brain-activity-classification/

EEG_N_WINDOWS_ONE_SIDE = 2 # number of eeg 2s windows either side of the centre window to include for features
SPG_N_WINDOWS_ONE_SIDE = 4 # number of spectrogram 10s windows either side of the centre window to include for features

In [None]:
def read_parquet_cache(path):
    cache = {}

    def read_parquet(id_):
        if id_ in cache:
            return cache[id_]
        
        df = pd.read_parquet(f'{path}/{id_}.parquet')
        
        if USE_CACHE:
            cache[id_] = df
        
        return df

    return read_parquet

read_eeg = read_parquet_cache(path=f'{DATA_DIR}/train_eegs')
read_eeg_test = read_parquet_cache(path=f'{DATA_DIR}/test_eegs')
read_spg = read_parquet_cache(path=f'{DATA_DIR}/train_spectrograms')
read_spg_test = read_parquet_cache(path=f'{DATA_DIR}/test_spectrograms')

def eeg_window(row, train=True):
    eeg_data = read_eeg(row.eeg_id) if train else read_eeg_test(row.eeg_id)
    if train:
        eeg_offset = int(row.eeg_label_offset_seconds)
        eeg_data = eeg_data.iloc[(200 * eeg_offset):(200 * (eeg_offset + 50))]
    return eeg_data

def spg_window(row, train=True):
    spg_data = read_spg(row.spectrogram_id) if train else read_spg_test(row.spectrogram_id)
    if train:
        spg_offset = int(row.spectrogram_label_offset_seconds)
        spg_data = spg_data.loc[(spg_data.time >= spg_offset) & (spg_data.time < spg_offset + 600)]
        spg_data = spg_data.drop(columns=['time'])
    return spg_data

In [None]:
N_SECS_PER_DF = 50
N_SECS_PER_WINDOW = 2
N_ROWS_PER_SEC = 200

N_WINDOWS = N_SECS_PER_DF / N_SECS_PER_WINDOW
I_CENTRE_WINDOW = N_WINDOWS / 2 - 0.5
I_ROW_LEFT = int(N_ROWS_PER_SEC * N_SECS_PER_WINDOW * I_CENTRE_WINDOW)
N_ROWS_PER_WINDOW = N_ROWS_PER_SEC * N_SECS_PER_WINDOW
I_ROW_RIGHT = I_ROW_LEFT + N_ROWS_PER_WINDOW

print(I_ROW_LEFT, I_ROW_RIGHT, N_ROWS_PER_WINDOW)

def eeg_features(eeg_df, w=EEG_N_WINDOWS_ONE_SIDE):
    features = []
    for i in range(-w, w + 1):
        df = eeg_df.iloc[(I_ROW_LEFT + N_ROWS_PER_WINDOW * i):(I_ROW_RIGHT + N_ROWS_PER_WINDOW * i)].mean(axis=0)
        df.index = [f'{label}_mean_{i}' for label in df.index]
        features.append(df) 

        df = eeg_df.iloc[(I_ROW_LEFT + N_ROWS_PER_WINDOW * i):(I_ROW_RIGHT + N_ROWS_PER_WINDOW * i)].std(axis=0)
        df.index = [f'{label}_std_{i}' for label in df.index]
        features.append(df) 
    return pd.concat(features, axis=0)

In [None]:
N_SECS_PER_DF = 600
N_SECS_PER_WINDOW = 10
N_SECS_PER_ROW = 2

N_WINDOWS = N_SECS_PER_DF / N_SECS_PER_WINDOW
I_CENTRE_WINDOW = N_WINDOWS / 2 - 0.5
I_ROW_LEFT = int(N_SECS_PER_WINDOW * I_CENTRE_WINDOW / N_SECS_PER_ROW)
N_ROWS_PER_WINDOW = int(N_SECS_PER_WINDOW / N_SECS_PER_ROW)
I_ROW_RIGHT = I_ROW_LEFT + N_ROWS_PER_WINDOW

print(I_ROW_LEFT, I_ROW_RIGHT, N_ROWS_PER_WINDOW)

def spg_features(spg_df, w=SPG_N_WINDOWS_ONE_SIDE):
    features = []
    # divide the 600s window into 10s windows from the centre, and 2 5s windows on either side, the centre one (295s to 305s) has index (295-1)/2 = 147 to 152 (excl.)
    for i in range(-w, w + 1):
        df = spg_df.iloc[(I_ROW_LEFT + N_ROWS_PER_WINDOW * i):(I_ROW_RIGHT + N_ROWS_PER_WINDOW * i)].mean(axis=0)
        df.index = [f'{label}_mean_{i}' for label in df.index]
        features.append(df) 

        df = spg_df.iloc[(I_ROW_LEFT + N_ROWS_PER_WINDOW * i):(I_ROW_RIGHT + N_ROWS_PER_WINDOW * i)].std(axis=0)
        df.index = [f'{label}_std_{i}' for label in df.index]
        features.append(df) 
    return pd.concat(features, axis=0)

In [None]:
train_csv = pd.read_csv(f'{DATA_DIR}/train.csv')
train_csv.head(1)

In [None]:
# remove duplicate eeg_ids (keeping the median one only) if needed

def actual_median(s):
    return s.iloc[(s - s.median()).abs().argsort().iloc[0]]

df_train = train_csv

if REMOVE_DUPLICATE_EEG_IDS:
    df_unique_eeg = train_csv.groupby('eeg_id')[['eeg_label_offset_seconds']].agg(actual_median)
    df_train = pd.merge(df_unique_eeg, train_csv, on=['eeg_id', 'eeg_label_offset_seconds'], how='left')

df_train = df_train[:MAX_ROWS]

df_train.head(1)

In [None]:
features_eeg = df_train.apply(lambda row: eeg_features(eeg_window(row)), axis=1)
features_eeg.head(1)

In [None]:
features_spg = df_train.apply(lambda row: spg_features(spg_window(row)), axis=1)
features_spg

In [None]:
data_processed = df_train

col_features = list(features_eeg.columns) + list(features_spg.columns)
col_targets = list(df_train.columns[-6:])

y = data_processed[col_targets]
y = y.div(y.sum(axis=1), axis=0)

data_processed[col_targets] = y

data_processed = pd.concat([data_processed, features_eeg, features_spg], axis=1)
data_processed = data_processed.fillna(0)  # fillna(0) / dropna()
data_processed = data_processed.reset_index()

data_processed['sample_weight'] = 1.0 / data_processed.groupby('eeg_id')['eeg_sub_id'].transform('count')

data_processed.head(1)

In [None]:
import sys
sys.path.append('/kaggle/input/kaggle-kl-div')

from kaggle_kl_div import score
import catboost as cat
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import GroupKFold

class_ids = {'Seizure':0, 'LPD':1, 'GPD':2, 'LRDA':3, 'GRDA':4, 'Other':5}

model = CatBoostClassifier(task_type='GPU', loss_function='MultiClass')

train_pool = Pool(
        data=data_processed.loc[:, col_features],
        label=data_processed.loc[:, 'expert_consensus'].map(class_ids),
        weight=data_processed.loc[:, 'sample_weight']
)

model.fit(train_pool, verbose=100)
model.save_model('model_full.cat')

In [None]:
solution_df = data_processed.loc[:, col_targets]
submission_df = pd.DataFrame(model.predict_proba(train_pool), columns=solution_df.columns)

solution_df['id'] = np.arange(len(solution_df))
submission_df['id'] = np.arange(len(submission_df))

score(solution=solution_df,
      submission=submission_df,
      row_id_column_name='id')

In [None]:
test_csv = pd.read_csv(f'{DATA_DIR}/test.csv')

features_eeg = test_csv.apply(lambda row: eeg_features(eeg_window(row, train=False)), axis=1)
features_spg = test_csv.apply(lambda row: spg_features(spg_window(row, train=False)), axis=1)

preds = []
model = CatBoostClassifier(task_type='GPU')
model.load_model('model_full.cat')

test_pool = Pool(
    data = pd.concat([features_eeg, features_spg], axis=1)
)

pred = model.predict_proba(test_pool)

submission_1 = pd.DataFrame({'eeg_id': test_csv.eeg_id.values})
submission_1[col_targets] = pred
submission_1

In [None]:
# Sample DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']
})

# Set 'Name' column as the index of the DataFrame
df.set_index('Name', inplace=True)

print(df)

In [None]:
class CFG:
    verbose = 1  # Verbosity
    seed = 42  # Random seed
    preset = "efficientnetv2_b2_imagenet"  # Name of pretrained classifier
    image_size = [400, 300]  # Input image size
    epochs = 13 # Training epochs
    batch_size = 64  # Batch size
    lr_mode = "cos" # LR scheduler mode from one of "cos", "step", "exp"
    drop_remainder = True  # Drop incomplete batches
    num_classes = 6 # Number of classes in the dataset
    fold = 0 # Which fold to set as validation data
    class_names = ['Seizure', 'LPD', 'GPD', 'LRDA','GRDA', 'Other']
    label2name = dict(enumerate(class_names))
    name2label = {v:k for k, v in label2name.items()}

keras.utils.set_random_seed(CFG.seed)

In [None]:
BASE_PATH = "/kaggle/input/hms-harmful-brain-activity-classification"

SPEC_DIR = "/tmp/dataset/hms-hbac"
os.makedirs(SPEC_DIR+'/train_spectrograms', exist_ok=True)
os.makedirs(SPEC_DIR+'/test_spectrograms', exist_ok=True)

In [None]:
# Train + Valid
df = pd.read_csv(f'{BASE_PATH}/train.csv')
df['eeg_path'] = f'{BASE_PATH}/train_eegs/'+df['eeg_id'].astype(str)+'.parquet'
df['spec_path'] = f'{BASE_PATH}/train_spectrograms/'+df['spectrogram_id'].astype(str)+'.parquet'
df['spec2_path'] = f'{SPEC_DIR}/train_spectrograms/'+df['spectrogram_id'].astype(str)+'.npy'
df['class_name'] = df.expert_consensus.copy()
df['class_label'] = df.expert_consensus.map(CFG.name2label)
display(df.head(2))

# Test
test_df = pd.read_csv(f'{BASE_PATH}/test.csv')
test_df['eeg_path'] = f'{BASE_PATH}/test_eegs/'+test_df['eeg_id'].astype(str)+'.parquet'
test_df['spec_path'] = f'{BASE_PATH}/test_spectrograms/'+test_df['spectrogram_id'].astype(str)+'.parquet'
test_df['spec2_path'] = f'{SPEC_DIR}/test_spectrograms/'+test_df['spectrogram_id'].astype(str)+'.npy'
display(test_df.head(2))

In [None]:
# Define a function to process a single eeg_id
def process_spec(spec_id, split="train"):
    spec_path = f"{BASE_PATH}/{split}_spectrograms/{spec_id}.parquet"
    spec = pd.read_parquet(spec_path)
    spec = spec.fillna(0).values[:, 1:].T # fill NaN values with 0, transpose for (Time, Freq) -> (Freq, Time)
    spec = spec.astype("float32")
    np.save(f"{SPEC_DIR}/{split}_spectrograms/{spec_id}.npy", spec)

# Get unique spec_ids of train and valid data
spec_ids = df["spectrogram_id"].unique()

# Parallelize the processing using joblib for training data
_ = joblib.Parallel(n_jobs=-1, backend="loky")(
    joblib.delayed(process_spec)(spec_id, "train")
    for spec_id in tqdm(spec_ids, total=len(spec_ids))
)

# Get unique spec_ids of test data
test_spec_ids = test_df["spectrogram_id"].unique()

# Parallelize the processing using joblib for test data
_ = joblib.Parallel(n_jobs=-1, backend="loky")(
    joblib.delayed(process_spec)(spec_id, "test")
    for spec_id in tqdm(test_spec_ids, total=len(test_spec_ids))
)

In [None]:
from sklearn.model_selection import StratifiedGroupKFold

sgkf = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=CFG.seed)

df["fold"] = -1
df.reset_index(drop=True, inplace=True)
for fold, (train_idx, valid_idx) in enumerate(sgkf.split(df, y=df["class_label"], groups=df["patient_id"])):
    df.loc[valid_idx, "fold"] = fold
df.groupby(["fold", "class_name"])[["eeg_id"]].count().T

In [None]:
def build_augmenter(dim=CFG.image_size):
    augmenters = [
        keras_cv.layers.MixUp(alpha=2.0),
        keras_cv.layers.RandomCutout(height_factor=(1.0, 1.0),
                                     width_factor=(0.06, 0.1)), # freq-masking
        keras_cv.layers.RandomCutout(height_factor=(0.06, 0.1),
                                     width_factor=(1.0, 1.0)), # time-masking
    ]
    
    def augment(img, label):
        data = {"images":img, "labels":label}
        for augmenter in augmenters:
            if tf.random.uniform([]) < 0.5:
                data = augmenter(data, training=True)
        return data["images"], data["labels"]
    
    return augment


def build_decoder(with_labels=True, target_size=CFG.image_size, dtype=32):
    def decode_signal(path, offset=None):
        # Read .npy files and process the signal
        file_bytes = tf.io.read_file(path)
        sig = tf.io.decode_raw(file_bytes, tf.float32)
        sig = sig[1024//dtype:]  # Remove header tag
        sig = tf.reshape(sig, [400, -1])
        
        # Extract labeled subsample from full spectrogram using "offset"
        if offset is not None: 
            offset = offset // 2  # Only odd values are given
            sig = sig[:, offset:offset+300]
            
            # Pad spectrogram to ensure the same input shape of [400, 300]
            pad_size = tf.math.maximum(0, 300 - tf.shape(sig)[1])
            sig = tf.pad(sig, [[0, 0], [0, pad_size]])
            sig = tf.reshape(sig, [400, 300])
        
        # Log spectrogram 
        sig = tf.clip_by_value(sig, tf.math.exp(-4.0), tf.math.exp(8.0)) # avoid 0 in log
        sig = tf.math.log(sig)
        
        # Normalize spectrogram
        sig -= tf.math.reduce_mean(sig)
        sig /= tf.math.reduce_std(sig) + 1e-6
        
        # Mono channel to 3 channels to use "ImageNet" weights
        sig = tf.tile(sig[..., None], [1, 1, 3])
        return sig
    
    def decode_label(label):
        label = tf.one_hot(label, CFG.num_classes)
        label = tf.cast(label, tf.float32)
        label = tf.reshape(label, [CFG.num_classes])
        return label
    
    def decode_with_labels(path, offset=None, label=None):
        sig = decode_signal(path, offset)
        label = decode_label(label)
        return (sig, label)
    
    return decode_with_labels if with_labels else decode_signal


def build_dataset(paths, offsets=None, labels=None, batch_size=32, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=False, repeat=True, shuffle=1024, 
                  cache_dir="", drop_remainder=False):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter()
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = (paths, offsets) if labels is None else (paths, offsets, labels)
    
    ds = tf.data.Dataset.from_tensor_slices(slices)
    ds = ds.map(decode_fn, num_parallel_calls=AUTO)
    ds = ds.cache(cache_dir) if cache else ds
    ds = ds.repeat() if repeat else ds
    if shuffle: 
        ds = ds.shuffle(shuffle, seed=CFG.seed)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        ds = ds.with_options(opt)
    ds = ds.batch(batch_size, drop_remainder=drop_remainder)
    ds = ds.map(augment_fn, num_parallel_calls=AUTO) if augment else ds
    ds = ds.prefetch(AUTO)
    return ds

print(build_dataset)

In [None]:
def preprocess_spg(npy_path, offset=None):
    # spg = np.load(npy_path).reshape(400, -1)
    file_bytes = tf.io.read_file(npy_path)
    spg = tf.io.decode_raw(file_bytes, tf.float32)
    spg = spg[32:]  # Remove header tag
    spg = tf.reshape(spg, [400, -1])

    if offset is None:
        offset = 0
    spg = spg[:, offset:(offset + 300)]
    row_offset = offset // 2
    spg = spg[:, row_offset:(row_offset + 300)]

    # spg = np.pad(spg, [[0, 0], [0, max(0, 300 - spg.shape[1])]])
    right_padding = tf.math.maximum(0, 300 - tf.shape(spg)[1])
    spg = tf.pad(spg, [[0, 0], [0, right_padding]])

    # spg = np.clip(spg, np.exp(-30), np.exp(30))
    spg = tf.clip_by_value(spg, tf.math.exp(-4.), tf.math.exp(8.))

    # spg = np.log(spg)
    spg = tf.math.log(spg)

    # spg -= spg.mean()
    spg -= tf.math.reduce_mean(spg)

    # spg /= spg.std()
    spg /= tf.math.reduce_std(spg) + 1e-6

    # spg = np.tile(spg[..., None], (1, 1, 3))
    spg = tf.tile(spg[..., None], (1, 1, 3))
    
    return spg


def build_preprocess_fn(with_labels=True):
    def preprocess(path, offset=None):
        return preprocess_spg(npy_path=path, offset=offset)
    
    def preprocess_with_label(path, offset=None, label=None):
        spg = preprocess_spg(npy_path=path, offset=offset)
        label = tf.one_hot(label, CFG.num_classes)
        return (spg, label)
    
    return preprocess_with_label if with_labels else preprocess


def build_augment_fn(dim=CFG.image_size):
    augmenters = [
        keras_cv.layers.MixUp(alpha=2.0),
        keras_cv.layers.RandomCutout(height_factor=(1.0, 1.0),
                                     width_factor=(0.06, 0.1)), # freq-masking
        keras_cv.layers.RandomCutout(height_factor=(0.06, 0.1),
                                     width_factor=(1.0, 1.0)), # time-masking
    ]
    
    def augment(img, label):
        data = {"images":img, "labels":label}
        for augmenter in augmenters:
            if tf.random.uniform([]) < 0.5:
                data = augmenter(data, training=True)
        return data["images"], data["labels"]
    
    return augment


def build_dataset(paths, offsets=None, labels=None, batch_size=32,
                  augment=False, augment_fn=None, preprocess_fn=None, repeat=True, shuffle=1024):
    if augment and augment_fn is None:
        augment_fn = build_augment_fn()

    if preprocess_fn is None:
        preprocess_fn = build_preprocess_fn(labels is not None)

    slices = (paths, offsets) if labels is None else (paths, offsets, labels)
    
    ds = tf.data.Dataset.from_tensor_slices(slices)

    ds = ds.map(preprocess_fn)
    
    ds = ds.repeat() if repeat else ds
    if shuffle: 
        ds = ds.shuffle(shuffle, seed=CFG.seed)
    ds = ds.batch(batch_size)
    ds = ds.map(augment_fn) if augment else ds
    return ds

print(build_dataset)

In [None]:
# Sample from full data
sample_df = df.groupby("spectrogram_id").head(1).reset_index(drop=True)
train_df = sample_df[sample_df.fold != CFG.fold]
valid_df = sample_df[sample_df.fold == CFG.fold]
print(f"# Num Train: {len(train_df)} | Num Valid: {len(valid_df)}")

# Train
train_paths = train_df.spec2_path.values
train_offsets = train_df.spectrogram_label_offset_seconds.values.astype(int)
train_labels = train_df.class_label.values
train_ds = build_dataset(train_paths, train_offsets, train_labels, batch_size=CFG.batch_size,
                         repeat=True, shuffle=True, augment=True)

# Valid
valid_paths = valid_df.spec2_path.values
valid_offsets = valid_df.spectrogram_label_offset_seconds.values.astype(int)
valid_labels = valid_df.class_label.values
valid_ds = build_dataset(valid_paths, valid_offsets, valid_labels, batch_size=CFG.batch_size,
                         repeat=False, shuffle=False, augment=False)

In [None]:
imgs, tars = next(iter(train_ds))

num_imgs = 8
plt.figure(figsize=(4*4, num_imgs//4*5))
for i in range(num_imgs):
    plt.subplot(num_imgs//4, 4, i + 1)
    img = imgs[i].numpy()[...,0]  # Adjust as per your image data format
    img -= img.min()
    img /= img.max() + 1e-4
    tar = CFG.label2name[np.argmax(tars[i].numpy())]
    plt.imshow(img)
    plt.title(f"Target: {tar}")
    plt.axis('off')
    print(img.sum())
    
plt.tight_layout()
plt.show()

In [None]:
# Build Classifier
model = keras_cv.models.ImageClassifier.from_preset(
    CFG.preset, num_classes=CFG.num_classes
)

# Compile the model  
model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-4),
              loss=keras.losses.KLDivergence())

# Model Sumamry
model.summary()

In [None]:
import math

def get_lr_callback(batch_size=8, mode='cos', epochs=10, plot=False):
    lr_start, lr_max, lr_min = 5e-5, 6e-6 * batch_size, 1e-5
    lr_ramp_ep, lr_sus_ep, lr_decay = 3, 0, 0.75

    def lrfn(epoch):  # Learning rate update function
        if epoch < lr_ramp_ep: lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
        elif epoch < lr_ramp_ep + lr_sus_ep: lr = lr_max
        elif mode == 'exp': lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
        elif mode == 'step': lr = lr_max * lr_decay**((epoch - lr_ramp_ep - lr_sus_ep) // 2)
        elif mode == 'cos':
            decay_total_epochs, decay_epoch_index = epochs - lr_ramp_ep - lr_sus_ep + 3, epoch - lr_ramp_ep - lr_sus_ep
            phase = math.pi * decay_epoch_index / decay_total_epochs
            lr = (lr_max - lr_min) * 0.5 * (1 + math.cos(phase)) + lr_min
        return lr

    if plot:  # Plot lr curve if plot is True
        plt.figure(figsize=(10, 5))
        plt.plot(np.arange(epochs), [lrfn(epoch) for epoch in np.arange(epochs)], marker='o')
        plt.xlabel('epoch'); plt.ylabel('lr')
        plt.title('LR Scheduler')
        plt.show()

    return keras.callbacks.LearningRateScheduler(lrfn, verbose=False)  # Create lr callback


lr_cb = get_lr_callback(CFG.batch_size, mode=CFG.lr_mode, plot=True)

In [None]:
ckpt_cb = keras.callbacks.ModelCheckpoint("best_model.keras",
                                         monitor='val_loss',
                                         save_best_only=True,
                                         save_weights_only=False,
                                         mode='min')

history = model.fit(
    train_ds, 
    epochs=CFG.epochs,
    callbacks=[lr_cb, ckpt_cb], 
    steps_per_epoch=len(train_df)//CFG.batch_size,
    validation_data=valid_ds, 
    verbose=CFG.verbose
)

In [None]:
model.load_weights("best_model.keras")

In [None]:
test_paths = test_df.spec2_path.values
test_ds = build_dataset(test_paths, batch_size=min(CFG.batch_size, len(test_df)),
                         repeat=False, shuffle=False, augment=False)

In [None]:
preds = model.predict(test_ds)

In [None]:
pred_df = test_df[["eeg_id"]].copy()
target_cols = [x.lower()+'_vote' for x in CFG.class_names]
pred_df[target_cols] = preds.tolist()

submission_2 = test_df[["eeg_id"]].copy()
submission_2 = submission_2.merge(pred_df, on="eeg_id", how="left")
submission_2

In [None]:
submission_2 = submission_1
submission_1 = submission_1.set_index('eeg_id')
submission_2 = submission_2.set_index('eeg_id')

sub_df = 0.5 * submission_1.add(submission_2)

sub_df = sub_df.reset_index()

sub_df

In [None]:
sub_df.to_csv("submission.csv", index=False)
sub_df.head()