In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import gc

import scipy.stats
from scipy.signal import welch
from scipy.stats import skew, kurtosis

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')

In [26]:
train = pd.read_csv('./Cleaned_Train.csv')
train.head()

Unnamed: 0,eeg_id,spec_id,min,max,patient_id,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote,target
0,568657,789577333,0.0,16.0,20654,0.0,0.0,0.25,0.0,0.166667,0.583333,Other
1,582999,1552638400,0.0,38.0,20230,0.0,0.857143,0.0,0.071429,0.0,0.071429,LPD
2,642382,14960202,1008.0,1032.0,5955,0.0,0.0,0.0,0.0,0.0,1.0,Other
3,751790,618728447,908.0,908.0,38549,0.0,0.0,1.0,0.0,0.0,0.0,GPD
4,778705,52296320,0.0,0.0,40955,0.0,0.0,0.0,0.0,0.0,1.0,Other


In [27]:
# Function to load EEG data
def load_eeg_data(file_path):
    return pd.read_csv(file_path)

# Function to extract time domain features for PCA components
def extract_pca_time_domain_features(pca_data):
    return {
        'eeg_mean': np.mean(pca_data, axis=0).item(),
        'eeg_std_dev': np.std(pca_data, axis=0).item(),
        'eeg_skewness': skew(pca_data, axis=0).item(),
        'eeg_kurtosis': kurtosis(pca_data, axis=0).item()
    }

In [28]:
PATH = '../hms-harmful-brain-activity-classification/'
EEG_PATH = PATH + 'train_eegs/'

All_Spec_df = []
for i in range(len(train)):
    df_eeg = pd.read_parquet(EEG_PATH + str(train['eeg_id'][i]) + '.parquet')
    print(train['eeg_id'][i])
    df_eeg.fillna(df_eeg.mean(), inplace=True)
    scaler = StandardScaler()
    eeg_data_scaled = scaler.fit_transform(df_eeg.drop(['Unnamed: 0'], axis = 1, errors='ignore'))  # Drop non-EEG columns
    pca = PCA(n_components = 1)  # Retain 1 principal component for simplicity
    eeg_data_pca = pca.fit_transform(eeg_data_scaled)

    # Extract time-domain features from PCA components
    ok = extract_pca_time_domain_features(eeg_data_pca)
    new_item = {"eeg_id": train['eeg_id'][i]}
    ok = {**new_item, **ok}

    All_Spec_df.append(ok)
    

568657
582999
642382
751790
778705
1629671
1895581
2061593
2078097
2366870
2482631
2521897
2918824
3108700
3625731
3851658
3907459
4431217
4454049
4559645
4904504
4969792
5168624
5423338
5424875
5485650
5586700
5710954
5993793
6259482
7278051
7585449
8027281
8071080
8922554
9240926
9418744
9859330
9980241
10249311
10343849
10386542
10466156
10617205
10687514
10859011
11127485
12197246
12298343
12422438
12784157
12830684
14548664
14609267
14623517
15073201
15238928
15271495
15981664
16002110
16114818
16178958
16982159
17295749
17311157
17662377
17714496
17875433
17987739
18169507
18263267
18802033
19239792
19529277
19740075
19780434
20299905
20697410
20915334
21054661
21379701
21498048
21557190
21746311
22065654
22429107
22825259
23656323
23760589
24140037
24527714
24536592
24814051
24824117
25239347
25339322
25408618
25655754
26080893
26207239
26441495
26443377
26560372
26797072
26950912
27286272
27374736
27424302
27498570
27705009
27969470
28486516
29150456
29194495
29265960
29351894


KeyboardInterrupt: 

In [None]:
ok = pd.DataFrame(All_Spec_df)
ok.head()

Unnamed: 0,eeg_id,eeg_mean,eeg_std_dev,eeg_skewness,eeg_kurtosis
0,568657,1.84955e-08,3.27767,0.266662,2.634665
1,582999,4.855069e-08,3.418199,-0.09562,0.70742
2,642382,3.299198e-08,2.695353,-0.128804,0.071505


In [None]:
# ok.to_csv('ALL_EEG_DF.csv')