In [None]:
import sys
sys.path.append('src')
from pathlib import Path
import pandas as pd
import tarfile
import urllib

def load_metadata():
  file_path = Path("data/train.csv")
  return pd.read_csv(file_path)
  
metadata = load_metadata()

def extract_eeg():
  eeg_dir = Path("../data/eeg")
  tarball_path = Path("data/eeg.tar.gz")
  if not tarball_path.is_file():
    url = 'https://dl.dropboxusercontent.com/scl/fi/5sina48c4naaxv6uze0fv/eeg.tar.gz?rlkey=r7ec191extynfcm8fy0tsiws5&dl=0'
    urllib.request.urlretrieve(url, tarball_path)
    with tarfile.open(tarball_path) as eeg_tarball:
      eeg_tarball.extractall()
    
extract_eeg()
metadata = metadata.drop_duplicates(subset='eeg_id')  # Dropping duplicate EEG IDs
metadata


In [None]:
# Visualization
import dask.dataframe as dd
import numpy as np
import glob

channel_order = ['Fp1', 'Fp2',
            'F7', 'F3', 'Fz', 'F4', 'F8', 
            'T3', 'C3', 'Cz', 'C4', 'T4', 
            'T5', 'P3', 'Pz', 'P4', 'T6', 
            'O1', 'O2',
            ]
sfreq = 200
eeg_ids = metadata['eeg_id'].to_list()

ddf_list = []
for eeg_id in eeg_ids:
  f_name = f'data/eeg/{eeg_id}.parquet'
  temp_ddf = dd.read_parquet(f_name).drop('EKG', axis=1) [channel_order]
  temp_ddf['eeg_id'] = str(eeg_id)
  temp_ddf = temp_ddf.set_index('eeg_id')
  ddf_list.append(temp_ddf)

ddf = dd.concat(ddf_list)
ddf



In [None]:
df = ddf.compute()
df



In [None]:
# Channel Selection Based on Variance
from feature_extraction.channel_selection import calculate_all_samples

top_channel_df = calculate_all_samples(df, eeg_ids, 1000)
top_channel_df

In [None]:
# Feature Extraction for 11 features per Channel - Averaged Values
from feature_extraction.extracted_features import extract_features_all_samples

features_df = extract_features_all_samples(df, top_channel_df)
features_df


In [None]:
# Visualization
from visualize import VisualizeEEG
raw_df = ddf.partitions[2].compute()

vis_eeg = VisualizeEEG(raw_df)
# Plot signal channels
vis_eeg.plot_signal(start=38, duration=7)
# Plot topographic map
vis_eeg.plot_topomap(start=38, end=45, delta=1)



In [None]:
#print(np.var(df['Fp1'], axis=0)) #variance for one col/channel
#fpl = df['Pz'].fillna(0).to_numpy() #converting to numby array for easier computation
#print(np.var(fpl, axis=0))
# variance for one channel(Fp1) in one signal(4144388963)
#np.var(sig1['Fp1'].to_numpy())
# one sample and their channels
#sig1 = df.loc[['1618328341']]
#sig1
# df.index = df.index.map(str)
# # Or, convert to integer
# df.index = df.index.map(int)