In [4]:
import sys
from pathlib import Path
import pandas as pd
import tarfile
import urllib

def load_metadata():
  file_path = Path("data/train.csv")
  return pd.read_csv(file_path)
  
metadata = load_metadata()

def extract_eeg():
  eeg_dir = Path("../data/eeg")
  tarball_path = Path("data/eeg.tar.gz")
  if not tarball_path.is_file():
    url = 'https://dl.dropboxusercontent.com/scl/fi/5sina48c4naaxv6uze0fv/eeg.tar.gz?rlkey=r7ec191extynfcm8fy0tsiws5&dl=0'
    urllib.request.urlretrieve(url, tarball_path)
    with tarfile.open(tarball_path) as eeg_tarball:
      eeg_tarball.extractall()
    
extract_eeg()

metadata

Unnamed: 0,eeg_id,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,4144388963,140,604.0,1156825996,140,604.0,1451266906,59489,GRDA,0,0,0,0,3,0
1,2353475448,30,64.0,1002394133,30,64.0,4000072340,5339,LRDA,0,0,0,3,0,0
2,1618328341,9,52.0,900482955,9,52.0,4140697659,20198,GRDA,0,0,0,0,3,0
3,979865826,7,90.0,1626043434,7,90.0,919550440,1069,Other,1,1,4,1,4,5
4,521108392,0,0.0,827447277,0,0.0,1717414556,13134,Other,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2509824693,10,68.0,1005228554,15,330.0,219919562,14386,LPD,0,11,0,1,1,2
996,2882719839,14,50.0,2035369578,14,50.0,4193559045,2641,GPD,5,0,11,0,0,0
997,1322226281,2,18.0,1740512896,2,18.0,1697286566,49448,Other,0,0,0,0,0,3
998,628369060,15,98.0,13143748,17,292.0,1650460145,34998,GPD,0,3,7,0,2,4


In [5]:
import dask.dataframe as dd
from src.utils import compute_signal_hash

channel_order = ['Fp1', 'Fp2',
            'F7', 'F3', 'Fz', 'F4', 'F8', 
            'T3', 'C3', 'Cz', 'C4', 'T4', 
            'T5', 'P3', 'Pz', 'P4', 'T6', 
            'O1', 'O2',
          ]

def load_signals(metadata):
  rows = len(metadata)
  eeg_list = []

  for row in range(0,rows):
    sample = metadata.iloc[row]
    f_name = f'data/eeg/{sample.eeg_id}.parquet'
    eeg = pd.read_parquet(f_name)[channel_order]
    eeg_offset = int(sample.eeg_label_offset_seconds)

    eeg['id'] = str(compute_signal_hash(sample))
    eeg = eeg.set_index('id')

    eeg = eeg.iloc[eeg_offset*200:(eeg_offset+50)*200]
    eeg_list.append(eeg)

  return dd.concat(eeg_list)

ddf = load_signals(metadata)
ddf

Unnamed: 0_level_0,Fp1,Fp2,F7,F3,Fz,F4,F8,T3,C3,Cz,C4,T4,T5,P3,Pz,P4,T6,O1,O2
npartitions=1000,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [6]:
df = ddf.compute()
df

sig_ids = list(set(df.index.to_list()))
sig_ids

['d128c9ebd',
 '8179f84ad',
 '5358bcc07',
 'b5ad85b3c',
 '65a7f2bdc',
 '2bd1c20f8',
 'd6e682913',
 '978b91f8e',
 'b2b0bba3d',
 'e0586c69f',
 'ebfe8cc9f',
 'e7f45d52b',
 'f92975586',
 '332440550',
 'c7c93b5d1',
 'c267c8f9f',
 '0d05b0b4b',
 '6bcd5b7c3',
 'c4745e235',
 '00f772986',
 '02a1b0aa4',
 'd35697e9f',
 '6fde86346',
 'f73a7e258',
 'ebff36d84',
 '095379882',
 '7ac4030e6',
 '9ed82fcc9',
 '4f2d7b60b',
 '0b378777d',
 '85275a06a',
 'e3d0d7ed6',
 '610d5859d',
 'c3022be1e',
 'bfa0cb2ff',
 '08fcc4d1e',
 '3aa295001',
 '0a5bd8674',
 '1bb700115',
 '08c7153a9',
 'd596e48c5',
 '1ce330e5b',
 '20ad68874',
 '4db0fb857',
 '482150b6a',
 '67271fc3c',
 '6e0540f92',
 'f599e6794',
 '6aca0e775',
 'df20a2bdd',
 'c9eac1abb',
 '5cf57ef7a',
 '058d5660f',
 'a5399830b',
 '1330f549a',
 '6f0f01997',
 'edf588e87',
 '9fac867a0',
 '013dbbafe',
 '7cad267f5',
 'a6f07eec7',
 'e93097da2',
 'fea510306',
 'd1970c837',
 '6f9d253c0',
 '42fde4e24',
 '185b54070',
 '26ef10bb2',
 '84beb452a',
 '813267d82',
 '4732cbfb4',
 '0737

In [7]:
sig1 = ddf.loc['ec2a8e98d'].compute()
sig1

Unnamed: 0_level_0,Fp1,Fp2,F7,F3,Fz,F4,F8,T3,C3,Cz,C4,T4,T5,P3,Pz,P4,T6,O1,O2
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
ec2a8e98d,12.390000,9.080000,-5.650000,33.340000,16.690001,22.990000,-5.870000,45.020000,91.080002,29.030001,6.580000,-47.340000,50.389999,26.320000,3.310000,10.680000,-12.160000,45.570000,35.75
ec2a8e98d,-22.879999,-4.470000,-6.090000,24.020000,13.990000,17.610001,0.810000,41.869999,87.900002,26.340000,6.180000,-42.810001,48.700001,25.680000,7.300000,11.690000,-11.200000,46.400002,37.57
ec2a8e98d,-35.770000,-16.549999,-0.580000,11.020000,2.820000,6.530000,-9.130000,34.119999,75.760002,11.650000,-6.260000,-53.369999,39.240002,14.600000,1.530000,0.870000,-20.049999,35.490002,29.23
ec2a8e98d,-13.310000,18.969999,11.560000,16.100000,0.090000,12.810000,6.050000,37.639999,78.099998,12.090000,-5.640000,-46.189999,42.529999,17.490000,-5.360000,5.150000,-12.950000,37.820000,31.60
ec2a8e98d,-32.540001,19.940001,12.040000,23.740000,4.320000,17.740000,10.620000,45.840000,83.510002,14.980000,-0.820000,-36.680000,46.590000,20.370001,-2.270000,9.790000,-7.170000,39.680000,37.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ec2a8e98d,-121.769997,55.470001,-37.790001,-110.839996,-78.500000,-20.500000,-30.280001,-128.529999,-47.049999,-88.160004,-36.700001,-60.750000,-94.870003,-116.330002,54.700001,-44.250000,-18.790001,-35.560001,7.47
ec2a8e98d,-140.589996,44.259998,-66.930000,-131.259995,-106.120003,-44.279999,-32.360001,-127.150002,-59.369999,-103.029999,-48.520000,-64.040001,-94.620003,-119.120003,54.730000,-46.639999,-7.720000,-33.770000,13.01
ec2a8e98d,-177.429993,14.850000,-61.500000,-150.789993,-135.970001,-71.669998,-42.900002,-131.759995,-68.650002,-116.900002,-62.180000,-68.139999,-94.160004,-119.699997,42.139999,-51.709999,-20.660000,-33.400002,8.13
ec2a8e98d,-180.509995,-15.750000,-67.669998,-171.380005,-161.699997,-103.080002,-59.869999,-141.059998,-73.000000,-125.699997,-75.230003,-76.470001,-94.110001,-120.589996,40.270000,-53.930000,-22.670000,-32.340000,9.00


In [8]:
## MNE setup
import mne
import numpy as np

mne_info = mne.create_info(ch_names=sig1.columns.tolist(), sfreq=200, ch_types='eeg')
mne_info.set_montage('standard_1020')
    
sig1_data = np.array(sig1.transpose())
sig1_data = np.nan_to_num(sig1_data)
    
raw = mne.io.RawArray(sig1_data, mne_info)
raw.apply_function(lambda x: x / 20e6, picks='eeg')

Creating RawArray with float64 data, n_channels=19, n_times=10000
    Range : 0 ... 9999 =      0.000 ...    49.995 secs
Ready.


0,1
Measurement date,Unknown
Experimenter,Unknown
Participant,Unknown

0,1
Digitized points,22 points
Good channels,19 EEG
Bad channels,
EOG channels,Not available
ECG channels,Not available

0,1
Sampling frequency,200.00 Hz
Highpass,0.00 Hz
Lowpass,100.00 Hz
Duration,00:00:50 (HH:MM:SS)


In [9]:
# Apply filters
from src.preprocessing import notch_filter, bp_filter, standardize

l_freq = 1.0
h_freq = 70.0

df = notch_filter(df, 60)
df = bp_filter(df, 1.0, 70)
df = standardize(df)
df

Creating RawArray with float64 data, n_channels=19, n_times=10000000
    Range : 0 ... 9999999 =      0.000 ... 49999.995 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1321 samples (6.605 s)



[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    3.2s


Creating RawArray with float64 data, n_channels=19, n_times=10000000
    Range : 0 ... 9999999 =      0.000 ... 49999.995 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 70 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 70.00 Hz
- Upper transition bandwidth: 17.50 Hz (-6 dB cutoff frequency: 78.75 Hz)
- Filter length: 661 samples (3.305 s)



[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    4.7s


Unnamed: 0_level_0,Fp1,Fp2,F7,F3,Fz,F4,F8,T3,C3,Cz,C4,T4,T5,P3,Pz,P4,T6,O1,O2
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
db76e9927,-0.358759,1.578162,0.622663,-0.660394,-1.209209,-0.394399,0.784306,0.806612,1.145476,0.513598,-1.380129,-1.178880,-0.084640,0.384806,1.579776,0.572196,-1.950236,-0.987145,0.216196
db76e9927,0.093783,1.296192,0.574898,-1.681821,-1.295806,-0.217123,0.315114,1.365653,0.815609,-1.133318,-1.382836,-0.411701,0.128509,0.903011,1.789717,0.642637,-1.265056,-0.613207,0.075747
db76e9927,0.061048,1.455427,0.728026,-1.826660,-1.062345,-0.202941,0.055641,1.928031,1.149419,-1.164770,-1.153550,-0.763313,0.105338,1.234197,0.805129,-0.104989,-0.889405,-0.668347,0.314066
db76e9927,0.521842,1.111046,0.385727,-1.577889,-0.797725,0.117748,0.152667,1.476937,0.519251,-1.688054,-1.140100,-0.191842,0.689475,1.791900,0.499601,-1.385861,-1.095527,-0.082454,0.693257
db76e9927,1.315258,1.196277,0.167834,-0.857804,-0.711531,0.131882,0.214946,0.627445,0.377539,-1.709879,-1.466293,0.017657,0.357584,1.835375,0.874360,-1.818273,-1.007563,-0.095704,0.550890
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
bbdf2f28e,1.452691,-0.210558,-0.492525,1.014448,0.617263,0.309325,0.126559,-0.480378,1.228044,1.413593,-0.133942,-0.160932,-0.331164,0.101733,0.637515,0.202091,-0.825493,-2.419308,-2.048960
bbdf2f28e,0.516668,-1.086366,-0.980298,0.100393,0.140696,0.641374,1.289093,0.017183,-1.416835,-0.685799,0.518389,1.573385,1.760938,-0.203149,-1.688528,-0.201256,0.111158,-1.456036,1.048988
bbdf2f28e,2.826115,0.790033,0.444021,-0.741464,-0.915052,1.056363,-0.561535,-0.736579,1.206857,-0.420858,-1.176683,0.040314,0.361949,0.344895,0.167977,0.207614,-0.712273,-1.639740,-0.541954
bbdf2f28e,-0.087751,-1.047592,-0.870010,0.011761,1.863578,2.515086,0.000094,-1.271951,-0.370206,-0.070828,0.043683,0.199790,1.064105,0.854764,-0.283924,0.413710,-0.754051,-1.558746,-0.651513


In [10]:
# Extracting top 3 channels based on variance for all samples
# 1000 samples computation duration = approx. 4 minutes for M1
from src.feature_extraction import calculate_all_samples

top_channels_df = calculate_all_samples(df, sig_ids, 1000) # 10 samples for testing
top_channels_df # NEED TO EXTRACT FEATURES FROM CHANNELS IN THIS DATA STRUCTURE

Unnamed: 0,0,1,2
d128c9ebd,Fp1,O2,Fp2
8179f84ad,O2,Fp1,Fp2
5358bcc07,Fp1,O2,Fp2
b5ad85b3c,O2,Fp1,O1
65a7f2bdc,O2,Fp1,O1
...,...,...,...
9c8ce7904,Fp1,O2,Fp2
742f5c6f8,Fp1,O2,Fp2
0da9d7ec3,Fp1,O2,O1
ace97a8d1,Fp1,O2,Fp2


In [11]:
from src.feature_extraction import extract_features_all_samples

feature_extracted = extract_features_all_samples(df, top_channels_df)
feature_extracted

Unnamed: 0,std,mean,max,min,var,med,skew,kurt,ent,mom,pow
d128c9ebd,1.116403,-0.00327,3.946,-3.859536,1.24771,-0.007252,0.009464,2.619701,4.948989,4.111222,124783445.587462
8179f84ad,1.103616,0.001938,3.934382,-3.691718,1.219182,-0.012947,0.03822,2.657286,4.976544,3.979107,121920809.554022
5358bcc07,1.110514,-0.001585,4.01099,-3.828518,1.234969,-0.010395,0.037063,2.66685,4.974836,4.105648,123502169.892182
b5ad85b3c,1.107383,-0.004989,3.78403,-3.912202,1.22713,-0.00474,0.029857,2.655467,4.958057,4.024142,122716858.89894
65a7f2bdc,1.10915,-0.005324,3.748586,-3.793229,1.231415,-0.015225,0.033488,2.634658,4.953907,4.026446,123152037.239885
...,...,...,...,...,...,...,...,...,...,...,...
9c8ce7904,1.110681,0.014479,3.828644,-3.940075,1.235215,0.013197,0.006068,2.647969,4.957339,4.075306,123542865.952174
742f5c6f8,1.102224,-0.004985,3.739732,-3.879643,1.216188,-0.015381,0.041106,2.692113,4.966284,4.011041,121621361.099973
0da9d7ec3,1.106036,-0.006022,3.879316,-3.730682,1.224694,-0.014269,0.037671,2.678809,4.965405,4.050822,122477485.360054
ace97a8d1,1.108349,-0.005593,3.829017,-3.830608,1.229224,-0.008106,0.021914,2.635667,4.965421,4.007093,122927287.179291


In [12]:
df_model = pd.DataFrame()
for idx, row in top_channels_df.iterrows():
  temp = df.loc[idx]
  selected_columns = temp[["Fp1", "O2", "Fp2", "O1"]]
  df_model = pd.concat([df_model, selected_columns])
df_model

Unnamed: 0_level_0,Fp1,O2,Fp2,O1
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d128c9ebd,-0.923302,0.766875,0.028176,-0.455157
d128c9ebd,0.867578,-2.510445,-0.089251,-0.455618
d128c9ebd,-1.647884,-0.257054,0.493182,-1.007211
d128c9ebd,0.701803,-0.440238,-0.273099,1.046773
d128c9ebd,-0.404222,0.856649,-0.493801,0.988441
...,...,...,...,...
fb2bfb3db,-1.275576,0.872572,-1.560284,1.430474
fb2bfb3db,-0.439766,0.729472,-1.678198,-0.623653
fb2bfb3db,0.882909,0.944848,-0.501423,-0.705514
fb2bfb3db,3.086802,-1.108595,1.013364,-1.598515


In [13]:
from sklearn.model_selection import train_test_split

# Drop the column with NaN values
# Transform the DataFrame into a 3D array
n_features = 4  # Number of EEG channels used
time_steps = 10000  # Rows per id
n_sequences = len(df_model) // time_steps  # Total number of sequences

# Initialize an empty array for the transformed data
X = np.empty((n_sequences, time_steps, n_features))

# Fill the array with data from the DataFrame
for i, (idx, group) in enumerate(df_model.groupby('id')):
    X[i] = group[['Fp1', 'O2', 'Fp2', 'O1']].values

y = metadata.expert_consensus
y = y.apply(lambda x: 1 if x == 'Seizure' else 0)

# Split the data into training and test sets
X_train = X[:400]
y_train = y[:400]
X_test = X[800:]
y_test = y[800:]
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle = False)


In [14]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim=1, num_layers=1):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # LSTM Layer
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return torch.sigmoid(out)

# Model initialization
input_dim = 4  # Number of channels
hidden_dim = 128  # Number of hidden units
num_layers = 2  # Number of LSTM layers

model = LSTMModel(input_dim, hidden_dim, output_dim=1, num_layers=num_layers)


  from .autonotebook import tqdm as notebook_tqdm


In [15]:
from torch.utils.data import TensorDataset, DataLoader

# Ensure X_train, X_test, y_train, y_test are all numpy arrays before proceeding
X_train = X_train.values if isinstance(X_train, pd.Series) else X_train
X_test = X_test.values if isinstance(X_test, pd.Series) else X_test
y_train = y_train.values if isinstance(y_train, pd.Series) else y_train
y_test = y_test.values if isinstance(y_test, pd.Series) else y_test

# Now, convert them into tensors
train_data = TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
test_data = TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())

# DataLoader
batch_size = 64
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)


In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = LSTMModel(input_dim=4, hidden_dim=128, output_dim=1, num_layers=2).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [17]:
num_epochs = 10
criterion = nn.BCEWithLogitsLoss()
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        
        # Backward and optimize
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')


Epoch [1/10], Loss: 0.8613
Epoch [2/10], Loss: 0.7645
Epoch [3/10], Loss: 0.6966
Epoch [4/10], Loss: 0.6940
Epoch [5/10], Loss: 0.6936
Epoch [6/10], Loss: 0.6935
Epoch [7/10], Loss: 0.6934
Epoch [8/10], Loss: 0.6934
Epoch [9/10], Loss: 0.6933
Epoch [10/10], Loss: 0.6933


In [18]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        outputs = model(inputs)
        predicted = (outputs.squeeze() > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy: {100 * correct / total}')

Accuracy: 81.0


In [1]:
# SVM model - training taking way too long need to be fixed
n_features_SVM = 11  

# Initialize an empty array for the transformed data
X = feature_extracted


# Assuming `labels` is your mapping from id to seizure label
y = metadata.expert_consensus
y = y.apply(lambda x: 1 if x == 'Seizure' else 0)

# Split the data into training and test sets
X_train = X[:800]
y_train = y[:800]
X_test = X[800:]
y_test = y[800:]


NameError: name 'feature_extracted' is not defined

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Create the SVM model
svm_model = SVC(kernel='linear')  # You can experiment with other kernels like 'rbf'

# Train the model
svm_model.fit(X_train, y_train)



In [None]:
# Make predictions
predictions = svm_model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, predictions))
print("\nClassification Report:\n", classification_report(y_test, predictions))
