In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob

In [17]:
from sktime.transformations.panel.rocket import MiniRocket
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import LeaveOneOut

In [3]:
file_paths = glob.glob('eeg_label/*_eeg_label.csv')

data_list = []
labels_list = []
patient_ids = []

for file_path in file_paths:
    eeg_data = pd.read_csv(file_path)
    patient_id = file_path.split('/')[-1]
    
    # Segment the data into non-overlapping 30-second windows
    segment_size = 128 * 30
    num_segments = len(eeg_data) // segment_size
    
    for i in range(num_segments):
        start_idx = i * segment_size
        end_idx = start_idx + segment_size
        segment = eeg_data['EEG'].iloc[start_idx:end_idx].values
        label = eeg_data['Label'].iloc[start_idx:end_idx].mode()[0]
        
        data_list.append(segment)
        labels_list.append(label)
        patient_ids.append(patient_id)  # Repeat patient ID for each segment

# Convert lists to numpy arrays
X = np.array(data_list)
y = np.array(labels_list)
patient_ids = np.array(patient_ids)

In [4]:
X_reshaped = X.reshape(X.shape[0], 1, X.shape[1])

In [5]:
X_reshaped

array([[[  3.8479987 ,  -0.05189488,   8.28753628, ...,  -7.48395381,
          -3.98232519,  -9.09177062]],

       [[-10.36212763,  -7.54309084,   2.06587013, ...,   8.56947209,
          -8.95266975,   2.31421482]],

       [[ -9.41317832,  -6.71875766,   5.51237151, ...,  -5.21229156,
          15.67001155,  18.34295955]],

       ...,

       [[  3.69158411,  11.39198934,  -6.79861131, ...,   5.54533639,
          -2.30382724,  -9.14454236]],

       [[  2.92013452,  -0.35889896,   2.22516951, ...,  -8.10374495,
          -0.07323493,   2.78687532]],

       [[ -2.76418176,   5.35863693,  -9.3733417 , ...,  -1.09258537,
           8.91095906,   9.99089462]]])

In [6]:
X_reshaped.shape

(1674, 1, 3840)

In [7]:
# Initialize the MiniRocket transformer
minirocket = MiniRocket(random_state=123)

# Transform the entire dataset using MiniRocket
X_transformed = minirocket.fit_transform(X_reshaped)

In [8]:
X_transformed.shape

(1674, 9996)

In [9]:
# Scale the transformed data
scaler = StandardScaler(with_mean=False)
X_scaled = scaler.fit_transform(X_transformed)

In [10]:
X_scaled.shape

(1674, 9996)

In [12]:
# Randomly select 50% of the data for quick testing
subset_indices = np.random.choice(len(X_scaled), size=int(len(X_scaled) * 0.8), replace=False)

X_subset = X_scaled[subset_indices]
y_subset = y[subset_indices]
patient_ids_subset = patient_ids[subset_indices]

# Use the subset for LOOCV
loo = LeaveOneOut()
all_preds = []
all_labels = []

for train_index, test_index in loo.split(X_subset, groups=patient_ids_subset):
    X_train, X_test = X_subset[train_index], X_subset[test_index]
    y_train, y_test = y_subset[train_index], y_subset[test_index]
    
    classifier = LogisticRegression(random_state=0, solver='liblinear')
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)
    
    all_preds.extend(y_pred)
    all_labels.extend(y_test)

KeyboardInterrupt: 

In [18]:
# Evaluate the overall performance of model fit with 50% of the data
report = classification_report(all_labels, all_preds, output_dict=True)
accuracy = report['accuracy']
print(f"LOOCV Accuracy: {accuracy}")

LOOCV Accuracy: 0.5686977299880526


In [14]:
all_preds = []
all_labels = []
all_patient_ids = np.unique(patient_ids)

# Loop over each patient (Leave-One-Patient-Out)
for patient in all_patient_ids:
    # Split the data based on patient IDs
    X_train = X_scaled[patient_ids != patient]
    X_test = X_scaled[patient_ids == patient]
    y_train = y[patient_ids != patient]
    y_test = y[patient_ids == patient]
    
    # Train the classifier
    classifier = LogisticRegression(random_state=0, solver='liblinear', max_iter=1000)
    classifier.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = classifier.predict(X_test)
    
    # Store the results
    all_preds.extend(y_pred)
    all_labels.extend(y_test)

In [18]:
# After looping through all patients, evaluate the overall performance
report = classification_report(all_labels, all_preds, labels=['AW', 'LA', 'MA', 'DA', 'RE'])
accuracy = accuracy_score(all_labels, all_preds)

print(report)
print(f'Overall Accuracy: {accuracy:.2f}')

              precision    recall  f1-score   support

          AW       0.34      0.21      0.26       155
          LA       0.37      0.25      0.30       229
          MA       0.21      0.16      0.18       327
          DA       0.59      0.69      0.63       777
          RE       0.30      0.42      0.35       186

    accuracy                           0.45      1674
   macro avg       0.36      0.35      0.34      1674
weighted avg       0.43      0.45      0.43      1674

Overall Accuracy: 0.45
