In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
from tqdm import tqdm
import pickle
import os

# Data Preprocessing

In [8]:
# Import the dataset
def get_data():
    """
    Load SEED data from .npz file.
    
    :return: train data, test data, train labels, test labels.
    """
    feature_1s_dir = './EEG-DE-feature/eeg_used_1s'
    file_1s_list = os.listdir(feature_1s_dir)
    file_1s_list.sort()
    
    all_train_data = {key: [] for key in ['delta', 'theta', 'alpha', 'beta', 'gamma']}
    all_test_data = {key: [] for key in ['delta', 'theta', 'alpha', 'beta', 'gamma']}
    all_train_labels = []
    all_test_labels = []
    
    for item in tqdm(file_1s_list, desc="Loading data"):
        npz_data = np.load(os.path.join(feature_1s_dir, item))
        
        # Extract train and test data
        train_data = pickle.loads(npz_data['train_data'])
        test_data = pickle.loads(npz_data['test_data'])
        
        # Append data from this file to the overall dictionary
        for key in all_train_data.keys():
            all_train_data[key].append(train_data[key])
            all_test_data[key].append(test_data[key])
        
        all_train_labels.append(npz_data['train_label'])
        all_test_labels.append(npz_data['test_label'])
        
    # Combine train and test data across all files
    train_data = {key: np.concatenate(all_train_data[key], axis=0) for key in all_train_data.keys()}
    test_data = {key: np.concatenate(all_test_data[key], axis=0) for key in all_test_data.keys()}
    train_labels = np.concatenate(all_train_labels, axis=0)
    test_labels = np.concatenate(all_test_labels, axis=0)
    
    # Combine features from all bands into a single feature vector
    trainX = np.hstack([train_data[key] for key in train_data.keys()])
    testX = np.hstack([test_data[key] for key in test_data.keys()])
    
    # Debug information
    # print("Combined Train Data Shape:", trainX.shape)
    # print("Combined Test Data Shape:", testX.shape)
    # print("Combined Train Labels Shape:", train_labels.shape)
    # print("Combined Test Labels Shape:", test_labels.shape)
    
    return trainX, testX, train_labels, test_labels

# Decision Tree

In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = get_data()

#import DecisionTreeClassifier and start the training
from sklearn.tree import DecisionTreeClassifier
estimator = DecisionTreeClassifier(random_state=0)
estimator.fit(X_train, y_train)

#apply the trained estimator on the test dataset
mean_accuracy=estimator.score(X_test, y_test)
y_pred = estimator.predict(X_test)
print('----------------Decision Tree----------------')
print('mean_accuracy: ', mean_accuracy)
# import relevant metrics and print the confusion matrix and classification report
from sklearn.metrics import classification_report, confusion_matrix
print('confusion matrix:\n', confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Loading data: 100%|██████████| 36/36 [00:00<00:00, 52.75it/s]


----------------Decision Tree----------------
mean_accuracy:  0.6069163455362877
confusion matrix
 [[ 6716  5610  3478]
 [ 4759  9865  2296]
 [ 2141  1301 13658]]
              precision    recall  f1-score   support

         0.0       0.49      0.42      0.46     15804
         1.0       0.59      0.58      0.59     16920
         2.0       0.70      0.80      0.75     17100

    accuracy                           0.61     49824
   macro avg       0.59      0.60      0.60     49824
weighted avg       0.60      0.61      0.60     49824



# Gaussian Naive Bayes

In [10]:

#import GaussianNB and perform Gaussian Naive Bayes on the training dataset
from sklearn.naive_bayes import GaussianNB
estimator = GaussianNB()
estimator.fit(X_train, y_train)

#apply the trained estimator on the test dataset
estimator.score(X_test, y_test)
y_pred = estimator.predict(X_test)

print('----------------Gaussian Naive Bayes----------------')
print('mean_accuracy: ', mean_accuracy)
from sklearn.metrics import classification_report, confusion_matrix
print('confusion matrix:\n', confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

----------------Gaussian Naive Bayes----------------
[[4173 6570 5061]
 [1978 9552 5390]
 [3678 4490 8932]]
              precision    recall  f1-score   support

         0.0       0.42      0.26      0.33     15804
         1.0       0.46      0.56      0.51     16920
         2.0       0.46      0.52      0.49     17100

    accuracy                           0.45     49824
   macro avg       0.45      0.45      0.44     49824
weighted avg       0.45      0.45      0.44     49824



# KNN

In [11]:
#import KNeighborsClassifier and KNN on the training dataset
from sklearn.neighbors import KNeighborsClassifier
estimator = KNeighborsClassifier(n_neighbors=3)
estimator.fit(X_train, y_train)

print('----------------KNN----------------')
#apply the trained estimator on the test dataset
mean_accuracy=estimator.score(X_test, y_test)
y_pred = estimator.predict(X_test)
print('mean_accuracy: ', mean_accuracy)
print('confusion matrix:\n', confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

----------------KNN----------------
mean_accuracy:  0.6903299614643545


# Random Forest

In [12]:
#import RandomForestClassifier and RFC on the training dataset
from sklearn.ensemble import RandomForestClassifier
estimator = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
estimator.fit(X_train, y_train)

print('----------------Random Forest----------------')
#apply the trained estimator on the test dataset
mean_accuracy=estimator.score(X_test, y_test)
y_pred = estimator.predict(X_test)
print('mean_accuracy: ', mean_accuracy)
# print(estimator.feature_importances_)
print('confusion matrix:\n', confusion_matrix(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

----------------Random Forest----------------
mean_accuracy:  0.5798410404624278
[[ 6913  4242  4649]
 [ 4916  8299  3705]
 [ 1831  1591 13678]]
              precision    recall  f1-score   support

         0.0       0.51      0.44      0.47     15804
         1.0       0.59      0.49      0.53     16920
         2.0       0.62      0.80      0.70     17100

    accuracy                           0.58     49824
   macro avg       0.57      0.58      0.57     49824
weighted avg       0.57      0.58      0.57     49824



# SVM


## Linear SVM

In [17]:
#import SVC and perform linear SVM on the training dataset
from sklearn.svm import SVC
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, y_train)

#apply the trained SVM on the test dataset
y_pred = svclassifier.predict(X_test)
# import relevant metrics and print the confusion matrix and classification report
print('----------------Linear SVM----------------')
print('mean_accuracy: ', mean_accuracy)
print('confusion matrix:\n', confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

----------------Linear SVM----------------
mean_accuracy:  0.6069163455362877
confusion matrix:
 [[ 8761  5591  1452]
 [ 2827 13123   970]
 [ 1615  1200 14285]]
              precision    recall  f1-score   support

         0.0       0.66      0.55      0.60     15804
         1.0       0.66      0.78      0.71     16920
         2.0       0.86      0.84      0.85     17100

    accuracy                           0.73     49824
   macro avg       0.73      0.72      0.72     49824
weighted avg       0.73      0.73      0.72     49824



## Polynomial SVM

In [18]:
#import SVC and perform Polynomial SVM on the training dataset
from sklearn.svm import SVC
svclassifier = SVC(kernel='poly', C=1, gamma=0.1, degree=3)
svclassifier.fit(X_train, y_train)

#apply the trained SVM on the test dataset
y_pred = svclassifier.predict(X_test)
print('----------------Polynomial SVM----------------')
print('mean_accuracy: ', mean_accuracy)
print('confusion matrix:\n', confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

----------------Polynomial SVM----------------
mean_accuracy:  0.6069163455362877
confusion matrix:
 [[ 9022  5290  1492]
 [ 2554 13396   970]
 [ 1897   801 14402]]
              precision    recall  f1-score   support

         0.0       0.67      0.57      0.62     15804
         1.0       0.69      0.79      0.74     16920
         2.0       0.85      0.84      0.85     17100

    accuracy                           0.74     49824
   macro avg       0.74      0.73      0.73     49824
weighted avg       0.74      0.74      0.74     49824



## RBF SVM

In [19]:
svclassifier = SVC(kernel='rbf', C=1, gamma=0.1)
svclassifier.fit(X_train, y_train)
y_pred = svclassifier.predict(X_test)
print('----------------RBF SVM----------------')
print('mean_accuracy: ', mean_accuracy)
print('confusion matrix:\n', confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

## Sigmoid SVM

In [None]:
svclassifier = SVC(kernel='sigmoid', C=1, gamma=0.1)
svclassifier.fit(X_train, y_train)
y_pred = svclassifier.predict(X_test)
print('----------------Sigmoid SVM----------------')
print('mean_accuracy: ', mean_accuracy)
print('confusion matrix:\n', confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))