In [1]:
#read the normalized data 
import pandas as pd
normalized_data = pd.read_csv("normalized_data.csv")
normalized_data.head(5)




Unnamed: 0,eeg_id,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote,target
0,568657,0.0,0.0,0.25,0.0,0.166667,0.583333,Other
1,582999,0.0,0.857143,0.0,0.071429,0.0,0.071429,LPD
2,642382,0.0,0.0,0.0,0.0,0.0,1.0,Other
3,751790,0.0,0.0,1.0,0.0,0.0,0.0,GPD
4,778705,0.0,0.0,0.0,0.0,0.0,1.0,Other


In [28]:
from sklearn.utils import resample

# Assuming you have your data in X and y variables, where X is the feature matrix and y is the target variable
X = normalized_data
y = normalized_data['target']

# Count the number of samples in each class
class_counts = y.value_counts()

# Find the minimum number of samples among all classes
min_samples = class_counts.min()

# Resample each class to have the same number of samples
resampled_X = pd.DataFrame()
resampled_y = pd.Series()
for class_label in class_counts.index:
    # Get the samples belonging to the current class
    class_samples = X[y == class_label]
    
    # Resample the class to have the same number of samples as the minimum
    resampled_class_samples = resample(class_samples, n_samples=min_samples, replace=False)
    
    # Append the resampled samples to the final resampled data
    resampled_X = pd.concat([resampled_X, resampled_class_samples])
    resampled_y = pd.concat([resampled_y, pd.Series([class_label] * min_samples)])

# Now you have the resampled data in resampled_X and resampled_y

In [29]:
resampled_X.shape, resampled_y.shape

((5496, 8), (5496,))

In [31]:
resampled_X.head(5)
#count the number of samples in each class
resampled_y.value_counts()

Other      916
Seizure    916
LPD        916
GRDA       916
GPD        916
LRDA       916
Name: count, dtype: int64

In [33]:
#rewrite the resampled data to a csv file
resampled_X.to_csv("resampled_data.csv", index=False)

In [34]:
EEG_PATH = 'train_eegs/'
train_path = 'resampled_data.csv'

In [35]:
labels = resampled_y

In [36]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import glob
from scipy.signal import butter, sosfilt
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
import time

import pandas as pd

from EGGDataset import EEGDataset
dataset = EEGDataset(train_path, EEG_PATH)

from sklearn.model_selection import train_test_split

train_dataset, test_dataset, train_label, test_label = train_test_split(dataset, labels, test_size=0.2, random_state=42)


train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [131]:
import torch.nn.functional as F

class CNNLSTM(nn.Module):

    def __init__(self, in_channels=8, num_classes=6):
        super(CNNLSTM, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(32)
        self.relu1 = nn.ReLU(inplace=True)
        self.dropout1 = nn.Dropout(p=0.3)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(64)
        self.relu2 = nn.ReLU(inplace=True)
        self.dropout2 = nn.Dropout(p=0.3)
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)

        self.flatten1 = nn.Flatten()
        self.linear1 = nn.Linear(160000, 6)
        self.bn3 = nn.BatchNorm1d(6)

        self.softmax = nn.Softmax(dim=1)
        # self.lstm1 = nn.LSTM(input_size=64, hidden_size=128, num_layers=1, batch_first=True, bidirectional=True)
        # self.lstm2 = nn.LSTM(input_size=256, hidden_size=128, num_layers=1, batch_first=True, bidirectional=True)

        # self.attention = nn.Sequential(
        #     nn.Linear(128 * 2, 64),
        #     nn.Tanh(),
        #     nn.Linear(64, 1)
        # )

        # self.fc = nn.Linear(128 * 2, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.pool2(x)

        x = self.flatten1(x)
        x = self.linear1(x)
        x = self.bn3(x)

        x = self.softmax(x)


        return x

In [132]:
# Create a list of class names
class_names = ['Seizure', 'LPD', 'GDP', 'LRDA', 'GRDA', 'Other']

# Create a dictionary to map class names to indices
class_to_idx = {class_name: idx for idx, class_name in enumerate(class_names)}

# Create a dictionary to map indices to class names
idx_to_class = {idx: class_name for idx, class_name in enumerate(class_names)}

In [134]:
input_channels = 8
num_classes = 6  


model = CNNLSTM(in_channels=input_channels, num_classes=num_classes)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

s = 0
num_epochs = 1
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_dataloader):
        print(f"Dataloader: {i+1}/{len(train_dataloader)} - {round(time.time()-s,2)}s")
        s = time.time()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_dataloader)}")

Dataloader: 1/138 - 1715890066.53s
Dataloader: 2/138 - 1.1s
Dataloader: 3/138 - 1.16s
Dataloader: 4/138 - 1.16s
Dataloader: 5/138 - 1.16s
Dataloader: 6/138 - 1.14s
Dataloader: 7/138 - 1.28s
Dataloader: 8/138 - 1.18s
Dataloader: 9/138 - 1.16s
Dataloader: 10/138 - 1.22s
Dataloader: 11/138 - 1.2s
Dataloader: 12/138 - 1.14s
Dataloader: 13/138 - 1.24s
Dataloader: 14/138 - 1.3s
Dataloader: 15/138 - 1.23s
Dataloader: 16/138 - 1.21s
Dataloader: 17/138 - 1.33s
Dataloader: 18/138 - 1.19s
Dataloader: 19/138 - 1.36s
Dataloader: 20/138 - 1.18s
Dataloader: 21/138 - 1.17s
Dataloader: 22/138 - 1.17s
Dataloader: 23/138 - 1.18s
Dataloader: 24/138 - 1.15s
Dataloader: 25/138 - 1.18s
Dataloader: 26/138 - 1.2s
Dataloader: 27/138 - 1.17s
Dataloader: 28/138 - 1.17s
Dataloader: 29/138 - 1.17s
Dataloader: 30/138 - 1.14s
Dataloader: 31/138 - 1.18s
Dataloader: 32/138 - 1.16s
Dataloader: 33/138 - 1.14s
Dataloader: 34/138 - 1.17s
Dataloader: 35/138 - 1.16s
Dataloader: 36/138 - 1.14s
Dataloader: 37/138 - 1.16s
Datal

In [157]:
#train model more
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_dataloader):
        s = time.time()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_dataloader)}")
    torch.save(model.state_dict(), f'model_epoch_{num_epochs}.pth')


Epoch 1, Loss: 1.4258839617604795
Epoch 2, Loss: 1.4231592788212541
Epoch 3, Loss: 1.4227589515672214
Epoch 4, Loss: 1.4155353083126787
Epoch 5, Loss: 1.4155676045279573


In [161]:
torch.save(model.state_dict(), f'cnn_model_35_epoch.pth')

In [176]:
model = CNNLSTM(in_channels=input_channels, num_classes=num_classes)
model.load_state_dict(torch.load('cnn_model_20_epoch.pth'))

<All keys matched successfully>

In [177]:
# Set the model to evaluation mode
model.eval()

# Initialize list to store predictions
predictions = []

# No need to track gradients for evaluation
with torch.no_grad():
    for data, _ in test_dataloader:
        # Forward pass
        output = model(data)

        # Store predictions
        predictions.extend(output.numpy())

# Convert list to numpy array
predictions = np.array(predictions)

In [178]:
predictions_class = np.argmax(predictions, axis=1)
#count how many different classes
unique, counts = np.unique(predictions_class, return_counts=True)
print(unique, counts)

[0 1 2 3 4 5] [ 86 253  13 169 109 470]


In [179]:
#fit the model to the test data
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Create a list of class names
class_names = ['Seizure', 'LPD', 'GPD', 'LRDA', 'GRDA', 'Other']

# Create a dictionary to map class names to indices
class_to_idx = {class_name: idx for idx, class_name in enumerate(class_names)}

# Create a dictionary to map indices to class names
idx_to_class = {idx: class_name for idx, class_name in enumerate(class_names)}

# Convert test labels to indices
test_label_idx = test_label.map(class_to_idx)

print(f"Test label indices: {test_label_idx}")

Test label indices: 27     1
691    5
26     1
185    0
429    3
      ..
177    4
85     0
654    1
649    2
138    4
Length: 1100, dtype: int64


In [181]:
test_label.value_counts()

Other      203
LPD        183
LRDA       183
GPD        181
Seizure    177
GRDA       173
Name: count, dtype: int64

In [182]:
# Calculate precision, recall, and F1 score of 20_epoch model
accuracy = accuracy_score(test_label_idx, predictions_class)
precision = precision_score(test_label_idx, predictions_class, average='macro')
recall = recall_score(test_label_idx, predictions_class, average='macro')
f1 = f1_score(test_label_idx, predictions_class, average='macro')
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")



Accuracy: 0.17636363636363636
Precision: 0.1789872619193212
Recall: 0.1697718865728338
F1 Score: 0.146084254436242


In [183]:
model2 = CNNLSTM(in_channels=input_channels, num_classes=num_classes)
model2.load_state_dict(torch.load('cnn_model_35_epoch.pth'))


<All keys matched successfully>

In [185]:
# Set the model to evaluation mode
model2.eval()

# Initialize list to store predictions
predictions = []

# No need to track gradients for evaluation
with torch.no_grad():
    for data, _ in test_dataloader:
        # Forward pass
        output = model2(data)

        # Store predictions
        predictions.extend(output.numpy())

# Convert list to numpy array
predictions = np.array(predictions)

In [186]:
predictions_class = np.argmax(predictions, axis=1)
#count how many different classes
unique, counts = np.unique(predictions_class, return_counts=True)
print(unique, counts)

[0 1 2 3 4 5] [  8 188 136   2 343 423]


In [187]:
# Calculate precision, recall, and F1 score of 20_epoch model
accuracy = accuracy_score(test_label_idx, predictions_class)
precision = precision_score(test_label_idx, predictions_class, average='macro')
recall = recall_score(test_label_idx, predictions_class, average='macro')
f1 = f1_score(test_label_idx, predictions_class, average='macro')
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")


Accuracy: 0.1590909090909091
Precision: 0.1051069786239386
Recall: 0.15520983950485587
F1 Score: 0.12014654112914391
