In [1]:
#basic import 
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch 
import torch.nn as nn
import torch.nn.functional as F
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import classification_report
device = torch.device("mps")

In [2]:
df = pd.read_csv('Epileptic Seizure Recognition.csv',index_col=0)

In [3]:
df.head()

Unnamed: 0_level_0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X170,X171,X172,X173,X174,X175,X176,X177,X178,y
Unnamed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
X21.V1.791,135,190,229,223,192,125,55,-9,-33,-38,...,-17,-15,-31,-77,-103,-127,-116,-83,-51,4
X15.V1.924,386,382,356,331,320,315,307,272,244,232,...,164,150,146,152,157,156,154,143,129,1
X8.V1.1,-32,-39,-47,-37,-32,-36,-57,-73,-85,-94,...,57,64,48,19,-12,-30,-35,-35,-36,5
X16.V1.60,-105,-101,-96,-92,-89,-95,-102,-100,-87,-79,...,-82,-81,-80,-77,-85,-77,-72,-69,-65,5
X20.V1.54,-9,-65,-98,-102,-78,-48,-16,0,-21,-59,...,4,2,-12,-32,-41,-65,-83,-89,-73,5


In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11500 entries, X21.V1.791 to X16.V1.210
Columns: 179 entries, X1 to y
dtypes: int64(179)
memory usage: 15.8+ MB


In [5]:
bin_map = {1 : 1 , 2:0,3:0,4:0,5:0}
df['y'] = df['y'].replace(bin_map)

In [6]:
from torch.utils.data import DataLoader,Dataset

In [7]:
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [8]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9200 entries, X12.V1.764 to X15.V1.871
Columns: 179 entries, X1 to y
dtypes: int64(179)
memory usage: 12.6+ MB


In [9]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2300 entries, X6.V1.83 to X9.V1.3
Columns: 179 entries, X1 to y
dtypes: int64(179)
memory usage: 3.2+ MB


In [10]:
class EEG_Dataset(Dataset):
    
    def __init__(self,df):
        self.df = df
        self.features = df.drop('y',axis=1).values
        self.labels = df['y'].values
    def __len__(self):
        return len(self.df)
    def __getitem__(self,idx):
        feature = torch.Tensor(self.features[idx])
        label = torch.tensor(self.labels[idx])
        return feature,label

In [11]:
train_dataset = EEG_Dataset(train_df)
test_dataset = EEG_Dataset(test_df)

In [12]:
def pad_collate(batch):
    data = [item[0] for item in batch]
    labels = [item[1] for item in batch]
    max_size = max([item.size(0) for item in data])
    padded_data = [F.pad(item, (0, max_size - item.size(0))) for item in data]
    return torch.stack(padded_data), torch.tensor(labels)

In [13]:
train_batch_size = 250
test_batch_size = 250
train_loader = DataLoader(train_dataset,batch_size=train_batch_size,shuffle=True,collate_fn=pad_collate)
test_loader = DataLoader(test_dataset,batch_size=test_batch_size,shuffle=False,collate_fn=pad_collate)

In [14]:
class LSTM(nn.Module):
    def __init__(self,in_size,hidden_size=32,out_size=1,num_layers=2,bidirectional=True,p=0.4):
        super(LSTM,self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_dir = 2 if bidirectional else 1
        
        self.lstm = nn.LSTM(in_size,hidden_size,num_layers,bidirectional=bidirectional,batch_first=True)
        self.fc = nn.Linear(self.num_dir*hidden_size,out_size)
        self.dropout = nn.Dropout(p)
        
    def forward(self,x,hidden):
        lstm_out,hidden = self.lstm(x,hidden)
        lstm_out = self.dropout(lstm_out)
        pred = self.fc(lstm_out[:,-1,:])
        return pred,hidden
    
    def init_hidden(self,batch_size):
        hidden = (torch.zeros(self.num_dir*self.num_layers,batch_size,self.hidden_size).to(device),
                 torch.zeros(self.num_dir*self.num_layers,batch_size,self.hidden_size).to(device))
        return hidden

In [15]:
model = LSTM(train_df.shape[1]-1,hidden_size=256,out_size=2,num_layers=1,bidirectional=False).to(device)

In [16]:
model

LSTM(
  (lstm): LSTM(178, 256, batch_first=True)
  (fc): Linear(in_features=256, out_features=2, bias=True)
  (dropout): Dropout(p=0.4, inplace=False)
)

In [17]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
num_epochs = 500

In [18]:
for epoch in range(num_epochs):
    for feature,label in train_loader:
        feature,label = feature.to(device),label.to(device)
        feature = feature.unsqueeze(1)
        hidden = model.init_hidden(feature.size(0))
        y_pred,hidden = model(feature,hidden)
        loss = criterion(y_pred,label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if (epoch+1)%50==0:
        print(f"epoch : {epoch+1} loss : {loss.item()}")

epoch : 50 loss : 0.16676928102970123
epoch : 100 loss : 0.16159594058990479
epoch : 150 loss : 0.14939987659454346
epoch : 200 loss : 0.17595107853412628
epoch : 250 loss : 0.11360174417495728
epoch : 300 loss : 0.16056251525878906
epoch : 350 loss : 0.14092794060707092
epoch : 400 loss : 0.09706249088048935
epoch : 450 loss : 0.04435652121901512
epoch : 500 loss : 0.05099885165691376


In [21]:
model.eval()  # Set the model to evaluation mode
total_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
    for feature, label in test_loader:
        feature, label = feature.to(device), label.to(device)
        feature = feature.unsqueeze(1)
        hidden = model.init_hidden(feature.size(0))
        outputs, _ = model(feature, hidden)
        outputs = outputs.view(label.size(0), -1)
        loss = criterion(outputs, label)
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += label.size(0)
        correct += (predicted == label).sum().item()
    avg_loss = total_loss / len(test_loader)
    accuracy = 100 * correct / total

print(f'Average Loss on the test set: {avg_loss:.4f}')
print(f'Accuracy of the model on the test set: {accuracy:.2f}%')

   

Average Loss on the test set: 0.3183
Accuracy of the model on the test set: 89.04%
