In [38]:
import torch
import h5py
import numpy as np
from torch.utils.data import Dataset,DataLoader
from torch import nn
import dask.array as da
from time import time
import random
import torch.nn.functional as F
from tqdm.notebook import tqdm


In [39]:

h5file = "D:\RnnMachineLearning\meditationData.hdf5"
f = h5py.File(h5file, 'r')
f.keys()


<KeysViewHDF5 ['sub-001', 'sub-002', 'sub-003', 'sub-004', 'sub-005', 'sub-006', 'sub-007', 'sub-008', 'sub-009', 'sub-010', 'sub-011', 'sub-012', 'sub-013', 'sub-014', 'sub-015', 'sub-016', 'sub-017', 'sub-018', 'sub-019', 'sub-020', 'sub-021', 'sub-022', 'sub-023', 'sub-024', 'sub-025', 'sub-026', 'sub-027', 'sub-028', 'sub-029', 'sub-030', 'sub-031', 'sub-032', 'sub-033', 'sub-034', 'sub-035', 'sub-036', 'sub-037', 'sub-038', 'sub-039', 'sub-040', 'sub-041', 'sub-042', 'sub-043', 'sub-044', 'sub-045', 'sub-046', 'sub-047', 'sub-048', 'sub-049', 'sub-050', 'sub-051', 'sub-052', 'sub-053', 'sub-054', 'sub-055', 'sub-056', 'sub-057', 'sub-058', 'sub-059', 'sub-060', 'sub-061', 'sub-062', 'sub-063', 'sub-064', 'sub-065', 'sub-066', 'sub-067', 'sub-068', 'sub-069', 'sub-070', 'sub-071', 'sub-072', 'sub-073', 'sub-074', 'sub-075', 'sub-076', 'sub-077', 'sub-078', 'sub-079', 'sub-080', 'sub-081', 'sub-082', 'sub-083', 'sub-084', 'sub-085', 'sub-086', 'sub-087', 'sub-088', 'sub-089', 'sub-0

In [40]:
def databykeys(hdfFile,hdfkeys):
    DataList = [da.from_array(hdfFile[i][j])for i in hdfkeys for j in hdfFile[i].keys() if "data" in j]
    data = da.concatenate(DataList, axis=0)

    LabelsList = [da.from_array(hdfFile[i][j])for i in hdfkeys for j in hdfFile[i].keys() if "labels" in j]
    labels = da.concatenate(LabelsList, axis=0)

    IdsList = [da.from_array(hdfFile[i][j])for i in hdfkeys for j in hdfFile[i].keys() if "ids" in j]
    ids = da.concatenate(IdsList, axis=0)

    return data,labels,ids

def train_test_split(hdfFile,dataSample=1,testratio=0.2,random_seed=True,random_seed_value =42):
    numPts = int(98*dataSample)
    if random_seed:
        np.random.seed(random_seed_value)
        random.seed(random_seed_value)

    myset = set(np.random.choice(range(1,99),numPts,replace=False))

    testIds = random.sample(myset, int(testratio*98))
    trainIds = myset -set(testIds)

    trainKeys = [i for i in f.keys() if int(i[-3:]) in trainIds]
    testKeys = [i for i in f.keys() if int(i[-3:]) in testIds]

    trainData = databykeys(hdfFile,trainKeys)
    testData = databykeys(hdfFile,testKeys)

    return trainData,testData


In [41]:
class EegDataSet(Dataset):

	def __init__(self,dataArray):
		#data loading
		self.x = torch.from_numpy(np.stack(dataArray[:,1]))
		self.labels = torch.from_numpy(dataArray[:,2].astype("float32"))

	def __len__(self):
		# len(dataset)
		return len(self.labels)

	def __getitem__(self, idx):
		return self.x[idx], self.labels[idx]

In [42]:
class EegDataSetDask(Dataset):
    def __init__(self,dataArray):
        #data loading
        self.x = dataArray[0]
        self.labels = dataArray[1]
        self.ids = dataArray[2]

    def __len__(self):
        # len(dataset)
        return self.x.shape[0]

    def __getitem__(self, idx):
        return torch.from_numpy(self.x[idx].compute()), torch.from_numpy(self.labels[idx].compute())

In [43]:
class Cnn_eeg(nn.Module):
    def __init__(self,batch_size):
        super().__init__()
        self.batch_size=batch_size
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(self.linear_input_neurons(), 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        # print("0:",x.shape)
        x = torch.reshape(x,(x.shape[0],1,x.shape[1],x.shape[2]))
        # print("1:",x.shape)
        x = self.pool(F.relu(self.conv1(x)))
        # print("2:",x.shape)
        x = self.pool(F.relu(self.conv2(x)))
        # print("3:",x.shape)
        x = x.view(x.shape[0], -1) # flatten all dimensions except batch
        # print("4:",x.shape)
        x = F.relu(self.fc1(x))
        # print("5:",x.shape)
        x = F.relu(self.fc2(x))
        # print("6:",x.shape)
        x = self.fc3(x)
        # print("7:",x.shape)
        return x
        # here we apply convolution operations before linear layer, and it returns the 4-dimensional size tensor.
    def size_after_relu(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        return x.size()[1:]


    # after obtaining the size in above method, we call it and multiply all elements of the returned size.
    def linear_input_neurons(self):
        size = self.size_after_relu(torch.rand(self.batch_size, 1, 72, 10240))
        m = 1
        for i in size:
            m *= i

        return int(m)

In [44]:
# NETWORK

# Vanilla RNN
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first = True)
        # x -> (batch_size, sequence_length, input_size)

        # or:
        #self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        #self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)

        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # print(x.shape)
        x = torch.reshape(x,(x.shape[0],x.shape[2],x.shape[1]))
        # print(x.shape)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        # print(x.shape)
        # print(h0.shape)
        out, _ = self.rnn(x, h0)
        # out -> (batch_size, sequence_length, hidden_size)
        # out -> (N, 129, 128) ->> NOTE: CHANGE SEQUENCE_LENGTH AS 750 LATER (TRANPOSE THE TENSOR)
        out = out[:, -1, :]
        # out -> (N, 128)
        out = self.fc(out)
        return out


In [45]:
class GRUmodel(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_layers,num_classes):
        super(GRUmodel, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x, h):
        x = torch.reshape(x,(x.shape[0],x.shape[2],x.shape[1]))
        out, h = self.gru(x, h)
        out = self.fc(self.relu(out[:,-1]))
        return out, h

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device)
        return hidden


In [46]:
class LSTMNet(nn.Module):
    def __init__(self, input_dim, hidden_dim,n_layers,num_classes):
        super(LSTMNet, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x, h):
        x = torch.reshape(x,(x.shape[0],x.shape[2],x.shape[1]))
        out, h = self.lstm(x, h)
        out = self.fc(self.relu(out[:,-1]))
        return out, h

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device))
        return hidden

In [47]:
trainData,testData = train_test_split(f,dataSample=0.25)
torch.cuda.is_available()
# torch.version.cuda()


False

CNN testing

In [48]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


num_epochs = 1
batch_size = 32
learning_rate = 0.001

dataArrTrain = EegDataSetDask(trainData)
dataArrTest = EegDataSetDask(testData)

dlDataArrTrain = DataLoader(dataArrTrain, batch_size=batch_size, shuffle=True,num_workers=0,pin_memory=False)
dlDataArrtest = DataLoader(dataArrTest, batch_size=batch_size, shuffle=True,num_workers=0,pin_memory=False)

In [49]:
net = Cnn_eeg(batch_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(net.parameters(), lr=learning_rate)

In [50]:
# Train the model CNN
n_total_steps = len(dlDataArrTrain)
scaler = torch.cuda.amp.GradScaler()
for epoch in range(3):  # loop over the dataset multiple times


    for i, data_b in tqdm(enumerate(dlDataArrTrain, 0)):
        start = time()
        inputs, labels = data_b[0].to(torch.float32),data_b[1].to(torch.int64)

        # Forward pass
        # Runs the forward pass with autocasting.
        with torch.cuda.amp.autocast():
            outputs = net(inputs)
            loss = criterion(outputs, labels)

        # Backward and optimize
        for param in net.parameters():
            param.grad = None
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        # loss.backward()
        # optimizer.step()
        end=time()
        print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}, Time : {end-start}')



print('Finished Training')

0it [00:00, ?it/s]

Epoch [1/1], Step [1/111], Loss: 0.6980, Time : 7.66520357131958
Epoch [1/1], Step [2/111], Loss: 2.4366, Time : 8.322979927062988
Epoch [1/1], Step [3/111], Loss: 0.9207, Time : 8.125818967819214
Epoch [1/1], Step [4/111], Loss: 0.8140, Time : 8.398743391036987
Epoch [1/1], Step [5/111], Loss: 0.7930, Time : 7.0264177322387695
Epoch [1/1], Step [6/111], Loss: 0.7144, Time : 7.162515878677368
Epoch [1/1], Step [7/111], Loss: 0.7406, Time : 7.730671167373657
Epoch [1/1], Step [8/111], Loss: 0.7193, Time : 8.62544298171997
Epoch [1/1], Step [9/111], Loss: 0.7261, Time : 8.230851173400879


KeyboardInterrupt: 

In [51]:
torch.save(net, 'cnn.pt')

In [None]:
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for data_b in dlDataArrtest:
        inputs, labels = data_b[0].to(torch.float32),data_b[1].to(torch.int64)
        outputs = net(inputs)
        # max returns (value ,index)

        _, predicted = torch.max(outputs.data, 1)

        #print(predicted, labels)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network: {acc} %')

GRU testing

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

input_size = 72
sequence_length = 10240
num_classes = 2
# HYPERPARAMETERS
hidden_size = 128
num_layers = 2

num_epochs = 1
batch_size = 32
learning_rate = 0.001


dataArrTrain = EegDataSetDask(trainData)
dataArrTest = EegDataSetDask(testData)

dlDataArrTrain = DataLoader(dataArrTrain, batch_size=batch_size, shuffle=True,num_workers=0,pin_memory=False)
dlDataArrtest = DataLoader(dataArrTest, batch_size=batch_size, shuffle=True,num_workers=0,pin_memory=False)

In [11]:
model = GRUmodel(input_size, hidden_size, num_layers, num_classes).to(device)

In [12]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

In [13]:
# Train the model
n_total_steps = len(dlDataArrTrain)
scaler = torch.cuda.amp.GradScaler()
h = model.init_hidden(batch_size)
for epoch in range(num_epochs):

    for i, data_b in enumerate(dlDataArrTrain):
        start = time()
        inputs, labels = data_b[0].to(torch.float32),data_b[1].to(torch.int64)
        h = h.data
        # Forward pass
        # Runs the forward pass with autocasting.
        with torch.cuda.amp.autocast():
            outputs,h = model(inputs,h)
            loss = criterion(outputs, labels)

        # Backward and optimize
        for param in model.parameters():
            param.grad = None
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        # loss.backward()
        # optimizer.step()
        end=time()
        print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}, Time : {end-start}')




Epoch [1/1], Step [1/596], Loss: 0.6919, Time : 40.33398199081421


KeyboardInterrupt: 

In [None]:
torch.save(net, 'gru.pt')

In [39]:
# Test the model GRU
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for data_b in dlDataArrtest:
        inputs, labels = data_b[0].to(torch.float32),data_b[1].to(torch.int64)
        outputs = model(inputs)
        # max returns (value ,index)

        _, predicted = torch.max(outputs.data, 1)

        #print(predicted, labels)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network: {acc} %')

TypeError: 'collections.OrderedDict' object is not callable

LSTM section

In [26]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

input_size = 72
sequence_length = 10240
num_classes = 2
# HYPERPARAMETERS
hidden_size = 128
num_layers = 2

num_epochs = 1
batch_size = 32
learning_rate = 0.001


dataArrTrain = EegDataSetDask(trainData)
dataArrTest = EegDataSetDask(testData)

dlDataArrTrain = DataLoader(dataArrTrain, batch_size=batch_size, shuffle=True,num_workers=0,pin_memory=False)
dlDataArrtest = DataLoader(dataArrTest, batch_size=batch_size, shuffle=True,num_workers=0,pin_memory=False)

model = LSTMNet(input_size, hidden_size, num_layers, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

In [27]:
# Train the model LSTM
n_total_steps = len(dlDataArrTrain)
scaler = torch.cuda.amp.GradScaler()
h = model.init_hidden(batch_size)
for epoch in range(num_epochs):

    for i, data_b in enumerate(dlDataArrTrain):
        start = time()
        inputs, labels = data_b[0].to(torch.float32),data_b[1].to(torch.int64)
        h = tuple([e.data for e in h])
        # Forward pass
        # Runs the forward pass with autocasting.
        with torch.cuda.amp.autocast():
            outputs,h = model(inputs,h)
            loss = criterion(outputs, labels)

        # Backward and optimize
        for param in model.parameters():
            param.grad = None
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        # loss.backward()
        # optimizer.step()
        end=time()
        print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}, Time : {end-start}')


Epoch [1/1], Step [1/222], Loss: 0.6939, Time : 59.77163600921631
Epoch [1/1], Step [2/222], Loss: 0.6989, Time : 64.41405653953552
Epoch [1/1], Step [3/222], Loss: 0.6927, Time : 66.36458945274353


KeyboardInterrupt: 

In [None]:
torch.save(net, 'lstm.pt')

In [None]:
# Test the model STM
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for data_b in dlDataArrtest:
        inputs, labels = data_b[0].to(torch.float32),data_b[1].to(torch.int64)
        outputs = model(inputs)
        # max returns (value ,index)

        _, predicted = torch.max(outputs.data, 1)

        #print(predicted, labels)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network: {acc} %')