## Create Custom Dataset for Dog 1 Files

In [39]:
import os
import numpy as np
from scipy.io import loadmat

Create annotation file by using label in filename. So if a file contains interictal, it is 0 and if it contains preictal it is 1. We ignore any files labelled test.

In [40]:
dog_one_files = sorted(os.listdir('data_copy/Dog_1/Dog_1'))
dog_one_ffiles = list(filter(lambda f: 'test' not in f, dog_one_files)) # ignore files labelled 'test'
dog_one_ffiles

['Dog_1_interictal_segment_0001.mat',
 'Dog_1_interictal_segment_0002.mat',
 'Dog_1_interictal_segment_0003.mat',
 'Dog_1_interictal_segment_0004.mat',
 'Dog_1_interictal_segment_0005.mat',
 'Dog_1_interictal_segment_0006.mat',
 'Dog_1_interictal_segment_0007.mat',
 'Dog_1_interictal_segment_0008.mat',
 'Dog_1_interictal_segment_0009.mat',
 'Dog_1_interictal_segment_0010.mat',
 'Dog_1_interictal_segment_0011.mat',
 'Dog_1_interictal_segment_0012.mat',
 'Dog_1_interictal_segment_0013.mat',
 'Dog_1_interictal_segment_0014.mat',
 'Dog_1_interictal_segment_0015.mat',
 'Dog_1_interictal_segment_0016.mat',
 'Dog_1_interictal_segment_0017.mat',
 'Dog_1_interictal_segment_0018.mat',
 'Dog_1_interictal_segment_0019.mat',
 'Dog_1_interictal_segment_0020.mat',
 'Dog_1_interictal_segment_0021.mat',
 'Dog_1_interictal_segment_0022.mat',
 'Dog_1_interictal_segment_0023.mat',
 'Dog_1_interictal_segment_0024.mat',
 'Dog_1_interictal_segment_0025.mat',
 'Dog_1_interictal_segment_0026.mat',
 'Dog_1_inte

In [41]:
def map_label(label):
    if label == 'interictal':
        return -1
    if label == 'preictal':
        return 1

annotations_dict = {f:map_label(f.split('.')[0].split('_')[2]) for f in dog_one_ffiles}
annotations_dict

{'Dog_1_interictal_segment_0001.mat': -1,
 'Dog_1_interictal_segment_0002.mat': -1,
 'Dog_1_interictal_segment_0003.mat': -1,
 'Dog_1_interictal_segment_0004.mat': -1,
 'Dog_1_interictal_segment_0005.mat': -1,
 'Dog_1_interictal_segment_0006.mat': -1,
 'Dog_1_interictal_segment_0007.mat': -1,
 'Dog_1_interictal_segment_0008.mat': -1,
 'Dog_1_interictal_segment_0009.mat': -1,
 'Dog_1_interictal_segment_0010.mat': -1,
 'Dog_1_interictal_segment_0011.mat': -1,
 'Dog_1_interictal_segment_0012.mat': -1,
 'Dog_1_interictal_segment_0013.mat': -1,
 'Dog_1_interictal_segment_0014.mat': -1,
 'Dog_1_interictal_segment_0015.mat': -1,
 'Dog_1_interictal_segment_0016.mat': -1,
 'Dog_1_interictal_segment_0017.mat': -1,
 'Dog_1_interictal_segment_0018.mat': -1,
 'Dog_1_interictal_segment_0019.mat': -1,
 'Dog_1_interictal_segment_0020.mat': -1,
 'Dog_1_interictal_segment_0021.mat': -1,
 'Dog_1_interictal_segment_0022.mat': -1,
 'Dog_1_interictal_segment_0023.mat': -1,
 'Dog_1_interictal_segment_0024.ma

In [42]:
import csv

file = open('data_copy/Dog_1/annotations.csv', 'w')
writer = csv.writer(file)
for key, value in annotations_dict.items():
    writer.writerow([key, value])
file.close()

In [43]:
!ls data_copy/Dog_1

Dog_1  annotations.csv


In [44]:
!tail data_copy/Dog_1/annotations.csv

Dog_1_preictal_segment_0015.mat,1
Dog_1_preictal_segment_0016.mat,1
Dog_1_preictal_segment_0017.mat,1
Dog_1_preictal_segment_0018.mat,1
Dog_1_preictal_segment_0019.mat,1
Dog_1_preictal_segment_0020.mat,1
Dog_1_preictal_segment_0021.mat,1
Dog_1_preictal_segment_0022.mat,1
Dog_1_preictal_segment_0023.mat,1
Dog_1_preictal_segment_0024.mat,1


In [45]:
from torch.utils.data import Dataset, DataLoader
import pandas as pd

In [83]:
class EEGTrainDataset(Dataset):
    def __init__(self, annotations_file, segment_dir, transform=None, target_transform=None):
        self.segment_labels = pd.read_csv(annotations_file)
        self.segment_dir = segment_dir
        self.transform = transform
        self.target_transform = target_transform
    
    def __len__(self):
        return len(self.segment_labels)
    
    def __getitem__(self, idx):
        segment_path = os.path.join(self.segment_dir, self.segment_labels.iloc[idx, 0])
        mat = loadmat(segment_path)
        segment_name = list(mat.keys())[-1]
        segment = mat[segment_name]
        signal = segment['data'][0][0]
        fs = segment['sampling_frequency'][0][0][0][0]
        label = self.segment_labels.iloc[idx, 1]
        if self.transform:
            signal = self.transform(signal)
        if self.target_transform:
            label = self.target_transform(label)
        return signal, fs, label

In [90]:
class EEGTestDataset(Dataset):
    def _getfiles(self):
        files = sorted(os.listdir(self.segment_dir))
        ffiles = list(filter(lambda f: 'test' in f, files))
        return ffiles
    
    def __init__(self, segment_dir, transform=None):
        self.segment_dir = segment_dir
        self.transform = transform
    
    def __len__(self):
        return len(self._getfiles())
    
    def __getitem__(self, idx):
        files = self._getfiles()
        segment_path = os.path.join(self.segment_dir, files[idx])
        mat = loadmat(segment_path)
        segment_name = list(mat.keys())[-1]
        segment = mat[segment_name]
        signal = segment['data'][0][0]
        fs = segment['sampling_frequency'][0][0][0][0]
        channels = segment['channels'][0][0][0]
        if self.transform:
            signal = self.transform(signal)
        return signal, fs

## Data preparation with DataLoaders

In [84]:
training_data = EEGTrainDataset(
    annotations_file='data_copy/Dog_1/annotations.csv',
    segment_dir='data_copy/Dog_1/Dog_1'    
)

train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)

In [91]:
test_data = EEGTestDataset(
    segment_dir='data_copy/Dog_1/Dog_1'
)

test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

## Iterate through DataLoader

In [85]:
train_features, train_fss, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Sampling frequency batch shape: {train_fss.size()}")
print(f"Labels batch shape: {train_labels.size()}")

signal = train_features[0].squeeze()
fs = train_fss[0]
label = train_labels[0]

Feature batch shape: torch.Size([64, 16, 239766])
Sampling frequency batch shape: torch.Size([64])
Labels batch shape: torch.Size([64])


In [87]:
print(fs)
print(signal.shape)
print(signal)
print(label)

tensor(399.6098, dtype=torch.float64)
torch.Size([16, 239766])
tensor([[-75, -61, -47,  ...,  24,  19,  14],
        [-27, -34, -31,  ..., -16, -12,   0],
        [ 44,  34,  30,  ...,   5,   8,  19],
        ...,
        [-22, -16, -19,  ...,  -2,   1,   0],
        [ -4,  -2,   1,  ...,  -7, -10, -10],
        [-33, -37, -32,  ...,   5,   8,   3]], dtype=torch.int16)
tensor(-1)


In [93]:
test_features, test_fss = next(iter(test_dataloader))
print(f"Feature batch shape: {test_features.size()}")
print(f"Sampling frequency batch shape: {test_fss.size()}")
signal = test_features[0].squeeze()
fs = test_fss[0]

Feature batch shape: torch.Size([64, 16, 239766])
Sampling frequency batch shape: torch.Size([64])


In [94]:
print(fs)
print(signal.shape)
print(signal)

tensor(399.6098, dtype=torch.float64)
torch.Size([16, 239766])
tensor([[ 55,  58,  59,  ..., -57, -50, -42],
        [ 54,  46,  28,  ...,  19,  20,  21],
        [ 18,  31,  36,  ...,  68,  71,  76],
        ...,
        [-32, -45, -45,  ..., -33, -35, -32],
        [-49, -47, -40,  ..., -12, -18, -18],
        [-54, -57, -55,  ...,  11,  -1, -10]], dtype=torch.int16)
