In [1]:
import preprocess
import numpy as np
from tqdm.notebook import tqdm
from scipy.io import loadmat

In [2]:
rootdir = '../../data.nosync/'
interictal_count = preprocess.count_interictal_files(rootdir)
interictal_count

480

In [34]:
interictal_files = preprocess.get_interictal_files(rootdir, 24)
preictal_files = preprocess.get_preictal_files(rootdir)
print('Interictal file count:', len(interictal_files))
print('Preictal file count:', len(preictal_files))

Interictal file count: 24
Preictal file count: 24


# Preprocessing

In [57]:
X = np.empty((len(interictal_files) + len(preictal_files), 16, 401, 601))
X.shape

(48, 16, 401, 601)

In [58]:
# Transform interictal raw EEG to STFT spectrograms.
for i in tqdm(range(len(interictal_files))):
    filepath = rootdir + interictal_files[i]
    mat = loadmat(filepath)
    Zxxs = preprocess.stft(mat, 2)
    X[i,:] = Zxxs.real

  0%|          | 0/24 [00:00<?, ?it/s]

In [59]:
# Transform preictal raw EEG to STFT spectrograms.
for i in tqdm(range(len(preictal_files))):
    filepath = rootdir + preictal_files[i]
    mat = loadmat(filepath)
    Zxxs = preprocess.stft(mat, 2)
    X[i,:] = Zxxs.real

  0%|          | 0/24 [00:00<?, ?it/s]

We define the target label for interictal as -1 and for preictal as 1.

In [60]:
interictal_y = -1 * np.ones(len(interictal_files))
interictal_y.shape

(24,)

In [61]:
preictal_y = np.ones(len(preictal_files))
preictal_y.shape

(24,)

In [62]:
%%time
# make one input array by joining interictal and preictal STFTs in dim1
# X = np.concatenate((interictal_Zxxs, preictal_Zxxs), axis=0)
# X.shape

CPU times: user 5 µs, sys: 1e+03 ns, total: 6 µs
Wall time: 10 µs


In [63]:
y = np.concatenate((interictal_y, preictal_y), axis=0)

In [42]:
from sklearn.model_selection import train_test_split

In [43]:
%%time
x_ids = list(range(len(X)))
x_train_ids, x_test_ids, y_train, y_test = train_test_split(x_ids, y, test_size=0.33, 
                                                            random_state=42)

CPU times: user 2.74 ms, sys: 11.9 ms, total: 14.6 ms
Wall time: 26.4 ms


In [64]:
X[0].shape

(16, 401, 601)

## File I/O

We now save the STFT matrix to disk.

In [65]:
import h5py

In [66]:
filename = 'data/Dog_1_interictal_preictal_STFT.h5'

In [67]:
# save STFT matrix to disk
f = h5py.File(filename, 'w')
f.create_dataset("Dog_1_interictal_preictal_STFT", data=X)
f.close()

In [29]:
# read from disk
f = h5py.File(filename, 'r')

In [32]:
# load as numpy array
data = f.get('Dog_1_interictal_preictal_STFT')[()]
data.shape

(28, 16, 401, 601)

In [33]:
type(data)

numpy.ndarray