## Data transforming for applying EEGlearn on them
Input: several files .mat with 3D matrix (time x channels x trials) 
Output: one or several files .mat with 2D matrix (trials x features)
The difference between this code and that of DataTransTimeWin.ipynb is that 
there is also data truncation processed here (smaller time interval).


Here is a preparation of data to use EEGlearn.


In [1]:
from __future__ import print_function
import scipy.io
import scipy.fftpack as sfp
import numpy as np
import os

In [5]:
T = 0.1 # Period of one time window (seconds)
elNum = 19 # Number of electrodes
win = 4 # Number of time windows
data = []
subNums = []
trial_subNums = []
path_to_data = 'write_your_path'
features = np.array([]).reshape(0, elNum * 3 * win + 1) # Array for features, (3 frequency bands*windows) + 1 for label

In [6]:
for subdir in os.listdir(path_to_data):
    for file in os.listdir(os.path.join(path_to_data,subdir)):
        subNums.append(int(subdir))
        data.append(scipy.io.loadmat(os.path.join(path_to_data,subdir, file)))
        print('{0}/{1}/{2} is loaded'.format(path_to_data, subdir, file))

/home/moskaleona/alenadir/data/rawData/38/eegNT.mat is loaded
/home/moskaleona/alenadir/data/rawData/38/eegT.mat is loaded
/home/moskaleona/alenadir/data/rawData/36/eegNT.mat is loaded
/home/moskaleona/alenadir/data/rawData/36/eegT.mat is loaded
/home/moskaleona/alenadir/data/rawData/28/eegNT.mat is loaded
/home/moskaleona/alenadir/data/rawData/28/eegT.mat is loaded
/home/moskaleona/alenadir/data/rawData/34/eegNT.mat is loaded
/home/moskaleona/alenadir/data/rawData/34/eegT.mat is loaded
/home/moskaleona/alenadir/data/rawData/32/eegNT.mat is loaded
/home/moskaleona/alenadir/data/rawData/32/eegT.mat is loaded
/home/moskaleona/alenadir/data/rawData/27/eegNT.mat is loaded
/home/moskaleona/alenadir/data/rawData/27/eegT.mat is loaded
/home/moskaleona/alenadir/data/rawData/37/eegNT.mat is loaded
/home/moskaleona/alenadir/data/rawData/37/eegT.mat is loaded
/home/moskaleona/alenadir/data/rawData/35/eegNT.mat is loaded
/home/moskaleona/alenadir/data/rawData/35/eegT.mat is loaded
/home/moskaleona

In [7]:
def trunc(data, freq=500, time_old=(-0.5,1), time_new=(0.1,0.5)):
    return data[int(freq*(time_new[0]-time_old[0])):int(freq*(time_new[1]-time_old[0]))]

In [8]:
data[1].get('eegT').shape

(750, 19, 346)

In [6]:
# Making one 2D array from data
for i in range(len(data)):
    dic = data[i]
    label = 1 if 'eegT' in dic.keys() else 0
    matrix = dic.get('eegT') if label == 1 else dic.get('eegNT') 
    matrix = trunc(matrix)
    N = matrix.shape[0]//win # Number of points for one time window
    if elNum != matrix.shape[1]:
        raise elNumException('Wrong number of electrodes!')
    line = np.zeros((1,elNum*3*win + 1), dtype = 'float64') # (3 frequency bands (theta, alpha, beta)*windows) plus 1 
                                                            # for label
    
    for _ in range(matrix.shape[2]):
        trial_subNums.append(subNums[i]) # subject numbers associated with each trial (for leave-subject-out
                                         #  cross validation).
        
    print('!',end='')
    for trialNum in range(matrix.shape[2]):
        for el in range(elNum):
            trial = matrix[:, el, trialNum]
            for w in range(win):
                timewin = trial[w * N : (w+1) * N]
                # trial -= np.mean(trial) to get rid of zero frequency in fft (e.g. for visualization of fft)
                A = abs(sfp.fft(timewin))/N # Real amplitudes of sinusoids
                Hz = np.array([k/T for k in range(N)]) # Frequences in hertz
                # Computing the sum of squared absolute values within each of the three
                # frequency bands of theta (4-7Hz), alpha (8-13Hz), and beta(13-30Hz)
                theta = ((A[[np.all([Hz[k] < 7, Hz[k] >= 4]) for k in range(N)]])**2).sum()
                alpha = ((A[[np.all([Hz[k] < 13, Hz[k] >= 7]) for k in range(N)]])**2).sum()
                beta = ((A[[np.all([Hz[k] <= 30, Hz[k] >= 13]) for k in range(N)]])**2).sum()
                line[:,elNum*3*w + el], line[:,elNum*(3*w + 1)+ el], line[:,elNum*(3*w + 2)+ el] = theta, alpha, beta
        line[:,-1] = label
        features = np.concatenate((features, line))
        print('.',end='')
    print()
print('Array is made!')

!.............................................................................................................................................................................................................................................................................................
!..........................................................................................................................................................................................................................................................................................................................................................
!.......................................................................................................................................................................................................................
!...................................................................................................................................................

In [7]:
# Saving FeatureMat_timeWin.mat and Saving trial_subNums.mat
path_to_new_data = 'write_your_path'
featureMatrix = np.array(features, ndmin =2)
featureDict = dict([('features', featureMatrix), ])
scipy.io.savemat(os.path.join(path_to_new_data,'FeatureMat_timeWin.mat'), featureDict)

trial_subMatrix = np.array(trial_subNums, ndmin =2)
trial_subDict = dict([('subjectNum', trial_subMatrix), ])
scipy.io.savemat(os.path.join(path_to_new_data, 'trials_subNums.mat'), trial_subDict)

In [8]:
# Use this for making separate files for different subjects
path_to_separated_data = 'write_your_path'
alldata = scipy.io.loadmat(os.path.join(path_to_new_data,'FeatureMat_timeWin.mat'))['features']
trialsubs = scipy.io.loadmat(os.path.join(path_to_new_data, 'trials_subNums.mat'))['subjectNum']

data_per_sub = {}
for sub_num in np.unique(np.ravel(trialsubs)):
    data_per_sub.update({str(sub_num) : alldata[np.ravel(trialsubs)==sub_num]})
    
for k in data_per_sub.keys():
    oneSubDict = dict([('features', data_per_sub[k]), ])
    print('Saving FeatureMat_timeWin' + k + '.mat')
    scipy.io.savemat(os.path.join(path_to_separated_data, 'FeatureMat_timeWin' + k + '.mat'), oneSubDict)
    
    trials_sub = np.array([int(k)] * data_per_sub[k].shape[0]).reshape(1,-1)
    trialsSubDict = dict([('subjectNum',trials_sub), ])
    print('Saving trials_subNums' + k + '.mat')
    scipy.io.savemat(os.path.join(path_to_separated_data, 'trials_subNums' + k + '.mat'), trialsSubDict)
    

Saving FeatureMat_timeWin25.mat
Saving trials_subNums25.mat
Saving FeatureMat_timeWin26.mat
Saving trials_subNums26.mat
Saving FeatureMat_timeWin27.mat
Saving trials_subNums27.mat
Saving FeatureMat_timeWin38.mat
Saving trials_subNums38.mat
Saving FeatureMat_timeWin33.mat
Saving trials_subNums33.mat
Saving FeatureMat_timeWin32.mat
Saving trials_subNums32.mat
Saving FeatureMat_timeWin30.mat
Saving trials_subNums30.mat
Saving FeatureMat_timeWin28.mat
Saving trials_subNums28.mat
Saving FeatureMat_timeWin29.mat
Saving trials_subNums29.mat
Saving FeatureMat_timeWin35.mat
Saving trials_subNums35.mat
Saving FeatureMat_timeWin34.mat
Saving trials_subNums34.mat
Saving FeatureMat_timeWin36.mat
Saving trials_subNums36.mat
Saving FeatureMat_timeWin37.mat
Saving trials_subNums37.mat
