In [13]:
# Importing python Library
import os, time, pickle, warnings, itertools, copy, sys, shutil
from mne.filter import filter_data as bandpass_filter
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import signal
from scipy.signal import welch, butter, lfilter, sosfilt, sosfreqz, freqz
from scipy.integrate import simps
from scipy.stats import f_oneway
from tqdm import tqdm
from sklearn.decomposition import FastICA
warnings.filterwarnings('ignore')

In [14]:
def eye_movement_artifact(input_data):
    # NOTE: input parameter must be an 2D array like 32_channels*7860_data
    input_data = input_data.transpose() # Inverse the given 2D array
    ica = FastICA()
    comps = ica.fit_transform(input_data)
    return comps.transpose() # invert the array 

def signal_pro(input_data):
    mean_value = 0
    # do the bandpass filter
    for i in range(40):
        for j in range(32):
            input_data[i][j] = bandpass_filter(input_data[i][j], sfreq = 128, l_freq = 4, h_freq = 48, verbose = False)
    # creating dummy variable which contains same data information 
    preprocessed_data, error_eye = input_data.copy(), input_data.copy()
    for i in range(40):
        error_eye[i] = eye_movement_artifact(error_eye[i]) # remove the eye movements
    for i in range(40):
        for j in range(32):
            mean_value = np.mean(error_eye[i][j])
            for k in range(7680):
                if(input_data[i][j][k] > 0.0): # data is positive
                    preprocessed_data[i][j][k] = input_data[i][j][k] - abs(mean_value)
                else: # data is negative
                    preprocessed_data[i][j][k] = input_data[i][j][k] + abs(mean_value)
    return preprocessed_data

In [15]:
def bandpower(input_data, band):
    sf = 128 # sampling frequency 128 Hz
    band = np.asarray(band)
    low, high = band # band is the tuple of (low, high)
    nperseg = (2 / low) * sf
    # Compute the modified periodogram (Welch)
    freqs, psd = welch(input_data, sf, nperseg = nperseg)
    # Find closest indices of band in frequency vector
    idx_band = np.logical_and(freqs >= low, freqs <= high)
    return np.mean(psd[idx_band]) #mean of the frequency bands

In [16]:
theta_band_range = (4, 8)   # drownsiness, emotional connection, intuition, creativity
alpha_band_range = (8, 12)  # reflection, relaxation
beta_band_range = (12, 30)  # concentration, problem solving, memory
gamma_band_range = (30, 48) # cognition, perception, learning, multi-tasking

In [17]:
def get_csv_file(subject, filter_data, labels):
    eeg_theta, eeg_alpha, eeg_beta, eeg_gamma = [], [], [], []
    channels_theta, channels_alpha, channels_beta, channels_gamma = [], [], [], []
    for video_no in range (len(filter_data)):
        for channel_no in range (len(filter_data[0])):
            eeg_theta.append(bandpower(filter_data[video_no, channel_no].copy(), theta_band_range))
            eeg_alpha.append(bandpower(filter_data[video_no, channel_no].copy(), alpha_band_range))
            eeg_beta.append(bandpower(filter_data[video_no, channel_no].copy(), beta_band_range))
            eeg_gamma.append(bandpower(filter_data[video_no, channel_no].copy(), gamma_band_range))
    eeg_theta = np.reshape(eeg_theta, (40, 32)) # 40 videos and 32 channels theta band power
    eeg_alpha = np.reshape(eeg_alpha, (40, 32))
    eeg_beta = np.reshape(eeg_beta, (40, 32))
    eeg_gamma = np.reshape(eeg_gamma, (40, 32))
    
    for i in range(0, len(eeg_channels)):
        channels_theta.append(eeg_channels[i] + '_theta')
        channels_alpha.append(eeg_channels[i] + '_alpha')
        channels_gamma.append(eeg_channels[i] + '_gamma')
        channels_beta.append(eeg_channels[i] + '_beta')
        
    df_theta = pd.DataFrame(eeg_theta, columns = channels_theta)
    df_alpha = pd.DataFrame(eeg_alpha, columns = channels_alpha)
    df_beta = pd.DataFrame(eeg_beta, columns = channels_beta)
    df_gamma = pd.DataFrame(eeg_gamma, columns = channels_gamma)
    
    # make a directory to save the csv file
    new_path = newpath + subject
    try:
        os.mkdir(new_path)
        os.mkdir(new_path + '/rawfiles')
    except:
        # If directory exists then delete that directory
        shutil.rmtree(new_path)
        # then make the new directory
        os.mkdir(new_path)
        os.mkdir(new_path + '/rawfiles')
    df_theta.to_csv(new_path + '/rawfiles/' + subject + '_theta.csv', index = False, encoding = 'utf-8-sig')
    df_alpha.to_csv(new_path + '/rawfiles/' + subject + '_alpha.csv', index = False, encoding = 'utf-8-sig')
    df_beta.to_csv(new_path + '/rawfiles/' + subject  + '_beta.csv', index = False, encoding = 'utf-8-sig')
    df_gamma.to_csv(new_path + '/rawfiles/' + subject + '_gamma.csv', index = False, encoding = 'utf-8-sig')
    #===========================   ALL Bands   ==================================
    frames = [df_theta, df_alpha, df_beta, df_gamma]
    all_bands = pd.concat(frames, axis = 1) # join these 4 data frame columns wise, row is fixed
    all_bands.to_csv(new_path + '/rawfiles/' + subject + '.csv', index = False, encoding = 'utf-8-sig')
    all_bands_valence, all_bands_arousal, all_bands_all = all_bands.copy(), all_bands.copy(), all_bands.copy()
    all_bands_valence['valence'] = emotion_label(labels, 'valence')
    all_bands_arousal['arousal'] = emotion_label(labels, 'arousal')
    all_bands_all['all'] = emotion_label(labels, 'all')
    all_bands_valence.to_csv(new_path + '/' + subject + '_valence.csv', index = False, encoding = 'utf-8-sig')
    all_bands_arousal.to_csv(new_path + '/' + subject + '_arousal.csv', index = False, encoding = 'utf-8-sig')
    all_bands_all.to_csv(new_path + '/' + subject + '_all.csv', index = False, encoding = 'utf-8-sig')

In [18]:
subject_names = ['s01', 's02', 's03', 's04', 's05', 's06', 's07', 's08', 's09', 's10', 's11', 's12', 
                 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21',
                 's22', 's23', 's24', 's25', 's26', 's27', 's28', 's29', 's30', 's31', 's32']
eeg_channels = np.array(['Fp1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 
                         'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 
                         'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2'])
# put the path in which deap dataset files are present
deap_dataset_path = '/Users/shyammarjit/Desktop/Brain Computer Interface/Deap Dataset/'
# put the path in which you want to save the csv file
save_csv_path = '/Users/shyammarjit/Desktop/Brain Computer Interface/Hybrid Sequential Forward channel selection (HSFCS)/Subject Dependent'

In [19]:
def emotion_label(labels, class_label):
    """
    This function gives the valence/arousal and HVHA/HVLA/LAHV/LALV class labels
    """
    em_labels = []
    if(class_label == 'valence'):
        for i in range(0, labels.shape[0]):
            if (labels[i][0]>5): # high valence
                em_labels.append(1)
            else: # low valence
                em_labels.append(0)
        return em_labels
    elif(class_label == 'arousal'):
        for i in range(0, labels.shape[0]):
            if (labels[i][1]>5): # high arousal
                em_labels.append(1)
            else: # low arousal
                em_labels.append(0)
        return em_labels
    elif(class_label == 'all'):
        for i in range(0, labels.shape[0]):
            if (labels[i][0]>5): # high valence
                if(labels[i][1]>5): # high arousal
                    em_labels.append(1) # HVHA
                else:
                    em_labels.append(0) # HVLA
            else: # low valence
                if(labels[i][1]>5): # high arousal
                    em_labels.append(2) # LVHA
                else: # low arousal
                    em_labels.append(3) # LVLA
        return em_labels

In [20]:
for subject in subject_names[0:32]:
    newpath = save_csv_path + '/datafiles/'
    try:
        # If the directory already exists then don't make any new dirctory
        os.mkdir(newpath)
    except:
        pass
    # load the dataset
    with open(deap_dataset_path + subject + '.dat', 'rb') as f:
        raw_data = pickle.load(f, encoding = 'latin1')
    # raw_data has two key 'data' and 'labels'
    data = raw_data['data']
    labels = raw_data['labels']
    # we are excluding 3s pre base line i.e. first 3*128 = 384 data points from time series data
    reduced_eeg_data  = data[0:40, 0:32, 384:8064]
    filter_data = signal_pro(reduced_eeg_data.copy())
    get_csv_file(subject, filter_data, labels)
    print('Done: ', subject)

Done:  s01
Done:  s02
Done:  s03
Done:  s04
Done:  s05
Done:  s06
Done:  s07
Done:  s08
Done:  s09
Done:  s10
Done:  s11
Done:  s12
Done:  s13
Done:  s14
Done:  s15
Done:  s16
Done:  s17
Done:  s18
Done:  s19
Done:  s20
Done:  s21
Done:  s22
Done:  s23
Done:  s24
Done:  s25
Done:  s26
Done:  s27
Done:  s28
Done:  s29
Done:  s30
Done:  s31
Done:  s32
