## Load Modules

In [8]:
# Signal Preprocessing Methods
from preprocessModule import *

# Transform preprocessed signal into an input formation
from transformModule import *

# Plot methods for various input data
from plotModule import *

# 1. DEAP Dataset

### DEAP Dataset Description
* __32 participants__
* __40 clips__ for each participant
* Evaluation : __Valence, Arousal, Dominance, Like__ (1 to 9)

### Setting & Initialization

In [2]:
DEAP_channels = {"FP1":0, "AF3":1, "F3":2, "F7":3,
                 "FC5":4, "FC1":5, "C3":6, "T7":7,
                 "CP5":8, "CP1":9, "P3":10,
                 "P7":11, "PO3":12, "O1":13, 
                 "OZ":14, "PZ":15, "FP2":16, 
                 "AF4":17, "FZ":18, "F4":19,
                 "F8":20, "FC6":21, "FC2":22,
                 "CZ":23, "C4":24, "T8":25,
                 "CP6":26, "CP2":27, "P4":28,
                 "P8":29, "PO4":30, "O2":31}

DEAP_all_channel_names = list(DEAP_channels.keys())
DEAP_all_channel_values = list(DEAP_channels.values())

In [5]:
# Initialization ============================
# Label list
label_list = [] # list of tuple (V,A,D,L)

# Input list 
fftMap_list = [] # Fast Fourier Transform Map
base_fftMap_list = [] # base(3sec) fft map list

fd_list = [] # FD pattern
base_fd_list = [] 
# ===========================================

# Parameters ===============================
freqs = [freq for freq in range(4,46,1)]
chosen_channels = DEAP_all_channel_values
# openBCI default channels
#chosen_channels = [0, 16, 6, 24, 11, 20, 13, 31] # ["FP1", "FP2", "C3", "C4", "P7", "P8", "O1", "O2"]

sf = 128 
chunk_size = 10 * sf
overlap = 5 * sf
# ==========================================

### Traverse Dataset

In [313]:
import time
import pickle

eeg_path = "EEG dataset/DEAP/physiological_recordings/data_preprocessed_python/data_preprocessed_python/"

# [ Loop 1 ] 32 Participants
for participant_id in range(1, 33):
    file_name = "s" + str(int(participant_id/10)) + str(participant_id%10) + ".dat"
    s = pickle.load(open(eeg_path + file_name, 'rb'), encoding='latin1')
    
    labels = s['labels']
    data = s['data']
    
    # [ Loop 2 ] 40 Trials
    for trial_id in range(0, 40):      
        # label 
        V, A, D, L = labels[trial_id]
        
        base_signal = data[trial_id][:, :sf * 3] # 3sec
        total_signal = data[trial_id][:, sf * 3:] # 60sec
        
        # all_chunks = [[start//sf, (start+chunk_size)//sf] for start in range(0, sf*60-chunk_size+1, chunk_size-overlap)]
        # print("Total %d chunks",%(len(all_chunks))); print(all_chunks)
        
        all_chunks = [total_signal[:, start:start+chunk_size] 
                     for start in range(0, sf*60-chunk_size+1, chunk_size-overlap)]
        
        # processing base signal==================================
        base_fftMap = computefftMap(base_signal, chosen_channels, freqs, sf)
        base_fftMap_list.append(base_fftMap)
        base_fd = computeFD(base_signal, chosen_channels)
        base_fd_list.append(base_fd)
        # ========================================================
        
        # [ Loop 3 ] Sliding window
        for chunk in all_chunks:
            # fftMap (2d array) ======================
            fftMap = computefftMap(chunk, chosen_channels, freqs, sf)    
            fftMap_list.append(fftMap)
            # ========================================
            # FD pattern (1d array)====================
            fd = computeFD(chunk, chosen_channels)
            fd_list.append(fd)
            # =========================================
            label_list.append((V,A,D,L))
    print("Participant %d Ended"%(participant_id))
    print(time.strftime('%c', time.localtime(time.time())))

1 분석 끝
Mon Apr 13 02:51:22 2020
2 분석 끝
Mon Apr 13 02:51:29 2020
3 분석 끝
Mon Apr 13 02:51:37 2020
4 분석 끝
Mon Apr 13 02:51:43 2020
5 분석 끝
Mon Apr 13 02:51:50 2020
6 분석 끝
Mon Apr 13 02:51:57 2020
7 분석 끝
Mon Apr 13 02:52:03 2020
8 분석 끝
Mon Apr 13 02:52:10 2020
9 분석 끝
Mon Apr 13 02:52:18 2020
10 분석 끝
Mon Apr 13 02:52:27 2020
11 분석 끝
Mon Apr 13 02:52:35 2020
12 분석 끝
Mon Apr 13 02:52:42 2020
13 분석 끝
Mon Apr 13 02:52:50 2020
14 분석 끝
Mon Apr 13 02:52:58 2020
15 분석 끝
Mon Apr 13 02:53:05 2020
16 분석 끝
Mon Apr 13 02:53:13 2020
17 분석 끝
Mon Apr 13 02:53:21 2020
18 분석 끝
Mon Apr 13 02:53:28 2020
19 분석 끝
Mon Apr 13 02:53:35 2020
20 분석 끝
Mon Apr 13 02:53:42 2020
21 분석 끝
Mon Apr 13 02:53:49 2020
22 분석 끝
Mon Apr 13 02:53:57 2020
23 분석 끝
Mon Apr 13 02:54:04 2020
24 분석 끝
Mon Apr 13 02:54:11 2020
25 분석 끝
Mon Apr 13 02:54:19 2020
26 분석 끝
Mon Apr 13 02:54:26 2020
27 분석 끝
Mon Apr 13 02:54:33 2020
28 분석 끝
Mon Apr 13 02:54:40 2020
29 분석 끝
Mon Apr 13 02:54:48 2020
30 분석 끝
Mon Apr 13 02:54:56 2020
31 분석 끝
Mon Apr 13 

In [364]:
noc = len(fftMap_list) // 1280 # num of chunks
len(label_list) == len(fftMap_list) # True

True

### Save

In [266]:
def get_data_index(p_id, t_id, c_id, noc):
    return (noc * 40) * p_id + noc * t_id + c_id

def get_base_index(p_id, t_id):
    return 40 * p_id + t_id

# get p_id, t_id, c_id
def from_data_index(index, noc):
    # return index//40, index%40
    p_id,remain = index // (noc*40), index % (noc*40)
    return p_id, remain//noc, remain%noc

In [337]:
data_list = []
save_option = "origin" # "diff"

for i in range(len(fftMap_list)):
    # activated signals (60sec)
    if save_option == "origin":
        data_list.append([fftMap_list[i], label_list[i]]) # **
        
    # 60sec(activate) - 3sec(base)
    elif save_option == "diff":
        p_id, t_id, c_id = from_data_index(i)
        base_index = get_base_index(p_id, t_id)
        
        diff = np.array(scale(fd_list[i])) - np.array(scale(base_fd_list[base_index]))
        data_list.append([diff, label_list[i]])

In [338]:
with open('DEAP_fftMap_10sec_5over.pickle', 'wb') as f:
    pickle.dump(data_list, f, pickle.HIGHEST_PROTOCOL)

# 2. SEED dataset

### SEED dataset Description

* 15 Participants
* 3 Experiment for each participant
* 15 Trial clips for each experiment
    * Trial clips (total 15)
        * Sad(0) : Aftershock(2), Back to 1942(3)
        * Neutral(1) : World Geritage in China(5)
        * Happy(2) : Lost in Thailand(2), Flirting Scholar, Just Another Pandora's Box(2) 
    * Trial selection criteria
        1. not too long (about 4 min)
        2. should elicit a single desired target emotion
        3. should be understood without explanation
* Experiment process
    * 5 sec base => 4 min trial => 45 sec self-assessment => 15 sec rest
* Preprocessing
    * downsampled to 200Hz
    * 0 ~ 75Hz bandpass filtering
* Total 45(=3 experiments x 15 participants) .mat files
    * one .mat file for each experiment
    * 16 arrays
        * 15 arrays : preprocessed EEG data of 15 trials
            * shape = (channel x data)
        * 1 arrays : Labels
            * -1 : negative
            * 0 : neutral
            * 1 : positive
    

### Load meta data

In [6]:
from scipy import io # load .mat file
from openpyxl import load_workbook

channel_path = "EEG dataset/SEED/Channel Order.xlsx"
ch_file = load_workbook(channel_path, data_only=True)

SEED_channels = {}

for ch_idx in range(1, 63): # 1 to 62
    SEED_channels[ch_file["Sheet1"][ch_idx][0].value] = ch_idx-1

SEED_all_channel_names = list(SEED_channels.keys())
SEED_all_channel_values = list(SEED_channels.values())

In [9]:
import os
from collections import defaultdict

eeg_path = "EEG dataset/SEED/Preprocessed_EEG/"

filename_dict = defaultdict(list)

for file in os.listdir(eeg_path):
    filename_dict[int(file.split('_')[0])].append(file)

In [10]:
filename_dict

defaultdict(list,
            {10: ['10_20131130.mat', '10_20131204.mat', '10_20131211.mat'],
             11: ['11_20140618.mat', '11_20140625.mat', '11_20140630.mat'],
             12: ['12_20131127.mat', '12_20131201.mat', '12_20131207.mat'],
             13: ['13_20140527.mat', '13_20140603.mat', '13_20140610.mat'],
             14: ['14_20140601.mat', '14_20140615.mat', '14_20140627.mat'],
             15: ['15_20130709.mat', '15_20131016.mat', '15_20131105.mat'],
             1: ['1_20131027.mat', '1_20131030.mat', '1_20131107.mat'],
             2: ['2_20140404.mat', '2_20140413.mat', '2_20140419.mat'],
             3: ['3_20140603.mat', '3_20140611.mat', '3_20140629.mat'],
             4: ['4_20140621.mat', '4_20140702.mat', '4_20140705.mat'],
             5: ['5_20140411.mat', '5_20140418.mat', '5_20140506.mat'],
             6: ['6_20130712.mat', '6_20131016.mat', '6_20131113.mat'],
             7: ['7_20131027.mat', '7_20131030.mat', '7_20131106.mat'],
             8: ['8_20

In [11]:
trial_labels = [1, 0, -1, -1, 0, 1, -1, 0, 1, 1, 0, -1, 0, 1, -1]

### Initialization

In [12]:
# Initialization ============================
# Label list
label_list = [] # -1, 0, 1

# Input list 
fftMap_list = [] # list of 2d-array
base_fftMap_list = [] # base(5 sec) fft map list

fd_list = []
base_fd_list = [] 
# ===========================================

# Parameters ===============================
sf = 200
chunk_size = 10 * sf
overlap = 5 * sf

freqs = [freq for freq in range(4,46,1)]

chosen_channels = SEED_all_channel_values # SEED channels
# chosen_channels = [0, 16, 6, 24, 11, 20, 13, 31] # ["FP1", "FP2", "C3", "C4", "P7", "P8", "O1", "O2"]
# chosen_channels = [] # DEAP channels 
# for ch in DEAP_all_channel_names:
#     chosen_channels.append(SEED_channels[ch])
# ==========================================

In [15]:
import time

# index data
ptc_to_idx = {} # key : "participantID-trialID-chunkID", value : index
idx_to_ptc = [] # index => participantID-trialID-chunkID

idx = 0

# [ loop 1 ] : Participants
for p_id in range(1, 16): # 1 to 15
    experiment = io.loadmat(eeg_path + filename_dict[p_id][0])
    
    # [ loop 2 ] : Trials
    t_id = 0
    keys = list(experiment.keys())[3:] 
    for t_id in range(0, 15): 
        signal = experiment[keys[t_id]]
        # ==============================================
        # 2. split signal
        base_signal = signal[:, :sf * 5] 
        total_signal = signal[:, sf * 5:] 
        
        sig_len = total_signal.shape[1]//sf
        
        # all_chunks = [[start//sf, (start+chunk_size)//sf] 
        #     for start in range(0, sf*sig_len - chunk_size + 1, chunk_size-overlap)]
        # print("Total %d chunks",%(len(all_chunks))); print(all_chunks)
        
        all_chunks = [total_signal[:, start:start+chunk_size] 
                     for start in range(0, sf*sig_len - chunk_size + 1, chunk_size-overlap)]
        
        label = trial_labels[t_id]
        # ==============================================
        
        # [ loop 3 ] : Chunks
        c_id = 0
        for chunk in all_chunks: 
            ptc_to_idx[str(p_id)+"-"+str(t_id)+"-"+str(c_id)] = idx
            idx_to_ptc.append((p_id,t_id,c_id))
            idx += 1; c_id += 1
            
            # Compute FFT Map =========================
            fftMap = computefftMap(chunk, chosen_channels, freqs, sf)
            fftMap_list.append(fftMap)
            # ========================================
            
            # Compute FD pattern (1d array)============
            fd = computeFD(chunk, chosen_channels)
            fd_list.append(fd)
            # =========================================
            label_list.append(label)
    print("Pariticipant %d Ended"%(p_id))
    print(time.strftime('%c', time.localtime(time.time())))

In [None]:
len(fd_list)==len(label_list)

### Save

In [16]:
def get_data_index(p_id, t_id, c_id):
    return ptc_to_idx[str(p_id+1) + "-" + str(t_id) + "-" + str(c_id)]
def get_base_index(p_id, t_id):
    return 15 * p_id + t_id
# get p_id, t_id, c_id
def from_data_index(index):
    return idx_to_ptc[index]

In [None]:
data_list = []
save_option = "origin" # "diff"

for i in range(len(label_list)):
    # activated signals
    if save_option == "origin":
        data_list.append([fftMap_list[i], label_list[i]])        
    # difference between activated and base signal
    elif save_option == "diff":
        p_id, t_id, c_id = from_data_index(i, noc)
        base_index = get_base_index(p_id, t_id)
        
        diff = np.array(scale(fftMap_list[i])) - np.array(scale(base_fftMap_list[base_index]))
        data_list.append([diff, label_list[i]])

In [None]:
with open('SEED_fftMap_10sec_5over.pickle', 'wb') as f:
    pickle.dump(data_list, f, pickle.HIGHEST_PROTOCOL)
    
with open("SEED_10sec_5over_ptc_to_idx", 'wb') as f:
    pickle.dump(ptc_to_idx, f, pickle.HIGHEST_PROTOCOL)

with open("SEED_10sec_5over_idx_to_ptc", 'wb') as f:
    pickle.dump(idx_to_ptc, f, pickle.HIGHEST_PROTOCOL)