In [175]:
from __future__ import division

import sys
import time
import logging

import numpy as np
from scipy.io import loadmat
from os import path

import wyrm.processing as proc
from wyrm import io
from wyrm.types import Data, BlockBuffer, RingBuffer
from pandas import read_csv      

#Classifiers
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn import metrics
import matplotlib
import matplotlib.pyplot as plt
import pickle

from sklearn.externals import joblib
from os import listdir
from os.path import isfile, join

In [3]:
# replay the experiment in real time?
REALTIME = False


TRAIN_DATA_A = '../../BCI_Comp_III_Wads_2004/data/Subject_A_Train.mat'
TEST_DATA_A = '../../BCI_Comp_III_Wads_2004/data/Subject_A_Test.mat'

TRAIN_DATA_B = '../../BCI_Comp_III_Wads_2004/data/Subject_B_Train.mat'
TEST_DATA_B = '../../BCI_Comp_III_Wads_2004/data/Subject_B_Test.mat'


CHANNEL_DATA = '../../BCI_Comp_III_Wads_2004/data/eloc64.txt'

TRUE_LABELS_TEST = "WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU"

STIMULUS_CODE = {
    # cols from left to right
    1 : "agmsy5",
    2 : "bhntz6",
    3 : "ciou17",
    4 : "djpv28",
    5 : "ekqw39",
    6 : "flrx4_",
    # rows from top to bottom
    7 : "abcdef",
    8 : "ghijkl",
    9 : "mnopqr",
    10: "stuvwx",
    11: "yz1234",
    12: "56789_"
}

MARKER_DEF_TRAIN = {'target': ['target'], 'nontarget': ['nontarget']}
MARKER_DEF_TEST = {i : [i] for i in STIMULUS_CODE.values()}

SEG_IVAL = [0, 800]

In [104]:
def load_bci_data(filename, ch_ival):
    """Load the BCI Competition III Data Set 2.
    This method loads the data set and converts it into Wyrm's ``Data``
    format. Before you use it, you have to download the data set in
    Matlab format and unpack it. The directory with the extracted files
    must contain the ``Subject_*.mat``- and the ``eloc64.txt`` files.
    .. note::
        If you need the true labels of the test sets, you'll have to
        download them separately from
        http://bbci.de/competition/iii/results/index.html#labels
    Parameters
    ----------
    filename : str
        The path to the matlab file to load
    Returns
    -------
    cnt : continuous `Data` object
    Examples
    --------
    >>> dat = load_bcicomp3_ds2('/home/foo/data/Subject_A_Train.mat')
    """
    STIMULUS_CODE = {
        0 : "blankMatrix",
        # cols from left to right
        1 : "agmsy5",
        2 : "bhntz6",
        3 : "ciou17",
        4 : "djpv28",
        5 : "ekqw39",
        6 : "flrx4_",
        # rows from top to bottom
        7 : "abcdef",
        8 : "ghijkl",
        9 : "mnopqr",
        10: "stuvwx",
        11: "yz1234",
        12: "56789_"
        }

    # load the matlab data
    data_mat = loadmat(filename)
    # load the channel names (the same for all datasets
    eloc_file = path.sep.join([path.dirname(filename), 'eloc64.txt'])
    with open(eloc_file) as fh:
        data = fh.read()
    channels = []
    for line in data.splitlines():
        if line:
            chan = line.split()[-1]
            chan = chan.replace('.', '')
            channels.append(chan)
    # fix the channel names, some letters have the wrong capitalization
    for i, s in enumerate(channels):
        s2 = s.upper()
        s2 = s2.replace('Z', 'z')
        s2 = s2.replace('FP', 'Fp')
        channels[i] = s2
    # The signal is recorded with 64 channels, bandpass filtered
    # 0.1-60Hz and digitized at 240Hz. The format is Character Epoch x
    # Samples x Channels
    data = data_mat['Signal'][ch_ival[0]:ch_ival[1],:,:]
    data = data.astype('double')
#     print('data: ',data.shape)
    # For each sample: 1 if a row/colum was flashed, 0 otherwise
    flashing = data_mat['Flashing'][ch_ival[0]:ch_ival[1],:]
    flashing = flashing.reshape(-1)
#     print('flashing: ',flashing.shape)
    #flashing = np.flatnonzero((np.diff(a) == 1)) + 1
    ##Creates an array where only the initial intensifications of each series appear
    tmp = []
    for i, _ in enumerate(flashing):
        if i == 0:
            tmp.append(flashing[i])
            continue
        if flashing[i] == flashing[i-1] == 1:
            tmp.append(0)
            continue
        tmp.append(flashing[i])
    flashing = np.array(tmp)
    # For each sample: 0 when no row/colum was intensified,
    # 1..6 for intensified columns, 7..12 for intensified rows
    stimulus_code = data_mat['StimulusCode'][ch_ival[0]:ch_ival[1],:].reshape(-1)
#     print('stim_code: ', stimulus_code.shape)
    stimulus_code = stimulus_code[flashing == 1]
    # 0 if no row/col was intensified or the intensified did not contain
    # the target character, 1 otherwise


    # The target characters
    target_chars = data_m.get('TargetChar')[0][ch_ival[0]:ch_ival[1]]
#     target_chars = data_mat.get('TargetChar', np.array([])).reshape(-1)
    
    fs = 240
    data = data.reshape(-1, 64)
    timeaxis = np.linspace(0, data.shape[0] / fs * 1000, data.shape[0], endpoint=False)
    dat = Data(data=data, axes=[timeaxis, channels], names=['time', 'channel'], units=['ms', '#'])
    dat.fs = fs

    #stimulus_code = zip([t for t, _ in flashing], [STIMULUS_CODE[i] for i in stimulus_code])
    #Raises error "TypeError: '<' not supported between instances of 'tuple' and 'list'" when calling sort() 
    #stimulus_code =[[t for t,_ in flashing], [STIMULUS_CODE[i] for i in stimulus_code]]
    #print(type(stimulus_code), type(flashing), type(targets), type(nontargets))

    try:
        stimulus_type = data_mat['StimulusType'][ch_ival[0]:ch_ival[1],:].reshape(-1)
        target_mask = np.logical_and((flashing == 1), (stimulus_type == 1)) if len(stimulus_type) > 0 else []
        nontarget_mask = np.logical_and((flashing == 1), (stimulus_type == 0)) if len(stimulus_type) > 0 else []
        targets = [[i, 'target'] for i in timeaxis[target_mask]]
        nontargets = [[i, 'nontarget'] for i in timeaxis[nontarget_mask]]
    except KeyError:
        targets = []
        nontargets = []
        pass
   
    # preparing the markers
    dat.stimulus_code = stimulus_code[:]
    stim = []
    flashing = (flashing == 1)
    flashing = [[i, 'flashing'] for i in timeaxis[flashing]]
    for i,_ in enumerate(flashing):
        stim.append([flashing[i][0], STIMULUS_CODE[stimulus_code[i]]])
    stimulus_code = stim


    markers = flashing[:]
    markers.extend(targets)
    markers.extend(nontargets)
    markers.extend(stimulus_code)
    markers.sort()
    dat.markers = markers[:]

    return dat


In [7]:
subject = 'B'
if subject == 'A':
    TRAIN_DATA = TRAIN_DATA_A
    TEST_DATA = TEST_DATA_A
elif subject == 'B':
    TRAIN_DATA = TRAIN_DATA_B
    TEST_DATA = TEST_DATA_B  

data_m = loadmat(TRAIN_DATA)
target_chars = data_m.get('TargetChar')[0][60:]
TRUE_LABELS = target_chars

In [17]:
train_A = load_bci_data(TRAIN_DATA_A, ch_ival=[0,42])
train_B = load_bci_data(TRAIN_DATA_B, ch_ival=[0,42])
train_Abis = load_bci_data(TRAIN_DATA_A, ch_ival=[42,84])
train_Bbis = load_bci_data(TRAIN_DATA_B, ch_ival=[42,84])


In [105]:
# Test data
test_A = load_bci_data(TEST_DATA_A, ch_ival=[0,31])
test_B = load_bci_data(TEST_DATA_B, ch_ival=[0,31])
test_Abis = load_bci_data(TEST_DATA_A, ch_ival=[31,62])
test_Bbis = load_bci_data(TEST_DATA_B, ch_ival=[31,62])

In [184]:
#labels
labels = TRUE_LABELS_TEST[0:31]
labels_bis = TRUE_LABELS_TEST[31:62]
train_A.labels = labels
train_B.labels = labels
train_Abis.labels = labels_bis
train_Bbis.labels = labels_bis

## Sujeto C

In [167]:
def load_bci_data2(filenames, ch_ival, train=True):

    STIMULUS_CODE = {
            0 : "blankMatrix",
            # cols from left to right
            1 : "agmsy5",
            2 : "bhntz6",
            3 : "ciou17",
            4 : "djpv28",
            5 : "ekqw39",
            6 : "flrx4_",
            # rows from top to bottom
            7 : "abcdef",
            8 : "ghijkl",
            9 : "mnopqr",
            10: "stuvwx",
            11: "yz1234",
            12: "56789_"
            }
    if train:
        n_ch = 42
        n_s = 12 #number of samples to erase
    else:
        n_ch = 31
        n_s = 3 #number of samples to erase

    # # load the channel names (the same for all datasets
    eloc_file = path.sep.join([path.dirname(filenames[0]), 'eloc64.txt'])
    with open(eloc_file) as fh:
        data = fh.read()
    channels = []
    for line in data.splitlines():
        if line:
            chan = line.split()[-1]
            chan = chan.replace('.', '')
            channels.append(chan)
    # # fix the channel names, some letters have the wrong capitalization
    for i, s in enumerate(channels):
        s2 = s.upper()
        s2 = s2.replace('Z', 'z')
        s2 = s2.replace('FP', 'Fp')
        channels[i] = s2

    data_mat0 = loadmat(filenames[0])    
    data = data_mat0['signal']
    flashing = data_mat0['Flashing'].reshape(-1)
    stimulus_code = data_mat0['StimulusCode'].reshape(-1)
    stimulus_type = data_mat0.get('StimulusType', np.array([])).reshape(-1)
    sequence_phase = data_mat0['PhaseInSequence']

    #print(data.shape, flashing.shape, stimulus_code.shape, stimulus_type.shape)

    for i, filename in enumerate(filenames):
        if i == 0:
            continue
        # load the matlab data
        data_mat = loadmat(filename)
        # The signal is recorded with 64 channels, bandpass filtered
        # 0.1-60Hz and digitized at 240Hz. The format is Character Epoch x
        # Samples x Channels
        data_temp = data_mat['signal']
        data = np.append(data, data_temp).reshape(-1, 64)
        data = data.astype('double')

        # For each sample: 1 if a row/colum was flashed, 0 otherwise
        flashing = np.append(flashing, data_mat['Flashing'].reshape(-1))
        #flashing = np.flatnonzero((np.diff(a) == 1)) + 1
        ##Creates an array where only the initial intensifications of each series appear
        tmp = []
        for i, _ in enumerate(flashing):
            if i == 0:
                tmp.append(flashing[i])
                continue
            if flashing[i] == flashing[i-1] == 1:
                tmp.append(0)
                continue
            tmp.append(flashing[i])
        flashing = np.array(tmp)

        # For each sample: 0 when no row/colum was intensified,
        # 1..6 for intensified columns, 7..12 for intensified rows
        stimulus_code = np.append(stimulus_code, data_mat['StimulusCode'].reshape(-1))

        # 0 if no row/col was intensified or the intensified did not contain
        # the target character, 1 otherwise
        stimulus_type = np.append(stimulus_type, data_mat.get('StimulusType', np.array([]).reshape(-1)))
        sequence_phase = np.append(sequence_phase, data_mat['PhaseInSequence'])
 
    # Erase last 12 samples in order to reshape to (n_ch,-1)
    flashing = flashing.reshape(-1)[:-n_s].reshape(n_ch,-1)[ch_ival[0]:ch_ival[1],:].reshape(-1)
    stimulus_code = stimulus_code.reshape(-1)[:-n_s].reshape(n_ch,-1)[ch_ival[0]:ch_ival[1],:].reshape(-1)
    stimulus_type = stimulus_type.reshape(-1)[:-n_s].reshape(n_ch,-1)[ch_ival[0]:ch_ival[1],:].reshape(-1)
    # Erase last 768 samples in order to reshape to (n_ch,-1,64 )
    data = data.reshape(-1)[:-n_s*64].reshape(n_ch,-1,64)
    data = data[ch_ival[0]:ch_ival[1],:,:] # Get first 20 characters for training data 
    data = data.reshape(-1, 64)
    fs = 240
    stimulus_code = stimulus_code[flashing == 1]
 
    timeaxis = np.linspace(0, data.shape[0] / fs * 1000, data.shape[0], endpoint=False)
    dat = Data(data=data, axes=[timeaxis, channels], names=['time', 'channel'], units=['ms', '#'])
    dat.fs = fs
    
#     # preparing the markers
    target_mask = np.logical_and((flashing == 1), (stimulus_type == 1)) if len(stimulus_type) > 0 else []
    nontarget_mask = np.logical_and((flashing == 1), (stimulus_type == 0)) if len(stimulus_type) > 0 else []
    flashing = (flashing == 1)
    flashing = [[i, 'flashing'] for i in timeaxis[flashing]]
    targets = [[i, 'target'] for i in timeaxis[target_mask]]
    nontargets = [[i, 'nontarget'] for i in timeaxis[nontarget_mask]]
    dat.stimulus_code = stimulus_code[:]
    stim = []
    for i,_ in enumerate(flashing):
        stim.append([flashing[i][0], STIMULUS_CODE[stimulus_code[i]]])
    stimulus_code = stim
    markers = flashing[:]
    markers.extend(targets)
    markers.extend(nontargets)
    markers.extend(stimulus_code)
    markers.sort()
    dat.markers = markers[:]
   # dat.sequence_phase = sequence_phase
    return dat


In [112]:
path_ = '../../BCI_Comp_II_2003/data/'
datafiles = [join(path_,f) for f in listdir(path_) if isfile(join(path_, f))]
training_set = [f for i,f in enumerate(datafiles) if '12' not in f and 'eloc64' not in f]
testing_set = [f for i,f in enumerate(datafiles) if '12' in f and 'eloc64' not in f]

In [174]:
# load the training set
train_C = load_bci_data2(training_set,ch_ival=[0,42])
test_C = load_bci_data2(testing_set,ch_ival=[0,31], train=False)

In [185]:
#labels
labels = 'CATDOGFISHWATERBOWLHATHATGLOVESHOESFISHRAT'
train_C.labels = labels

## Saving the data variables

In [188]:
with open('data_subjs.pkl', 'wb') as f:  # Python 3: open(..., 'wb')
    pickle.dump([train_A, train_Abis, train_B, train_Bbis, train_C, 
                 test_A, test_Abis, test_B, test_Bbis, test_C], f)
