In [1]:
import scipy.io as spio

from __future__ import division

import numpy as np
import scipy as sp
from matplotlib import pyplot as plt
from matplotlib import ticker
import matplotlib as mpl
from os import path

from scipy.io import loadmat

from wyrm.types import Data

from wyrm import plot
#plot.beautify()
from wyrm.types import Data
from wyrm import processing as proc

DEBUG:matplotlib.backends:backend module://ipykernel.pylab.backend_inline version unknown


In [67]:
import pandas as pd
from sklearn.preprocessing import normalize

In [2]:
TRAIN_A = '../BCI_Comp_III_Wads_2004/data/Subject_A_Train.mat'
TRAIN_B = '../BCI_Comp_III_Wads_2004/data/Subject_B_Train.mat'

TEST_A = '../BCI_Comp_III_Wads_2004/data/Subject_A_Test.mat'
TEST_B = '../BCI_Comp_III_Wads_2004/data/Subject_B_Test.mat'

TRUE_LABELS_A = 'WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU'
TRUE_LABELS_B = 'MERMIROOMUHJPXJOHUVLEORZP3GLOO7AUFDKEFTWEOOALZOP9ROCGZET1Y19EWX65QUYU7NAK_4YCJDVDNGQXODBEV2B5EFDIDNR'

MATRIX = ['abcdef',
          'ghijkl',
          'mnopqr',
          'stuvwx',
          'yz1234',
          '56789_']

MARKER_DEF_TRAIN = {'target': ['target'], 'nontarget': ['nontarget']}
MARKER_DEF_TEST = {'flashing': ['flashing']}

SEG_IVAL = [0, 700]

In [3]:
def load_bci_data(filename):
    """Load the BCI Competition III Data Set 2.
    This method loads the data set and converts it into Wyrm's ``Data``
    format. Before you use it, you have to download the data set in
    Matlab format and unpack it. The directory with the extracted files
    must contain the ``Subject_*.mat``- and the ``eloc64.txt`` files.
    .. note::
        If you need the true labels of the test sets, you'll have to
        download them separately from
        http://bbci.de/competition/iii/results/index.html#labels
    Parameters
    ----------
    filename : str
        The path to the matlab file to load
    Returns
    -------
    cnt : continuous `Data` object
    Examples
    --------
    >>> dat = load_bcicomp3_ds2('/home/foo/data/Subject_A_Train.mat')
    """
    STIMULUS_CODE = {
        0 : "blankMatrix",
        # cols from left to right
        1 : "agmsy5",
        2 : "bhntz6",
        3 : "ciou17",
        4 : "djpv28",
        5 : "ekqw39",
        6 : "flrx4_",
        # rows from top to bottom
        7 : "abcdef",
        8 : "ghijkl",
        9 : "mnopqr",
        10: "stuvwx",
        11: "yz1234",
        12: "56789_"
        }

    # load the matlab data
    data_mat = loadmat(filename)
    # load the channel names (the same for all datasets
    eloc_file = path.sep.join([path.dirname(filename), 'eloc64.txt'])
    with open(eloc_file) as fh:
        data = fh.read()
    channels = []
    for line in data.splitlines():
        if line:
            chan = line.split()[-1]
            chan = chan.replace('.', '')
            channels.append(chan)
    # fix the channel names, some letters have the wrong capitalization
    for i, s in enumerate(channels):
        s2 = s.upper()
        s2 = s2.replace('Z', 'z')
        s2 = s2.replace('FP', 'Fp')
        channels[i] = s2
    # The signal is recorded with 64 channels, bandpass filtered
    # 0.1-60Hz and digitized at 240Hz. The format is Character Epoch x
    # Samples x Channels
    data = data_mat['Signal']
    data = data.astype('double')
    # For each sample: 1 if a row/colum was flashed, 0 otherwise
    flashing = data_mat['Flashing'].reshape(-1)
    #flashing = np.flatnonzero((np.diff(a) == 1)) + 1
    ##Creates an array where only the initial intensifications of each series appear
    tmp = []
    for i, _ in enumerate(flashing):
        if i == 0:
            tmp.append(flashing[i])
            continue
        if flashing[i] == flashing[i-1] == 1:
            tmp.append(0)
            continue
        tmp.append(flashing[i])
    flashing = np.array(tmp)
    # For each sample: 0 when no row/colum was intensified,
    # 1..6 for intensified columns, 7..12 for intensified rows
    stimulus_code = data_mat['StimulusCode'].reshape(-1)
    stimulus_code = stimulus_code[flashing == 1]
    # 0 if no row/col was intensified or the intensified did not contain
    # the target character, 1 otherwise
    stimulus_type = data_mat.get('StimulusType', np.array([])).reshape(-1)
    # The target characters
    target_chars = data_mat.get('TargetChar', np.array([])).reshape(-1)
    fs = 240
    data = data.reshape(-1, 64)
    timeaxis = np.linspace(0, data.shape[0] / fs * 1000, data.shape[0], endpoint=False)
    dat = Data(data=data, axes=[timeaxis, channels], names=['time', 'channel'], units=['ms', '#'])
    dat.fs = fs
    # preparing the markers
    target_mask = np.logical_and((flashing == 1), (stimulus_type == 1)) if len(stimulus_type) > 0 else []
    nontarget_mask = np.logical_and((flashing == 1), (stimulus_type == 0)) if len(stimulus_type) > 0 else []
    flashing = (flashing == 1)
    flashing = [[i, 'flashing'] for i in timeaxis[flashing]]
    targets = [[i, 'target'] for i in timeaxis[target_mask]]
    nontargets = [[i, 'nontarget'] for i in timeaxis[nontarget_mask]]
    dat.stimulus_code = stimulus_code[:]
    stim = []
    for i,_ in enumerate(flashing):
        stim.append([flashing[i][0], STIMULUS_CODE[stimulus_code[i]]])
    stimulus_code = stim
    #stimulus_code = zip([t for t, _ in flashing], [STIMULUS_CODE[i] for i in stimulus_code])
    #Raises error "TypeError: '<' not supported between instances of 'tuple' and 'list'" when calling sort() 
    #stimulus_code =[[t for t,_ in flashing], [STIMULUS_CODE[i] for i in stimulus_code]]
    #print(type(stimulus_code), type(flashing), type(targets), type(nontargets))
    markers = flashing[:]
    markers.extend(targets)
    markers.extend(nontargets)
    markers.extend(stimulus_code)
    markers.sort()
    dat.markers = markers[:]
    return dat


In [4]:
def preprocessing_simple(dat, MRK_DEF, *args, **kwargs):
    """Simple preprocessing that reaches 97% accuracy.
    """
    fs_n = dat.fs / 2
    b, a = proc.signal.butter(5, [10 / fs_n], btype='low')
    dat = proc.filtfilt(dat, b, a)
   
    dat = proc.subsample(dat, 20)
    epo = proc.segment_dat(dat, MRK_DEF, SEG_IVAL)
    fv = proc.create_feature_vectors(epo)
    return fv, epo

In [5]:
dat_train = load_bci_data(TRAIN_A)
dat_test = load_bci_data(TEST_A)

In [6]:
fv_train, epo_train = preprocessing_simple(dat_train, MARKER_DEF_TRAIN, SEG_IVAL)
fv_test, _ = preprocessing_simple(dat_test, MARKER_DEF_TEST, SEG_IVAL)



In [7]:
def predict_character(pred, n_characters, labels):
    pred_target = pred
    #unscramble_idx = fv_test.stimulus_code.reshape(100, 15, 12).argsort()
    unscramble_idx = fv_test.stimulus_code.reshape(n_characters, -1, 12).argsort()
    static_idx = np.indices(unscramble_idx.shape)
    #lda_out_prob = pred.reshape(100, 15, 12)
    lda_out_prob = pred.reshape(n_characters, -1, 12)
    lda_out_prob = lda_out_prob[static_idx[0], static_idx[1], unscramble_idx]

    # destil the result of the 15 runs
    lda_out_prob = lda_out_prob.sum(axis=1)
    lda_out_prob = lda_out_prob.argsort()


    cols = lda_out_prob[lda_out_prob <= 5].reshape(n_characters, -1)
    rows = lda_out_prob[lda_out_prob > 5].reshape(n_characters, -1)
    text = ''
    for i in range(n_characters):
        row = rows[i][-1]-6
        col = cols[i][-1]
        letter = MATRIX[row][col]
        text += letter
    print()
    print('Constructed labels: %s' % text.upper())
    print('True labels       : %s' % labels)
    a = np.array(list(text.upper()))
    b = np.array(list(TRUE_LABELS_A))
    accuracy = np.count_nonzero(a == b) / len(a)
    print('Accuracy: %.1f%%' % (accuracy * 100))
    return accuracy

In [8]:
from sklearn.neural_network import MLPClassifier

#### Rectified linear unit function

In [55]:
x = fv_train.data
y = fv_train.axes[0]
x_test = fv_test.data

In [23]:
clf = MLPClassifier(hidden_layer_sizes=(100, ), activation='relu', solver='adam')
clf.fit(x, y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [19]:
pred = clf.predict_proba(x_test)
predict_character(pred[:,1], 100, TRUE_LABELS_A)


Constructed labels: WEX8LZCUMRWC97YFNDEZ1DQI9NN2GX8DJCO2RMEUO1OJTPUFYPOOHJ7LDAYKGOA5VHNEBBTXOO3TEOILUEE5MFGCEXAX7KFR3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 73.0%


#### Sigma function

In [26]:
clf = MLPClassifier(hidden_layer_sizes=(100, ), activation='logistic', solver='adam')
clf.fit(x, y)
pred = clf.predict_proba(x_test)

In [27]:
predict_character(pred[:,1], 100, TRUE_LABELS_A)


Constructed labels: WQWPLZCIMRWOE7YFZDEZ1DPI9NNVGR9DJCOVRMEUOOOJBPUFYPOO6J7LDGYEGOG5VHNEGBUCOO1TDOILUEE5BFAEAXAEOK9RYMRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 83.0%


#### Hyperbolic tan function

In [28]:
clf = MLPClassifier(hidden_layer_sizes=(100, ), activation='tanh', solver='adam')
clf.fit(x, y)
pred = clf.predict_proba(x_test)

In [29]:
predict_character(pred[:,1], 100, TRUE_LABELS_A)


Constructed labels: WK_PLZCIM4WOK7YFMDEZ1DP79NRVGR9DICUJRMEUOUOJGPUFYPOOH87LDG3EGOG52ZNEUBS_OO1TDOILUEE5BFAEDXMWRKFR3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 72.0%


#### Identity function

In [36]:
from sklearn.preprocessing import normalize

In [42]:
clf = MLPClassifier(hidden_layer_sizes=(100, ), activation='identity', solver='adam')
import time
t_start = time.clock()
clf.fit(normalize(x, axis = 1), y)
t_end = time.clock()
pred = clf.predict_proba(normalize(x_test, axis = 1))

print('time: {} seconds'.format(t_end - t_start))

time: 23.970288614646282 seconds


In [43]:
predict_character(pred[:,1], 100, TRUE_LABELS_A)


Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5HFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 96.0%


In [57]:
list_fcns = ['identity', 'tanh', 'logistic', 'relu']

In [82]:
def MLP_test(X_train, Y_train, X_test, fcns, hidden_layer_sizes = (100,) verbose = True):
    predictions = {}
    models_perf = {}
    for i, fcn in enumerate(fcns):
        t_start = time.clock()
        classifier = MLPClassifier(activation = fcn, hidden_layer_sizes)
        classifier.fit(X_train, Y_train)
        t_end = time.clock()
        
        t_diff = t_end - t_start
       # test_score = classifier.score(X_test, Y_test)
        pred = classifier.predict_proba(X_test)
        acc = predict_character(pred[:,1], 100, TRUE_LABELS_A)
        
        #dict_models[classifier_name] = {'model': fcn, 'test_score': test_score, 'train_time': t_diff}
        models_perf[fcn] = {'time': t_diff, 'accuracy': acc}
        if verbose:
            print("trained {fn} in {f:.2f} s".format(fn = fcn,f = t_diff))
    return pred, models_perf

SyntaxError: invalid syntax (<ipython-input-82-39ad0d720968>, line 1)

In [72]:
pred, models = MLP_test(x, y, x_test, list_fcns)


Constructed labels: WQWPLZCCM_WO97YFZDDZ1DPI9NNVGR9DJCUVRMEUO1OJD8UFYPOO6H7LDGYEGOA5VTNEHBTXOO1TDOILUEE5AFAFEXATOK4R9MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 85.0%
trained identity in 7.16 s

Constructed labels: WQ_PLNCOMRWO_7YFYDEZ1DPJ9NHVGQJDJUOVLMEUOOOJA4UFYPOO6H7LDAYEGOG5VHNE_BJXOOZTDOILUED5AFAFAXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 76.0%
trained tanh in 17.44 s

Constructed labels: WPXPLNCIMRWO97YFZDEZ1DPR9MNVGR8DJUUV_MEUO7CJD2UFYPOO6H7RDGYEGOA5VTNEEDUXOO1TDOILUEEABFAKEXAWOKER3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 79.0%
trained logistic in 25.89 s

Constructed labels: WQFPLHOIMXEOQ7YFZDEZ1DQL8NQVGRPDHCMV4REUO1OJBPTFYP7OH87X2GYQGOA5V2NEHBUXOOZTDQIKUEE5HASEDXAW_K3X1MRU
True labels       : WQ

In [63]:
import pandas as pd

In [74]:
df = pd.DataFrame(models)
df

Unnamed: 0,identity,tanh,logistic,relu
accuracy,0.85,0.76,0.79,0.62
time,7.158911,17.436235,25.894318,10.276268


### Normalized data

In [75]:
x_norm = normalize(x, axis = 1)
x_test_norm = normalize(x_test, axis = 1)
pred, models = MLP_test(x_norm, y, x_test_norm, list_fcns)


Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5NFAEEXAWRK4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 95.0%
trained identity in 13.68 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5HFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 96.0%
trained tanh in 14.54 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5HFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 96.0%
trained logistic in 50.03 s

Constructed labels: WQSPLZCIMRKO97YFZDEZ1DPA9MN2GRQDJUUJRMEUOUOJAPUFYPOO6KOLDGYEGOG5VHNEHBTXOO1TCOILUEEANFAFEXAW7K4R3MXU
True labels       : W

In [76]:
df = pd.DataFrame(models)
df

Unnamed: 0,identity,tanh,logistic,relu
accuracy,0.95,0.96,0.96,0.81
time,13.676181,14.544991,50.02758,74.283147


### Hidden layers size

In [84]:
x_norm = normalize(x, axis = 1)
x_test_norm = normalize(x_test, axis = 1)
pred, models = MLP_test(x_norm, y, x_test_norm, list_fcns, (200,))


Constructed labels: WQXPLZCOMRWO97YFZDEZ1DPI9NNVGRPDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5NFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 96.0%
trained identity in 16.45 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEKBTXOO1TDOILUEE5HFAEEXAWRK4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 94.0%
trained tanh in 20.22 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5HFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 96.0%
trained logistic in 52.40 s

Constructed labels: WWXPLZFIMREO97YFZDEZ1DPC9NN2GRQDJUOJ4MEUOUOJDPUFYPOO6K7LDAYEGOA5VHNEZBTXO1ZBDOILUEEAHF5FEXAWUK4R3MXU
True labels       : W

In [85]:
df = pd.DataFrame(models)
df

Unnamed: 0,identity,tanh,logistic,relu
accuracy,0.96,0.94,0.96,0.76
time,16.448045,20.220809,52.403316,92.07403


In [86]:
x_norm = normalize(x, axis = 1)
x_test_norm = normalize(x_test, axis = 1)
pred, models = MLP_test(x_norm, y, x_test_norm, list_fcns, (50,))


Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VTNEHBTXOO1TDOILUEE5HFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 95.0%
trained identity in 9.45 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRPDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VTNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 95.0%
trained tanh in 21.66 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5NFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 96.0%
trained logistic in 51.22 s

Constructed labels: WQXPLZCIM4ECW7YIZDEZ1DQL9NNVGR8DJUIVRMEUOUOJA2UFYPOO687LDGYEGOA5VHNEHATXOO1TDOILUEE5BFAFEXAE_KFR3MXU
True labels       : WQ

In [87]:
df = pd.DataFrame(models)
df

Unnamed: 0,identity,tanh,logistic,relu
accuracy,0.95,0.95,0.96,0.81
time,9.451202,21.663958,51.218713,64.421723


In [68]:
x_norm = normalize(x, axis = 1)
x_test_norm = normalize(x_test, axis = 1)

In [88]:
pred, models = MLP_test(x_norm, y, x_test_norm, list_fcns, (20,))


Constructed labels: WQXPLZCOMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5NFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 97.0%
trained identity in 15.43 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VTNEHBTXOO1TDOILUEE5NFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 95.0%
trained tanh in 15.06 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5HFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 96.0%
trained logistic in 34.34 s

Constructed labels: WQXPLZELMREO57YFYDEZ1DPL9NNVGR8DHUUVRMEUOOOJJPUFYPOO6J7LDAYE5OA5VHBE6BTXOO1TDOILUEEAMFAEEXAW_K4R3MXU
True labels       : W

In [89]:
x_norm = normalize(x, axis = 1)
x_test_norm = normalize(x_test, axis = 1)
pred, models = MLP_test(x_norm, y, x_test_norm, list_fcns, (10,))


Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 97.0%
trained identity in 19.51 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5HFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 96.0%
trained tanh in 16.00 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5HFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 96.0%
trained logistic in 43.60 s

Constructed labels: WPYPLZDOMREO97YFZ2EZ1DPC9NN2GRPDHUIV_MEUOCOJDPUFYPOO9J7RDGYEGOG5VLNEHBTXOO1TEOHLUEEABF5FEXAW_K4R3MRU
True labels       : W

In [90]:
df = pd.DataFrame(models)
df

Unnamed: 0,identity,tanh,logistic,relu
accuracy,0.97,0.96,0.96,0.77
time,19.509023,16.000125,43.601013,86.100817


In [91]:
x_norm = normalize(x, axis = 1)
x_test_norm = normalize(x_test, axis = 1)
pred, models = MLP_test(x_norm, y, x_test_norm, list_fcns, (5,))


Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VTNEHBTXOO1TDOILUEE5NFAEEXAWRK4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 94.0%
trained identity in 12.43 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRPDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5NFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 95.0%
trained tanh in 17.66 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDAYEGOA5VHNEHBTXOO1TDOILUEE5HFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 96.0%
trained logistic in 37.09 s

Constructed labels: WPXPLZCIM4EO97YDZDEZ1DPC9NNVGR8DJC5VLMEUOUOJDPUFYPOO6J7LDAYEGOA5VHNEBBTXUO1TDOHLUEE5BFAEEXEW7K4R3MRU
True labels       : W

### Tomar una parte del training set para validar

Training set: tomo 70% para entrenar y 30% para validar.

In [52]:
x_train = fv_train.data
y_train = fv_train.axes[0]

In [65]:
import time
def MLP_test(X_train, Y_train, X_test, fcns, hsizes = (100,), verbose = True, valid = False):
    predictions = {}
    models_perf = {}
    for i, fcn in enumerate(fcns):
        classifier = MLPClassifier(activation = fcn, hidden_layer_sizes = hsizes)
        if valid:
            # For validation split training data into
            # 70% training and 30% validation
            nt = round(X_train.shape[0] * 0.7)
            X_valid = X_train[nt:,:]
            X_train = X_train[:nt,:]
            Y_valid = Y_train[nt:]
            Y_train = Y_train[:nt]
            
        t_start = time.clock()
        classifier.fit(X_train, Y_train)
        t_end = time.clock()        
        t_diff = t_end - t_start
        pred = classifier.predict_proba(X_test)
        train_score = classifier.score(X_train, Y_train)
        acc = predict_character(pred[:,1], 100, TRUE_LABELS_A)
        if valid:
            test_score = classifier.score(X_valid, Y_valid)
            models_perf[fcn] = {'Time [s]': t_diff, 'Train score': train_score, 
                            'Test score': test_score, 'Accuracy [%]': acc*100}
        else: 
            models_perf[fcn] = {'Time [s]': t_diff, 'Train score': train_score, 
                                'Accuracy [%]': acc*100}
        if verbose:
            print("trained {fn} in {f:.2f} s".format(fn = fcn,f = t_diff))

    return pred, models_perf

In [61]:
pred, models = MLP_test(x, y, x_test, list_fcns, valid = True)


Constructed labels: WPXPLZCCM43597YFZDEZ1CP79NQ2G4PDKBOVLMEUOOOJB2UBYPOO681R2AYEAOG5VHNWKBW7OO1TDOILUEE5MFAREXAW_K4RYMFU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 68.0%
trained identity in 2.29 s

Constructed labels: WPXQLZFLMRHOA7Y_1EEH1DPB9QN2GRQDJCUVRMEUOUOJDPUF1POO687RJGYEG7G8VNNELEEXOOZTDOILUEE8YFCCDXAW7KER3MHU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 65.0%
trained tanh in 4.68 s

Constructed labels: WQXVLHCIM_WOK7YDZDEZ1DPN9NNVGRQDJCULRMEUOUOJDPUFYPOO897LDAYEGOA5VZNELBUXOO1TDOILUEE5MFAEEXAW7K3R9MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 79.0%
trained logistic in 3.66 s

Constructed labels: WK_QL1ELM4FOS7YFWDDZ1DP29NL2GREDIBVGNMEWPCCJD4ULYP1U5J1LDGY_GOB8VSNEBHVXNI9TDQCBUEW7MFD9FXETUKAQ9ZQU
True labels       : WQXP

In [64]:
df = pd.DataFrame(models)
df

Unnamed: 0,identity,tanh,logistic,relu
Accuracy [%],68.0,65.0,79.0,43.0
Test score,0.791721,0.79334,0.813695,0.754765
Time [s],2.286131,4.683003,3.664879,1.882201
Train score,0.820915,0.912498,0.944931,0.965161


In [69]:
pred, models = MLP_test(x_norm, y, x_test_norm, list_fcns, valid = True)


Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5NFAEEXAW_K4R3MRU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 97.0%
trained identity in 9.40 s

Constructed labels: WQXPLZCIMRWO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOUOJD2UFYPOO6J7LDAYEGOA5VZNEHBTXOO1TDOILUEE5NFAEEXAWOK4R3MXU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 92.0%
trained tanh in 9.08 s

Constructed labels: WQXPLZCIMRQO97YFZDEZ1DPI9NNVGRQDJCUJRMEUOOOJD2UFYPOO6H7LDGYEGOA5VZNEHBTXOO1TDOILUEE5NFAEEXAWOK4R3MXU
True labels       : WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW_K4R3MRU
Accuracy: 92.0%
trained logistic in 17.84 s

Constructed labels: WWXPL9CIM4NCK7YDYDEZ1DPZ9NN3GRQDHBUHRMEUOOOJD2UFYP7O6K1GDAYEGOB5VZNEBBUIOO3TDOHLUEE5BF5RDXAB1KFRYMXU
True labels       : WQX

In [70]:
df = pd.DataFrame(models)
df

Unnamed: 0,identity,tanh,logistic,relu
Accuracy [%],97.0,92.0,92.0,66.0
Test score,0.839434,0.841892,0.835038,0.824651
Time [s],9.396146,9.082205,17.837402,13.983796
Train score,0.869748,0.878351,0.873666,0.997006
