# EA-CSP-TCA

In [46]:
import sys
sys.path.append('D:\Google_Drive\JupyterNotebookProjects\bci-research\EA+CSP+TCA\csp')
sys.path.append('D:\Google_Drive\JupyterNotebookProjects\bci-research\EA+CSP+TCA\func')
sys.path.append('..')

In [47]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import moabb.datasets
import moabb.paradigms

from copy import deepcopy
from csp.utils import subject_counter
from csp.preprocess import fir_bandpass, apply_bandpass, fetch_left_right_EEG
from csp.preprocess import split_EEG_one_class, process_s_data
from csp.feat_extraction import compute_Z, feat_vector, true_label
from csp.csp import CSP

# Set float number print options
np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
dframe = pd.DataFrame

In [48]:
# First import the data

# Number of subject, n + 1 for iteration purpose (there are 9 subjects)
ns = 10

# Creating dict to store original data and modified data
# ori_data will serve as initial loaded data that will remain unchanged
ori_data = dict()
mod_data = dict() 

## Load Dataset

In [52]:
import os

# Iter over all data path then store them in sub0X variable
for subj in range(1, 10):
    path = '..\..\..\datasets\BCICIV2a'
    file = 'A{:02d}T.npz'.format(subj)
    data_path = os.path.join(path, file)
    
    # Load EEG data from datapath and store into subj0X variabel then store into ori_dict
    # Then also fetch 's' (EEG data) into mod_data
    ori_data[subj] = np.load(data_path)
    mod_data[subj] = {}
    mod_data[subj]['s'] = deepcopy(ori_data[subj]['s'])
    mod_data[subj]['etyp'] = deepcopy(ori_data[subj]['etyp'])
    mod_data[subj]['epos'] = deepcopy(ori_data[subj]['epos'])
    
    # Remove last three EOG electrodes
    mod_data[subj]['s'] = np.delete(mod_data[subj]['s'], np.s_[22:], 1)
    
    # Transpose s so that it's in shape of samples x n_electrodes
    mod_data[subj]['s'] = mod_data[subj]['s'].T

In [53]:
mod_data[1].keys()

dict_keys(['s', 'etyp', 'epos'])

In [55]:
test_file = np.load('..\..\..\datasets\BCICIV2a\A01T.npz')
eval_file = np.load('..\..\..\datasets\BCICIV2a\A01E.npz')

print('Inside test file:', test_file.files)
print('Eval test file  :', eval_file.files)

print('')
print('Unique value inside test file:', np.unique(test_file['etyp']))
print('Unique value inside eval file:', np.unique(eval_file['etyp']))

print('')
print('Number of 768 in test file:', (test_file['etyp'].ravel() == 768).sum())
print('Number of 769 in test file:', (test_file['etyp'].ravel() == 769).sum())
print('Number of 770 in test file:', (test_file['etyp'].ravel() == 770).sum())
print('Number of 771 in test file:', (test_file['etyp'].ravel() == 771).sum())
print('Number of 772 in test file:', (test_file['etyp'].ravel() == 772).sum())
print('===============================')
print('Number of 768 in eval file:', (eval_file['etyp'].ravel() == 768).sum())
print('Number of 783 in eval file:', (eval_file['etyp'].ravel() == 783).sum())

Inside test file: ['s', 'etyp', 'epos', 'edur', 'artifacts']
Eval test file  : ['s', 'etyp', 'epos', 'edur', 'artifacts']

Unique value inside test file: [  276   277   768   769   770   771   772  1023  1072 32766]
Unique value inside eval file: [  276   277   768   783  1023  1072 32766]

Number of 768 in test file: 288
Number of 769 in test file: 72
Number of 770 in test file: 72
Number of 771 in test file: 72
Number of 772 in test file: 72
Number of 768 in eval file: 288
Number of 783 in eval file: 288


## Preprocessing

### Bandpass Filter

In [56]:
from scipy.signal import firwin, freqs, lfilter

In [57]:
# Frequency sampling at 250Hz
fs = 250

## === Bandpass filter 8-39 Hz ====
# Creating coefficient
b = fir_bandpass(51, low=8, high=30, fs=fs)

# Applying filter
for subj in mod_data.keys():
    temp_raw_EEG = mod_data[subj]['s']
    
    assert temp_raw_EEG.shape[0] < temp_raw_EEG.shape[1]
    
    mod_data[subj]['s_filt'] = apply_bandpass(temp_raw_EEG, b)

In [58]:
mod_data[1]['s_filt'].shape

(22, 672528)

### Convert n_samp x n_el -> n_trials x n_el x n_samp

In [59]:
# Iterate overall all_data
for subj in mod_data.keys():
    print('Processing for subject', subj)
    mod_data[subj]['epochs'], mod_data[subj]['y'], _ = process_s_data(data=mod_data[subj], eeg_key='s_filt', start_t=0.5, end_t=3.5, fs=250)
    
    # Make sure they are in correct size
    assert mod_data[subj]['epochs'].shape[0] == 144
    assert mod_data[subj]['epochs'].shape[1] == 22
    assert mod_data[subj]['epochs'].shape[2] > 144    

Processing for subject 1
Processing for subject 2
Processing for subject 3
Processing for subject 4
Processing for subject 5
Processing for subject 6
Processing for subject 7
Processing for subject 8
Processing for subject 9


## Apply Data Alignment on raw EEG

In [82]:
from func.EA import apply_EA
from func.csp_matlab import csp_feat_ver1

In [83]:
mod_data[1].keys()

dict_keys(['s', 'etyp', 'epos', 's_filt', 'epochs', 'y', 'RefEA', 'R_inv', 'epochs_EA', 'all_feats', 'all_feats_EA'])

In [84]:
# Iterate over all subjects to apply EA to all trials
# Define keys to be processed
process_key = ['epochs']

for subj in mod_data.keys():
    print('Processing subject ', subj)
    mod_data[subj]['epochs_EA']  = apply_EA(mod_data[subj], process_key)
    print('')

Processing subject  1
Found 1 key(s) in which EEG data is stored
Computing reference matrix RefEA
Add RefEA as a new key in data

Processing subject  2
Found 1 key(s) in which EEG data is stored
Computing reference matrix RefEA
Add RefEA as a new key in data

Processing subject  3
Found 1 key(s) in which EEG data is stored
Computing reference matrix RefEA
Add RefEA as a new key in data

Processing subject  4
Found 1 key(s) in which EEG data is stored
Computing reference matrix RefEA
Add RefEA as a new key in data

Processing subject  5
Found 1 key(s) in which EEG data is stored
Computing reference matrix RefEA
Add RefEA as a new key in data

Processing subject  6
Found 1 key(s) in which EEG data is stored
Computing reference matrix RefEA
Add RefEA as a new key in data

Processing subject  7
Found 1 key(s) in which EEG data is stored
Computing reference matrix RefEA
Add RefEA as a new key in data

Processing subject  8
Found 1 key(s) in which EEG data is stored
Computing reference matri

In [85]:
mod_data[1].keys()

dict_keys(['s', 'etyp', 'epos', 's_filt', 'epochs', 'y', 'RefEA', 'R_inv', 'epochs_EA', 'all_feats', 'all_feats_EA'])

In [86]:
# Sanity check that each subject EA data equal to identity matrix
for subj in mod_data.keys():
    all_trials = mod_data[subj]['epochs_EA']
    all_cov = 0
    
    for tr in all_trials:
        all_cov += np.cov(tr, rowvar=True, ddof=1)
    
    all_cov = all_cov/len(all_trials)
    
    assert np.round(np.diag(all_cov)).sum() == 22
    print('Subject %d Aligned!' %subj)

Subject 1 Aligned!
Subject 2 Aligned!
Subject 3 Aligned!
Subject 4 Aligned!
Subject 5 Aligned!
Subject 6 Aligned!
Subject 7 Aligned!
Subject 8 Aligned!
Subject 9 Aligned!


# Visualizing using T-SNE
Skip visualization

In [87]:
from sklearn.manifold import TSNE
from func.csp_matlab import csp_feat_no_test
from func.csp_matlab import csp_feat_no_test_2

In [88]:
mod_data[1].keys()

dict_keys(['s', 'etyp', 'epos', 's_filt', 'epochs', 'y', 'RefEA', 'R_inv', 'epochs_EA', 'all_feats', 'all_feats_EA'])

In [67]:
data = mod_data

for subj in data.keys():
    # Compute csp feature
    data[subj]['all_feats'] = csp_feat_no_test(data[subj], eeg_key='epochs')
    data[subj]['all_feats_EA'] = csp_feat_no_test(data[subj], eeg_key='epochs_EA')
    
    # Compute sne representation
    # data[subj]['sne'] = {}
    # data[subj]['sne']['no_EA'] = TSNE(perplexity=50, n_iter=2000, random_state=42).fit_transform(data[subj]['all_feats'])
    # data[subj]['sne']['EA'] = TSNE(perplexity=50, n_iter=2000, random_state=42).fit_transform(data[subj]['all_feats_EA'])

In [68]:
for subj in mod_data.keys():
    assert mod_data[subj]['all_feats'].shape[0] == 144
    assert mod_data[subj]['all_feats_EA'].shape[0] == 144 

# Transfer Learning Case, CSP + TCA

In [69]:
# Re-prepare data, redefine TL_data, delete existing content
TL_data = {}
TL_data['tgt'] = {}
TL_data['src'] = {}

for subj in mod_data.keys():
    # TARGET
    TL_data['tgt'][subj] = {}
    TL_data['tgt'][subj]['epochs'] = deepcopy(mod_data[subj]['epochs'])
    TL_data['tgt'][subj]['epochs_EA'] = deepcopy(mod_data[subj]['epochs_EA'])
    TL_data['tgt'][subj]['all_feats'] = deepcopy(mod_data[subj]['all_feats'])
    TL_data['tgt'][subj]['all_feats_EA'] = deepcopy(mod_data[subj]['all_feats_EA'])
    TL_data['tgt'][subj]['y'] = deepcopy(mod_data[subj]['y'])    
    
    # SOURCE
    TL_data['src'][subj] = {}
    TL_data['src'][subj]['epochs']    = deepcopy(mod_data[subj]['epochs'])
    TL_data['src'][subj]['epochs_EA'] = deepcopy(mod_data[subj]['epochs_EA'])
    TL_data['src'][subj]['all_feats']     = deepcopy(mod_data[subj]['all_feats'])
    TL_data['src'][subj]['all_feats_EA']  = deepcopy(mod_data[subj]['all_feats_EA'])
    TL_data['src'][subj]['y'] = deepcopy(mod_data[subj]['y'])    

In [70]:
TL_data['src'].keys()

dict_keys([1, 2, 3, 4, 5, 6, 7, 8, 9])

## MMD

In [71]:
# Compute MMD (maximum mean discrepancy) using numpy and scikit-learn.

import numpy as np
from sklearn import metrics


def mmd_linear(X, Y):
    """MMD using linear kernel (i.e., k(x,y) = <x,y>)
    Note that this is not the original linear MMD, only the reformulated and faster version.
    The original version is:
        def mmd_linear(X, Y):
            XX = np.dot(X, X.T)
            YY = np.dot(Y, Y.T)
            XY = np.dot(X, Y.T)
            return XX.mean() + YY.mean() - 2 * XY.mean()
    Arguments:
        X {[n_sample1, dim]} -- [X matrix]
        Y {[n_sample2, dim]} -- [Y matrix]
    Returns:
        [scalar] -- [MMD value]
    """
    delta = X.mean(0) - Y.mean(0)
    return delta.dot(delta.T)


def mmd_rbf(X, Y, gamma=1.0):
    """MMD using rbf (gaussian) kernel (i.e., k(x,y) = exp(-gamma * ||x-y||^2 / 2))
    Arguments:
        X {[n_sample1, dim]} -- [X matrix]
        Y {[n_sample2, dim]} -- [Y matrix]
    Keyword Arguments:
        gamma {float} -- [kernel parameter] (default: {1.0})
    Returns:
        [scalar] -- [MMD value]
    """
    XX = metrics.pairwise.rbf_kernel(X, X, gamma)
    YY = metrics.pairwise.rbf_kernel(Y, Y, gamma)
    XY = metrics.pairwise.rbf_kernel(X, Y, gamma)
    return XX.mean() + YY.mean() - 2 * XY.mean()


def mmd_poly(X, Y, degree=2, gamma=1, coef0=0):
    """MMD using polynomial kernel (i.e., k(x,y) = (gamma <X, Y> + coef0)^degree)
    Arguments:a
        X {[n_sample1, dim]} -- [X matrix]
        Y {[n_sample2, dim]} -- [Y matrix]
    Keyword Arguments:
        degree {int} -- [degree] (default: {2})
        gamma {int} -- [gamma] (default: {1})
        coef0 {int} -- [constant item] (default: {0})
    Returns:
        [scalar] -- [MMD value]
    """
    XX = metrics.pairwise.polynomial_kernel(X, X, degree, gamma, coef0)
    YY = metrics.pairwise.polynomial_kernel(Y, Y, degree, gamma, coef0)
    XY = metrics.pairwise.polynomial_kernel(X, Y, degree, gamma, coef0)
    return XX.mean() + YY.mean() - 2 * XY.mean()

## Define `TCA`

In [72]:
import scipy.linalg
import sklearn.metrics
from sklearn.neighbors import KNeighborsClassifier

def kernel(ker, X1, X2, gamma):
    K = None
    if not ker or ker == 'primal':
        K = X1
    elif ker == 'linear':
        print(f'=== {ker} kernel ===')
        if X2 is not None:
            K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T, np.asarray(X2).T)
        else:
            K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T)
    elif ker == 'rbf':
        print(f'=== {ker} kernel ===')
        if X2 is not None:
            K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1).T, np.asarray(X2).T, gamma)
        else:
            K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1).T, None, gamma)
    return K


class TCA:
    def __init__(self, kernel_type='primal', dim=30, lamb=1, gamma=1):
        '''
        Init func
        :param kernel_type: kernel, values: 'primal' | 'linear' | 'rbf'
        :param dim: dimension after transfer
        :param lamb: lambda value in equation
        :param gamma: kernel bandwidth for rbf kernel
        '''
        self.kernel_type = kernel_type
        self.dim = dim
        self.lamb = lamb
        self.gamma = gamma

    def fit(self, Xs, Xt):
        '''
        Transform Xs and Xt
        :param Xs: ns * n_feature, source feature
        :param Xt: nt * n_feature, target feature
        :return: Xs_new and Xt_new after TCA
        '''
        X = np.hstack((Xs.T, Xt.T))
        X /= np.linalg.norm(X, axis=0)
        m, n = X.shape
        ns, nt = len(Xs), len(Xt)
        e = np.vstack((1 / ns * np.ones((ns, 1)), -1 / nt * np.ones((nt, 1))))
        M = e * e.T
        M = M / np.linalg.norm(M, 'fro')
        H = np.eye(n) - 1 / n * np.ones((n, n))
        K = kernel(self.kernel_type, X, None, gamma=self.gamma)
        n_eye = m if self.kernel_type == 'primal' else n
        a, b = np.linalg.multi_dot([K, M, K.T]) + self.lamb * np.eye(n_eye), np.linalg.multi_dot([K, H, K.T])
        w, V = scipy.linalg.eig(a, b)
        ind = np.argsort(w)
        A = V[:, ind[:self.dim]]
        Z = np.dot(A.T, K)
        Z /= np.linalg.norm(Z, axis=0)
        Xs_new, Xt_new = Z[:, :ns].T, Z[:, ns:].T
        return Xs_new, Xt_new

    def fit_predict(self, Xs, Ys, Xt, Yt):
        '''
        Transform Xs and Xt, then make predictions on target using 1NN
        :param Xs: ns * n_feature, source feature
        :param Ys: ns * 1, source label
        :param Xt: nt * n_feature, target feature
        :param Yt: nt * 1, target label
        :return: Accuracy and predicted_labels on the target domain
        '''
        Xs_new, Xt_new = self.fit(Xs, Xt)
        clf = KNeighborsClassifier(n_neighbors=1)
        

        clf.fit(Xs_new, Ys.ravel())
        y_pred = clf.predict(Xt_new)
        acc = sklearn.metrics.accuracy_score(Yt, y_pred)
        return acc, y_pred

## `evaluate-all` function 1
This is my initial trial, comparing CSP-SVM and SCP-TCA-SVM in TCA step, I execute them twice  
Pseudocode:
- Iterate n_iter times:
    - X_tgt_tr = random n_samp
    - X_tgt_te = all - n_samp
    - MMD to find most similar subject
    - CSP on X_tgt_train and X_tgt_test, this means X_tgt_train is filtered from X_tgt_train data
    - csp_src = csp of most similar subject, this is pre-computed
    - tca_src_tgt_train, tca_tgt_test = TCA(csp_src, csp_tgt_train)
    - tca_src_tgt_test, tca_tgt_test = TCA(csp_src, csp_tgt_test)

This function consist of 3 modes:
- mode A:      
    tca_stack = stack(tca_src_tgt_train, tca_target_train)  
    clf.fit(tca_stack)  
    clf.score(tca_tgt_test)
- mode B:  
    clf.fit(tca_src_tgt_train)  
    clf.score(tca_tgt_test)
- mode C:  
    clf.fit(tca_src_tgt_test, csp_tgt_test)
    clf.score(tca_tgt_test)

In [73]:
import random
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [74]:
def evaluate_all_ver1(data, tgt_subj=1, kertyp='primal', mode='A', n_iter=30, model='CSP-SVM', n_samp=10):
    '''
    Pseudocode, for each subject as target:
    - For Mode A, CSP-SVM as base:
      Iterate n_iter times:
        X_tgt_tr = random n_samp
        X_tgt_te = all - n_samp
        csp_tgt_tr, csp_tgt_te = CSP(Xt_tgt_tr, Xt_tgt_te)
        clf.fit(csp_tgt_tr, y_tr)
        evaluate(csp_tgt_te, y_te)
        
    - For Mode B, CSP-TCA-SVM:
      Iterate n_iter times:
        X_tgt_tr = random n_samp
        X_tgt_te = all - n_samp
        csp_tgt_tr, csp_tgt_te = CSP(Xt_tgt_tr, Xt_tgt_te)
        dist = MMD (current target subject to each source subejct)
        sim_subj = source subject most similar to target
        csp_src, y_src = get from existing data
        tca_src_train, tca_tgt_train = TCA(csp_src, csp_tgt_train)
        tca_src_test, tca_tgt_test = TCA(csp_src, csp_tgt_test)
        clf.fit([csp_src, csp_tgt_tr], y_tr)
        evaluate(tca_tgt_test, y_te)
         
    '''
    print(f'Processing target subject: {tgt_subj}')
    print(f'Model: {model}')    
    
    # Key to define where raw data is stored
    all_keys=['epochs', 'epochs_EA', 'y']
    key_noEA = all_keys[0]
    key_EA = all_keys[1]    
    y = all_keys[2]
    
    # =========== PREPROCESSING  ============== #
    '''
    This section will fetch raw target and source data depending on model being evaluated
    '''
    # Following models need to fetch source EA data
    model_EA = ['EA-CSP-TCA-SVM']
    
    # === EA data ====
    if model in model_EA:
        # Set key variable in which EA data is stored
        key = key_EA
        csp_feat = 'all_feats_EA'
        
    # === no EA ===
    else:
        # Set key variable in which non-EA data is stored
        key = key_noEA
        csp_feat = 'all_feats'
    print('Fetching target data stored in {}'.format(key))
    print('Fetching precomputed csp stored in {}'.format(csp_feat))    
        
    sources = [s for s in data['src'].keys() if s != tgt_subj]
                
    # Fetch raw data of target
    X_tgt = deepcopy(data['tgt'][tgt_subj][key])
    y_tgt = deepcopy(data['tgt'][tgt_subj][y]) 
    
    # Object to store all result
    all_sc = []
    
    # ===== EVALUATION ====== #
    print('Iteration: ', end='')
    for i in range(n_iter):
        print(i+1, end=' ')
        
         # Randomly pick samples_tr unique trials as target training trials
        ids_train = random.sample(range(len(X_tgt)), n_samp)    
        
        # Guarantee balance class in training data 
        while (y_tgt[ids_train] == 1).sum() != n_samp//2:
            ids_train = random.sample(range(len(X_tgt)), n_samp)    
        
        assert (y_tgt[ids_train] == 1).sum() == n_samp//2
        
        # Index of test data
        ids_test = np.delete(range(len(X_tgt)), ids_train)
        
        # Raw data for train, test and corresponding y
        XtrRaw = X_tgt[ids_train]
        XteRaw = X_tgt[ids_test]
        ytr = y_tgt[ids_train]
        yte = y_tgt[ids_test]
        
        # Compute target feat_train, feat_test
        csp_tgt_train, csp_tgt_test = csp_feat_ver1(XtrRaw, XteRaw, ytr, n_filter=3)
        
        # ===== MODEL FITTING ===== #
        # --- CSP-SVM --- #
        # print(ids_train, ids_test, XtrRaw.shape, XteRaw.shape)
        
        if model == 'CSP-SVM':
            print(f'Evaluating {model}')
            print(f'train size {csp_tgt_train.shape}')
            print(f'test size {csp_tgt_test.shape}')
            
            # Fitting and scoring
            model_svm = SVC()

            # Fiting data
            model_svm.fit(csp_tgt_train, ytr)

            # Add score to list
            all_sc.append(model_svm.score(csp_tgt_test, yte))
        
        else:
            print(f'Evaluating {model}')  
            # ===== Find one most similar source subject ====== #
            # MMD from current csp_tgt_train to rest of source
            mmd = []
            for src_subj in TL_data['src'].keys():
                # print('Subj:', src_subj)
                
                if src_subj == tgt_subj:
                    mmd.append(0)
                else:
                    csp_src = data['src'][src_subj][csp_feat]
                    mmd.append(mmd_poly(csp_src, csp_tgt_train))
            
            
            # Choose one subject most similar to current target
            mmd_sim = np.argsort(np.array(mmd))+1    
            
            # Choose second subject, first subject is our current target subject 
            sim_subj = mmd_sim[1] 
            
            
            # ====== TCA ====== #
            # Compute TCA feature of current target and most similar subject
            tca = TCA(kernel_type=kertyp, dim=3, lamb=1, gamma=1)
            
            csp_src = data['src'][sim_subj][csp_feat]
            y_src   = data['src'][sim_subj]['y']
            
            # print('=== Training classifier ===')
            print('Find new TCA feature using following data')
            print('CSP src all     : ', csp_src.shape)
            print('CSP target train: ', csp_tgt_train.shape)
            print('CSP target train: ', csp_tgt_test.shape)

            # Similar pairing source subject to target train and targe test data
            tca_src_tgt_train, tca_tgt_train = tca.fit(csp_src, csp_tgt_train)
            tca_src_tgt_test, tca_tgt_test  = tca.fit(csp_src, csp_tgt_test)
            
            # Stack tca features and labels of most similar and target 
            tca_both = np.vstack([tca_src_tgt_train, tca_tgt_train])
            y_both = np.concatenate([y_src, ytr])
            
            print('')
            print('TCA src      : ', tca_src_tgt_train.shape)
            print('TCA tgt train: ', tca_tgt_train.shape)
            print('TCA both     : ', tca_both.shape)
            print('TCA tgt_test : ', tca_tgt_test.shape)
            
            # --- Training ----- #
            # Define new classifier
            model_svm = SVC()
            
            # Fiting data
            # Whether to train on stacked tca stc and tgt_train or not
            if mode == 'A':
                model_svm.fit(tca_both.real, y_both)
            elif mode == 'B':
                print('mode B')
                model_svm.fit(tca_src_tgt_train.real, y_src)
            else:
                print('mode C')
                model_svm.fit(tca_src_tgt_test.real, y_src)
            
            # Add score to list
            all_sc.append(model_svm.score(tca_tgt_test.real, yte))
            
        print('')
    mean_sc = np.array(all_sc).mean()
    std_sc = np.array(all_sc).std()
    
    return mean_sc, std_sc

In [75]:
# Function to convert score and std to +/- form
conv = lambda sc, std: str(np.round(sc, 2)) + ' +/- ' + str(np.round(std, 2))
# print(conv(80.356, 0.5678))

In [76]:
%%capture
# Variable to store all score
all_sc1 = {}

# Models
models = ['CSP-SVM', 'CSP-TCA-SVM', 'EA-CSP-TCA-SVM']

for m in models:
    all_sc1[m] = {}
    temp_sc = []
    
    for s in TL_data['tgt'].keys():
        #all_sc1[s][m] = evaluate_all_ver1(TL_data, tgt_subj=s, model=m, kertyp='primal', mode='C', n_iter=30, n_samp=30)
        avg, std = evaluate_all_ver1(TL_data, tgt_subj=s, model=m, kertyp='linear', mode='B', n_iter=30, n_samp=20)
        temp_sc.append(avg)
        
        all_sc1[m][s] = conv(avg, std)
    
    # Average over all subjects
    all_sc1[m]['avg'] = conv(np.array(temp_sc).mean(), np.array(temp_sc).std())

In [77]:
df_sc1 = dframe(all_sc1)
# df_sc1.loc['mean'] = df_sc1.mean()
df_sc1

Unnamed: 0,CSP-SVM,CSP-TCA-SVM,EA-CSP-TCA-SVM
1,0.85 +/- 0.03,0.5 +/- 0.0,0.51 +/- 0.09
2,0.55 +/- 0.05,0.5 +/- 0.0,0.5 +/- 0.0
3,0.93 +/- 0.03,0.5 +/- 0.0,0.46 +/- 0.17
4,0.59 +/- 0.05,0.5 +/- 0.0,0.5 +/- 0.0
5,0.53 +/- 0.04,0.5 +/- 0.0,0.5 +/- 0.0
6,0.58 +/- 0.05,0.5 +/- 0.0,0.5 +/- 0.01
7,0.64 +/- 0.05,0.5 +/- 0.0,0.5 +/- 0.0
8,0.96 +/- 0.02,0.5 +/- 0.0,0.51 +/- 0.06
9,0.81 +/- 0.03,0.5 +/- 0.0,0.49 +/- 0.06
avg,0.72 +/- 0.16,0.5 +/- 0.0,0.5 +/- 0.01


In [29]:
df_sc1 = dframe(all_sc1)
# df_sc1.loc['mean'] = df_sc1.mean()
df_sc1

Unnamed: 0,CSP-SVM,CSP-TCA-SVM,EA-CSP-TCA-SVM
1,0.84 +/- 0.04,0.5 +/- 0.0,0.51 +/- 0.07
2,0.56 +/- 0.05,0.5 +/- 0.0,0.5 +/- 0.0
3,0.94 +/- 0.03,0.5 +/- 0.0,0.49 +/- 0.06
4,0.6 +/- 0.04,0.5 +/- 0.0,0.5 +/- 0.0
5,0.53 +/- 0.05,0.5 +/- 0.0,0.5 +/- 0.0
6,0.59 +/- 0.05,0.5 +/- 0.0,0.5 +/- 0.0
7,0.64 +/- 0.05,0.5 +/- 0.0,0.5 +/- 0.0
8,0.96 +/- 0.02,0.5 +/- 0.0,0.5 +/- 0.0
9,0.82 +/- 0.03,0.5 +/- 0.0,0.48 +/- 0.08
avg,0.72 +/- 0.16,0.5 +/- 0.0,0.5 +/- 0.01


In [30]:
df_sc1 = dframe(all_sc1)
df_sc1

Unnamed: 0,CSP-SVM,CSP-TCA-SVM,EA-CSP-TCA-SVM
1,0.84 +/- 0.04,0.5 +/- 0.0,0.51 +/- 0.07
2,0.56 +/- 0.05,0.5 +/- 0.0,0.5 +/- 0.0
3,0.94 +/- 0.03,0.5 +/- 0.0,0.49 +/- 0.06
4,0.6 +/- 0.04,0.5 +/- 0.0,0.5 +/- 0.0
5,0.53 +/- 0.05,0.5 +/- 0.0,0.5 +/- 0.0
6,0.59 +/- 0.05,0.5 +/- 0.0,0.5 +/- 0.0
7,0.64 +/- 0.05,0.5 +/- 0.0,0.5 +/- 0.0
8,0.96 +/- 0.02,0.5 +/- 0.0,0.5 +/- 0.0
9,0.82 +/- 0.03,0.5 +/- 0.0,0.48 +/- 0.08
avg,0.72 +/- 0.16,0.5 +/- 0.0,0.5 +/- 0.01


Try modifying the following parameter from above `evaluate_all1` function
- kertyp    = 'rbf', 'linear', 'primal'
- stack_tca = True or False  
This also clearly shows that this approach doesn't give good score

## `evaluate-all` function 2 
**Main idea**  
Following algorithm on the reference code

Following algorithm in TCA reference code,  
All CSP of target and source are precomputed, without test data  
Use all source and target to train SVM, predict target data, no iteration
How this algorithm works?
- CSP-SVM    : Train on 144 source data, predict 144 target data, use CSP features
- CSP-TCA-SVM: Train on 144 source data, predict 144 target data, use CSP features
- EA-CSP-TCA-SVM: Train on 144 source data, predict 144 target data, use EA-CSP features

In [31]:
from sklearn.model_selection import train_test_split, cross_val_score

In [32]:
def evaluate_all_ver2(data, tgt_subj=1, model='CSP-SVM', samples_tr=10):
    '''
    This function assumes data contain pre-computed csp ftrain feature in all target and source subject
    '''
    print(f'Processing target subject: {tgt_subj}')
    print(f'Model: {model}')    
    # Key to define where raw data is stored
    all_keys=['epochs', 'epochs_EA', 'y']
    
    # Key to define where data is stored
    key_noEA = all_keys[0]
    key_EA = all_keys[1]    
    y = all_keys[2]
    
    # =========== PREPROCESSING  ============== #
    '''
    This section will fetch raw data to be evaluated
    '''
    # Following models need to fetch source EA data
    model_EA = ['EA-CSP-TCA-SVM']
    
    # === EA data ====
    if model in model_EA:
        # Set key variable in which EA data is stored
        key = key_EA
        csp_feat = 'all_feats_EA'
        print('Fetching raw data stored in {}'.format(key))
        print('Fetching feat data stored in {}'.format(csp_feat))

    # === no EA ===
    else:
        # Set key variable in which non-EA data is stored
        key = key_noEA
        csp_feat = 'all_feats'
        print('Fetching raw data stored in {}'.format(key))
        print('Fetching raw data stored in {}'.format(csp_feat))
    
                
    # Object to store all result
    all_sc = []
    
    # ======= EVALUATION ========= #
    # === Looking for csp target feature ==== #
    csp_tgt = deepcopy(data['tgt'][tgt_subj][csp_feat]) 
    y_tgt   = deepcopy(data['tgt'][tgt_subj][y]) 
    
    # ===== Find one most similar source subject ====== #
    # MMD from current csp_tgt_train to rest of source
    mmd = []
    for src_subj in data['src'].keys():
        # print('Subj:', src_subj)

        if src_subj == tgt_subj:
            mmd.append(0)
        else:
            csp_src = data['src'][src_subj][csp_feat]
            mmd.append(mmd_poly(csp_src, csp_tgt))


    # Choose one subject most similar to current target
    mmd_sim = np.argsort(np.array(mmd))+1    

    # Choose second subject, first subject is our current target subject 
    sim_subj = mmd_sim[1] 
    
    print(f'mmd result: {mmd_sim}')
    print(f'sim subj: {sim_subj}')
    
    csp_src = data['src'][sim_subj][csp_feat]
    y_src   = data['src'][sim_subj][y]
        
    if model == 'CSP-SVM':
        print(f'Evaluating {model}')
        
        ## Compute target feat_train, feat_test
        print(f'Similar subject: {sim_subj}')
        print(f'train size {csp_src.shape}')
        print(f'test size {csp_tgt.shape}')

        # Define classifier
        model_svm = SVC()
        
        ## Fitting data
        model_svm.fit(csp_src, y_src)
        
        ## Add score to list
        sc = model_svm.score(csp_tgt, y_tgt)
                
    else:         
        # ====== TCA ====== #
        # Compute TCA feature of current target and most similar subject
        tca = TCA(kernel_type='primal', dim=6, lamb=1, gamma=1)

        # print('=== Training classifier ===')
        print('Find new TCA feature using following data')
        print('CSP src all     : ', csp_src.shape)
        print('CSP tgt all     : ', csp_tgt.shape)
            
        tca_src, tca_tgt = tca.fit(csp_src, csp_tgt)

        print('')
        print('TCA src      : ', tca_src.shape)
        print('TCA src      : ', tca_tgt.shape)

        # --- Training ----- #
        # Define new classifier
        model_svm = SVC()

        # Fiting data
        model_svm.fit(tca_src.real, y_src)
        
        # Add score to list
        sc = model_svm.score(tca_tgt.real, y_tgt)
        print('')
            
    return sc

In [33]:
%%capture
# This version doesn't have iteration
# Variable to store all score
all_sc2 = {}

# Models
models = ['CSP-SVM', 'CSP-TCA-SVM', 'EA-CSP-TCA-SVM']

for m in models:
    all_sc2[m] = {}
    temp_sc = []
    
    for s in TL_data['tgt'].keys():
        score = evaluate_all_ver2(TL_data, tgt_subj=s, model=m, samples_tr=30)
        all_sc2[m][s] = np.round(score,3)
        temp_sc.append(score)
    
    all_sc2[m]['avg'] = conv(np.array(temp_sc).mean(), np.array(temp_sc).std())

In [34]:
all_sc2['CSP-SVM'].keys()

dict_keys([1, 2, 3, 4, 5, 6, 7, 8, 9, 'avg'])

In [35]:
df_sc2 = dframe(all_sc2)
# df_sc2.loc['mean'] = df_sc2.mean()
df_sc2.round(2)

Unnamed: 0,CSP-SVM,CSP-TCA-SVM,EA-CSP-TCA-SVM
1,0.951,0.951,0.931
2,0.549,0.708,0.792
3,0.674,0.806,0.972
4,0.812,0.819,0.812
5,0.507,0.514,0.736
6,0.722,0.778,0.792
7,0.75,0.819,0.875
8,0.806,0.986,0.986
9,0.514,0.868,0.868
avg,0.7 +/- 0.14,0.81 +/- 0.13,0.86 +/- 0.08


## Check MMD vs Accuracy
Total subject m
Pseudocode:
- Current target subject : x
- Source subject: m - x
- Compute MMD target to each source subject
- For each source subject:
    - clf.fit(source data)
    - clf.score(target data)
- Compare MMD and score

In [36]:
def MMDacc(data, tgt_subj, raw_keys=['epochs', 'epochs_EA', 'y'], csp_keys=['all_feats', 'all_feats_EA'], model='CSP-SVM'):
    print(f'Processing target subject: {tgt_subj}')
    print(f'Model: {model}')    
    
    # Key to define where data is stored
    raw_noEA = raw_keys[0]
    raw_EA   = raw_keys[1]
    y        = raw_keys[2]
    csp_noEA = csp_keys[0]
    csp_EA   = csp_keys[1]
    
    # =========== PREPROCESSING  ============== #
    '''
    This section will fetch raw data to be evaluated
    '''
    key      = raw_noEA
    csp_feat = csp_noEA
    print('Fetching raw data stored in {}'.format(key))
    print('Fetching csp data stored in {}'.format(csp_feat))
    
    sources = [s for s in data['src'].keys() if s != tgt_subj]
                
    # Object to store all result
    all_sc = []
    
    # ===== EVALUATION ====== #
    # === Compute csp_target ==== #
    csp_tgt = csp_feat_no_test(data['tgt'][tgt_subj], eeg_key=key)
    y_tgt   = deepcopy(data['tgt'][tgt_subj][y]) 
    
    # ===== Find one most similar source subject ====== #
    # MMD from current csp_tgt to rest of source
    mmd_vals = []
    for src_subj in data['src'].keys():
        # print('Subj:', src_subj)
        
        # if src_subj == tgt_subj:
        #    mmd_vals.append(0)
        # else:
        csp_src = data['src'][src_subj][csp_feat]
        mmd_vals.append(mmd_poly(csp_tgt, csp_src))

    
    # Choose one subject most similar to current target
    mmd_idx = np.argsort(np.array(mmd_vals))    

    # Sorted mmd_vals based on mmd_idx
    # Sorted most similar subject, mmd_idx+1, to compensate python indexing
    # mmd_vals = np.array(mmd_vals)[mmd_idx]
    mmd_idx  = mmd_idx + 1 
        
    print(f'mmd values result   : {np.round(mmd_vals,3)}')
    print(f'most similar subject: {mmd_idx}')

    # print(f'sim subj: {sim_subj}')
    
    # csp_src = data['src'][sim_subj][csp_feat]
    # y_src   = data['src'][sim_subj][y]
        
    # if model == 'CSP-SVM':
    print(f'Evaluating {model}')
    ## Compute target feat_train, feat_test

    # List to store all score
    all_sc = []
    
    # Iterate over all source subject:
    for src_subj in data['src']:
        print(f'Evaluating subject {src_subj} as source')
        csp_src = data['src'][src_subj][csp_feat]
        y_src   = data['src'][src_subj][y]
        
        # Define classifier
        model_svm = SVC()

        # Fitting data
        model_svm.fit(csp_src, y_src)

        # Add score to list
        all_sc.append(model_svm.score(csp_tgt, y_tgt))

    return all_sc

In [37]:
t = 9
sc = MMDacc(TL_data, tgt_subj=t)

Processing target subject: 9
Model: CSP-SVM
Fetching raw data stored in epochs
Fetching csp data stored in all_feats
mmd values result   : [ 6.714  5.334  5.051  4.067  5.969  3.287  5.099  9.595  0.000]
most similar subject: [9 6 4 3 7 2 5 1 8]
Evaluating CSP-SVM
Evaluating subject 1 as source
Evaluating subject 2 as source
Evaluating subject 3 as source
Evaluating subject 4 as source
Evaluating subject 5 as source
Evaluating subject 6 as source
Evaluating subject 7 as source
Evaluating subject 8 as source
Evaluating subject 9 as source


In [38]:
print(f'Accuracy, subj {t} as target    : {np.round(np.array(sc),2)}')
print('Subject with highest acc      :', np.argsort(sc)[::-1]+1)

Accuracy, subj 9 as target    : [ 0.820  0.740  0.760  0.860  0.510  0.510  0.540  0.780  0.870]
Subject with highest acc      : [9 4 1 8 3 2 7 6 5]


## `evaluate-all` function 3
**Main idea**  
Take only random n number of pre-computed csp target feature, each iteration, build new classifier each iteration

New classifier each iteration
This code will test the viability non-inductive transfer learning(?) of :
- CSP-SVM         
- CSP-TCA-SVM
- EA-CSP-TCA-SVM


Pseudocode:
- All CSP of target and source are precomputed, without test data  
- Iterate n times  
- Grab csp_target
- Each iteration:  
    - idx = sample.random(all_csp_target_feat, n_sample)  
    - csp_tgt_test = csp_target[idx]
    - With MMD pick best similar source subject to csp_tgt_test, grab its csp_source
    - Now we have csp_source, csp_target_test
    - if model is CSP-SVM:
        - train on csp_source
        - evaluate on csp_tgt_test
    - if model EA, CSP-TCA-SVM 
        - tca_source, tca_tgt_test = TCA(csp_source, csp_tgt_test)  
        - clf.fit(tca_source)
        - clf.score(tca_tgt_test)

In [39]:
def evaluate_all_ver3(data, tgt_subj=1, model='CSP-SVM', n_samp=10, n_iter=20):
    '''
    
    '''
    print(f'Processing target subject: {tgt_subj}')
    print(f'Model: {model}')    
    
    # Key to define where data is stored
    all_keys=['epochs', 'epochs_EA', 'y']
    key_noEA = all_keys[0]
    key_EA = all_keys[1]    
    y = all_keys[2]
    
    # =========== PREPROCESSING  ============== #
    '''
    This section will fetch raw data to be evaluated
    '''
    # Following models need to fetch source EA data
    model_EA = ['EA-CSP-TCA-SVM']
    
    # === EA data ====
    if model in model_EA:
        # Set key variable in which EA data is stored
        key = key_EA
        csp_feat = 'all_feats_EA'
        print('Fetching raw data stored in {}'.format(key))
        print('Fetching feat data stored in {}'.format(csp_feat))

    # === no EA ===
    else:
        # Set key variable in which non-EA data is stored
        key = key_noEA
        csp_feat = 'all_feats'
        print('Fetching raw data stored in {}'.format(key))
        print('Fetching raw data stored in {}'.format(csp_feat))
    
    # === Find csp_target ==== #
    csp_tgt = deepcopy(data['tgt'][tgt_subj][csp_feat])
    y_tgt   = deepcopy(data['tgt'][tgt_subj]['y'])
    
    # Object to store all result
    all_sc = []
    
    for i in range(n_iter):
        print('Iteration', i)
        
        # ===== Selecting n random target sample ==== #
        idx = random.sample(range(len(csp_tgt)), n_samp)
        while (y_tgt[idx] == 1).sum() != n_samp//2:
            idx = random.sample(range(len(csp_tgt)), n_samp)
        
        csp_tgt_test = csp_tgt[idx]
        y_tgt_te     = y_tgt[idx]
        
        # ===== Find most similar source to current target data ====== #
        # MMD from current csp_tgt_train to rest of source
        mmd = []
        for src_subj in data['src'].keys():
            if src_subj == tgt_subj:
                mmd.append(0)
            else:
                csp_src = data['src'][src_subj][csp_feat]
                mmd.append(mmd_poly(csp_src, csp_tgt_test))

        # Choose one subject most similar to current target
        mmd_sim = np.argsort(np.array(mmd))+1    

        # Choose second subject, first subject is our current target subject 
        sim_subj = mmd_sim[1] 

        print(f'mmd result: {mmd_sim}')
        print(f'sim subj: {sim_subj}')

        csp_src = data['src'][sim_subj][csp_feat]
        y_src   = data['src'][sim_subj][y]
        
        ## =========== EVALUATION =========== ##
        # ====== CSP-SVM ====== #
        if model == 'CSP-SVM':
            print(f'Evaluating {model}')
            print(f'Similar subject: {sim_subj}')
            print(f'train size {csp_src.shape}')
            print(f'test size  {csp_tgt_test.shape}')

            # Define classifier
            model_svm = SVC()

            ## Fitting data
            model_svm.fit(csp_src, y_src)

            ## Add score to list
            all_sc.append(model_svm.score(csp_tgt_test, y_tgt_te))
        
        # ====== EA, CSP-TCA-SVM ====== #
        elif (model == 'CSP-TCA-SVM') or (model=='EA-CSP-TCA-SVM'):         
            # ====== TCA ====== #
            print(f'Evaluating {model}')
            # Compute TCA feature of current target and most similar subject
            tca = TCA(kernel_type='primal', dim=6, lamb=1, gamma=1)

            # print('=== Training classifier ===')
            print('Find new TCA feature using following data')
            print('CSP src all     : ', csp_src.shape)
            print('CSP tgt test    : ', csp_tgt_test.shape)

            tca_src, tca_tgt_test = tca.fit(csp_src, csp_tgt_test)

            print('')
            print('TCA tgt train: ', tca_src.shape)
            print('TCA tgt_test : ', tca_tgt_test.shape)

            # --- Training ----- #
            # Define new classifier
            model_svm = SVC()

            # Fiting data
            model_svm.fit(tca_src.real, y_src)
        
            # Add score to list
            all_sc.append(model_svm.score(tca_tgt_test.real, y_tgt_te))
        print('')
            
    mean_sc = np.array(all_sc).mean()
    std_sc = np.array(all_sc).std()
    
    return mean_sc, std_sc

In [40]:
%%capture
# Store all score
all_sc3 = {}

# Models
models = ['CSP-SVM', 'CSP-TCA-SVM', 'EA-CSP-TCA-SVM']

# Iterate over all tgt subj
for m in models:
    all_sc3[m] = {}
    temp_sc = []
    
    for s in TL_data['tgt'].keys():
        avg, std = evaluate_all_ver3(TL_data, tgt_subj=s, model=m, n_samp=30, n_iter=30)
        temp_sc.append(avg)
        
        all_sc3[m][s] = conv(avg, std)
        
    # Average over all subjects
    all_sc3[m]['avg'] = conv(np.array(temp_sc).mean(), np.array(temp_sc).std())

In [41]:
all_sc3.keys()

dict_keys(['CSP-SVM', 'CSP-TCA-SVM', 'EA-CSP-TCA-SVM'])

In [42]:
df_sc3 = dframe(all_sc3)
df_sc3

Unnamed: 0,CSP-SVM,CSP-TCA-SVM,EA-CSP-TCA-SVM
1,0.9 +/- 0.08,0.86 +/- 0.03,0.92 +/- 0.05
2,0.54 +/- 0.03,0.6 +/- 0.04,0.8 +/- 0.05
3,0.66 +/- 0.05,0.75 +/- 0.06,0.97 +/- 0.03
4,0.81 +/- 0.06,0.8 +/- 0.06,0.81 +/- 0.06
5,0.51 +/- 0.01,0.52 +/- 0.02,0.73 +/- 0.07
6,0.71 +/- 0.07,0.74 +/- 0.04,0.79 +/- 0.07
7,0.75 +/- 0.06,0.77 +/- 0.07,0.89 +/- 0.06
8,0.81 +/- 0.06,0.99 +/- 0.01,0.98 +/- 0.03
9,0.51 +/- 0.02,0.87 +/- 0.04,0.86 +/- 0.05
avg,0.69 +/- 0.14,0.77 +/- 0.13,0.86 +/- 0.08


## `evaluate-all` function 4
**Main idea**  
This moment I realize, how the TCA-BCI paper might work, the CSP step is crucial, it uses the source data to build filter, then implement it on the n-random target data. But here the base case `CSP-SVM` is not valid, this will be fixed next. 

Pseudocode:
- XRaw_tgt = raw data
- y_tgt = y target
- Iterate:
    - idx = random n data
    - XRaw_tgt_test = XRaw_tgt[idx], y_tgt_te = y_tgt[idx]
    - csp_tgt_test = csp of XRaw_tgt_test (apply filter to its own data)
    - sim_subj     = MMD to measure most similar source
    - csp_src      = csp of all raw source subject, y_src is corresponding label (apply filter to its own data)
    - For CSP-SVM:
        - model.fit(csp_src, y_src)
        - model.pred(csp_tgt_test)
    - For EA, CSP-TCA-SVM:
        - XRaw_src = sim_subj raw data, y_src = sim_subj label
        - csp_src, csp_tgt_test = csp(XRaw_src, XRaw_tgt_test, y_src) (apply filter of src to tgt_test)
        - tca_src, tca_tgt_test = tca(csp_src, csp_tgt_test)
        - model.fit(tca_src, y_src)
        - model.pred(tca_tgt_test)

In [43]:
from func.csp_matlab import csp_feat_no_test_2

In [44]:
def evaluate_all_ver4(data, tgt_subj=1, model='CSP-SVM', n_samp=10, n_iter=20):
    '''
    
    '''
    print(f'Processing target subject: {tgt_subj}')
    print(f'Model: {model}')    
    
    # Key to define where data is stored
    all_keys=['epochs', 'epochs_EA', 'y']
    key_noEA = all_keys[0]
    key_EA = all_keys[1]    
    y = all_keys[2]
    
    # =========== PREPROCESSING  ============== #
    '''
    This section will fetch raw data to be evaluated
    '''
    # Following models need to fetch source EA data
    model_EA = ['EA-CSP-TCA-SVM']
    
    # === EA data ====
    if model in model_EA:
        # Set key variable in which EA data is stored
        key = key_EA
        csp_feat = 'all_feats_EA'
        print('Fetching raw data stored in {}'.format(key))
        print('Fetching feat data stored in {}'.format(csp_feat))

    # === no EA ===
    else:
        # Set key variable in which non-EA data is stored
        key = key_noEA
        csp_feat = 'all_feats'
        print('Fetching raw data stored in {}'.format(key))
        print('Fetching raw data stored in {}'.format(csp_feat))
    
    # === Raw data of target ==== #
    Xraw_tgt = deepcopy(data['tgt'][tgt_subj][key])
    y_tgt    = deepcopy(data['tgt'][tgt_subj]['y'])
    
    # Object to store all result
    all_sc = []
    
    for i in range(n_iter):
        print('Iteration', i)
        
        # ===== Selecting n random target sample ==== #
        idx = random.sample(range(len(Xraw_tgt)), n_samp)
        while (y_tgt[idx] == 1).sum() != n_samp//2:
            idx = random.sample(range(len(Xraw_tgt)), n_samp)
        
        Xraw_tgt_test = Xraw_tgt[idx]
        y_tgt_te      = y_tgt[idx]
        
        # ===== Find most similar source to current target data ====== #
        # CSP of current Xraw_tgt_te
        csp_tgt_test = csp_feat_no_test_2(Xraw_tgt_test, y_tgt_te, n_filter=3)
        
        # MMD from current csp_tgt_train to rest of source
        mmd = []
        for src_subj in data['src'].keys():
            if src_subj == tgt_subj:
                mmd.append(0)
            else:
                csp_src = data['src'][src_subj][csp_feat]
                mmd.append(mmd_poly(csp_src, csp_tgt_test))

        # Choose one subject most similar to current target
        mmd_sim = np.argsort(np.array(mmd))+1    

        # Choose second subject, first subject is our current target subject 
        sim_subj = mmd_sim[1] 

        print(f'mmd result: {mmd_sim}')
        print(f'sim subj: {sim_subj}')

        csp_src = data['src'][sim_subj][csp_feat]
        y_src   = data['src'][sim_subj][y]
        
        ## =========== EVALUATION =========== ##
        # ====== CSP-SVM ====== #
        if model == 'CSP-SVM':
            print(f'Evaluating {model}')
            print(f'Similar subject: {sim_subj}')
            print(f'train size {csp_src.shape}')
            print(f'test size  {csp_tgt_test.shape}')

            # Define classifier
            model_svm = SVC()

            ## Fitting data
            model_svm.fit(csp_src, y_src)

            ## Add score to list
            all_sc.append(model_svm.score(csp_tgt_test, y_tgt_te))
        
        # ====== EA, CSP-TCA-SVM ====== #
        elif (model == 'CSP-TCA-SVM') or (model=='EA-CSP-TCA-SVM'):   
            # Raw data of most similar source subject
            Xraw_src = data['src'][sim_subj][key]
            y_src    = data['src'][sim_subj][y] 
            
            # CSP of Xraw_tgt with filter from Xraw_src 
            csp_src, csp_tgt_test = csp_feat_ver1(Xraw_src, Xraw_tgt_test, y_src, n_filter=3)
            
            # ====== TCA ====== #
            print(f'Evaluating {model}')
            # Compute TCA feature of current target and most similar subject
            tca = TCA(kernel_type='primal', dim=6, lamb=1, gamma=1)

            # print('=== Training classifier ===')
            print('Find new TCA feature using following data')
            print('CSP src all     : ', csp_src.shape)
            print('CSP tgt test    : ', csp_tgt_test.shape)

            tca_src, tca_tgt_test = tca.fit(csp_src, csp_tgt_test)

            # print('')
            # print('TCA tgt train: ', tca_src.shape)
            # print('TCA tgt_test : ', tca_tgt_test.shape)

            # --- Training ----- #
            # Define new classifier
            model_svm = SVC()

            # Fiting data
            model_svm.fit(tca_src.real, y_src)
        
            # Add score to list
            all_sc.append(model_svm.score(tca_tgt_test.real, y_tgt_te))
        print('')
            
    mean_sc = np.array(all_sc).mean()
    std_sc = np.array(all_sc).std()

    return mean_sc, std_sc

In [45]:
%%capture
# Store all score
all_sc4 = {}

# Models
models = ['CSP-SVM', 'CSP-TCA-SVM', 'EA-CSP-TCA-SVM']

# Iterate over all tgt subj
for m in models:
    all_sc4[m] = {}
    temp_sc = []
    
    for s in TL_data['tgt'].keys():
        avg, std = evaluate_all_ver4(TL_data, tgt_subj=s, model=m, n_samp=30, n_iter=30)
        temp_sc.append(avg)
        
        all_sc4[m][s] = conv(avg, std)
        
    # Average over all subjects
    all_sc4[m]['avg'] = conv(np.array(temp_sc).mean(), np.array(temp_sc).std())

In [46]:
df_sc4 = dframe(all_sc4)
# df_sc4.loc['mean'] = df_sc4.mean()
df_sc4

Unnamed: 0,CSP-SVM,CSP-TCA-SVM,EA-CSP-TCA-SVM
1,0.91 +/- 0.08,0.59 +/- 0.09,0.85 +/- 0.07
2,0.66 +/- 0.14,0.51 +/- 0.08,0.56 +/- 0.08
3,0.79 +/- 0.12,0.56 +/- 0.12,0.94 +/- 0.04
4,0.75 +/- 0.15,0.56 +/- 0.1,0.67 +/- 0.09
5,0.73 +/- 0.17,0.51 +/- 0.03,0.53 +/- 0.11
6,0.75 +/- 0.12,0.59 +/- 0.08,0.64 +/- 0.08
7,0.78 +/- 0.16,0.53 +/- 0.05,0.59 +/- 0.12
8,0.83 +/- 0.16,0.89 +/- 0.08,0.85 +/- 0.11
9,0.79 +/- 0.13,0.6 +/- 0.1,0.73 +/- 0.08
avg,0.78 +/- 0.06,0.6 +/- 0.11,0.7 +/- 0.13


## `evaluate-all` function 5
**Main idea**  
See from previous version, in the base case, I think it is showing training accuracy, csp_src and csp_target is build from its own their each own data. While in normal csp score evaluation, csp test data is build from applying filter built on training data, here source acts as training data, and target acts as test 

Pseudocode:
- XRaw_tgt = target raw data
- y_tgt = y target
- Iterate:
    - idx = randon n data
    - XRaw_tgt_test = XRaw_tgt[idx], y_tgt_te = y_tgt[idx]
    - csp_tgt_test = csp of XRaw_tgt_test (apply filter to its own data, only to measure distance)
    - sim_subj     = MMD to measure most similar source
    - XRaw_src = sim_subj raw data, y_src = sim_subj y label
    - csp_src, csp_tgt_test = csp(XRaw_src, XRaw_tgt_test, y_src) (apply filter of src to tgt_test)
    - For CSP-SVM:
        - model.fit(csp_src, y_src)
        - model.pred(csp_tgt_test)
    - For EA, CSP-TCA-SVM:
        - tca_src, tca_tgt_test = tca(csp_src, csp_tgt_test)
        - model.fit(tca_src, y_src)
        - model.pred(tca_tgt_test)

In [47]:
# Checking whether csp_src from csp_feat_ver1 and csp_feat_no_test_2 are the same
s, t = 3, 8
XRaw_src = TL_data['src'][s]['epochs']
y_src    = TL_data['src'][s]['y'] 
XRaw_tgt = TL_data['tgt'][t]['epochs']

csp_src1     = csp_feat_no_test_2(XRaw_src, y_src)
csp_src2, _  = csp_feat_ver1(XRaw_src, XRaw_tgt, y_src)

np.all(csp_src1 == csp_src2)

True

In [48]:
from func.csp_matlab import csp_feat_no_test_2

In [49]:
def evaluate_all_ver5(data, tgt_subj=1, model='CSP-SVM', n_samp=10, n_iter=20):
    '''
    
    '''
    print(f'Processing target subject: {tgt_subj}')
    print(f'Model: {model}')    
    
    # Key to define where data is stored
    all_keys=['epochs', 'epochs_EA', 'y']
    key_noEA = all_keys[0]
    key_EA = all_keys[1]    
    y = all_keys[2]
    
    # =========== PREPROCESSING  ============== #
    '''
    This section will fetch raw data to be evaluated
    '''
    # Following models need to fetch source EA data
    model_EA = ['EA-CSP-TCA-SVM']
    
    # === EA data ====
    if model in model_EA:
        # Set key variable in which EA data is stored
        key = key_EA
        csp_feat = 'all_feats_EA'
        print('Fetching raw data stored in {}'.format(key))
        print('Fetching feat data stored in {}'.format(csp_feat))

    # === no EA ===
    else:
        # Set key variable in which non-EA data is stored
        key = key_noEA
        csp_feat = 'all_feats'
        print('Fetching raw data stored in {}'.format(key))
        print('Fetching raw data stored in {}'.format(csp_feat))
    
    # === Raw data of target ==== #
    Xraw_tgt = deepcopy(data['tgt'][tgt_subj][key])
    y_tgt    = deepcopy(data['tgt'][tgt_subj]['y'])
    
    # Object to store all result
    all_sc = []
    
    for i in range(n_iter):
        print('Iteration', i)
        
        # ===== Selecting n random target sample ==== #
        idx = random.sample(range(len(Xraw_tgt)), n_samp)
        while (y_tgt[idx] == 1).sum() != n_samp//2:
            idx = random.sample(range(len(Xraw_tgt)), n_samp)
        
        Xraw_tgt_test = Xraw_tgt[idx]
        y_tgt_te      = y_tgt[idx]
        
        # ===== Find most similar source to current target data ====== #
        # CSP of current Xraw_tgt_te
        csp_tgt_test = csp_feat_no_test_2(Xraw_tgt_test, y_tgt_te, n_filter=3)
        
        # MMD from current csp_tgt_train to rest of source
        mmd = []
        for src_subj in data['src'].keys():
            if src_subj == tgt_subj:
                mmd.append(0)
            else:
                csp_src = data['src'][src_subj][csp_feat]
                mmd.append(mmd_poly(csp_src, csp_tgt_test))

        # Choose one subject most similar to current target
        mmd_sim = np.argsort(np.array(mmd))+1    

        # Choose second subject, first subject is our current target subject 
        sim_subj = mmd_sim[1] 

        print(f'mmd result: {mmd_sim}')
        print(f'sim subj: {sim_subj}')

        # Raw data of most similar source subject
        Xraw_src = data['src'][sim_subj][key]
        y_src    = data['src'][sim_subj][y] 

        # CSP of Xraw_tgt with filter from Xraw_src 
        csp_src, csp_tgt_test = csp_feat_ver1(Xraw_src, Xraw_tgt_test, y_src, n_filter=3)
        
        ## =========== EVALUATION =========== ##
        # ====== CSP-SVM ====== #
        if model == 'CSP-SVM':
            print(f'Evaluating {model}')
            print(f'Similar subject: {sim_subj}')
            print(f'train size {csp_src.shape}')
            print(f'test size  {csp_tgt_test.shape}')

            # Define classifier
            model_svm = SVC()

            ## Fitting data
            model_svm.fit(csp_src, y_src)

            ## Add score to list
            all_sc.append(model_svm.score(csp_tgt_test, y_tgt_te))
        
        # ====== EA, CSP-TCA-SVM ====== #
        elif (model == 'CSP-TCA-SVM') or (model=='EA-CSP-TCA-SVM'):   
            # ====== TCA ====== #
            print(f'Evaluating {model}')
            # Compute TCA feature of current target and most similar subject
            tca = TCA(kernel_type='primal', dim=6, lamb=1, gamma=1)

            # print('=== Training classifier ===')
            print('Find new TCA feature using following data')
            print('CSP src all     : ', csp_src.shape)
            print('CSP tgt test    : ', csp_tgt_test.shape)

            tca_src, tca_tgt_test = tca.fit(csp_src, csp_tgt_test)

            # print('')
            # print('TCA tgt train: ', tca_src.shape)
            # print('TCA tgt_test : ', tca_tgt_test.shape)

            # --- Training ----- #
            # Define new classifier
            model_svm = SVC()

            # Fiting data
            model_svm.fit(tca_src.real, y_src)
        
            # Add score to list
            all_sc.append(model_svm.score(tca_tgt_test.real, y_tgt_te))
        print('')
            
    mean_sc = np.array(all_sc).mean()
    std_sc  = np.array(all_sc).std()
    
    return mean_sc, std_sc

In [50]:
%%capture
# Store all score
all_sc5 = {}

# Models
models = ['CSP-SVM', 'CSP-TCA-SVM', 'EA-CSP-TCA-SVM']

# Iterate over all tgt subj
for m in models:
    all_sc5[m] = {}
    temp_sc = []
    
    for s in TL_data['tgt'].keys():
        avg, std = evaluate_all_ver5(TL_data, tgt_subj=s, model=m, n_samp=30, n_iter=30)
        temp_sc.append(avg)
        
        all_sc5[m][s] = conv(avg, std)
        
    all_sc5[m]['avg'] = conv(np.array(temp_sc).mean(), np.array(temp_sc).std())

In [51]:
df_sc5 = dframe(all_sc5)
# df_sc5.loc['mean'] = df_sc5.mean()
df_sc5

Unnamed: 0,CSP-SVM,CSP-TCA-SVM,EA-CSP-TCA-SVM
1,0.59 +/- 0.06,0.6 +/- 0.06,0.84 +/- 0.12
2,0.51 +/- 0.07,0.49 +/- 0.05,0.56 +/- 0.08
3,0.58 +/- 0.12,0.57 +/- 0.13,0.93 +/- 0.04
4,0.53 +/- 0.09,0.55 +/- 0.08,0.59 +/- 0.1
5,0.5 +/- 0.01,0.5 +/- 0.03,0.53 +/- 0.07
6,0.56 +/- 0.08,0.57 +/- 0.09,0.62 +/- 0.12
7,0.53 +/- 0.04,0.54 +/- 0.09,0.59 +/- 0.13
8,0.82 +/- 0.11,0.9 +/- 0.07,0.9 +/- 0.05
9,0.56 +/- 0.15,0.61 +/- 0.1,0.73 +/- 0.09
avg,0.58 +/- 0.09,0.59 +/- 0.11,0.7 +/- 0.15


## `evaluate-all` function 6
**Main idea** :   
Previous function build classifier on most similar source subject to current target data that is measured using MMD, but this is not always the case, see MMD vs accuracy section, so in this function I tried to compute different classifier from n-first best similar subject instead of only one

**Pseudocode**:
- XRaw_tgt = target raw data
- y_tgt = y target
- Iterate:
    - idx = randon n data
    - XRaw_tgt_test = XRaw_tgt[idx], y_tgt_te = y_tgt[idx]
    - csp_tgt_test  = csp of XRaw_tgt_test (apply filter to its own data)
    - n-sim srcsubj = MMD to measure most similar source
    - Iterate over n-sim source subject
        - XRaw_src = sim_subj raw data, y_src = sim_subj y label
        - csp_src, csp_tgt_test = csp(XRaw_src, XRaw_tgt_test, y_src) (apply filter of src to tgt_test)
        - best_sc = 0, indicator to store best score 
        - For CSP-SVM:
            - model.fit(csp_src, y_src)
            - best_sc = model.score(csp_tgt_test) if > current best_sc
        - For EA, CSP-TCA-SVM:
            - tca_src, tca_tgt_test = tca(csp_src, csp_tgt_test)
            - model.fit(tca_src, y_src)
            - best_sc = model.score(tca_tgt_test) if > current best_sc

In [1]:
def evaluate_all_ver6(data, tgt_subj=1, model='CSP-SVM', n_sim=3, n_samp=10, n_iter=20):
    '''
    
    '''
    print(f'Processing target subject: {tgt_subj}')
    print(f'Model: {model}')    
    
    # Key to define where data is stored
    all_keys=['epochs', 'epochs_EA', 'y']
    key_noEA = all_keys[0]
    key_EA = all_keys[1]    
    y = all_keys[2]
    
    # =========== PREPROCESSING  ============== #
    '''
    This section will fetch raw data to be evaluated
    '''
    # Following models need to fetch source EA data
    model_EA = ['EA-CSP-TCA-SVM', 'EA-CSP-SVM']
    
    # === EA data ====
    if model in model_EA:
        # Set key variable in which EA data is stored
        key = key_EA
        csp_feat = 'all_feats_EA'
        print('Fetching raw data stored in {}'.format(key))
        print('Fetching feat data stored in {}'.format(csp_feat))

    # === no EA ===
    else:
        # Set key variable in which non-EA data is stored
        key = key_noEA
        csp_feat = 'all_feats'
        print('Fetching raw data stored in {}'.format(key))
        print('Fetching feat data stored in {}'.format(csp_feat))
    
    # === Raw data of target ==== #
    Xraw_tgt = deepcopy(data['tgt'][tgt_subj][key])
    y_tgt    = deepcopy(data['tgt'][tgt_subj]['y'])
    
    # Object to store all result
    all_sc = []
    
    for i in range(n_iter):
        print('Iteration', i)
        
        # ===== Selecting n random target sample ==== #
        idx = random.sample(range(len(Xraw_tgt)), n_samp)
        while (y_tgt[idx] == 1).sum() != n_samp//2:
            idx = random.sample(range(len(Xraw_tgt)), n_samp)
        
        Xraw_tgt_test = Xraw_tgt[idx]
        y_tgt_te      = y_tgt[idx]
        
        # ===== Find most similar source to current target data ====== #
        # CSP of current Xraw_tgt_te
        csp_tgt_test = csp_feat_no_test_2(Xraw_tgt_test, y_tgt_te, n_filter=3)
        
        # MMD from current csp_tgt_train to rest of source
        mmd = []
        for src_subj in data['src'].keys():
            if src_subj == tgt_subj:
                mmd.append(0)
            else:
                csp_src = data['src'][src_subj][csp_feat]
                mmd.append(mmd_poly(csp_src, csp_tgt_test))

        # Choose one subject most similar to current target
        mmd_sim = np.argsort(np.array(mmd))+1    

        # Choose n_sim most simliar source to current target subject 
        n_sim_subj = mmd_sim[1:n_sim+1] 

        print(f'mmd result: {mmd_sim}')
        print(f'sim subj: {n_sim_subj}')
        
        # Variable to store best score from each source subject
        best_sc = 0
            
        # Iterate over all similar subject
        for sim_subj in n_sim_subj:
            # Raw data of most similar subject
            Xraw_src = data['src'][sim_subj][key]
            y_src    = data['src'][sim_subj][y] 

            # CSP of Xraw_tgt with filter from Xraw_src 
            csp_src, csp_tgt_test = csp_feat_ver1(Xraw_src, Xraw_tgt_test, y_src, n_filter=3)
            
            
            ## =========== EVALUATION =========== ##
            # ====== CSP-SVM ====== #
            if (model == 'CSP-SVM') or (model == 'EA-CSP-SVM'):
                print(f'Evaluating {model}')
                print(f'Similar subject: {sim_subj}')
                print(f'train size {csp_src.shape}')
                print(f'test size  {csp_tgt_test.shape}')

                # Define classifier
                model_svm = SVC()

                # Fitting data
                model_svm.fit(csp_src, y_src)
                
                # Current score
                sc = model_svm.score(csp_tgt_test, y_tgt_te)
                print(f'Source{sim_subj} current score: {sc:.2f}')

                # Storing the best score
                best_sc = sc if sc > best_sc else best_sc
            

            # ====== EA, CSP-TCA-SVM ====== #
            elif (model == 'CSP-TCA-SVM') or (model=='EA-CSP-TCA-SVM'):   
                # ====== TCA ====== #
                print(f'Evaluating {model}')
                # Compute TCA feature of current target and most similar subject
                tca = TCA(kernel_type='rbf', dim=4, lamb=1, gamma=1)

                # print('=== Training classifier ===')
                print('Find new TCA feature using following data')
                print('CSP src all     : ', csp_src.shape)
                print('CSP tgt test    : ', csp_tgt_test.shape)

                tca_src, tca_tgt_test = tca.fit(csp_src, csp_tgt_test)

                # print('')
                # print('TCA tgt train: ', tca_src.shape)
                # print('TCA tgt_test : ', tca_tgt_test.shape)

                # --- Training ----- #
                # Define new classifier
                model_svm = SVC()

                # Fiting data
                model_svm.fit(tca_src.real, y_src)

                # Current score
                sc = model_svm.score(tca_tgt_test.real, y_tgt_te)
                #print(f'Source{sim_subj} current score: {sc:.2f}')
                
                # Storing the best score
                best_sc = sc if sc > best_sc else best_sc
            
            print('')
        
        # Out of similar subject loop, append each best_sc 
        all_sc.append(best_sc)
        
        #print(f'Best score {best_sc:.2f}')
        #print(f'All score {all_sc}')
        print('')
    
    # Out of iteration loop
    mean_sc = np.array(all_sc).mean()
    std_sc  = np.array(all_sc).std()
    
    return mean_sc, std_sc, all_sc

In [67]:
evaluate_all_ver6(TL_data, tgt_subj=2, model='EA-CSP-SVM', n_sim=3, n_samp=30, n_iter=5)

Processing target subject: 2
Model: EA-CSP-SVM
Fetching raw data stored in epochs_EA
Fetching feat data stored in all_feats_EA
Iteration 0
mmd result: [2 7 4 5 1 6 8 3 9]
sim subj: [7 4 5]
Evaluating EA-CSP-SVM
Similar subject: 7
train size (144, 6)
test size  (30, 6)
Source7 current score: 0.67

Evaluating EA-CSP-SVM
Similar subject: 4
train size (144, 6)
test size  (30, 6)
Source4 current score: 0.60

Evaluating EA-CSP-SVM
Similar subject: 5
train size (144, 6)
test size  (30, 6)
Source5 current score: 0.60


Iteration 1
mmd result: [2 5 7 6 4 1 8 3 9]
sim subj: [5 7 6]
Evaluating EA-CSP-SVM
Similar subject: 5
train size (144, 6)
test size  (30, 6)
Source5 current score: 0.57

Evaluating EA-CSP-SVM
Similar subject: 7
train size (144, 6)
test size  (30, 6)
Source7 current score: 0.57

Evaluating EA-CSP-SVM
Similar subject: 6
train size (144, 6)
test size  (30, 6)
Source6 current score: 0.43


Iteration 2
mmd result: [2 7 5 4 6 1 8 9 3]
sim subj: [7 5 4]
Evaluating EA-CSP-SVM
Similar s

(0.62,
 0.09333333333333331,
 [0.6666666666666666,
  0.5666666666666667,
  0.6666666666666666,
  0.4666666666666667,
  0.7333333333333333])

In [118]:
%%capture
# Store all score
all_sc6 = {}
pval_sc6 = {}
# Models
models = ['CSP-SVM', 'EA-CSP-SVM', 'CSP-TCA-SVM', 'EA-CSP-TCA-SVM']
# models = ['CSP-SVM', 'EA-CSP-SVM', 'EA-CSP-TCA-SVM']
# models = ['CSP-SVM', 'CSP-TCA-SVM']

# Iterate over all tgt subj
for m in models:
    all_sc6[m]  = {}
    pval_sc6[m] = {}
    temp_sc = []
    
    for s in TL_data['tgt'].keys():
        avg, std, all_sc = evaluate_all_ver6(TL_data, tgt_subj=s, model=m, n_sim=3, n_samp=30, n_iter=30)
        temp_sc.append(avg)
        
        pval_sc6[m][s] = all_sc 
        all_sc6[m][s] = conv(avg, std)
        
    all_sc6[m]['avg'] = conv(np.array(temp_sc).mean(), np.array(temp_sc).std())

In [55]:
df_sc6 = dframe(all_sc6)
df_sc6

Unnamed: 0,CSP-SVM,CSP-TCA-SVM,EA-CSP-TCA-SVM
1,0.65 +/- 0.05,0.62 +/- 0.12,0.89 +/- 0.05
2,0.54 +/- 0.05,0.54 +/- 0.05,0.59 +/- 0.08
3,0.74 +/- 0.06,0.61 +/- 0.1,0.98 +/- 0.02
4,0.62 +/- 0.09,0.62 +/- 0.06,0.68 +/- 0.12
5,0.52 +/- 0.02,0.58 +/- 0.05,0.56 +/- 0.05
6,0.61 +/- 0.05,0.61 +/- 0.07,0.65 +/- 0.09
7,0.6 +/- 0.06,0.71 +/- 0.09,0.62 +/- 0.07
8,0.95 +/- 0.05,0.9 +/- 0.08,0.93 +/- 0.06
9,0.69 +/- 0.07,0.66 +/- 0.1,0.73 +/- 0.07
avg,0.66 +/- 0.12,0.65 +/- 0.1,0.74 +/- 0.15


In [119]:
df_sc6 = dframe(all_sc6)
df_sc6

Unnamed: 0,CSP-SVM,EA-CSP-SVM,CSP-TCA-SVM,EA-CSP-TCA-SVM
1,0.64 +/- 0.05,0.89 +/- 0.05,0.75 +/- 0.08,0.87 +/- 0.05
2,0.54 +/- 0.05,0.6 +/- 0.06,0.55 +/- 0.05,0.56 +/- 0.05
3,0.74 +/- 0.07,0.95 +/- 0.02,0.8 +/- 0.08,0.98 +/- 0.02
4,0.64 +/- 0.1,0.69 +/- 0.08,0.62 +/- 0.06,0.7 +/- 0.07
5,0.52 +/- 0.03,0.61 +/- 0.07,0.56 +/- 0.05,0.61 +/- 0.08
6,0.61 +/- 0.07,0.69 +/- 0.08,0.64 +/- 0.07,0.68 +/- 0.09
7,0.59 +/- 0.06,0.65 +/- 0.08,0.68 +/- 0.09,0.65 +/- 0.08
8,0.94 +/- 0.05,0.94 +/- 0.04,0.95 +/- 0.04,0.94 +/- 0.04
9,0.68 +/- 0.06,0.77 +/- 0.08,0.68 +/- 0.08,0.76 +/- 0.05
avg,0.66 +/- 0.12,0.76 +/- 0.13,0.69 +/- 0.12,0.75 +/- 0.14


## `evaluate-all` function 7
**Main idea** :   
Got new approach from David, how about when building the csp filter we combine the source data + some target data, this makes it inductive TL

**Pseudocode**:
- XRaw_tgt = target raw data
- y_tgt = y target
- Iterate:
    - idx_tr = random n data
    - idx_te = random (all-n) data
    - XRaw_tgt_test  = XRaw_tgt[idx_tr], y_tgt_te = y_tgt[idx_tr]
    - XRaw_tgt_train = XRaw_tgt[idx_te], y_tgt_tr = y_tgt[idx_te]
    - csp_tgt_test   = csp of XRaw_tgt_test (apply filter to its own data, to measure distance to source)
    - n-sim srcsubj  = MMD to measure n-most similar source
    - Iterate over n-sim source subject
        - XRaw_src = sim_subj raw data, y_src = sim_subj y label
        - csp_src, csp_tgt_test = csp([XRaw_src + XRaw_tgt_train], XRaw_tgt_test, [y_src + y_tgt_tr]) (apply filter of (src + tgt_train) to tgt_test)
        - best_sc = 0, indicator to store best score 
        - For CSP-SVM:
            - model.fit(csp_src, y_src)
            - best_sc = model.score(csp_tgt_test) if > current best_sc
        - For EA, CSP-TCA-SVM:
            - tca_src, tca_tgt_test = tca(csp_src, csp_tgt_test)
            - model.fit(tca_src, y_src)
            - best_sc = model.score(tca_tgt_test) if > current best_sc

In [111]:
def evaluate_all_ver7(data, tgt_subj=1, model='CSP-SVM', n_sim=3, n_samp=10, n_iter=20):
    '''
    
    '''
    print(f'Processing target subject: {tgt_subj}')
    print(f'Model: {model}')    
    
    # Key to define where data is stored
    all_keys=['epochs', 'epochs_EA', 'y']
    key_noEA = all_keys[0]
    key_EA = all_keys[1]    
    y = all_keys[2]
    
    # =========== PREPROCESSING  ============== #
    '''
    This section will fetch raw data to be evaluated
    '''
    # Following models need to fetch source EA data
    model_EA = ['EA-CSP-TCA-SVM', 'EA-CSP-SVM']
    
    # === EA data ====
    if model in model_EA:
        # Set key variable in which EA data is stored
        key      = key_EA
        csp_feat = 'all_feats_EA'
        print('Fetching raw data stored in {}'.format(key))
        print('Fetching feat data stored in {}'.format(csp_feat))

    # === no EA ===
    else:
        # Set key variable in which non-EA data is stored
        key      = key_noEA
        csp_feat = 'all_feats'
        print('Fetching raw data stored in {}'.format(key))
        print('Fetching feat data stored in {}'.format(csp_feat))
    
    # === Raw data of target ==== #
    Xraw_tgt = deepcopy(data['tgt'][tgt_subj][key])
    y_tgt    = deepcopy(data['tgt'][tgt_subj]['y'])
    
    # Object to store all result
    all_sc = []
    
    for i in range(n_iter):
        print('Iteration', i)
        
        # ===== Selecting n random target sample ==== #
        idx_tr = random.sample(range(len(Xraw_tgt)), n_samp)
        while (y_tgt[idx_tr] == 1).sum() != n_samp//2:
            idx_tr = random.sample(range(len(Xraw_tgt)), n_samp)
        
        idx_te = np.delete(range(len(y_tgt)), idx_tr)
        
        # return idx_tr, idx_te
        
        Xraw_tgt_tr = Xraw_tgt[idx_tr]
        Xraw_tgt_te = Xraw_tgt[idx_te]
        y_tgt_tr      = y_tgt[idx_tr]
        y_tgt_te      = y_tgt[idx_te]
        
        # ===== Find most similar source to current target data ====== #
        # CSP of current Xraw_tgt_te
        csp_tgt_test = csp_feat_no_test_2(Xraw_tgt_te, y_tgt_te, n_filter=3)
        
        # MMD from current csp_tgt_train to rest of source
        mmd = []
        for src_subj in data['src'].keys():
            if src_subj == tgt_subj:
                mmd.append(0)
            else:
                csp_src = data['src'][src_subj][csp_feat]
                mmd.append(mmd_poly(csp_src, csp_tgt_test))
    
        # Choose one subject most similar to current target
        mmd_sim = np.argsort(np.array(mmd))+1    

        # Choose n_sim most simliar source to current target subject 
        n_sim_subj = mmd_sim[1:n_sim+1] 

        print(f'mmd result: {mmd_sim}')
        print(f'sim subj: {n_sim_subj}')
        
        # Variable to store best score from each source subject
        best_sc = 0
            
        # Iterate over all similar subject
        for sim_subj in n_sim_subj:
            # Raw data of most similar subject
            Xraw_src = data['src'][sim_subj][key]
            y_src    = data['src'][sim_subj][y] 
            Xraw_both= np.concatenate([Xraw_src, Xraw_tgt_tr], axis=0)
            y_both    = np.concatenate([y_src, y_tgt_tr], axis=0)
            
            assert Xraw_both.shape[0] == len(y_both)
            assert Xraw_both.shape[0] == len(y_src) + len(y_tgt_tr)
            
            # CSP of Xraw_tgt with filter from Xraw_src 
            csp_both, csp_tgt_test = csp_feat_ver1(Xraw_both, Xraw_tgt_te, y_both, n_filter=3)
            
            
            ## =========== EVALUATION =========== ##
            # ====== CSP-SVM ====== #
            if (model == 'CSP-SVM') or (model == 'EA-CSP-SVM'):
                print(f'Evaluating {model}')
                print(f'Similar subject: {sim_subj}')
                print(f'train size {csp_both.shape}')
                print(f'test size  {csp_tgt_test.shape}')

                # Define classifier
                model_svm = SVC()

                # Fitting data
                model_svm.fit(csp_both, y_both)
                
                # Current score
                sc = model_svm.score(csp_tgt_test, y_tgt_te)
                print(f'Source{sim_subj} current score: {sc:.2f}')

                # Storing the best score
                best_sc = sc if sc > best_sc else best_sc
            

            # ====== EA, CSP-TCA-SVM ====== #
            elif (model == 'CSP-TCA-SVM') or (model=='EA-CSP-TCA-SVM'):   
                # ====== TCA ====== #
                print(f'Evaluating {model}')
                # Compute TCA feature of current target and most similar subject
                tca = TCA(kernel_type='rbf', dim=4, lamb=1, gamma=1)

                # print('=== Training classifier ===')
                print('Find new TCA feature using following data')
                print('CSP src all + train  : ', csp_both.shape)
                print('CSP tgt test         : ', csp_tgt_test.shape)

                tca_both, tca_tgt_test = tca.fit(csp_both, csp_tgt_test)

                # print('')
                # print('TCA tgt train: ', tca_src.shape)
                # print('TCA tgt_test : ', tca_tgt_test.shape)

                # --- Training ----- #
                # Define new classifier
                model_svm = SVC()

                # Fiting data
                model_svm.fit(tca_both.real, y_both)

                # Current score
                sc = model_svm.score(tca_tgt_test.real, y_tgt_te)
                #print(f'Source{sim_subj} current score: {sc:.2f}')
                
                # Storing the best score
                best_sc = sc if sc > best_sc else best_sc
            
            print('')
        
        # Out of similar subject loop, append each best_sc 
        all_sc.append(best_sc)
        
        #print(f'Best score {best_sc:.2f}')
        #print(f'All score {all_sc}')
        print('')
    
    # Out of iteration loop
    mean_sc = np.array(all_sc).mean()
    std_sc  = np.array(all_sc).std()
    
    return mean_sc, std_sc, all_sc

In [110]:
# %%capture
# Store all score
all_sc7 = {}
pval_sc7 = {}
# Models
models = ['CSP-SVM', 'EA-CSP-SVM', 'CSP-TCA-SVM', 'EA-CSP-TCA-SVM']
# models = ['CSP-SVM', 'EA-CSP-SVM', 'EA-CSP-TCA-SVM']
# models = ['CSP-SVM', 'CSP-TCA-SVM']

# Iterate over all tgt subj
for m in models:
    all_sc7[m]  = {}
    pval_sc7[m] = {}
    temp_sc = []
    
    for s in TL_data['tgt'].keys():
        avg, std, all_sc = evaluate_all_ver7(TL_data, tgt_subj=s, model=m, n_sim=3, n_samp=30, n_iter=30)
        temp_sc.append(avg)
        
        pval_sc7[m][s] = all_sc 
        all_sc7[m][s] = conv(avg, std)
        
    all_sc7[m]['avg'] = conv(np.array(temp_sc).mean(), np.array(temp_sc).std())

Processing target subject: 1
Model: CSP-SVM
Fetching raw data stored in epochs
Fetching feat data stored in all_feats
Iteration 0
mmd result: [1 4 7 6 2 5 3 9 8]
sim subj: [4 7 6]
Evaluating CSP-SVM
Similar subject: 4
train size (144, 6)
test size  (114, 6)
Source4 current score: 0.84

Evaluating CSP-SVM
Similar subject: 7
train size (144, 6)
test size  (114, 6)
Source7 current score: 0.86

Evaluating CSP-SVM
Similar subject: 6
train size (144, 6)
test size  (114, 6)
Source6 current score: 0.82


Iteration 1
mmd result: [1 6 7 4 5 2 3 9 8]
sim subj: [6 7 4]
Evaluating CSP-SVM
Similar subject: 6
train size (144, 6)
test size  (114, 6)
Source6 current score: 0.81

Evaluating CSP-SVM
Similar subject: 7
train size (144, 6)
test size  (114, 6)
Source7 current score: 0.92

Evaluating CSP-SVM
Similar subject: 4
train size (144, 6)
test size  (114, 6)
Source4 current score: 0.71


Iteration 2
mmd result: [1 4 6 7 5 2 3 9 8]
sim subj: [4 6 7]
Evaluating CSP-SVM
Similar subject: 4
train size (14

In [112]:
df_sc7 = dframe(all_sc7)
df_sc7

Unnamed: 0,CSP-SVM,EA-CSP-SVM,CSP-TCA-SVM,EA-CSP-TCA-SVM
1,0.88 +/- 0.04,0.87 +/- 0.02,0.76 +/- 0.15,0.86 +/- 0.04
2,0.56 +/- 0.02,0.59 +/- 0.03,0.52 +/- 0.03,0.58 +/- 0.04
3,0.91 +/- 0.02,0.97 +/- 0.01,0.94 +/- 0.02,0.97 +/- 0.01
4,0.69 +/- 0.04,0.64 +/- 0.03,0.66 +/- 0.02,0.64 +/- 0.03
5,0.51 +/- 0.02,0.57 +/- 0.03,0.54 +/- 0.03,0.57 +/- 0.03
6,0.67 +/- 0.04,0.62 +/- 0.04,0.62 +/- 0.06,0.6 +/- 0.05
7,0.7 +/- 0.02,0.68 +/- 0.03,0.68 +/- 0.05,0.67 +/- 0.04
8,0.96 +/- 0.01,0.93 +/- 0.02,0.96 +/- 0.01,0.95 +/- 0.01
9,0.8 +/- 0.04,0.82 +/- 0.02,0.81 +/- 0.04,0.8 +/- 0.03
avg,0.74 +/- 0.15,0.74 +/- 0.15,0.72 +/- 0.15,0.74 +/- 0.15


## p-value

In [58]:
from scipy import stats

In [59]:
data = pval_sc6
subjects = range(1, 10)
models = ['CSP-SVM','CSP-TCA-SVM', 'EA-CSP-TCA-SVM']
m1 = 0
m2 = 1

print(f'p value between {models[m1]} and {models[m2]}')
print('==='*15)

if len(data[models[m2]]) != len(data[models[m1]]):
    print('undefined')

# Collect pval
all_pval = []

data1 = np.array([])
data2 = np.array([])

for s in subjects:
    #print(s)
    data1 = np.concatenate([data1, pval_sc6[models[m1]][s]])
    data2 = np.concatenate([data2, pval_sc6[models[m2]][s]])

# print(len(data1))
# print(len(data2))

_, pval = stats.ttest_ind(a=data1, b=data2, equal_var=False)
# print(f'pval between model {models[m1]} and {models[m2]} of subject {s} is {pval:3f} ')
# all_pval.append(pval)

# print('==='*15)
print(f'p value between {models[m1]} and {models[m2]} : {pval:.3f}')
print(f'p value between {models[m1]} and {models[m2]} : {pval:.2e}')

p value between CSP-SVM and CSP-TCA-SVM
p value between CSP-SVM and CSP-TCA-SVM : 0.385
p value between CSP-SVM and CSP-TCA-SVM : 3.85e-01


## 20210323 - Final Note
This as far I will work here, this file will be put in dev folder,in the next file I will only work with `evaluate-all` function 6