> __Purpose:__ Implement an initial privacy attack to quantify how much re-identiifcation and linkability risk exists from personalization parameters (the decoder used in the co-adaptation algorithm). Thus, implement a basic ML model to link the decoder matrices back to the corresponding subject.  

-- 7 Subjects, therefore pure guessing would be 14.28% correct on average

In [1]:
import numpy as np
import matplotlib.pyplot as plt
# import seaborn
import time
import pandas as pd
import pickle
import random
from kcs_ml_infr import *

random.seed(a=1)

# Load In Data

In [2]:
keys = ['METACPHS_S106', 'METACPHS_S107','METACPHS_S108', 'METACPHS_S109', 'METACPHS_S110', 'METACPHS_S111', 'METACPHS_S112']

with open('Data\cphs_data_block1.pickle', 'rb') as handle:
    #refs_block1, poss_block1, dec_vels_block1, int_vel_block1, emgs_block1, Ws_block1, Hs_block1, alphas_block1, pDs_block1, times_block1, conditions_block1 = pickle.load(handle)
    _, _, _, _, _, Ws_block1, _, _, _, times_block1, _ = pickle.load(handle)

with open('Data\cphs_data_block2.pickle', 'rb') as handle:
    #refs_block2, poss_block2, dec_vels_block2, int_vel_block2, emgs_block2, Ws_block2, Hs_block2, alphas_block2, pDs_block2, times_block2, conditions_block2 = pickle.load(handle)
    _, _, _, _, _, Ws_block2, _, _, _, times_block2, _ = pickle.load(handle)

In [3]:
# Decoder updates
W = Ws_block1[keys[0]][0]
W[1:,:,:].shape # 7199 time points x (decoder dimensions is 2 x 6)
dold = W[0]
update_ix = []
for ix,d in enumerate(W[1:]):
  if (np.array_equal(dold,d)==False):
    update_ix.append(ix)
    dold = d

update_ix.append(len(W) - 1) 
update_ix = np.asarray(update_ix)
update_ix = np.hstack([[0],update_ix])

print(update_ix.shape)
print(update_ix)

(19,)
[    0  1200  2402  3604  4806  6008  7210  8412  9614 10816 12018 13220
 14422 15624 16826 18028 19230 20432 20769]


I'm just making the flatten array input DF here

In [4]:
dec_flattened_df1 = pd.DataFrame(columns=["Subject", "Condition", "Update Number", "Flattened Decoder"])
dec_flattened_df2 = pd.DataFrame(columns=["Subject", "Condition", "Update Number", "Flattened Decoder"])

dec_flattened_df1.head()

Unnamed: 0,Subject,Condition,Update Number,Flattened Decoder


In [5]:
num_conds = 8

t0 = time.time()

for key in keys:
    participant_dec1 = Ws_block1[key]
    participant_dec2 = Ws_block2[key]
    
    for my_cond in range(num_conds):
        for update_number, update_idx in enumerate(update_ix):
            dec_flattened_df1.loc[len(dec_flattened_df1)] = [key, my_cond, update_number, np.ravel(participant_dec1[my_cond, update_idx, :, :])]
            dec_flattened_df2.loc[len(dec_flattened_df2)] = [key, my_cond, update_number, np.ravel(participant_dec2[my_cond, update_idx, :, :])]
        
t1 = time.time()
total = t1-t0  
print(total)

dec_flattened_df = pd.concat((dec_flattened_df1, dec_flattened_df2))

3.7873847484588623


In [6]:
print(dec_flattened_df.shape)
dec_flattened_df.head()

(2128, 4)


Unnamed: 0,Subject,Condition,Update Number,Flattened Decoder
0,METACPHS_S106,0,0,"[0.002722144351611262, 0.002605931562722017, 0..."
1,METACPHS_S106,0,1,"[0.002722144351611262, 0.002605931562722017, 0..."
2,METACPHS_S106,0,2,"[-0.34157085409453486, 0.08410593293763585, -0..."
3,METACPHS_S106,0,3,"[-0.1738624752800762, 0.003705171262358347, 0...."
4,METACPHS_S106,0,4,"[0.8321061501817386, -1.3261242289666402, 0.28..."


# Classification
1. Logistic Regression
2. K-Nearest Neighbor
3. Gaussian Naive Bayes
4. Linear SVC
5. Stochastic Gradient Descent
6. Decision Tree Classifier
7. Gradient Boosting Trees

In [7]:
# Machine learning
from sklearn.model_selection import train_test_split
from sklearn import model_selection, tree, preprocessing, metrics, linear_model

from sklearn.svm import LinearSVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.tree import DecisionTreeClassifier

In [8]:
# Set the number of k-folds
cv = 5
# Changed to 5 from 10 because the smallest class in cross val only has 7 instances
# NOTE: I also changed the default cv number in my function to be 5 because of this

In [9]:
key_to_num = dict()
num_to_key = dict()
for idx, key in enumerate(keys):
    key_to_num[key] = idx
    num_to_key[idx] = key

In [10]:
# Result logs
my_metrics_cols = ['Algorithm', 'One Off Acc', 'CV Acc', 'K Folds']
res_df = pd.DataFrame(columns=my_metrics_cols)

In [11]:
#my_models = [LogisticRegression(), KNeighborsClassifier(), GradientBoostingClassifier()]
my_models = [LogisticRegression(), KNeighborsClassifier(), GaussianNB(), LinearSVC(), SGDClassifier(), DecisionTreeClassifier(), GradientBoostingClassifier()]

# Compare Accuracies Of First and Last Decoders

In [12]:
dec_flattened_df.head()

Unnamed: 0,Subject,Condition,Update Number,Flattened Decoder
0,METACPHS_S106,0,0,"[0.002722144351611262, 0.002605931562722017, 0..."
1,METACPHS_S106,0,1,"[0.002722144351611262, 0.002605931562722017, 0..."
2,METACPHS_S106,0,2,"[-0.34157085409453486, 0.08410593293763585, -0..."
3,METACPHS_S106,0,3,"[-0.1738624752800762, 0.003705171262358347, 0...."
4,METACPHS_S106,0,4,"[0.8321061501817386, -1.3261242289666402, 0.28..."


In [13]:
flat_dec_expanded_df = pd.DataFrame()
for my_row in range(dec_flattened_df.shape[0]):
    test=pd.DataFrame(dec_flattened_df.iloc[my_row,3]).T
    flat_dec_expanded_df = pd.concat((flat_dec_expanded_df, test))

flat_dec_expanded_df.reset_index(inplace=True, drop=True)
flat_dec_expanded_df['Update Number'] = list(dec_flattened_df['Update Number'])
flat_dec_expanded_df['Subject'] = list(dec_flattened_df['Subject'])

print(flat_dec_expanded_df.shape)
flat_dec_expanded_df.head()

(2128, 130)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,120,121,122,123,124,125,126,127,Update Number,Subject
0,0.002722,0.002606,0.003087,0.00961,0.009477,0.004051,0.000124,0.001421,0.001815,0.003185,...,0.004476,0.004007,0.006816,0.005033,0.008904,0.005124,0.006046,0.005625,0,METACPHS_S106
1,0.002722,0.002606,0.003087,0.00961,0.009477,0.004051,0.000124,0.001421,0.001815,0.003185,...,0.004476,0.004007,0.006816,0.005033,0.008904,0.005124,0.006046,0.005625,1,METACPHS_S106
2,-0.341571,0.084106,-0.540574,0.44432,-1.65632,-1.081761,1.806691,1.327715,-1.441646,-1.758564,...,0.8488,-0.828621,-0.453219,0.081089,0.411674,0.070501,1.248486,-0.666425,2,METACPHS_S106
3,-0.173862,0.003705,0.407216,-0.515222,-0.217008,-0.418798,0.288675,0.172723,0.290123,-0.836033,...,0.015547,-0.266492,-0.287574,-0.069196,-0.461702,-0.393759,0.979884,1.426735,3,METACPHS_S106
4,0.832106,-1.326124,0.28719,1.302253,-1.523494,-1.953003,1.082167,-3.010511,0.697523,-0.469502,...,0.088116,-0.280073,0.022182,-0.648191,1.269775,0.324295,0.327097,-2.468706,4,METACPHS_S106


## Functionalized Testing As A Function of Decoder Update Number

In [14]:
def nth_decoder_model(flat_dec_expanded_df, n, my_models, key_to_num_dict=key_to_num, my_metrics_cols=['Algorithm', 'One Off Acc', 'CV Acc', 'K Folds'], cv=5, test=False):
    nth_update_idxs = flat_dec_expanded_df[~(flat_dec_expanded_df['Update Number'] == n)].index
    
    dec_df = flat_dec_expanded_df.drop(nth_update_idxs)
    dec_labels_df = pd.DataFrame(dec_df['Subject'].map(key_to_num_dict))
    dec_df.drop(['Subject', 'Update Number'], axis=1, inplace=True)
    
    X_train, y_train, X_test, y_test, X_val, y_val = train_test_val_split(dec_df, dec_labels_df)
    y_train = np.ravel(y_train)

    dec_res_df = pd.DataFrame(columns=my_metrics_cols)
    print("TRAINING")
    for model_num, model in enumerate(my_models):
        print(f"{model_num} of {len(my_models)}")
        dec_res_df = train_model(model, X_train, y_train, cv, dec_res_df)
        
    test_df = pd.DataFrame(columns=['Algorithm', 'CV Acc', 'Test Acc', 'K Folds'])
    if test:
        print("TESTING")
        for model in my_models:
            print(f"{model_num} of {len(my_models)}")
            test_df = test_model(model, X_train, y_train, X_test, y_test, test_df, cv)
            
    return dec_res_df, test_df

## n=0

In [15]:
zero_df, zero_test_df = nth_decoder_model(flat_dec_expanded_df, 0, my_models, test=True)
zero_test_df.head(10)

TRAINING
0 of 7
1 of 7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


2 of 7
3 of 7
4 of 7
5 of 7
6 of 7
TESTING
6 of 7
6 of 7
6 of 7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


6 of 7
6 of 7
6 of 7
6 of 7


Unnamed: 0,Algorithm,CV Acc,Test Acc,K Folds
0,LogisticRegression(),14.103,5.882,5
0,KNeighborsClassifier(),96.154,100.0,5
0,GaussianNB(),100.0,100.0,5
0,LinearSVC(),14.103,8.824,5
0,SGDClassifier(),38.462,5.882,5
0,DecisionTreeClassifier(),100.0,100.0,5
0,GradientBoostingClassifier(),100.0,100.0,5


## n=1

In [16]:
one_df, one_test_df = nth_decoder_model(flat_dec_expanded_df, 1, my_models, test=True)
one_test_df.head(10)

TRAINING
0 of 7
1 of 7
2 of 7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


3 of 7
4 of 7
5 of 7
6 of 7
TESTING
6 of 7
6 of 7
6 of 7
6 of 7
6 of 7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


6 of 7
6 of 7


Unnamed: 0,Algorithm,CV Acc,Test Acc,K Folds
0,LogisticRegression(),14.103,5.882,5
0,KNeighborsClassifier(),96.154,100.0,5
0,GaussianNB(),100.0,100.0,5
0,LinearSVC(),14.103,8.824,5
0,SGDClassifier(),34.615,5.882,5
0,DecisionTreeClassifier(),100.0,100.0,5
0,GradientBoostingClassifier(),100.0,100.0,5


## n=2

In [17]:
two_df, two_test_df = nth_decoder_model(flat_dec_expanded_df, 2, my_models, test=True)
two_test_df.head(10)

TRAINING
0 of 7
1 of 7
2 of 7
3 of 7
4 of 7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


5 of 7
6 of 7
TESTING
6 of 7
6 of 7
6 of 7
6 of 7
6 of 7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


6 of 7
6 of 7


Unnamed: 0,Algorithm,CV Acc,Test Acc,K Folds
0,LogisticRegression(),28.205,29.412,5
0,KNeighborsClassifier(),17.949,17.647,5
0,GaussianNB(),39.744,35.294,5
0,LinearSVC(),30.769,29.412,5
0,SGDClassifier(),33.333,26.471,5
0,DecisionTreeClassifier(),25.641,17.647,5
0,GradientBoostingClassifier(),32.051,20.588,5


## n=17

In [18]:
svntn_df, svntn_test_df = nth_decoder_model(flat_dec_expanded_df, 17, my_models, test=True)
svntn_test_df.head(10)

TRAINING
0 of 7
1 of 7
2 of 7
3 of 7
4 of 7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


5 of 7
6 of 7
TESTING
6 of 7
6 of 7
6 of 7
6 of 7
6 of 7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


6 of 7
6 of 7


Unnamed: 0,Algorithm,CV Acc,Test Acc,K Folds
0,LogisticRegression(),74.359,85.294,5
0,KNeighborsClassifier(),57.692,67.647,5
0,GaussianNB(),62.821,64.706,5
0,LinearSVC(),70.513,82.353,5
0,SGDClassifier(),62.821,61.765,5
0,DecisionTreeClassifier(),44.872,44.118,5
0,GradientBoostingClassifier(),46.154,67.647,5


## n=18

In [19]:
eigtn_df, eigtn_test_df = nth_decoder_model(flat_dec_expanded_df, 18, my_models, test=True)
eigtn_test_df.head(10)

TRAINING
0 of 7
1 of 7
2 of 7
3 of 7
4 of 7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


5 of 7
6 of 7
TESTING
6 of 7
6 of 7
6 of 7
6 of 7
6 of 7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


6 of 7
6 of 7


Unnamed: 0,Algorithm,CV Acc,Test Acc,K Folds
0,LogisticRegression(),71.795,70.588,5
0,KNeighborsClassifier(),46.154,50.0,5
0,GaussianNB(),67.949,64.706,5
0,LinearSVC(),71.795,79.412,5
0,SGDClassifier(),51.282,58.824,5
0,DecisionTreeClassifier(),37.179,41.176,5
0,GradientBoostingClassifier(),42.308,50.0,5


In [20]:
# Get indexes where name column has desired value
zero_update_idxs = flat_dec_expanded_df[~(flat_dec_expanded_df['Update Number'] == 0)].index
first_update_idxs = flat_dec_expanded_df[~(flat_dec_expanded_df['Update Number'] == 1)].index
last_update_idxs = flat_dec_expanded_df[~(flat_dec_expanded_df['Update Number'] == 18)].index
 
# Delete these row indexes from dataFrame
zero_dec_df = flat_dec_expanded_df.drop(zero_update_idxs)
zero_dec_labels_df = pd.DataFrame(zero_dec_df['Subject'].map(key_to_num))
#'Condition', 
zero_dec_df.drop(['Subject', 'Update Number'], axis=1, inplace=True)

# Delete these row indexes from dataFrame
first_dec_df = flat_dec_expanded_df.drop(first_update_idxs)
first_dec_labels_df = pd.DataFrame(first_dec_df['Subject'].map(key_to_num))
#'Condition', 
first_dec_df.drop(['Subject', 'Update Number'], axis=1, inplace=True)

#'Condition', 
last_dec_df = flat_dec_expanded_df.drop(last_update_idxs)
last_dec_labels_df = pd.DataFrame(last_dec_df['Subject'].map(key_to_num))
last_dec_df.drop(['Subject', 'Update Number'], axis=1, inplace=True)

In [21]:
#print(first_dec_df.shape)
#first_dec_df.head()

print(zero_dec_df.shape)
zero_dec_df.head()

(112, 128)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,0.002722,0.002606,0.003087,0.00961,0.009477,0.004051,0.000124,0.001421,0.001815,0.003185,...,0.005588,0.005065,0.004476,0.004007,0.006816,0.005033,0.008904,0.005124,0.006046,0.005625
19,0.002722,0.002606,0.003087,0.00961,0.009477,0.004051,0.000124,0.001421,0.001815,0.003185,...,0.005588,0.005065,0.004476,0.004007,0.006816,0.005033,0.008904,0.005124,0.006046,0.005625
38,-0.000533,-0.001358,-0.009211,-0.003424,-0.002861,-0.008239,-0.006398,-0.009825,-0.006713,-0.005338,...,-0.002812,-0.009092,-0.004493,-0.006459,-0.00138,-0.001834,-0.006104,-0.001417,-0.000446,-0.000528
57,-0.000533,-0.001358,-0.009211,-0.003424,-0.002861,-0.008239,-0.006398,-0.009825,-0.006713,-0.005338,...,-0.002812,-0.009092,-0.004493,-0.006459,-0.00138,-0.001834,-0.006104,-0.001417,-0.000446,-0.000528
76,0.002722,0.002606,0.003087,0.00961,0.009477,0.004051,0.000124,0.001421,0.001815,0.003185,...,0.005588,0.005065,0.004476,0.004007,0.006816,0.005033,0.008904,0.005124,0.006046,0.005625


In [22]:
#first_dec_labels_df.head()

zero_dec_labels_df.head()

Unnamed: 0,Subject
0,0
19,0
38,0
57,0
76,0


Evaluate Accuracy when only using the first decoder

In [23]:
#X_train, y_train, X_test, y_test, X_val, y_val = train_test_val_split(first_dec_df, first_dec_labels_df)
X_train, y_train, X_test, y_test, X_val, y_val = train_test_val_split(zero_dec_df, zero_dec_labels_df)
y_train = np.ravel(y_train)

print(X_train.shape)
X_train.head()

(78, 128)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
1121,-0.000533,-0.001358,-0.009211,-0.003424,-0.002861,-0.008239,-0.006398,-0.009825,-0.006713,-0.005338,...,-0.002812,-0.009092,-0.004493,-0.006459,-0.00138,-0.001834,-0.006104,-0.001417,-0.000446,-0.000528
1235,0.006337,0.005173,0.007767,0.003164,9e-05,0.007068,0.001793,0.00664,0.001389,0.000453,...,0.009929,0.005151,0.001664,0.008477,0.009138,0.002877,0.002956,0.007657,0.007265,0.000872
1406,-0.005214,-0.001919,-0.003886,-0.001386,-0.007317,-0.003193,-0.009982,-0.001711,-0.001661,-0.005257,...,-0.006882,-0.009146,-0.001946,-0.00987,-0.007666,-0.008545,-0.006092,-0.00633,-0.005208,-0.0059
19,0.002722,0.002606,0.003087,0.00961,0.009477,0.004051,0.000124,0.001421,0.001815,0.003185,...,0.005588,0.005065,0.004476,0.004007,0.006816,0.005033,0.008904,0.005124,0.006046,0.005625
342,-0.005214,-0.001919,-0.003886,-0.001386,-0.007317,-0.003193,-0.009982,-0.001711,-0.001661,-0.005257,...,-0.006882,-0.009146,-0.001946,-0.00987,-0.007666,-0.008545,-0.006092,-0.00633,-0.005208,-0.0059


In [24]:
#first_dec_res_df = pd.DataFrame(columns=my_metrics_cols)
zero_dec_res_df = pd.DataFrame(columns=my_metrics_cols)

for model_num, model in enumerate(my_models):
    print(f"{model_num} of {len(my_models)}")
    #first_dec_res_df = train_model(model, X_train, y_train, cv, first_dec_res_df)
    zero_dec_res_df = train_model(model, X_train, y_train, cv, zero_dec_res_df)

#first_dec_res_df.head(10)
zero_dec_res_df.head(10)

0 of 7
1 of 7
2 of 7
3 of 7
4 of 7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


5 of 7
6 of 7


Unnamed: 0,Algorithm,One Off Acc,CV Acc,K Folds
0,LogisticRegression(),17.949,14.103,5
0,KNeighborsClassifier(),100.0,96.154,5
0,GaussianNB(),100.0,100.0,5
0,LinearSVC(),26.923,14.103,5
0,SGDClassifier(),30.769,70.513,5
0,DecisionTreeClassifier(),100.0,100.0,5
0,GradientBoostingClassifier(),100.0,100.0,5


That's pretty high accuracy for a decoder that purportedly doesn't contain any information...

In [25]:
func_test_df = pd.DataFrame(columns=['Algorithm', 'CV Acc', 'Test Acc', 'K Folds'])
for model in my_models:
    func_test_df = test_model(model, X_train, y_train, X_test, y_test, func_test_df, cv, verbose=True)
    
func_test_df.head(10)

LogisticRegression()
CV Accuracy: 14.103
Test Accuracy: 5.882

KNeighborsClassifier()
CV Accuracy: 96.154
Test Accuracy: 100.0

GaussianNB()
CV Accuracy: 100.0
Test Accuracy: 100.0

LinearSVC()
CV Accuracy: 14.103
Test Accuracy: 8.824

SGDClassifier()
CV Accuracy: 70.513
Test Accuracy: 82.353



  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


DecisionTreeClassifier()
CV Accuracy: 100.0
Test Accuracy: 100.0

GradientBoostingClassifier()
CV Accuracy: 100.0
Test Accuracy: 100.0



Unnamed: 0,Algorithm,CV Acc,Test Acc,K Folds
0,LogisticRegression(),14.103,5.882,5
0,KNeighborsClassifier(),96.154,100.0,5
0,GaussianNB(),100.0,100.0,5
0,LinearSVC(),14.103,8.824,5
0,SGDClassifier(),70.513,82.353,5
0,DecisionTreeClassifier(),100.0,100.0,5
0,GradientBoostingClassifier(),100.0,100.0,5


In [26]:
manual_test_df = pd.DataFrame(columns=['Algorithm', 'CV Acc', 'Test Acc', 'K Folds'])

for model in my_models:
    _, _, acc_cv, trained_model = fit_ml_algo(model, X_train, y_train, cv, testing=True)
    y_pred = trained_model.predict(X_test)
    test_acc = round(metrics.accuracy_score(y_test, y_pred) * 100, 6)

    temp_df = pd.DataFrame([str(model), acc_cv, test_acc, cv], index=my_metrics_cols).T
    manual_test_df = pd.concat((manual_test_df, temp_df))

manual_test_df.head(10)

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


Unnamed: 0,Algorithm,CV Acc,Test Acc,K Folds,One Off Acc
0,LogisticRegression(),5.882353,,5,14.103
0,KNeighborsClassifier(),100.0,,5,96.154
0,GaussianNB(),100.0,,5,100.0
0,LinearSVC(),8.823529,,5,14.103
0,SGDClassifier(),61.764706,,5,41.026
0,DecisionTreeClassifier(),100.0,,5,100.0
0,GradientBoostingClassifier(),100.0,,5,100.0


In [27]:
test_acc

100.0

In [28]:
sum(np.array(y_test) - np.array(y_pred))

array([  56,   56,   22,   90,  -80, -114,  -46,   22,  -12,  -46,  -80,
        -12,   90,   56,  -12,  -12,  -80,  -12,  -12,  -12,   22,   90,
         90, -114,  -80,   90,   22,   56,   90,   22,  -12,  -46,  -12,
        -80], dtype=int64)

In [29]:
y_test[0:5]

Unnamed: 0,Subject
266,1
247,1
399,2
38,0
836,5


In [30]:
y_pred[0:5]

array([1, 1, 2, 0, 5], dtype=int64)

Evaluate Accuracy when only using the last decoder

In [31]:
X_train, y_train, X_test, y_test, X_val, y_val = train_test_val_split(last_dec_df, last_dec_labels_df)
y_train = np.ravel(y_train)

print(X_train.shape)
X_train.head()

(78, 128)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
1139,0.414126,0.614378,-0.508587,-0.241768,0.17048,-0.80234,0.435517,-1.026645,0.091916,-0.841928,...,0.237292,0.267282,-0.326636,-1.266925,-0.65255,1.500895,-1.926677,0.383244,0.493479,-0.476878
1253,-0.878678,0.396983,1.760158,-0.746273,-0.146237,0.365797,-0.424158,2.323568,-1.326084,2.290065,...,0.159107,-0.08221,0.398153,0.191123,-0.18301,0.892171,-0.632417,0.030742,-0.441663,0.067433
1424,0.235174,-0.46494,0.421214,-1.056778,0.136869,0.641214,-0.883863,0.705181,-0.813906,0.519847,...,-0.627237,0.118435,0.261921,0.694501,0.040589,0.114659,0.539587,-0.054543,0.297198,0.316903
37,-0.1716,0.043457,-0.547486,0.061096,-0.110454,2.220624,0.163372,-0.182332,-0.137738,0.390813,...,0.682716,-0.58251,1.310635,0.437512,0.690045,-0.880961,-0.49045,0.537525,0.779871,0.095508
360,0.75557,-1.134471,-0.066264,-0.919892,-2.537963,-1.661552,1.059168,1.057877,-1.285315,0.516275,...,0.293442,-0.589343,0.128415,0.183144,-0.166083,0.824607,0.39511,0.29678,0.093886,-0.15564


In [32]:
last_dec_res_df = pd.DataFrame(columns=my_metrics_cols)

for model_num, model in enumerate(my_models):
    print(f"{model_num} of {len(my_models)}")
    last_dec_res_df = train_model(model, X_train, y_train, cv, last_dec_res_df)

0 of 7
1 of 7
2 of 7
3 of 7
4 of 7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


5 of 7
6 of 7


In [33]:
last_dec_res_df.head(10)

Unnamed: 0,Algorithm,One Off Acc,CV Acc,K Folds
0,LogisticRegression(),100.0,71.795,5
0,KNeighborsClassifier(),67.949,46.154,5
0,GaussianNB(),92.308,67.949,5
0,LinearSVC(),100.0,71.795,5
0,SGDClassifier(),100.0,51.282,5
0,DecisionTreeClassifier(),100.0,34.615,5
0,GradientBoostingClassifier(),100.0,41.026,5
