In [62]:
import pandas as pd
import numpy as np
import os
import sys
import json

In [63]:
from sklearn.preprocessing import StandardScaler

In [64]:
import torch
from scipy.ndimage import median_filter
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score, accuracy_score
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
from torch import optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import shap
from sklearn.cluster import DBSCAN, AgglomerativeClustering
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

In [65]:
from pickle import load
from tqdm import tqdm
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt


In [66]:
from sklearn.metrics import silhouette_score

In [67]:
data = pd.read_csv('../data/gesture-recognition-and-biometrics-electromyogram-grabmyo-1.0.2/features_v2.csv')
feature_cols = [c for c in data.columns if "_" in c]
for col in feature_cols:
    data[col] = data[col].clip(data[col].mean() - (3*data[col].std()), data[col].mean() + (3*data[col].std()))

In [68]:
class Encoder(nn.Module):
    def __init__(self, input_dim, embed_dim):
        super().__init__()
        self.l1 = nn.Linear(input_dim, 128)
        
        self.l2 = nn.Linear(128, 128)
        self.l3 = nn.Linear(128, 128)
        self.l4 = nn.Linear(128, embed_dim)
        self.dropout = nn.Dropout(p=0.3)
        self.relu = nn.ReLU()
    def forward(self, x):
        x = self.relu(self.l1(x))
        x = self.dropout(x)
        x = self.relu(self.l2(x))
        x = self.dropout(x)
        x = self.relu(self.l3(x))
        x = self.dropout(x)
        x = self.l4(x)
        return x


class ClassificationHead(nn.Module):
    def __init__(self,embed_dim, output_dim):
        super().__init__()
        self.l1 = nn.Linear(embed_dim, 64)
        self.l2 = nn.Linear(64, output_dim)
        self.relu = nn.ReLU()
    def forward(self, x):
        x = self.relu(self.l1(x))
        x = self.l2(x)
        return x

class UserClassifier(nn.Module):
    def __init__(self, input_dim, embed_dim, output_dim):
        super().__init__()
        self.encoder = Encoder(input_dim, embed_dim)
        self.decoder = ClassificationHead(embed_dim, output_dim)
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [69]:
train_parts = list(range(1,39))
test_parts = list(range(39,44))

In [70]:
train_df = data[data["participant"].isin(train_parts)].reset_index()
test_df = data[data["participant"].isin(test_parts)].reset_index()

In [71]:
scaler  = load(open('sub_scaler.pkl', 'rb'))

In [72]:
x_train = train_df.loc[:, feature_cols].values
y_train = (train_df.loc[:, 'participant']).values
g_train = (train_df.loc[:, 'gesture']).values
s_train = (train_df.loc[:, 'session']).values

x_test = test_df.loc[:, feature_cols].values
y_test = (test_df.loc[:, 'participant']).values
g_test = (test_df.loc[:, 'gesture']).values
s_test = (test_df.loc[:, 'session']).values

In [73]:
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [74]:
encoder = Encoder(x_train.shape[1], 16)

In [75]:
encoder.load_state_dict(torch.load('sub_encoder.pth'), strict=False)

<All keys matched successfully>

In [76]:
def embed(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    y_pred = []

    with torch.no_grad():
        for batch_idx, (inputs, labels) in enumerate(test_loader):
            outputs = model(inputs)
            # _, predicted = torch.max(outputs, 1)
            y_pred.extend(outputs)

    return torch.stack(y_pred)

In [77]:
## Reading centroids
with open("centroids_v1.json", "r") as file:
    centroids = json.load(file)

In [79]:
## Reading centroids
with open("test_centroids_v1.json", "r") as file:
    test_centroids = json.load(file)

In [80]:
train_dataset = TensorDataset(torch.tensor(x_train).type(torch.float32), torch.tensor(y_train).type(torch.LongTensor))
test_dataset = TensorDataset(torch.tensor(x_test).type(torch.float32), torch.tensor(y_test).type(torch.LongTensor))

In [81]:
batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [82]:
train_embeddings = embed(model=encoder, test_loader=train_loader)
test_embeddings = embed(model=encoder, test_loader=test_loader)

In [83]:
centroids.keys()

dict_keys(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17'])

In [84]:
gestures = range(1,18)

In [87]:
def predict_cluster(centroids, embedding, gesture, parts):
    cents = centroids[str(gesture)]
    min_dist = 1e10
    min_part = None
    for i in parts:
        c = cents[str(i)]['centroid']
        dist = np.linalg.norm(embedding - c)
        if dist< min_dist:
            min_dist = dist
            min_part = i
    return min_part
    
    

In [94]:
i = np.where(g_train==10)
embeds = train_embeddings[i]
labels = y_train[i]

In [95]:
(embeds[10,:]).shape

torch.Size([16])

In [96]:
preds = []
for e in tqdm(range(len(embeds))):
    embedding = np.array(embeds[e,:])
    # print(embedding.shape)
    pred = predict_cluster(centroids, np.array(embedding), 10, train_parts)
    preds.append(pred)

100%|██████████████████████████████████████████████████████████████████████████████| 798/798 [00:00<00:00, 2247.91it/s]


In [97]:
accuracy_score(preds, labels)

0.9223057644110275

In [98]:
g_accs = {}
for g in tqdm(gestures):
    i = np.where(g_train==g)
    embeds = train_embeddings[i]
    labels = y_train[i]
    preds = []
    for e in tqdm(range(len(embeds))):
        embedding = np.array(embeds[e,:])
        # print(embedding.shape)
        pred = predict_cluster(centroids, np.array(embedding), g, train_parts)
        preds.append(pred)
    g_accs[g] = accuracy_score(preds, labels)

  0%|                                                                                           | 0/17 [00:00<?, ?it/s]
  0%|                                                                                          | 0/798 [00:00<?, ?it/s][A
 19%|███████████████▏                                                              | 155/798 [00:00<00:00, 1534.67it/s][A
 39%|██████████████████████████████▏                                               | 309/798 [00:00<00:00, 1406.01it/s][A
 68%|████████████████████████████████████████████████████▉                         | 542/798 [00:00<00:00, 1806.47it/s][A
100%|██████████████████████████████████████████████████████████████████████████████| 798/798 [00:00<00:00, 1645.36it/s][A
  6%|████▉                                                                              | 1/17 [00:00<00:07,  2.03it/s]
  0%|                                                                                          | 0/798 [00:00<?, ?it/s][A
 17%|█████████████    

In [99]:
g_accs_test = {}
for g in tqdm(gestures):
    i = np.where(g_test==g)
    embeds = test_embeddings[i]
    labels = y_test[i]
    preds = []
    for e in tqdm(range(len(embeds))):
        embedding = np.array(embeds[e,:])
        # print(embedding.shape)
        pred = predict_cluster(test_centroids, np.array(embedding), g, test_parts)
        preds.append(pred)
    g_accs_test[g] = accuracy_score(preds, labels)

  0%|                                                                                           | 0/17 [00:00<?, ?it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 105/105 [00:00<00:00, 6562.29it/s][A

100%|██████████████████████████████████████████████████████████████████████████████| 105/105 [00:00<00:00, 4999.51it/s][A

100%|██████████████████████████████████████████████████████████████████████████████| 105/105 [00:00<00:00, 6563.46it/s][A

100%|██████████████████████████████████████████████████████████████████████████████| 105/105 [00:00<00:00, 8077.80it/s][A

100%|██████████████████████████████████████████████████████████████████████████████| 105/105 [00:00<00:00, 8749.07it/s][A
 29%|████████████████████████▍                                                          | 5/17 [00:00<00:00, 43.48it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 105/105 [00:00<00:00, 8755.33it/s][A

100%|████████████

In [100]:
g_accs

{1: 0.899749373433584,
 2: 0.9761904761904762,
 3: 0.9611528822055138,
 4: 0.9598997493734336,
 5: 0.9598997493734336,
 6: 0.9598997493734336,
 7: 0.9649122807017544,
 8: 0.9398496240601504,
 9: 0.9573934837092731,
 10: 0.9223057644110275,
 11: 0.9385964912280702,
 12: 0.9335839598997494,
 13: 0.9486215538847118,
 14: 0.9611528822055138,
 15: 0.9548872180451128,
 16: 0.9423558897243107,
 17: 0.7268170426065163}

In [101]:
g_accs_test

{1: 0.8285714285714286,
 2: 0.8952380952380953,
 3: 0.8761904761904762,
 4: 0.8380952380952381,
 5: 0.8761904761904762,
 6: 0.9238095238095239,
 7: 0.7428571428571429,
 8: 0.8380952380952381,
 9: 0.8571428571428571,
 10: 0.9047619047619048,
 11: 0.8952380952380953,
 12: 0.6952380952380952,
 13: 0.8857142857142857,
 14: 0.8095238095238095,
 15: 0.8666666666666667,
 16: 0.8285714285714286,
 17: 0.580952380952381}


## Bucket based Authentication

### Train

In [28]:
auth_set = set(np.random.choice(range(1, 38), size = 5, replace=False))

In [29]:
auth_set

{8, 16, 28, 30, 36}

In [103]:
def auth(emb:np.ndarray, auth_set:set, centroids, gesture_val, parts)-> bool:
    pred = predict_cluster(centroids, np.array(emb), gesture_val, parts)
    # print(pred)
    if pred in auth_set:
        return 1
    return 0
    # pass

In [31]:
g = 2

In [32]:
auths = []
i = np.where(g_train==g)
embeds = train_embeddings[i]
labels = y_train[i]
for e in tqdm(range(len(embeds))):
    embedding = np.array(embeds[e,:])
    auth_code = auth(embedding, auth_set, centroids, g)
    auths.append(auth_code)

100%|██████████████████████████████████████████████████████████████████████████████| 798/798 [00:00<00:00, 1546.27it/s]


In [33]:
actual_auth = [1 if x in auth_set else 0 for x in labels]

In [34]:
def far(true, preds):
    true = np.array(true)
    preds = np.array(preds)
    far = np.sum((true==0)&(preds==1))
    return far / len(true)

In [35]:
def frr(true, preds):
    true = np.array(true)
    preds = np.array(preds)
    far = np.sum((true==1)&(preds==0))
    return far / len(true)

In [36]:
far(actual_auth, auths)

0.0012531328320802004

In [37]:
frr(actual_auth, auths)

0.0012531328320802004

In [38]:
accuracy_score(actual_auth, auths)

0.9974937343358395

In [39]:
num_exp = 50

In [51]:
aggregates_5_auth = {g:0 for g in gestures}
for g in gestures:
    aggregates_5_auth[g] = {}
    aggregates_5_auth[g]['far'] = 0
    aggregates_5_auth[g]['frr'] = 0
    aggregates_5_auth[g]['accuracy'] = 0
for exp in tqdm(range(num_exp)):
    auth_set = set(np.random.choice(range(1, 38), size = 5, replace=False))
    for g in gestures:
        # aggregates_5_auth[g] = {}
        auths = []
        i = np.where(g_train==g)
        embeds = train_embeddings[i]
        labels = y_train[i]
        # print(len(labels), g)
        for e in range(len(embeds)):
            embedding = np.array(embeds[e,:])
            auth_code = auth(embedding, auth_set, centroids, g, train_parts)
            auths.append(auth_code)
        actual_auth = [1 if x in auth_set else 0 for x in labels]

        aggregates_5_auth[g]['far'] += far(actual_auth, auths)
        aggregates_5_auth[g]['frr'] += frr(actual_auth, auths)
        aggregates_5_auth[g]['accuracy'] += accuracy_score(actual_auth, auths)

for k in aggregates_5_auth:
    aggregates_5_auth[k]['far'] /= num_exp
    aggregates_5_auth[k]['frr'] /= num_exp
    aggregates_5_auth[k]['accuracy'] /= num_exp

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [06:03<00:00,  7.27s/it]


In [55]:
aggregates_1_auth = {g:0 for g in gestures}
for g in gestures:
    aggregates_1_auth[g] = {}
    aggregates_1_auth[g]['far'] = 0
    aggregates_1_auth[g]['frr'] = 0
    aggregates_1_auth[g]['accuracy'] = 0
for exp in tqdm(range(num_exp)):
    auth_set = set(np.random.choice(range(1, 38), size = 1, replace=False))
    for g in gestures:
        # aggregates_5_auth[g] = {}
        auths = []
        i = np.where(g_train==g)
        embeds = train_embeddings[i]
        labels = y_train[i]
        # print(len(labels), g)
        for e in range(len(embeds)):
            embedding = np.array(embeds[e,:])
            auth_code = auth(embedding, auth_set, centroids, g, train_parts)
            auths.append(auth_code)
        actual_auth = [1 if x in auth_set else 0 for x in labels]

        aggregates_1_auth[g]['far'] += far(actual_auth, auths)
        aggregates_1_auth[g]['frr'] += frr(actual_auth, auths)
        aggregates_1_auth[g]['accuracy'] += accuracy_score(actual_auth, auths)

for k in aggregates_5_auth:
    aggregates_1_auth[k]['far'] /= num_exp
    aggregates_1_auth[k]['frr'] /= num_exp
    aggregates_1_auth[k]['accuracy'] /= num_exp

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [05:43<00:00,  6.87s/it]


In [56]:
aggregates_10_auth = {g:0 for g in gestures}
for g in gestures:
    aggregates_10_auth[g] = {}
    aggregates_10_auth[g]['far'] = 0
    aggregates_10_auth[g]['frr'] = 0
    aggregates_10_auth[g]['accuracy'] = 0
for exp in tqdm(range(num_exp)):
    auth_set = set(np.random.choice(range(1, 38), size = 10, replace=False))
    for g in gestures:
        # aggregates_5_auth[g] = {}
        auths = []
        i = np.where(g_train==g)
        embeds = train_embeddings[i]
        labels = y_train[i]
        # print(len(labels), g)
        for e in range(len(embeds)):
            embedding = np.array(embeds[e,:])
            auth_code = auth(embedding, auth_set, centroids, g, train_parts)
            auths.append(auth_code)
        actual_auth = [1 if x in auth_set else 0 for x in labels]

        aggregates_10_auth[g]['far'] += far(actual_auth, auths)
        aggregates_10_auth[g]['frr'] += frr(actual_auth, auths)
        aggregates_10_auth[g]['accuracy'] += accuracy_score(actual_auth, auths)

for k in aggregates_5_auth:
    aggregates_10_auth[k]['far'] /= num_exp
    aggregates_10_auth[k]['frr'] /= num_exp
    aggregates_10_auth[k]['accuracy'] /= num_exp

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [05:39<00:00,  6.80s/it]


In [57]:
aggregates_20_auth = {g:0 for g in gestures}
for g in gestures:
    aggregates_20_auth[g] = {}
    aggregates_20_auth[g]['far'] = 0
    aggregates_20_auth[g]['frr'] = 0
    aggregates_20_auth[g]['accuracy'] = 0
for exp in tqdm(range(num_exp)):
    auth_set = set(np.random.choice(range(1, 38), size = 20, replace=False))
    for g in gestures:
        # aggregates_5_auth[g] = {}
        auths = []
        i = np.where(g_train==g)
        embeds = train_embeddings[i]
        labels = y_train[i]
        # print(len(labels), g)
        for e in range(len(embeds)):
            embedding = np.array(embeds[e,:])
            auth_code = auth(embedding, auth_set, centroids, g)
            auths.append(auth_code)
        actual_auth = [1 if x in auth_set else 0 for x in labels]

        aggregates_20_auth[g]['far'] += far(actual_auth, auths)
        aggregates_20_auth[g]['frr'] += frr(actual_auth, auths)
        aggregates_20_auth[g]['accuracy'] += accuracy_score(actual_auth, auths)

for k in aggregates_20_auth:
    aggregates_20_auth[k]['far'] /= num_exp
    aggregates_20_auth[k]['frr'] /= num_exp
    aggregates_20_auth[k]['accuracy'] /= num_exp

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [05:40<00:00,  6.81s/it]


In [113]:
pd.DataFrame(aggregates_20_auth).T.sort_values(by = "accuracy", ascending=False)

Unnamed: 0,far,frr,accuracy
2,0.004812,0.006817,0.988371
7,0.009273,0.009123,0.981604
4,0.009724,0.010025,0.980251
3,0.009123,0.011454,0.979424
14,0.008446,0.012206,0.979348
5,0.010426,0.010301,0.979273
6,0.010602,0.011278,0.97812
9,0.009825,0.012506,0.977669
15,0.011328,0.011654,0.977018
13,0.012406,0.014261,0.973333


In [112]:
pd.DataFrame(aggregates_10_auth).T.sort_values(by = "accuracy", ascending=False)

Unnamed: 0,far,frr,accuracy
2,0.004085,0.005063,0.990852
7,0.007193,0.006792,0.986015
14,0.00609,0.008571,0.985338
3,0.006817,0.008195,0.984987
5,0.008596,0.007494,0.98391
9,0.008296,0.008371,0.983333
6,0.009398,0.007368,0.983233
4,0.008922,0.00807,0.983008
15,0.009223,0.00807,0.982707
13,0.010351,0.010426,0.979223


In [111]:
pd.DataFrame(aggregates_1_auth).T.sort_values(by = "accuracy", ascending=False)

Unnamed: 0,far,frr,accuracy
2,0.000702,0.000652,0.998647
4,0.000652,0.000877,0.998471
7,0.000877,0.000977,0.998145
3,0.001028,0.000902,0.99807
5,0.000877,0.001053,0.99807
14,0.000902,0.001128,0.99797
9,0.001378,0.000952,0.997669
6,0.000977,0.001504,0.997519
15,0.001278,0.001253,0.997469
11,0.001404,0.001454,0.997143


### test

In [None]:
num_exp = 5

In [105]:
aggregates_test = {g:0 for g in gestures}
for g in gestures:
    aggregates_test[g] = {}
    aggregates_test[g]['far'] = 0
    aggregates_test[g]['frr'] = 0
    aggregates_test[g]['accuracy'] = 0
for exp in tqdm(range(num_exp)):
    auth_set = set(np.random.choice(range(39, 44), size = 2, replace=False))
    for g in gestures:
        # aggregates_5_auth[g] = {}
        auths = []
        i = np.where(g_test==g)
        embeds = test_embeddings[i]
        labels = y_test[i]
        # print(len(labels), g)
        for e in range(len(embeds)):
            embedding = np.array(embeds[e,:])
            auth_code = auth(embedding, auth_set, test_centroids, g, test_parts)
            auths.append(auth_code)
        actual_auth = [1 if x in auth_set else 0 for x in labels]

        aggregates_test[g]['far'] += far(actual_auth, auths)
        aggregates_test[g]['frr'] += frr(actual_auth, auths)
        aggregates_test[g]['accuracy'] += accuracy_score(actual_auth, auths)

for k in aggregates_test:
    aggregates_test[k]['far'] /= num_exp
    aggregates_test[k]['frr'] /= num_exp
    aggregates_test[k]['accuracy'] /= num_exp

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [00:09<00:00,  5.20it/s]


In [110]:
pd.DataFrame(aggregates_test).T.sort_values(by = "accuracy", ascending=False)


Unnamed: 0,far,frr,accuracy
6,0.022476,0.01981,0.957714
11,0.023048,0.024952,0.952
2,0.02781,0.026857,0.945333
10,0.030476,0.029333,0.94019
3,0.033905,0.030286,0.93581
13,0.037143,0.032,0.930857
8,0.041524,0.039619,0.918857
5,0.042095,0.039048,0.918857
15,0.04419,0.036952,0.918857
4,0.04381,0.040762,0.915429


In [114]:
num_exp = 10
aggregates_test = {g:0 for g in gestures}
for g in gestures:
    aggregates_test[g] = {}
    aggregates_test[g]['far'] = 0
    aggregates_test[g]['frr'] = 0
    aggregates_test[g]['accuracy'] = 0
for exp in tqdm(range(num_exp)):
    auth_set = set(np.random.choice(range(39, 44), size = 1, replace=False))
    for g in gestures:
        # aggregates_5_auth[g] = {}
        auths = []
        i = np.where(g_test==g)
        embeds = test_embeddings[i]
        labels = y_test[i]
        # print(len(labels), g)
        for e in range(len(embeds)):
            embedding = np.array(embeds[e,:])
            auth_code = auth(embedding, auth_set, test_centroids, g, test_parts)
            auths.append(auth_code)
        actual_auth = [1 if x in auth_set else 0 for x in labels]

        aggregates_test[g]['far'] += far(actual_auth, auths)
        aggregates_test[g]['frr'] += frr(actual_auth, auths)
        aggregates_test[g]['accuracy'] += accuracy_score(actual_auth, auths)

for k in aggregates_test:
    aggregates_test[k]['far'] /= num_exp
    aggregates_test[k]['frr'] /= num_exp
    aggregates_test[k]['accuracy'] /= num_exp

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00,  4.95it/s]


In [115]:
pd.DataFrame(aggregates_test).T.sort_values(by = "accuracy", ascending=False)


Unnamed: 0,far,frr,accuracy
6,0.01619,0.015238,0.968571
5,0.015238,0.018095,0.966667
10,0.013333,0.025714,0.960952
13,0.029524,0.013333,0.957143
2,0.033333,0.011429,0.955238
11,0.027619,0.019048,0.953333
9,0.028571,0.02381,0.947619
15,0.033333,0.025714,0.940952
1,0.034286,0.026667,0.939048
3,0.028571,0.034286,0.937143


In [116]:
num_exp = 20
aggregates_test = {g:0 for g in gestures}
for g in gestures:
    aggregates_test[g] = {}
    aggregates_test[g]['far'] = 0
    aggregates_test[g]['frr'] = 0
    aggregates_test[g]['accuracy'] = 0
for exp in tqdm(range(num_exp)):
    auth_set = set(np.random.choice(range(39, 44), size = 3, replace=False))
    for g in gestures:
        # aggregates_5_auth[g] = {}
        auths = []
        i = np.where(g_test==g)
        embeds = test_embeddings[i]
        labels = y_test[i]
        # print(len(labels), g)
        for e in range(len(embeds)):
            embedding = np.array(embeds[e,:])
            auth_code = auth(embedding, auth_set, test_centroids, g, test_parts)
            auths.append(auth_code)
        actual_auth = [1 if x in auth_set else 0 for x in labels]

        aggregates_test[g]['far'] += far(actual_auth, auths)
        aggregates_test[g]['frr'] += frr(actual_auth, auths)
        aggregates_test[g]['accuracy'] += accuracy_score(actual_auth, auths)

for k in aggregates_test:
    aggregates_test[k]['far'] /= num_exp
    aggregates_test[k]['frr'] /= num_exp
    aggregates_test[k]['accuracy'] /= num_exp

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:04<00:00,  4.99it/s]


In [117]:
pd.DataFrame(aggregates_test).T.sort_values(by = "accuracy", ascending=False)


Unnamed: 0,far,frr,accuracy
6,0.02381,0.021429,0.954762
10,0.029524,0.030476,0.94
11,0.031905,0.028571,0.939524
2,0.033333,0.028571,0.938095
13,0.03381,0.035714,0.930476
3,0.036667,0.037619,0.925714
5,0.038095,0.03619,0.925714
15,0.041429,0.04,0.918571
9,0.044286,0.040476,0.915238
4,0.046667,0.045714,0.907619
