In [1]:
import pandas as pd
import numpy as np
import os
import sys
import json

In [2]:
from sklearn.preprocessing import StandardScaler

In [21]:
import torch
from scipy.ndimage import median_filter
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score, accuracy_score
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
from torch import optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import shap
from sklearn.cluster import DBSCAN, AgglomerativeClustering
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

In [4]:
from pickle import load
from tqdm import tqdm
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt


In [5]:
from sklearn.metrics import silhouette_score

In [6]:
data = pd.read_csv('../data/gesture-recognition-and-biometrics-electromyogram-grabmyo-1.0.2/features_v2.csv')
feature_cols = [c for c in data.columns if "_" in c]
for col in feature_cols:
    data[col] = data[col].clip(data[col].mean() - (3*data[col].std()), data[col].mean() + (3*data[col].std()))

In [7]:
class Encoder(nn.Module):
    def __init__(self, input_dim, embed_dim):
        super().__init__()
        self.l1 = nn.Linear(input_dim, 128)
        
        self.l2 = nn.Linear(128, 128)
        self.l3 = nn.Linear(128, 128)
        self.l4 = nn.Linear(128, embed_dim)
        self.dropout = nn.Dropout(p=0.3)
        self.relu = nn.ReLU()
    def forward(self, x):
        x = self.relu(self.l1(x))
        x = self.dropout(x)
        x = self.relu(self.l2(x))
        x = self.dropout(x)
        x = self.relu(self.l3(x))
        x = self.dropout(x)
        x = self.l4(x)
        return x


class ClassificationHead(nn.Module):
    def __init__(self,embed_dim, output_dim):
        super().__init__()
        self.l1 = nn.Linear(embed_dim, 64)
        self.l2 = nn.Linear(64, output_dim)
        self.relu = nn.ReLU()
    def forward(self, x):
        x = self.relu(self.l1(x))
        x = self.l2(x)
        return x

class UserClassifier(nn.Module):
    def __init__(self, input_dim, embed_dim, output_dim):
        super().__init__()
        self.encoder = Encoder(input_dim, embed_dim)
        self.decoder = ClassificationHead(embed_dim, output_dim)
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [8]:
train_parts = list(range(1,39))
test_parts = list(range(39,44))

In [9]:
train_df = data[data["participant"].isin(train_parts)].reset_index()
test_df = data[data["participant"].isin(test_parts)].reset_index()

In [10]:
scaler  = load(open('sub_scaler.pkl', 'rb'))

In [11]:
x_train = train_df.loc[:, feature_cols].values
y_train = (train_df.loc[:, 'participant']).values
g_train = (train_df.loc[:, 'gesture']).values
s_train = (train_df.loc[:, 'session']).values

x_test = test_df.loc[:, feature_cols].values
y_test = (test_df.loc[:, 'participant']).values
g_test = (test_df.loc[:, 'gesture']).values
s_test = (test_df.loc[:, 'session']).values

In [12]:
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [13]:
encoder = Encoder(x_train.shape[1], 16)

In [14]:
encoder.load_state_dict(torch.load('sub_encoder.pth'), strict=False)

<All keys matched successfully>

In [15]:
def embed(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    y_pred = []

    with torch.no_grad():
        for batch_idx, (inputs, labels) in enumerate(test_loader):
            outputs = model(inputs)
            # _, predicted = torch.max(outputs, 1)
            y_pred.extend(outputs)

    return torch.stack(y_pred)

In [17]:
## Reading centroids
with open("centroids_v1.json", "r") as file:
    centroids = json.load(file)

In [24]:
train_dataset = TensorDataset(torch.tensor(x_train).type(torch.float32), torch.tensor(y_train).type(torch.LongTensor))
test_dataset = TensorDataset(torch.tensor(x_test).type(torch.float32), torch.tensor(y_test).type(torch.LongTensor))

In [25]:
batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [26]:
train_embeddings = embed(model=encoder, test_loader=train_loader)
test_embeddings = embed(model=encoder, test_loader=test_loader)

In [None]:
centroids


In [19]:
centroids.keys()

dict_keys(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17'])

In [18]:
gestures = range(1,18)

In [47]:
def predict_cluster(centroids, embedding, gesture):
    cents = centroids[str(gesture)]
    min_dist = 1e10
    min_part = None
    for i in range(1,39):
        c = cents[str(i)]['centroid']
        dist = np.linalg.norm(embedding - c)
        if dist< min_dist:
            min_dist = dist
            min_part = i
    return min_part
    
    

In [52]:
i = np.where(g_train==10)
embeds = train_embeddings[i]
labels = y_train[i]

In [49]:
(embeds[10,:]).shape

torch.Size([16])

In [50]:
preds = []
for e in tqdm(range(len(embeds))):
    embedding = np.array(embeds[e,:])
    # print(embedding.shape)
    pred = predict_cluster(centroids, np.array(embedding), 10)
    preds.append(pred)

100%|██████████████████████████████████████████████████████████████████████████████| 798/798 [00:00<00:00, 1877.79it/s]


In [53]:
accuracy_score(preds, labels)

0.9223057644110275

In [55]:
g_accs = {}
for g in tqdm(gestures):
    i = np.where(g_train==g)
    embeds = train_embeddings[i]
    labels = y_train[i]
    preds = []
    for e in tqdm(range(len(embeds))):
        embedding = np.array(embeds[e,:])
        # print(embedding.shape)
        pred = predict_cluster(centroids, np.array(embedding), g)
        preds.append(pred)
    g_accs[g] = accuracy_score(preds, labels)

  0%|                                                                                           | 0/17 [00:00<?, ?it/s]
  0%|                                                                                          | 0/798 [00:00<?, ?it/s][A
 20%|███████████████▉                                                              | 163/798 [00:00<00:00, 1613.91it/s][A
 41%|███████████████████████████████▊                                              | 325/798 [00:00<00:00, 1519.57it/s][A
 68%|████████████████████████████████████████████████████▉                         | 542/798 [00:00<00:00, 1805.84it/s][A
100%|██████████████████████████████████████████████████████████████████████████████| 798/798 [00:00<00:00, 1765.50it/s][A
  6%|████▉                                                                              | 1/17 [00:00<00:07,  2.18it/s]
  0%|                                                                                          | 0/798 [00:00<?, ?it/s][A
 18%|██████████████   

In [56]:
g_accs

{1: 0.899749373433584,
 2: 0.9761904761904762,
 3: 0.9611528822055138,
 4: 0.9598997493734336,
 5: 0.9598997493734336,
 6: 0.9598997493734336,
 7: 0.9649122807017544,
 8: 0.9398496240601504,
 9: 0.9573934837092731,
 10: 0.9223057644110275,
 11: 0.9385964912280702,
 12: 0.9335839598997494,
 13: 0.9486215538847118,
 14: 0.9611528822055138,
 15: 0.9548872180451128,
 16: 0.9423558897243107,
 17: 0.7268170426065163}