In [1]:
import mediapipe as mp
import cv2
import time
import numpy as np
import torch
import torch.nn as nn
import torch.utils as utils
from datetime import datetime
import pickle

In [2]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [3]:
classes = ("down", "up", "stop", "thumbright", "thumbleft")

In [4]:
hands = mp_hands.Hands(min_detection_confidence=0.6, min_tracking_confidence=0.3, static_image_mode=True, max_num_hands=1)

In [5]:
train_data = []
train_labels = []

for class_index, gesture_class in enumerate(classes):
    for i in range(70):
        image = cv2.imread(f"../training/{gesture_class}.{i}.jpg")

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # changes from bgr to rgb since cv2 is bgr but mediapipe requires rgb
        image.flags.writeable = False
        results = hands.process(image) # this makes the actual detections
        
        landmarks = []
        if results.multi_hand_landmarks:
            for landmark in results.multi_hand_landmarks[0].landmark:
                x, y = landmark.x, landmark.y
                landmarks.append([x,y])
            train_label = np.zeros([len(classes)])
            train_label[class_index] = 1
            train_data.append(landmarks)
            train_labels.append(train_label)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [6]:
train_data = torch.tensor(train_data)
train_labels = torch.tensor(train_labels)

  train_labels = torch.tensor(train_labels)


In [7]:
class LandmarksDataset(utils.data.Dataset):
    def __init__(self, X, y, transform=None):
        self.X = X
        self.y = y
        self.len = len(y)
        self.transform = transform
    def __len__(self):
        return self.len
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [8]:
training_set = LandmarksDataset(train_data, train_labels)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=4, shuffle=True)

In [9]:
val_data = []
val_labels = []
for class_index, gesture_class in enumerate(classes):
    for i in range(20):
        image = cv2.imread(f"../validation/{gesture_class}.{i}.jpg")

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # changes from bgr to rgb since cv2 is bgr but mediapipe requires rgb
        image.flags.writeable = False
        results = hands.process(image) # this makes the actual detections
        
        landmarks = []
        if results.multi_hand_landmarks:
            for landmark in results.multi_hand_landmarks[0].landmark:
                x, y = landmark.x, landmark.y
                landmarks.append([x,y])
            val_label = np.zeros([len(classes)])
            val_label[class_index] = 1
            val_data.append(landmarks)
            val_labels.append(val_label)

In [10]:
val_data = torch.tensor(val_data)
val_labels = torch.tensor(val_labels)

In [11]:
validation_set = LandmarksDataset(val_data, val_labels)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=4, shuffle=False)

In [12]:
class HandNetwork(nn.Module):
    def __init__(self):
        super(HandNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(42, 120)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(120, 100)
        self.fc3 = nn.Linear(100, len(classes))
    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

In [16]:
model = HandNetwork()
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.1)

In [17]:
def train_one_epoch(curr_model):
    running_loss = 0
    last_loss = 0

    for i, data in enumerate(training_loader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = curr_model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward() # calculate the gradients
        optimizer.step() # update the params

        running_loss += loss.item()
        if i % 10 == 10-1:
            last_loss = running_loss / 10 # loss per batch
            print(f'  batch {i+1} loss: {last_loss}')
            running_loss = 0
    
    return last_loss

In [18]:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

#This is doing some logging that we don't need to worry about right now.
epoch_number = 0

EPOCHS = 200

best_vloss = 1_000_000.

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch_number + 1))
    
    model.train(True)
    
    avg_loss = train_one_epoch(curr_model=model)

    # We don't need gradients on to do reporting
    model.train(False)

    running_vloss = 0.0
    for i, vdata in enumerate(validation_loader):
        vinputs, vlabels = vdata
        voutputs = model(vinputs)
        vloss = loss_fn(voutputs, vlabels)
        running_vloss += vloss

    avg_vloss = running_vloss / (i + 1)
    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

    
    # Track best performance, and save the model's state
    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        
    epoch_number += 1

EPOCH 1:
  batch 10 loss: 1.6179750233888626
  batch 20 loss: 1.637564355134964
  batch 30 loss: 1.6156188249588013
  batch 40 loss: 1.589386910200119
  batch 50 loss: 1.5989562839269638
  batch 60 loss: 1.6341707080602645
  batch 70 loss: 1.6121572315692902
  batch 80 loss: 1.6184952229261398
LOSS train 1.6184952229261398 valid 1.6037553989887237
EPOCH 2:
  batch 10 loss: 1.6107109278440475
  batch 20 loss: 1.6050293415784835
  batch 30 loss: 1.5994475424289702
  batch 40 loss: 1.5954216212034225
  batch 50 loss: 1.6147206246852874
  batch 60 loss: 1.5917430430650712
  batch 70 loss: 1.6003032594919204
  batch 80 loss: 1.5947639137506484
LOSS train 1.5947639137506484 valid 1.597197924852371
EPOCH 3:
  batch 10 loss: 1.5936622262001037
  batch 20 loss: 1.5950306475162506
  batch 30 loss: 1.592026698589325
  batch 40 loss: 1.5919599652290344
  batch 50 loss: 1.5904400646686554
  batch 60 loss: 1.5939450442790986
  batch 70 loss: 1.5813089072704316
  batch 80 loss: 1.5947670072317124
LOS

In [20]:
import torch.nn.functional as F

In [21]:
model.eval()
correct_count = 0
total = len(validation_set)
with torch.no_grad():
    for i, vdata in enumerate(validation_loader):
        inputs, labels = vdata
        outputs = torch.argmax(model(inputs), dim=1)
        labels = torch.argmax(labels, dim=1)
        correct_count += (outputs==labels).sum().item()
        # correct_count += (outputs==labels).sum().item()

In [22]:
correct_count/total

1.0

In [23]:
torch.save(model, "../models/model.pth")

In [132]:
# with open("../models/model6.pkl", "wb") as file:
#     pickle.dump(model, file)

from sklearn.neighbors import KNeighborsClassifier

In [23]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [24]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(train_data.view(-1,21*2), train_labels)
y_pred = knn.predict(val_data.view(-1,21*2))
accuracy_score(val_labels, y_pred)

0.9278350515463918

In [25]:
from sklearn.linear_model import RidgeClassifierCV

In [26]:
train_data.shape

torch.Size([345, 21, 2])

In [92]:
clf = RidgeClassifierCV()
clf.fit(train_data.view(-1, 21*2), train_labels)
y_pred = clf.predict(val_data.view(-1, 21*2))
accuracy_score(val_labels, y_pred)

1.0

In [93]:
F.softmax(torch.tensor(clf.decision_function(val_data.view(-1, 21*2))))

  F.softmax(torch.tensor(clf.decision_function(val_data.view(-1, 21*2))))


tensor([[0.6817, 0.0807, 0.0736, 0.0839, 0.0802],
        [0.5953, 0.0871, 0.1020, 0.1140, 0.1016],
        [0.6809, 0.0781, 0.0679, 0.0831, 0.0901],
        [0.6449, 0.0783, 0.0792, 0.0871, 0.1105],
        [0.6436, 0.0777, 0.0825, 0.0876, 0.1086],
        [0.5774, 0.0947, 0.0956, 0.1239, 0.1083],
        [0.5115, 0.0937, 0.0987, 0.1656, 0.1306],
        [0.4998, 0.1001, 0.0840, 0.1717, 0.1444],
        [0.6768, 0.0651, 0.0778, 0.0808, 0.0994],
        [0.7222, 0.0787, 0.0629, 0.0701, 0.0661],
        [0.7228, 0.0590, 0.0769, 0.0705, 0.0708],
        [0.6058, 0.0801, 0.0881, 0.1046, 0.1215],
        [0.5946, 0.0750, 0.0937, 0.1189, 0.1178],
        [0.4962, 0.1252, 0.0958, 0.1549, 0.1279],
        [0.5400, 0.1057, 0.0932, 0.1275, 0.1336],
        [0.6375, 0.1178, 0.0775, 0.0829, 0.0844],
        [0.6289, 0.0775, 0.0811, 0.1140, 0.0985],
        [0.6558, 0.0717, 0.0818, 0.0909, 0.0998],
        [0.6551, 0.0666, 0.0837, 0.0982, 0.0964],
        [0.0692, 0.6421, 0.0877, 0.1231, 0.0780],


In [30]:
clf.score(val_data.view(-1, 21*2), val_labels)

1.0

In [94]:
from sklearn.svm import LinearSVC

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,

In [101]:
clf = LinearSVC(multi_class="ovr")
clf.fit(train_data.view(-1, 21*2), torch.argmax(train_labels, dim=1))
clf.score(val_data.view(-1, 21*2), torch.argmax(val_labels, dim=1))



1.0

In [108]:
F.softmax(torch.tensor(clf.decision_function(val_data.view(-1, 21*2))), dim=1)

tensor([[0.7528, 0.0526, 0.0551, 0.0519, 0.0876],
        [0.6623, 0.0536, 0.0913, 0.0680, 0.1248],
        [0.7421, 0.0511, 0.0541, 0.0523, 0.1004],
        [0.7057, 0.0502, 0.0582, 0.0611, 0.1248],
        [0.6898, 0.0542, 0.0641, 0.0646, 0.1273],
        [0.6079, 0.0719, 0.0888, 0.0880, 0.1435],
        [0.5345, 0.0827, 0.1002, 0.1041, 0.1785],
        [0.5266, 0.0871, 0.0854, 0.1148, 0.1861],
        [0.7288, 0.0413, 0.0623, 0.0545, 0.1131],
        [0.7814, 0.0521, 0.0439, 0.0446, 0.0779],
        [0.8104, 0.0328, 0.0484, 0.0363, 0.0720],
        [0.7139, 0.0439, 0.0575, 0.1079, 0.0768],
        [0.6979, 0.0423, 0.0651, 0.1153, 0.0793],
        [0.5230, 0.0924, 0.0954, 0.1829, 0.1062],
        [0.5858, 0.0602, 0.0815, 0.1587, 0.1139],
        [0.6972, 0.0675, 0.0633, 0.1023, 0.0697],
        [0.7065, 0.0484, 0.0617, 0.1076, 0.0757],
        [0.7538, 0.0348, 0.0552, 0.0913, 0.0649],
        [0.7592, 0.0318, 0.0541, 0.0928, 0.0620],
        [0.0342, 0.6723, 0.0858, 0.0939, 0.1137],
