In [2]:
import json
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg 
import trt_pose.coco
import math
import os
import numpy as np
import traitlets
import pickle 


In [3]:
with open('preprocess/hand_pose.json', 'r') as f:
    hand_pose = json.load(f)

topology = trt_pose.coco.coco_category_to_topology(hand_pose)
import trt_pose.models

num_parts = len(hand_pose['keypoints'])
num_links = len(hand_pose['skeleton'])

model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval()
import torch


WIDTH = 224
HEIGHT = 224
data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()

if not os.path.exists('model/hand_pose_resnet18_att_244_244_trt.pth'):
    MODEL_WEIGHTS = 'model/hand_pose_resnet18_att_244_244.pth'
    model.load_state_dict(torch.load(MODEL_WEIGHTS))
    import torch2trt
    model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25)
    OPTIMIZED_MODEL = 'model/hand_pose_resnet18_att_244_244_trt.pth'
    torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)


OPTIMIZED_MODEL = 'model/hand_pose_resnet18_att_244_244_trt.pth'
from torch2trt import TRTModule

model_trt = TRTModule()
model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))

<All keys matched successfully>

In [4]:
from trt_pose.draw_objects import DrawObjects
from trt_pose.parse_objects import ParseObjects

parse_objects = ParseObjects(topology,cmap_threshold=0.12, link_threshold=0.15)
draw_objects = DrawObjects(topology)

In [5]:

import torchvision.transforms as transforms
import PIL.Image

mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
device = torch.device('cuda')

def preprocess(image):
    global device
    device = torch.device('cuda')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = PIL.Image.fromarray(image)
    image = transforms.functional.to_tensor(image).to(device)
    image.sub_(mean[:, None, None]).div_(std[:, None, None])
    return image[None, ...]

In [6]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto', kernel='rbf'))

In [7]:
from preprocessdata import preprocessdata
preprocessdata = preprocessdata(topology, num_parts)

In [8]:
svm_train = False
if svm_train:
    clf, predicted = preprocessdata.trainsvm(clf, joints_train, joints_test, hand.labels_train, hand.labels_test)
    filename = 'svmmodel.sav'
    pickle.dump(clf, open(filename, 'wb'))
else:
    filename = 'svmmodel.sav'
    clf = pickle.load(open(filename, 'rb'))

In [9]:
from jetcam.usb_camera import USBCamera
from jetcam.csi_camera import CSICamera
from jetcam.utils import bgr8_to_jpeg

camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=30, capture_device=0)
#camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=30)

camera.running = True

In [10]:
def draw_joints(image, joints):
    count = 0
    for i in joints:
        if i==[0,0]:
            count+=1
    if count>= 7:
        return 
    for i in joints:
        cv2.circle(image, (i[0],i[1]), 2, (0,0,255), 1)
    cv2.circle(image, (joints[0][0],joints[0][1]), 2, (255,0,255), 1)
    for i in hand_pose['skeleton']:
        if joints[i[0]-1][0]==0 or joints[i[1]-1][0] == 0:
            break
        cv2.line(image, (joints[i[0]-1][0],joints[i[0]-1][1]), (joints[i[1]-1][0],joints[i[1]-1][1]), (0,255,0), 1)

In [11]:
with open('preprocess/gesture.json', 'r') as f:
    gesture = json.load(f)
gesture_type = gesture["paint"]

In [12]:
pen = []
rectangle = []
def draw(image, joints):
    global pen
    global rectangle
    if preprocessdata.text=="draw":
        pen.append((joints[6][0], joints[6][1]))
    for i in range(len(pen)):
        cv2.circle(image, pen[i], 1,(0,0,0), 2)
    if preprocessdata.text=="line":
        if joints[5]!=[0,0]:
            rectangle.append((joints[6][0], joints[6][1]))
    for i in range(len(rectangle)):
        if i > 0:
            if rectangle[i]!=[0,0]:
                cv2.line(image,rectangle[i-1], rectangle[i], (0,0,0), 2)
    if preprocessdata.text=="erase":
        to_be_erased = []
        for i in range(10):
            for j in range(10):
                
                x = (joints[6][0]+i, joints[6][1]+j)
                if x[0]>=0 or x[1]>=0:
                    to_be_erased.append(x)
        for i in to_be_erased:
            if i in pen:
                pen.remove(i)           
            
    if preprocessdata.text=="clear":
        pen.clear()
        rectangle.clear()

In [13]:
import ipywidgets
from IPython.display import display


image_w = ipywidgets.Image(format='jpeg', width=224, height=224)
display(image_w)

Image(value=b'', format='jpeg', height='224', width='224')

In [18]:
def execute(change):
    image = change['new']
    data = preprocess(image)
    cmap, paf = model_trt(data)
    cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
    print('cmap_size:', cmap.size())
    print('cmap:', cmap)
    print('paf_size:', paf.size())
    print('paf:', paf)
    counts, objects, peaks = parse_objects(cmap, paf)
    joints = preprocessdata.joints_inference(image, counts, objects, peaks)
    print(joints)
    draw_objects(image, counts, objects, peaks)
    draw_joints(image, joints)
    dist_bn_joints = preprocessdata.find_distance(joints)
    gesture = clf.predict([dist_bn_joints,[0]*num_parts*num_parts])
    gesture_joints = gesture[0]
    preprocessdata.prev_queue.append(gesture_joints)
    preprocessdata.prev_queue.pop(0)
    preprocessdata.print_label(image, preprocessdata.prev_queue, gesture_type)
    draw(image, joints)
    #image = image[:, ::-1, :]
    image_w.value = bgr8_to_jpeg(image)


In [19]:
execute({'new': camera.value})

cmap_size: torch.Size([1, 21, 56, 56])
cmap: tensor([[[[-3.0518e-05, -1.4496e-04, -7.6294e-05,  ..., -6.1035e-05,
           -1.9836e-04, -1.5259e-04],
          [ 1.5259e-05, -1.5259e-05, -1.5259e-05,  ...,  0.0000e+00,
           -2.2888e-05,  3.0518e-05],
          [ 1.5259e-05, -2.2888e-05,  0.0000e+00,  ..., -1.5259e-05,
           -3.0518e-05,  8.3923e-05],
          ...,
          [ 8.1635e-04,  5.3406e-05,  6.8665e-05,  ...,  7.6294e-06,
           -2.2888e-05, -2.4414e-04],
          [ 3.5858e-04, -1.5259e-05,  9.1553e-05,  ..., -3.8147e-05,
           -1.5259e-05,  1.5259e-05],
          [ 2.8763e-03,  9.0790e-04,  8.3923e-04,  ...,  3.0518e-05,
            3.0518e-05, -9.4604e-04]],

         [[ 6.8665e-05, -3.8147e-05,  5.3406e-05,  ..., -3.8147e-05,
           -2.2888e-05,  3.8147e-05],
          [-7.6294e-05, -3.0518e-05,  0.0000e+00,  ..., -7.6294e-06,
            0.0000e+00, -7.6294e-05],
          [-5.3406e-05, -2.2888e-05, -7.6294e-06,  ...,  0.0000e+00,
            0

In [20]:
camera.observe(execute, names='value')

cmap_size: torch.Size([1, 21, 56, 56])
cmap: tensor([[[[-3.8147e-05, -1.7548e-04, -1.2207e-04,  ..., -3.8147e-05,
           -1.6022e-04, -1.2970e-04],
          [ 0.0000e+00, -2.2888e-05, -1.5259e-05,  ...,  0.0000e+00,
           -2.2888e-05,  9.1553e-05],
          [ 1.5259e-05, -2.2888e-05, -7.6294e-06,  ..., -1.5259e-05,
           -2.2888e-05,  9.1553e-05],
          ...,
          [ 5.9891e-04,  7.6294e-06,  1.5259e-05,  ...,  7.6294e-06,
           -2.2888e-05, -2.1362e-04],
          [ 2.4414e-04, -5.3406e-05,  3.0518e-05,  ..., -3.8147e-05,
           -1.5259e-05,  1.5259e-05],
          [ 2.1286e-03,  7.1716e-04,  6.1417e-04,  ...,  8.3923e-05,
            4.5776e-05, -6.5613e-04]],

         [[ 6.8665e-05, -5.3406e-05,  2.2888e-05,  ..., -2.2888e-05,
            7.6294e-06,  1.6022e-04],
          [-6.1035e-05, -1.5259e-05,  0.0000e+00,  ..., -7.6294e-06,
            0.0000e+00, -7.6294e-05],
          [-4.5776e-05, -7.6294e-06, -7.6294e-06,  ...,  0.0000e+00,
            0

In [21]:
camera.unobserve_all()

cmap_size: torch.Size([1, 21, 56, 56])
cmap: tensor([[[[ 8.6975e-04,  1.5259e-05, -2.2888e-05,  ..., -7.6294e-05,
           -1.9073e-04, -1.6022e-04],
          [ 1.4496e-04, -1.5259e-05, -7.6294e-06,  ...,  0.0000e+00,
           -2.2888e-05,  8.3923e-05],
          [ 3.8147e-05, -1.5259e-05,  0.0000e+00,  ..., -1.5259e-05,
           -2.2888e-05,  8.3923e-05],
          ...,
          [ 2.0599e-04,  6.8665e-05,  4.5776e-05,  ...,  0.0000e+00,
           -3.0518e-05, -3.6621e-04],
          [ 2.8992e-04, -1.5259e-05,  0.0000e+00,  ..., -3.8147e-05,
           -3.8147e-05,  2.7466e-04],
          [ 8.0872e-04,  7.6294e-06, -6.8665e-05,  ...,  3.3569e-04,
           -6.1035e-05, -5.7983e-04]],

         [[ 1.3733e-04, -4.5776e-05,  1.5259e-05,  ..., -5.3406e-05,
           -3.8147e-05, -1.5259e-05],
          [ 3.8147e-05,  0.0000e+00,  7.6294e-06,  ..., -7.6294e-06,
            0.0000e+00, -6.1035e-05],
          [-7.6294e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
            0

In [None]:
#camera.running = False