In [1]:
import json
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg 
import trt_pose.coco
import math
import os
import numpy as np
import traitlets
import pickle 


In [2]:
%cd ..
%pwd

/home/project


'/home/project'

In [3]:
with open('preprocess/hand_pose.json', 'r') as f:
    hand_pose = json.load(f)

topology = trt_pose.coco.coco_category_to_topology(hand_pose)
import trt_pose.models

num_parts = len(hand_pose['keypoints'])
num_links = len(hand_pose['skeleton'])

model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval()
import torch


WIDTH = 224
HEIGHT = 224
data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()

if not os.path.exists('model/hand_pose_resnet18_att_244_244_trt.pth'):
    MODEL_WEIGHTS = 'model/hand_pose_resnet18_att_244_244.pth'
    model.load_state_dict(torch.load(MODEL_WEIGHTS))
    import torch2trt
    model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25)
    OPTIMIZED_MODEL = 'model/hand_pose_resnet18_att_244_244_trt.pth'
    torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)


OPTIMIZED_MODEL = 'model/hand_pose_resnet18_att_244_244_trt.pth'
from torch2trt import TRTModule

model_trt = TRTModule()
model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))

<All keys matched successfully>

In [4]:
from trt_pose.draw_objects import DrawObjects
from trt_pose.parse_objects import ParseObjects

parse_objects = ParseObjects(topology,cmap_threshold=0.12, link_threshold=0.15)
draw_objects = DrawObjects(topology)

In [5]:

import torchvision.transforms as transforms
import PIL.Image

mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
device = torch.device('cuda')

def preprocess(image):
    global device
    device = torch.device('cuda')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = PIL.Image.fromarray(image)
    image = transforms.functional.to_tensor(image).to(device)
    image.sub_(mean[:, None, None]).div_(std[:, None, None])
    return image[None, ...]

In [6]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto', kernel='rbf'))

In [7]:
from preprocessdata import preprocessdata
preprocessdata = preprocessdata(topology, num_parts)

In [8]:
from dataloader import dataloader
path = "data/"
label_file = "training.json"
test_label = "testing.json"
hand = dataloader(path, label_file, test_label)

00000000.jpg
00000001.jpg
00000002.jpg
00000003.jpg
00000004.jpg
00000005.jpg
00000006.jpg
00000007.jpg
00000008.jpg
00000009.jpg
00000010.jpg
00000011.jpg
00000012.jpg
00000013.jpg
00000014.jpg
00000015.jpg
00000016.jpg
00000017.jpg
00000018.jpg
00000019.jpg
00000020.jpg
00000021.jpg
00000022.jpg
00000023.jpg
00000024.jpg
00000025.jpg
00000026.jpg
00000027.jpg
00000028.jpg
00000029.jpg
00000030.jpg
00000031.jpg
00000032.jpg
00000033.jpg
00000034.jpg
00000035.jpg
00000036.jpg
00000037.jpg
00000038.jpg
00000039.jpg
00000040.jpg
00000041.jpg
00000042.jpg
00000043.jpg
00000044.jpg
00000045.jpg
00000046.jpg
00000047.jpg
00000048.jpg
00000049.jpg
00000050.jpg
00000051.jpg
00000052.jpg
00000053.jpg
00000054.jpg
00000055.jpg
00000056.jpg
00000057.jpg
00000058.jpg
00000059.jpg
00000060.jpg
00000061.jpg
00000062.jpg
00000063.jpg
00000064.jpg
00000065.jpg
00000066.jpg
00000067.jpg
00000068.jpg
00000069.jpg
00000070.jpg
00000071.jpg
00000072.jpg
00000073.jpg
00000074.jpg
00000075.jpg
00000076.jpg

In [9]:
def data_preprocess(images):
    dist_bn_joints_all_data = []
    for idx, im in enumerate(images):
        #print(idx)
        im = im[:, ::-1, :]
        data_im = preprocess(im)
        #print('data_im:', data_im)
        cmap, paf = model_trt(data_im)
        cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
        #print('cmap_size:', cmap.size())
        #print('cmap:', cmap)
        #print('paf_size:', paf.size())
        #print('paf:', paf)
        counts, objects, peaks = parse_objects(cmap, paf)
        #print('counts:', counts)
        #print('objects:', objects)
        #print('peaks:', peaks)
        #img = PIL.Image.fromarray(im)
        #display(img)
        #draw_objects(im, counts, objects, peaks)
        joints = preprocessdata.joints_inference(im, counts, objects, peaks)
        #print('joints:', joints)
        dist_bn_joints = preprocessdata.find_distance(joints)
        #print('dist_bn_joints', dist_bn_joints)
        dist_bn_joints_all_data.append(dist_bn_joints)
    return dist_bn_joints_all_data

In [10]:
def smaller_dataset(dataset, no_samples_per_class, no_of_classes):
    total_samples_per_class =100
    start = 0
    end = no_samples_per_class
    new_dataset = []
    labels = []
    for i in range(no_of_classes):
        new_data = dataset[start:end]
        start = start+total_samples_per_class
        end = start+no_samples_per_class
        new_dataset.extend(new_data)
        labels.extend([i+1]*no_samples_per_class)
    return new_dataset, labels

In [11]:
train_images, labels_train = hand.smaller_dataset(hand.train_images,100,2)

In [12]:
joints_train = data_preprocess(hand.train_images)
joints_test = data_preprocess(hand.test_images)

In [13]:
print(np.array(joints_train).shape)
print(joints_train)
print(np.array(joints_test).shape)
print(joints_test)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [14]:
svm_train = True
if svm_train:
    clf, predicted = preprocessdata.trainsvm(clf, joints_train, joints_test, hand.labels_train, hand.labels_test)
    filename = 'svmmodel_new.sav'
    pickle.dump(clf, open(filename, 'wb'))
else:
    filename = 'svmmodel.sav'
    clf = pickle.load(open(filename, 'rb'))

In [15]:
preprocessdata.svm_accuracy(clf.predict(joints_test), hand.labels_test)


0.9477351916376306

In [16]:
clf.predict([joints_test[40],[0]*num_parts*num_parts])

array([1, 3])

In [17]:
clf.predict(joints_test)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,

In [None]:
from jetcam.usb_camera import USBCamera
from jetcam.csi_camera import CSICamera
from jetcam.utils import bgr8_to_jpeg

camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=30, capture_device=0)
#camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=30)

camera.running = True

In [None]:
import ipywidgets
from IPython.display import display


image_w = ipywidgets.Image(format='jpeg', width=256, height=256)
display(image_w)

In [None]:
with open('preprocess/gesture.json', 'r') as f:
    gesture = json.load(f)
gesture_type = gesture["drawing"]

In [None]:
def execute(change):
    image = change['new']
    data = preprocess(image)
    cmap, paf = model_trt(data)
    cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
    counts, objects, peaks = parse_objects(cmap, paf)#, cmap_threshold=0.15, link_threshold=0.15)
    draw_objects(image, counts, objects, peaks)
    joints = preprocessdata.joints_inference(image, counts, objects, peaks)
    dist_bn_joints = preprocessdata.find_distance(joints)
    gesture = clf.predict([dist_bn_joints,[0]*num_parts*num_parts])
    gesture_joints = gesture[0]
    preprocessdata.prev_queue.append(gesture_joints)
    preprocessdata.prev_queue.pop(0)
    preprocessdata.print_label(image, preprocessdata.prev_queue, gesture_type)
    image_w.value = bgr8_to_jpeg(image)


In [None]:
execute({'new': camera.value})

In [None]:
camera.observe(execute, names='value')

In [None]:
camera.unobserve_all()

In [None]:
#camera.running = False