# Putting things Together



### Loading Pretrained Model

Finally, We can load our trained model and use it to predict the gesture.

In [1]:
import sys
import torch

sys.path.append("../../") # we need to add the folder path to sys path to be able to import like below

from train import CNN

model = CNN()

# Model will have different state_dict() at this point
# print(new_model.state_dict())

checkpoint = torch.load('../../data/checkpoint.pth.tar')
model.load_state_dict(checkpoint['state_dict'])
print(model.state_dict())

OrderedDict([('conv1.0.weight', tensor([[[[ 0.1714,  0.1695, -0.1754,  0.1610,  0.1165],
          [ 0.0948, -0.1709, -0.0858,  0.1308, -0.0206],
          [-0.0683, -0.0954, -0.0405,  0.1121,  0.1950],
          [-0.1576, -0.1964,  0.1961,  0.1877, -0.0082],
          [-0.1260, -0.0954,  0.1295, -0.1168,  0.0749]]],


        [[[ 0.0392,  0.0322, -0.1547,  0.2144,  0.0637],
          [ 0.1612,  0.0127,  0.0175, -0.0431,  0.1231],
          [-0.1211,  0.1276,  0.1531,  0.0741,  0.1507],
          [ 0.0872, -0.2289, -0.2112, -0.1895,  0.0331],
          [-0.0756,  0.1316, -0.1575,  0.0604, -0.0769]]],


        [[[-0.0179,  0.1728, -0.1148,  0.0221,  0.1647],
          [ 0.1089, -0.1034, -0.1346, -0.1794, -0.0760],
          [ 0.1683, -0.0468,  0.0269, -0.2315, -0.1304],
          [-0.1319,  0.1508,  0.0865,  0.1156,  0.0819],
          [ 0.0154,  0.2225,  0.0591,  0.0113,  0.0834]]],


        [[[ 0.1857,  0.2133, -0.1062,  0.1494,  0.0666],
          [ 0.0713, -0.1152, -0.1240,  0.119

### Detecting Gesture
Now, we just get NAO's camera image feed and try to predict every image from the video stream to see if gesture is present.

In [3]:
import cv2
import numpy as np

from os.path import dirname, abspath
from vision_definitions import kQVGA,kBGRColorSpace
from naoqi import ALProxy
from torchvision import transforms
from torch.autograd import Variable

NAO_IP="192.168.1.7" # <YOUR_NAO_IP> or nao.local


if __name__=="__main__":  # Should not run when imported

    loader = transforms.Compose([
                        transforms.Grayscale(num_output_channels=1),
                        transforms.ToTensor()
                    ])
    
    camera_index = 0 # Top camera
    
    # Proxy for ALVideoDevice
    name = "nao_opencv"
    videoProxy = ALProxy("ALVideoDevice", NAO_IP, 9559)

    # Subscribe to video device on a specific camera
    # BGR for OpenCV
    name = videoProxy.subscribeCamera(name,camera_index,kQVGA,kBGRColorSpace,30)
    print "Subscribed to ", name

    try:
        frame = None
        # Keep Looping
        while True:
            # Get image
            img = videoProxy.getImageRemote(name)

            # Get image attributes
            width = img[0]
            height = img[1]
            nchannels = img[2]
            imgbuffer = img[6]
            
            # Get OpenCV image (allocate on first pass)
            if frame is None:
                print 'Grabbed image: ',width,'x',height,' nchannels=',nchannels
                frame=np.asarray(bytearray(imgbuffer), dtype=np.uint8)
                frame=frame.reshape((height,width,3))
            else:
                frame.data=bytearray(imgbuffer)

            # Display the frame to our screen
            # NOTE : Do not run this code if your run your python in the robot
            # as NAO has no screen to show
            cv2.imshow("Frame", frame)
            
            # Let's crop the image frame so the focus is in center
            upper_left = (80, 40)     #Crop: top left point
            bottom_right = (230, 190) #Crop: bottom right point
            cropped_frame = frame[upper_left[1] : bottom_right[1], upper_left[0] : bottom_right[0]]

            # Converting cropped color image to Grayscale
            gray_frame = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2GRAY)

            # Lets resize thee grayscale image to 28 x 28 (height x width) for convenience
            resized_frame = cv2.resize(gray_frame, dsize=(28, 28), interpolation=cv2.INTER_CUBIC)

            # Expanding the image to 1 x 28 x 28
            image = np.expand_dims(resized_frame, axis=0)
            
            image = torch.from_numpy(image).float()
            image = Variable(image)
            
            # Flatten the image to 1 x 1 x 28 x 28
            image = image.unsqueeze(0)
              
            # Prediction
            output, last_layer = model(image)
            pred = torch.max(output, 1)[1].data.squeeze()
            print(pred)
            
            if int(pred) == 0:
                # Left
                print('Left Hand Gesture Detected')
            elif int(pred) == 1:
                # Right
                print('Right Hand Gesture Detected')
                
            # Get the key pressed in the image window
            key = cv2.waitKey(33)&0xFF
            if  key == ord('q') or key == 27:
                # Exit loop when 'q' or 'Esc' is pressed on the image window
                break

    finally: # As fallback we'll make sure to unsubscribe
        print "Unsubscribing ",name
    
    cv2.destroyAllWindows()
    videoProxy.unsubscribe(name)


Subscribed to  nao_opencv_3
Grabbed image:  320 x 240  nchannels= 3
tensor(0)
Left Hand Gesture Detected




tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
tensor(0)
Left Hand Gesture Detected
t

KeyboardInterrupt: 

In [57]:
from naoqi import ALProxy

NAO_IP = "192.168.1.7"
NAO_PORT = 9559

motionProxy = ALProxy("ALMotion", NAO_IP, NAO_PORT)
    
motionProxy.moveInit()
motionProxy.moveTo(0, -0.5, 0) # To move left


### Finally

Now in order to do each action when each gesture is detected we can put things as follows.

In [None]:
import cv2
import numpy as np

from os.path import dirname, abspath
from vision_definitions import kQVGA,kBGRColorSpace
from naoqi import ALProxy
from torchvision import transforms
from torch.autograd import Variable


NAO_IP="192.168.1.7" # <YOUR_NAO_IP> or nao.local
NAO_PORT = 9559


if __name__=="__main__":  # Should not run when imported

    loader = transforms.Compose([
                        transforms.Grayscale(num_output_channels=1),
                        transforms.ToTensor()
                    ])
    
    camera_index = 0 # Top camera
    
    # Proxy for ALVideoDevice
    name = "nao_opencv"
    videoProxy = ALProxy("ALVideoDevice", NAO_IP, 9559)
    motionProxy = ALProxy("ALMotion", NAO_IP, NAO_PORT)
    
    motionProxy.moveInit()

    # Subscribe to video device on a specific camera
    # BGR for OpenCV
    name = videoProxy.subscribeCamera(name,camera_index,kQVGA,kBGRColorSpace,30)
    print "Subscribed to ", name

    try:
        frame = None
        # Keep Looping
        while True:
            # Get image
            img = videoProxy.getImageRemote(name)

            # Get image attributes
            width = img[0]
            height = img[1]
            nchannels = img[2]
            imgbuffer = img[6]
            
            # Get OpenCV image (allocate on first pass)
            if frame is None:
                print 'Grabbed image: ',width,'x',height,' nchannels=',nchannels
                frame=np.asarray(bytearray(imgbuffer), dtype=np.uint8)
                frame=frame.reshape((height,width,3))
            else:
                frame.data=bytearray(imgbuffer)

            # Display the frame to our screen
            # NOTE : Do not run this code if your run your python in the robot
            # as NAO has no screen to show
            cv2.imshow("Frame", frame)
            
            # Let's crop the image frame so the focus is in center
            upper_left = (80, 40)     #Crop: top left point
            bottom_right = (230, 190) #Crop: bottom right point
            cropped_frame = frame[upper_left[1] : bottom_right[1], upper_left[0] : bottom_right[0]]

            # Converting cropped color image to Grayscale
            gray_frame = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2GRAY)

            # Lets resize thee grayscale image to 28 x 28 (height x width) for convenience
            resized_frame = cv2.resize(gray_frame, dsize=(28, 28), interpolation=cv2.INTER_CUBIC)

            # Expanding the image to 1 x 28 x 28
            image = np.expand_dims(resized_frame, axis=0)
            
            image = torch.from_numpy(image).float()
            image = Variable(image)
            
            # Flatten the image to 1 x 1 x 28 x 28
            image = image.unsqueeze(0)
              
            # Prediction
            output, last_layer = model(image)
            pred = torch.max(output, 1)[1].data.squeeze()
            print(pred)
            
            if int(pred) == 0:
                # Left
                print('Left Hand Gesture Detected')
                motionProxy.moveTo(0, -0.5, 0) # To move left
            elif int(pred) == 1:
                # Right
                print('Right Hand Gesture Detected')
                motionProxy.moveTo(0, 0.5, 0) # To move right
                
            # Get the key pressed in the image window
            key = cv2.waitKey(33)&0xFF
            if  key == ord('q') or key == 27:
                # Exit loop when 'q' or 'Esc' is pressed on the image window
                break

    finally: # As fallback we'll make sure to unsubscribe
        print "Unsubscribing ",name
    
    cv2.destroyAllWindows()
    videoProxy.unsubscribe(name)
