In [1]:
from scipy import ndimage
import numpy as np
import torch
import os
import sys
if '/opt/ros/kinetic/lib/python2.7/dist-packages' in sys.path:
    sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages')
import cv2
sys.path.append(os.path.join(os.getcwd(), 'src/grasp_suck/src'))
from matplotlib import pyplot as plt
from model_v2 import reinforcement_net

# Helper function

In [2]:
def preprocessing(color, depth):
	# Zoom 2 times
	color_img_2x = ndimage.zoom(color, zoom=[2, 2, 1], order=0)
	depth_img_2x = ndimage.zoom(depth, zoom=[2, 2],    order=0)
	# Add extra padding to handle rotations inside network
	diag_length = float(color_img_2x.shape[0])*np.sqrt(2)
	diag_length = np.ceil(diag_length/32)*32 # Shrink 32 times in network
	padding_width = int((diag_length - color_img_2x.shape[0])/2)
	# Convert BGR (cv) to RGB
	color_img_2x_b = np.pad(color_img_2x[:, :, 0], padding_width, 'constant', constant_values=0)
	color_img_2x_b.shape = (color_img_2x_b.shape[0], color_img_2x_b.shape[1], 1)
	color_img_2x_g = np.pad(color_img_2x[:, :, 1], padding_width, 'constant', constant_values=0)
	color_img_2x_g.shape = (color_img_2x_g.shape[0], color_img_2x_g.shape[1], 1)
	color_img_2x_r = np.pad(color_img_2x[:, :, 2], padding_width, 'constant', constant_values=0)
	color_img_2x_r.shape = (color_img_2x_r.shape[0], color_img_2x_r.shape[1], 1)
	color_img_2x = np.concatenate((color_img_2x_r, color_img_2x_g, color_img_2x_b), axis = 2)
	depth_img_2x = np.pad(depth_img_2x, padding_width, 'constant', constant_values=0)
	# Normalize color image with ImageNet data
	image_mean = [0.485, 0.456, 0.406] # for sim: [0.20414721, 0.17816422, 0.15419899]
	image_std  = [0.229, 0.224, 0.225] # for sim: [0.1830081 , 0.16705943, 0.17520182]
	input_color_img = color_img_2x.astype(float)/255 # np.uint8 to float
	for c in range(3):
		input_color_img[:, :, c] = (input_color_img[:, :, c] - image_mean[c]) / image_std[c]
	# Normalize depth image
	depth_mean = 0.0909769548291 # for sim: 0.032723393
	depth_std = 0.0397293901695 # for sim: 0.056900032
	tmp = depth_img_2x.astype(float)
	tmp = (tmp-depth_mean)/depth_std
	# Duplicate channel to DDD
	tmp.shape = (tmp.shape[0], tmp.shape[1], 1)
	input_depth_img = np.concatenate((tmp, tmp, tmp), axis = 2)
	# Convert to tensor
	# H, W, C - > N, C, H, W
	input_color_img.shape = (input_color_img.shape[0], input_color_img.shape[1], input_color_img.shape[2], 1)
	input_depth_img.shape = (input_depth_img.shape[0], input_depth_img.shape[1], input_depth_img.shape[2], 1)
	input_color_data = torch.from_numpy(input_color_img.astype(np.float32)).permute(3, 2, 0, 1)
	input_depth_data = torch.from_numpy(input_depth_img.astype(np.float32)).permute(3, 2, 0, 1)
	return input_color_data, input_depth_data, padding_width

In [95]:
def vis_affordance(predictions):
	tmp = np.copy(predictions)
	# View the value as probability
	tmp[tmp<0] = 0
	tmp /= 5
	tmp[tmp>1] = 1
	tmp = (tmp*255).astype(np.uint8)
	tmp.shape = (tmp.shape[0], tmp.shape[1], 1)
	heatmap = cv2.applyColorMap(tmp, cv2.COLORMAP_JET)
	return heatmap

# Model

## Parameters Definition 

In [None]:
########################### CHANGE HERE ###########################
model_name = # TODO
# input image should be the orthogonal projection from the point cloud along gravity direction
# depth stands `height from bottom`
color_name = # TODO
depth_name = # TODO

In [10]:
net = reinforcement_net(use_cuda=True)
net.load_state_dict(torch.load(model_name))
net = net.cuda().eval()

In [119]:
color = cv2.imread(color_name)
depth = np.load(depth_name)
size = color.shape[0]
# Preprocessing
color_tensor, depth_tensor, pad = preprocessing(color, depth)
color_tensor = color_tensor.cuda()
depth_tensor = depth_tensor.cuda()
prediction = net.forward(color_tensor, depth_tensor, is_volatile=True)
# prediction: list with length 6
# | index | tool |
# | --- | --- |
# | 0 | small suction cup |
# | 1 | medium suction cup |
# | 2 | gripper with -90 deg |
# | 3 | gripper with -45 deg |
# | 4 | gripper with 0 deg |
# | 5 | gripper with 45 deg |
# Only show small suction cup
tool_1 = prediction[0][0, 0, pad//2:size+pad//2, pad//2:size+pad//2].detach().cpu().numpy() # small suction cup
tool_1_cmap = vis_affordance(tool_1)
combine = cv2.addWeighted(color, 1.0, tool_1_cmap, 0.8, 0.0)
best = np.where(tool_1==np.max(tool_1))
u, v = best[1][0], best[0][0]
combine = cv2.circle(combine, (u, v), 3, (255, 255, 255), 2)
plt.imshow(combine[:,:,::-1])