In [1]:
import torch
import utils
from models.ggcnn import GGCNN
import numpy as np
import scipy.ndimage as ndimage
import time
import math

import cv2
import tifffile as tf

In [2]:
def pre_process(depth, crop_size, out_size=300, return_mask=False, crop_y_offset=0):
    # Crop
    depth_crop = depth[(imh - crop_size) // 2 - crop_y_offset:(imh - crop_size) // 2 + crop_size - crop_y_offset,
                               (imw - crop_size) // 2:(imw - crop_size) // 2 + crop_size]
    
    #Inpainting
    depth_crop = cv2.copyMakeBorder(depth_crop, 1, 1, 1, 1, cv2.BORDER_DEFAULT)
    depth_nan_mask = np.isnan(depth_crop).astype(np.uint8)

    kernel = np.ones((3, 3),np.uint8)
    depth_nan_mask = cv2.dilate(depth_nan_mask, kernel, iterations=1)

    depth_crop[depth_nan_mask==1] = 0

    # Scale to keep as float, but has to be in bounds -1:1 to keep opencv happy.
    depth_scale = np.abs(depth_crop).max()
    depth_crop = depth_crop.astype(np.float32) / depth_scale 

    depth_crop = cv2.inpaint(depth_crop, depth_nan_mask, 1, cv2.INPAINT_NS)

    # Back to original size and value range.
    depth_crop = depth_crop[1:-1, 1:-1]
    depth_crop = depth_crop * depth_scale

    depth_crop = cv2.resize(depth_crop, (out_size, out_size), cv2.INTER_AREA)

    if return_mask:
        depth_nan_mask = depth_nan_mask[1:-1, 1:-1]
        depth_nan_mask = cv2.resize(depth_nan_mask, (out_size, out_size), cv2.INTER_NEAREST)
        return depth_crop, depth_nan_mask
    else:
        return depth_crop

In [3]:
if __name__ == '__main__':
    device = (
            "cuda"
            if torch.cuda.is_available()
            else "mps"
            if torch.backends.mps.is_available()
            else "cpu"
        )
    model = torch.load('ggcnn_weights_cornell/ggcnn_epoch_23_cornell', map_location=torch.device(device), weights_only=False)
    print(model)

GGCNN(
  (conv1): Conv2d(1, 32, kernel_size=(9, 9), stride=(3, 3), padding=(3, 3))
  (conv2): Conv2d(32, 16, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (conv3): Conv2d(16, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (convt1): ConvTranspose2d(8, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
  (convt2): ConvTranspose2d(8, 16, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), output_padding=(1, 1))
  (convt3): ConvTranspose2d(16, 32, kernel_size=(9, 9), stride=(3, 3), padding=(3, 3), output_padding=(1, 1))
  (pos_output): Conv2d(32, 1, kernel_size=(2, 2), stride=(1, 1))
  (cos_output): Conv2d(32, 1, kernel_size=(2, 2), stride=(1, 1))
  (sin_output): Conv2d(32, 1, kernel_size=(2, 2), stride=(1, 1))
  (width_output): Conv2d(32, 1, kernel_size=(2, 2), stride=(1, 1))
)




In [4]:
depth = cv2.imread('pcd0100d.tiff', -1)
imh, imw = depth.shape
print(imh, " ", imw)
out_size=300
crop_y_offset=40
crop_size=300

480   640


In [5]:
def predict(depth, crop_size, out_size, crop_y_offset=crop_y_offset):
    depth, depth_nan_mask = pre_process(depth, crop_size, out_size, True, crop_y_offset=crop_y_offset)
    # normalize
    depth = np.clip((depth - depth.mean()), -1, 1)
    tensor = torch.from_numpy(depth).float()
    
    tensor = torch.reshape(tensor, (1, 300, 300))
    
    pred_out = model(tensor)
    
    #pos, cos, sin, width
    pred_out= np.array([pred_out[0].detach().numpy(),
                        pred_out[1].detach().numpy(),
                        pred_out[2].detach().numpy(),
                        pred_out[3].detach().numpy()])
    return pred_out

In [6]:
pred_out=predict(depth, crop_size, out_size, crop_y_offset=crop_y_offset)

In [7]:
print(np.shape(pred_out))

(4, 1, 300, 300)


In [8]:
def showImage(array, name_im):
    array= np.reshape(array, (300,300))
    grayImage = cv2.cvtColor(array, cv2.COLOR_GRAY2BGR)
    cv2.imshow(name_im, grayImage)
    cv2.waitKey(0)

In [9]:
# showImage("pos",pred_out[1])
print(np.shape(pred_out[1]))
print(pred_out[0])

(1, 300, 300)
[[[-0.00074773  0.00042186  0.00073207 ... -0.02339884 -0.019777
   -0.00198394]
  [ 0.00078627 -0.00645291 -0.00140484 ... -0.02787955 -0.02352439
   -0.00152461]
  [-0.00478718 -0.00146893 -0.01199936 ... -0.02854259 -0.02819814
   -0.00735021]
  ...
  [-0.00935977 -0.01171742 -0.01715592 ... -0.01650656 -0.01007534
   -0.01441366]
  [-0.00963436 -0.01398444 -0.00634139 ... -0.00062511 -0.01076333
   -0.01479367]
  [-0.01287851 -0.01571235 -0.0165068  ... -0.01099557  0.00499863
    0.01250987]]]


In [133]:
showImage(pred_out[3], "pos")

In [10]:
argmax = np.argmax(pred_out[0])
x= argmax%300
y= int(np.ceil(argmax/300))

print(x,y)

117 212


In [11]:
argmax = np.argmax(pred_out[0])
width= pred_out[3][0][y][x]*150
print(width)
angle = 0.5 * np.arctan2(pred_out[2][0][y][x],pred_out[1][0][x][y]) 
print(angle)

42.18177795410156
0.044570039957761765


In [12]:
x=x+imw*0.5-150
y=y+imh*0.5-150-crop_y_offset
p1_x= int(x+(width*0.5*np.cos(angle)))
p2_x= int(x-(width*0.5*np.cos(angle)))

p1_y= int(y+(width*0.5*np.sin(angle)))
p2_y= int(y-(width*0.5*np.sin(angle)))
print(p1_x, p1_y, p2_x, p2_y)

308 262 265 261


In [None]:
object= cv2.imread("pcd0100r.png", cv2.IMREAD_COLOR)
object= cv2.line(object,(p1_x,p1_y),(p2_x,p2_y),(255,0,0),5)
cv2.imshow("Grasp", object)
cv2.waitKey(0)