In [1]:
import copy
import utils
import torch
import numpy as np
import matplotlib.pyplot as plt

from env import Env
from agent import Agent
from torchsummary import summary

# Initialise Environment

In [2]:
#initialise environment
min_x, max_x =  -0.110 - 0.175,   -0.110 + 0.175
min_y, max_y =   0.535 - 0.175,    0.535 + 0.175
min_z, max_z =               0,              0.4 

workspace_lim = np.asarray([[min_x, max_x], 
                            [min_y, max_y],
                            [min_z, max_z]])

print(f"workspace space: \n{workspace_lim}")

obj_dir = 'objects/blocks/'
N_obj   = 3

env = Env(obj_dir, N_obj, workspace_lim)

workspace space: 
[[-0.285  0.065]
 [ 0.36   0.71 ]
 [ 0.     0.4  ]]
[SUCCESS] restart environment
[setup_rgbd_cam] 
 [[ 9.99999992e-01  2.89813737e-05  1.26784815e-04 -1.06169154e-01]
 [ 2.90796779e-05 -9.99999699e-01 -7.75429384e-04  5.35611757e-01]
 [ 1.26762304e-04  7.75433064e-04 -9.99999691e-01  3.42376167e-01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]
[SUCCESS] setup rgbd camera
[SUCCESS] load obj paths
[SUCCESS] randomly choose objects
[SUCCESS] randomly choose object colors
object 0: shape_0, pose: [-0.20163370404246994, 0.4024629930458873, 0.15, 2.6698950202255682, 3.2443683141271906, 2.8032962264941492]
[SUCCESS] add objects to simulation


# Test Environment Reset

In [3]:
env.reset(reset_obj = False)

[SUCCESS] restart environment
[setup_rgbd_cam] 
 [[ 9.99999990e-01  3.01665057e-05  1.38725731e-04 -1.06171375e-01]
 [ 3.02703689e-05 -9.99999719e-01 -7.48753515e-04  5.35606429e-01]
 [ 1.38703105e-04  7.48757707e-04 -9.99999710e-01  3.42390860e-01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]
[SUCCESS] setup rgbd camera
[SUCCESS] load obj paths
object 0: shape_0, pose: [-0.1997834691444808, 0.38619137474456106, 0.026316947673774314, 0.014095854728885832, -0.004121689057108273, -0.13506204367169508]
[SUCCESS] add objects to simulation


# Initialise Agent

In [4]:
agent = Agent(env, N_batch = 8)

[SUCCESS] initialise environment
[SUCCESS] initialise networks
[SUCCESS] initialise memory buffer


  return torch._C._cuda_getDeviceCount() > 0


# Check Guidance

In [5]:
agent.is_debug = True
delta_move = agent.env.push_guidance_generation(max_move = 0.05)

gripper_pos = np.array([-0.11122626281611381, 0.4855598136140757, 0.2684023847637833])
for i in range(len(delta_move)):
    gripper_pos += np.array(delta_move[i][0:3])
    print(f'gripper_pos: {gripper_pos}, type: {delta_move[i][-1]}')

[return_home] [-0.0006897854820629551, -0.00011562923627691966, 3.7950135190808595e-05] [0, 0, 0]
[SUCCESS] return home pose


In [6]:
agent.interact_by_guidance(max_episode = 1, grasp_guidance = False)

==== episode: 0 step: 0 ====
[return_home] [-0.0004854973945755558, -2.2243481043042263e-05, -2.0140213320075496e-06] [0, 0, 0]
[SUCCESS] return home pose
[move_reward] r: -0.23462995139781778
[grasp_reward] r: 0.0
[executable_reward] r: 0
[collision_to_ground_reward] r: 0
[workingspace_reward] r: 0
[STEP]: 0 [ACTION TYPE]: tensor([0.]) [REWARD]: -0.23462995139781778
[MOVE]: [-0.01473706 -0.01647011 -0.03544261 -0.13505219]


  unit_dir  = move_vector/move_norm


[move_reward] r: -0.19276165112180405
[grasp_reward] r: 0.0
[executable_reward] r: 0
[collision_to_ground_reward] r: 0
[workingspace_reward] r: 0
[STEP]: 1 [ACTION TYPE]: tensor([0.]) [REWARD]: -0.19276165112180405
[MOVE]: [-0.01473706 -0.01647011 -0.03544261  0.        ]
[move_reward] r: -0.15104025227485282
[grasp_reward] r: 0.0
[executable_reward] r: 0
[collision_to_ground_reward] r: 0
[workingspace_reward] r: 0
[STEP]: 2 [ACTION TYPE]: tensor([0.]) [REWARD]: -0.15104025227485282
[MOVE]: [-0.01473706 -0.01647011 -0.03544261  0.        ]
[move_reward] r: -0.10934294308068905
[grasp_reward] r: 0.0
[executable_reward] r: 0
[collision_to_ground_reward] r: 0
[workingspace_reward] r: 0
[STEP]: 3 [ACTION TYPE]: tensor([0.]) [REWARD]: -0.10934294308068905
[MOVE]: [-0.01473706 -0.01647011 -0.03544261  0.        ]
[move_reward] r: -0.06802190590595557
[grasp_reward] r: 0.0
[executable_reward] r: 0
[collision_to_ground_reward] r: 0
[workingspace_reward] r: 0
[STEP]: 4 [ACTION TYPE]: tensor([0.

# Check Encoder

In [None]:
summary(agent.encoder, input_size=(1, 128, 128))

In [None]:
#get color
color_img, depth_img = agent.env.get_rgbd_data()
print(f'dmin: {np.min(depth_img[:])}, dmax: {np.max(depth_img[:])}')

#preprocess data
in_color_img, in_depth_img = agent.preprocess_input(color_img, depth_img)
print(in_color_img.shape)
print(in_depth_img.shape)

#add the extra dimension in the 1st dimension
in_color_img = in_color_img.unsqueeze(0)
in_depth_img = in_depth_img.unsqueeze(0)
print(in_color_img.shape)
print(in_depth_img.shape)

#feed into encoder
with torch.no_grad():
    latent_vector, reconstructed = agent.encoder(in_depth_img)

print(f'dmin: {torch.min(in_depth_img)}, dmax: {torch.max(in_depth_img)}')
print('latent vector shape: ', latent_vector.shape)
print('reconstructed shape: ', reconstructed.shape)

#show depth image
plt.imshow(in_depth_img[0].permute(1,2,0))

# Check Actor

In [None]:
#feed into actor
with torch.no_grad():
    a, a_type, z, normal, a_type_probs = agent.actor.get_actions(latent_vector)
    
print(f"action: {a}, action_type: {a_type}")

# Check Critic

In [None]:
#feed into actor
with torch.no_grad():

    #compute one hot vector
    a_type_onehot = torch.nn.functional.one_hot(a_type.long(), num_classes = 3).float()

    q1 = agent.critic1(state = latent_vector, action = a, action_type = a_type_onehot)
    q2 = agent.critic2(state = latent_vector, action = a, action_type = a_type_onehot)

    tq1 = agent.critic1_target(state = latent_vector, action = a, action_type = a_type_onehot)
    tq2 = agent.critic2_target(state = latent_vector, action = a, action_type = a_type_onehot)

print(f"q1: {q1}, q2: {q2}, tq1: {tq1}, tq2: {tq2}")

# Testing raw data and preprocess input

In [None]:
color_img, depth_img = agent.env.get_rgbd_data()

In [None]:
in_color_img, in_depth_img = agent.preprocess_input(color_img, depth_img)

In [None]:
print(in_color_img.shape)
print(in_depth_img.shape)

In [None]:
fig, ax = plt.subplots(1, 2)
ax[0].imshow(in_depth_img.permute((1,2,0)))
ax[1].imshow(in_color_img.permute((1,2,0)))
plt.show()

# Test interact

In [None]:
agent.interact()