In [1]:
import copy
import utils
import torch
import constants
import numpy as np
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.distributions import Normal, Categorical

from env import Env
from agent import Agent
from torchsummary import summary

# Initialise Environment

In [2]:
#initialise environment
min_x, max_x =  -0.110 - 0.175,   -0.110 + 0.175
min_y, max_y =   0.510 - 0.175,    0.510 + 0.175
min_z, max_z =               0,              0.4 

workspace_lim = np.asarray([[min_x, max_x], 
                            [min_y, max_y],
                            [min_z, max_z]])

print(f"workspace space: \n{workspace_lim}")

obj_dir = 'objects/blocks/'
N_obj   = 1

env = Env(obj_dir, N_obj, workspace_lim)

workspace space: 
[[-0.285  0.065]
 [ 0.335  0.685]
 [ 0.     0.4  ]]
[SUCCESS] restart environment
[SUCCESS] setup rgbd camera
[SUCCESS] load obj paths
[SUCCESS] randomly choose objects
[SUCCESS] randomly choose object colors
object 0: shape_0, pose: [-0.21864485058366134, 0.6061462888887129, 0.15, 5.01677905609941, 3.9754634898561556, 5.377313770117954]
[SUCCESS] add objects to simulation


# Test Environment Reset

In [3]:
env.reset(reset_obj = False)

[SUCCESS] restart environment
[SUCCESS] setup rgbd camera
[SUCCESS] load obj paths
object 0: shape_0, pose: [-0.20033428892096974, 0.5982588308011253, 0.02599026618328189, 1.565187168305127, -0.7566068208061396, 1.559547456018734]
[SUCCESS] add objects to simulation


# Initialise Agent

In [4]:
agent = Agent(env, N_batch = 8)

device: cpu
[SUCCESS] initialise environment
[SUCCESS] initialise networks
[SUCCESS] initialise memory buffer


  return torch._C._cuda_getDeviceCount() > 0


# Check Network Architecture

In [5]:
summary(agent.hld_net, input_size=(1, 128, 128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 64, 64]              80
       BatchNorm2d-2            [-1, 8, 64, 64]              16
              ReLU-3            [-1, 8, 64, 64]               0
            Conv2d-4           [-1, 16, 32, 32]           1,168
       BatchNorm2d-5           [-1, 16, 32, 32]              32
              ReLU-6           [-1, 16, 32, 32]               0
            Conv2d-7           [-1, 32, 16, 16]           4,640
       BatchNorm2d-8           [-1, 32, 16, 16]              64
              ReLU-9           [-1, 32, 16, 16]               0
           Conv2d-10             [-1, 64, 8, 8]          18,496
      BatchNorm2d-11             [-1, 64, 8, 8]             128
             ReLU-12             [-1, 64, 8, 8]               0
           Conv2d-13            [-1, 128, 4, 4]          73,856
      BatchNorm2d-14            [-1, 12

In [6]:
summary(agent.grasp_actor, input_size=(2, 128, 128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 64, 64]             152
       BatchNorm2d-2            [-1, 8, 64, 64]              16
              ReLU-3            [-1, 8, 64, 64]               0
            Conv2d-4           [-1, 16, 32, 32]           1,168
       BatchNorm2d-5           [-1, 16, 32, 32]              32
              ReLU-6           [-1, 16, 32, 32]               0
            Conv2d-7           [-1, 32, 16, 16]           4,640
       BatchNorm2d-8           [-1, 32, 16, 16]              64
              ReLU-9           [-1, 32, 16, 16]               0
           Conv2d-10             [-1, 64, 8, 8]          18,496
      BatchNorm2d-11             [-1, 64, 8, 8]             128
             ReLU-12             [-1, 64, 8, 8]               0
           Conv2d-13            [-1, 128, 4, 4]          73,856
      BatchNorm2d-14            [-1, 12

In [7]:
summary(agent.push_actor, input_size=(2, 128, 128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 64, 64]             152
       BatchNorm2d-2            [-1, 8, 64, 64]              16
              ReLU-3            [-1, 8, 64, 64]               0
            Conv2d-4           [-1, 16, 32, 32]           1,168
       BatchNorm2d-5           [-1, 16, 32, 32]              32
              ReLU-6           [-1, 16, 32, 32]               0
            Conv2d-7           [-1, 32, 16, 16]           4,640
       BatchNorm2d-8           [-1, 32, 16, 16]              64
              ReLU-9           [-1, 32, 16, 16]               0
           Conv2d-10             [-1, 64, 8, 8]          18,496
      BatchNorm2d-11             [-1, 64, 8, 8]             128
             ReLU-12             [-1, 64, 8, 8]               0
           Conv2d-13            [-1, 128, 4, 4]          73,856
      BatchNorm2d-14            [-1, 12

In [8]:
summary(agent.grasp_critic1, input_size=(8, 128, 128))
summary(agent.grasp_critic2, input_size=(8, 128, 128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 64, 64]             584
       BatchNorm2d-2            [-1, 8, 64, 64]              16
              ReLU-3            [-1, 8, 64, 64]               0
            Conv2d-4           [-1, 16, 32, 32]           1,168
       BatchNorm2d-5           [-1, 16, 32, 32]              32
              ReLU-6           [-1, 16, 32, 32]               0
            Conv2d-7           [-1, 32, 16, 16]           4,640
       BatchNorm2d-8           [-1, 32, 16, 16]              64
              ReLU-9           [-1, 32, 16, 16]               0
           Conv2d-10             [-1, 64, 8, 8]          18,496
      BatchNorm2d-11             [-1, 64, 8, 8]             128
             ReLU-12             [-1, 64, 8, 8]               0
           Conv2d-13            [-1, 128, 4, 4]          73,856
      BatchNorm2d-14            [-1, 12

In [9]:
summary(agent.push_critic1, input_size=(6, 128, 128))
summary(agent.push_critic2, input_size=(6, 128, 128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 64, 64]             440
       BatchNorm2d-2            [-1, 8, 64, 64]              16
              ReLU-3            [-1, 8, 64, 64]               0
            Conv2d-4           [-1, 16, 32, 32]           1,168
       BatchNorm2d-5           [-1, 16, 32, 32]              32
              ReLU-6           [-1, 16, 32, 32]               0
            Conv2d-7           [-1, 32, 16, 16]           4,640
       BatchNorm2d-8           [-1, 32, 16, 16]              64
              ReLU-9           [-1, 32, 16, 16]               0
           Conv2d-10             [-1, 64, 8, 8]          18,496
      BatchNorm2d-11             [-1, 64, 8, 8]             128
             ReLU-12             [-1, 64, 8, 8]               0
           Conv2d-13            [-1, 128, 4, 4]          73,856
      BatchNorm2d-14            [-1, 12

# Gather Demo Experience

In [10]:
# agent.gather_guidance_experience(max_episode = 15)

# Get Train and Test Loader

In [11]:
agent.buffer_replay.load_buffer()

grasp_exp = agent.buffer_replay.get_experience_by_action_type(0)
push_exp  = agent.buffer_replay.get_experience_by_action_type(1)

grasp_train_loader, grasp_test_loader = agent.get_train_test_dataloader(grasp_exp, is_grasp = True)
push_train_loader, push_test_loader   = agent.get_train_test_dataloader(push_exp, is_grasp = False)

# Grasp System Behaviour Clone

In [13]:
# agent.behaviour_cloning(grasp_train_loader, agent.grasp_critic1, agent.grasp_critic2, agent.grasp_actor, num_epochs = 1000)
agent.behaviour_cloning(push_train_loader, agent.push_critic1, agent.push_critic2, agent.push_actor, num_epochs = 100, is_grasp = False)

Epoch 1/100, Critic1 Loss: 0.008735, Critic2 Loss: 0.009715, Actor Loss: 0.000305
Epoch 2/100, Critic1 Loss: 0.005548, Critic2 Loss: 0.006517, Actor Loss: 0.000190
Epoch 3/100, Critic1 Loss: 0.004416, Critic2 Loss: 0.004712, Actor Loss: 0.000197
Epoch 4/100, Critic1 Loss: 0.003326, Critic2 Loss: 0.003687, Actor Loss: 0.000135
Epoch 5/100, Critic1 Loss: 0.002455, Critic2 Loss: 0.002761, Actor Loss: 0.000127
Epoch 6/100, Critic1 Loss: 0.002270, Critic2 Loss: 0.002708, Actor Loss: 0.000103
Epoch 7/100, Critic1 Loss: 0.002216, Critic2 Loss: 0.002314, Actor Loss: 0.000103
Epoch 8/100, Critic1 Loss: 0.002556, Critic2 Loss: 0.002608, Actor Loss: 0.000088
Epoch 9/100, Critic1 Loss: 0.002095, Critic2 Loss: 0.002202, Actor Loss: 0.000079
Epoch 10/100, Critic1 Loss: 0.002146, Critic2 Loss: 0.002396, Actor Loss: 0.000089
Epoch 11/100, Critic1 Loss: 0.002189, Critic2 Loss: 0.002266, Actor Loss: 0.000076
Epoch 12/100, Critic1 Loss: 0.002115, Critic2 Loss: 0.002057, Actor Loss: 0.000092
Epoch 13/100,

# Testing raw data and preprocess input

In [None]:
color_img, depth_img = agent.env.get_rgbd_data()

In [None]:
in_color_img, in_depth_img = agent.preprocess_input(color_img, depth_img)

In [None]:
print(in_color_img.shape)
print(in_depth_img.shape)

In [None]:
fig, ax = plt.subplots(1, 2)
ax[0].imshow(in_depth_img.permute((1,2,0)))
ax[1].imshow(in_color_img.permute((1,2,0)))
plt.show()

# Test interact

In [None]:
agent.interact()