# Prepare environment

In [1]:
!apt install -y python-opengl ffmpeg > /dev/null 2>&1

%pip install pyvirtualdisplay

Note: you may need to restart the kernel to use updated packages.


In [2]:
!pip install gym==0.25.2 
!pip install gymnasium==1.0.0 
!pip install imutils==0.5.4 
!pip install Jinja2==3.1.4 
!pip install joblib 
!pip install libclang==18.1.1 
!pip install Markdown==3.7 
!pip install MarkupSafe==3.0.2 
!pip install matplotlib==3.9.3 
!pip install panda-gym==3.0.7 
!pip install pillow==11.0.0 
!pip install pybullet==3.2.6 
!pip install six==1.16.0 
!pip install sympy==1.13.1 



In [3]:
from pyvirtualdisplay import Display

display = Display(visible=0, size=(1024, 768))
display.start()

from matplotlib import pyplot as plt, animation
%matplotlib inline
from IPython import display

def create_anim(frames, dpi, fps):

    plt.figure(figsize=(frames[0].shape[1] / dpi, frames[0].shape[0] / dpi), dpi=dpi)

    patch = plt.imshow(frames[0])

    def setup():

        plt.axis('off')

    def animate(i):

        patch.set_data(frames[i])

    anim = animation.FuncAnimation(plt.gcf(), animate, init_func=setup, frames=len(frames), interval=fps)

    return anim


def display_anim(frames, dpi=72, fps=50):

    anim = create_anim(frames, dpi, fps)

    return anim.to_jshtml()


def save_anim(frames, filename, dpi=72, fps=50):

    anim = create_anim(frames, dpi, fps)

    anim.save(filename)


class trigger:

    def __init__(self):

        self._trigger = True

    def __call__(self, e):

        return self._trigger

    def set(self, t):

        self._trigger = t

# Clone project to working directory

In [4]:
!git clone https://github.com/nmq443/RBE3043-23.git

fatal: destination path 'RBE3043-23' already exists and is not an empty directory.


In [5]:
import os
os.chdir('./RBE3043-23/src')

In [None]:
from test_env import *
from model import DiscreteActor, ContinuousActor, Critic
import torch
from trainer import Trainer
from os import path
from pathlib import Path

MOVE = 0
PICK = 1
PLACE = 2

action_space = {
    'discrete': {'Move': 0, 'Pick': 1, 'Place': 2},
    'continuous': [4, 4, 4]
}

discrete_dim = len(action_space['discrete'])
continuous_dim = action_space['continuous']

env = My_Arm_RobotEnv(
    observation_type=0,
    render_mode='rgb_array',
    blocker_bar=False,
    objects_count=1,
    sorting_count=1
)

obs, _ = env.reset()
obs_dim = len(obs['observation'])
d_actor = DiscreteActor(obs_dim=obs_dim, output_dim=discrete_dim)
c_actor = ContinuousActor(obs_dim=obs_dim,
                          continuous_param_dim=continuous_dim)
critic = Critic(obs_dim=obs_dim)

trainer = Trainer(
    env=env,
    discrete_actor=d_actor,
    continuous_actor=c_actor,
    critic=critic,
    timesteps=2_000_000,
    timesteps_per_batch=5_000,
    max_timesteps_per_episode=750,
)

Path("./training").mkdir(parents=True, exist_ok=True)
if path.isfile("./training/state.data"):
    trainer.load("./training")
trainer.train()

pybullet build time: Nov 28 2023 23:45:17


argv[0]=--background_color_red=0.7843137383460999
argv[1]=--background_color_green=0.7843137383460999
argv[2]=--background_color_blue=0.7843137383460999
startThreads creating 1 threads.
starting thread 0
started thread 0 
argc=5
argv[0] = --unused
argv[1] = --background_color_red=0.7843137383460999
argv[2] = --background_color_green=0.7843137383460999
argv[3] = --background_color_blue=0.7843137383460999
argv[4] = --start_demo_name=Physics Server
ExampleBrowserThreadFunc started
X11 functions dynamically loaded using dlopen/dlsym OK!
X11 functions dynamically loaded using dlopen/dlsym OK!
Creating context
Created GL 3.3 context
Direct GLX rendering context obtained
Making context current
GL_VENDOR=Mesa/X.org
GL_RENDERER=llvmpipe (LLVM 12.0.0, 256 bits)
GL_VERSION=4.5 (Core Profile) Mesa 21.2.6
GL_SHADING_LANGUAGE_VERSION=4.50
pthread_getconcurrency()=0
Version = 4.5 (Core Profile) Mesa 21.2.6
Vendor = Mesa/X.org
Renderer = llvmpipe (LLVM 12.0.0, 256 bits)
b3Printf: Selected demo: Physic

  coefficients = np.polyfit(

            Timesteps: 750 / 2,000,000 (0.0375%)
            Episodes: 1
            Currently: Rollout
            Latest Reward: -289
            Latest Avg Rewards: -289
            Recent Change: -144.54
            Best Reward: -289.07
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: 0.0
            Avg Continuous Actor Loss: 0.0
            Latest Critic Loss: 0.0
            Avg Critic Loss: 0.0
        

            Timesteps: 1,500 / 2,000,000 (0.075%)
            Episodes: 2
            Currently: Rollout
            Latest Reward: -291
            Latest Avg Rewards: -290
            Recent Change: 1.92
            Best Reward: -289.07
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: 0.0
            Avg Continuous Actor Loss: 0.0
            Latest Critic Loss: 0.0
            Avg Critic Loss

Object 0 dropped to the floor

b3Printf: Removing body failed



            Timesteps: 1,568 / 2,000,000 (0.0784%)
            Episodes: 3
            Currently: Rollout
            Latest Reward: -76
            Latest Avg Rewards: -219
            Recent Change: -106.77
            Best Reward: -75.53
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: 0.0
            Avg Continuous Actor Loss: 0.0
            Latest Critic Loss: 0.0
            Avg Critic Loss: 0.0
        

            Timesteps: 2,318 / 2,000,000 (0.1159%)
            Episodes: 4
            Currently: Rollout
            Latest Reward: -333
            Latest Avg Rewards: -247
            Recent Change: -8.28
            Best Reward: -75.53
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: 0.0
            Avg Continuous Actor Loss: 0.0
            Latest Critic Loss: 0.0
            Avg Critic Loss: 0.0
        

            

Object 0 dropped to the floor

b3Printf: Removing body failed



            Timesteps: 3,923 / 2,000,000 (0.1962%)
            Episodes: 7
            Currently: Rollout
            Latest Reward: -79
            Latest Avg Rewards: -263
            Recent Change: -2.31
            Best Reward: -75.53
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: 0.0
            Avg Continuous Actor Loss: 0.0
            Latest Critic Loss: 0.0
            Avg Critic Loss: 0.0
        


Object 0 dropped to the floor

b3Printf: Removing body failed



            Timesteps: 4,026 / 2,000,000 (0.2013%)
            Episodes: 8
            Currently: Rollout
            Latest Reward: -95
            Latest Avg Rewards: -242
            Recent Change: -15.52
            Best Reward: -75.53
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: 0.0
            Avg Continuous Actor Loss: 0.0
            Latest Critic Loss: 0.0
            Avg Critic Loss: 0.0
        

            Timesteps: 4,776 / 2,000,000 (0.2388%)
            Episodes: 9
            Currently: Rollout
            Latest Reward: -300
            Latest Avg Rewards: -249
            Recent Change: -7.04
            Best Reward: -75.53
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: 0.0
            Avg Continuous Actor Loss: 0.0
            Latest Critic Loss: 0.0
            Avg Critic Loss: 0.0
        

            T

Object 0 dropped to the floor

b3Printf: Removing body failed



            Timesteps: 5,599 / 2,000,000 (0.2799%)
            Episodes: 11
            Currently: Rollout
            Latest Reward: -81
            Latest Avg Rewards: -249
            Recent Change: -5.18
            Best Reward: -75.53
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0025
            Avg Discrete Actor Loss: -0.0004
            Avg Continuous Actor Loss: 0.0037
            Latest Critic Loss: 1829.0818
            Avg Critic Loss: 1833.0417
        

            Timesteps: 6,349 / 2,000,000 (0.3174%)
            Episodes: 12
            Currently: Rollout
            Latest Reward: -333
            Latest Avg Rewards: -256
            Recent Change: -0.75
            Best Reward: -75.53
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0025
            Avg Discrete Actor Loss: -0.0004
            Avg Continuous Actor Loss: 0.0037
            Latest Critic Loss: 1829.0818
         

Object 0 dropped to the floor

b3Printf: Removing body failed



            Timesteps: 6,413 / 2,000,000 (0.3206%)
            Episodes: 13
            Currently: Rollout
            Latest Reward: -72
            Latest Avg Rewards: -242
            Recent Change: -6.67
            Best Reward: -71.66
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0025
            Avg Discrete Actor Loss: -0.0004
            Avg Continuous Actor Loss: 0.0037
            Latest Critic Loss: 1829.0818
            Avg Critic Loss: 1833.0417
        

            Timesteps: 7,163 / 2,000,000 (0.3582%)
            Episodes: 14
            Currently: Rollout
            Latest Reward: -362
            Latest Avg Rewards: -251
            Recent Change: -1.9
            Best Reward: -71.66
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0025
            Avg Discrete Actor Loss: -0.0004
            Avg Continuous Actor Loss: 0.0037
            Latest Critic Loss: 1829.0818
          

Object 0 dropped to the floor

b3Printf: Removing body failed



            Timesteps: 7,188 / 2,000,000 (0.3594%)
            Episodes: 15
            Currently: Rollout
            Latest Reward: -60
            Latest Avg Rewards: -238
            Recent Change: -6.29
            Best Reward: -60.42
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0025
            Avg Discrete Actor Loss: -0.0004
            Avg Continuous Actor Loss: 0.0037
            Latest Critic Loss: 1829.0818
            Avg Critic Loss: 1833.0417
        


Object 0 incorrectly sorted into sorting_one

b3Printf: Removing body failed



            Timesteps: 7,580 / 2,000,000 (0.379%)
            Episodes: 16
            Currently: Rollout
            Latest Reward: -123
            Latest Avg Rewards: -231
            Recent Change: -7.71
            Best Reward: -60.42
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0025
            Avg Discrete Actor Loss: -0.0004
            Avg Continuous Actor Loss: 0.0037
            Latest Critic Loss: 1829.0818
            Avg Critic Loss: 1833.0417
        

            Timesteps: 8,330 / 2,000,000 (0.4165%)
            Episodes: 17
            Currently: Rollout
            Latest Reward: -230
            Latest Avg Rewards: -231
            Recent Change: -6.44
            Best Reward: -60.42
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0025
            Avg Discrete Actor Loss: -0.0004
            Avg Continuous Actor Loss: 0.0037
            Latest Critic Loss: 1829.0818
         

Object 0 dropped to the floor

b3Printf: Removing body failed



            Timesteps: 8,465 / 2,000,000 (0.4233%)
            Episodes: 18
            Currently: Rollout
            Latest Reward: -96
            Latest Avg Rewards: -223
            Recent Change: -7.78
            Best Reward: -60.42
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0025
            Avg Discrete Actor Loss: -0.0004
            Avg Continuous Actor Loss: 0.0037
            Latest Critic Loss: 1829.0818
            Avg Critic Loss: 1833.0417
        


Object 0 dropped to the floor

b3Printf: Removing body failed



            Timesteps: 8,827 / 2,000,000 (0.4413%)
            Episodes: 19
            Currently: Rollout
            Latest Reward: -164
            Latest Avg Rewards: -220
            Recent Change: -7.55
            Best Reward: -60.42
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0025
            Avg Discrete Actor Loss: -0.0004
            Avg Continuous Actor Loss: 0.0037
            Latest Critic Loss: 1829.0818
            Avg Critic Loss: 1833.0417
        

            Timesteps: 9,577 / 2,000,000 (0.4788%)
            Episodes: 20
            Currently: Rollout
            Latest Reward: -235
            Latest Avg Rewards: -221
            Recent Change: -6.26
            Best Reward: -60.42
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0025
            Avg Discrete Actor Loss: -0.0004
            Avg Continuous Actor Loss: 0.0037
            Latest Critic Loss: 1829.0818
        

Object 0 dropped to the floor

b3Printf: Removing body failed



            Timesteps: 11,182 / 2,000,000 (0.5591%)
            Episodes: 23
            Currently: Rollout
            Latest Reward: -86
            Latest Avg Rewards: -220
            Recent Change: -4.53
            Best Reward: -60.42
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0006
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 1282.2802
            Avg Critic Loss: 1559.4198
        


Object 0 dropped to the floor

b3Printf: Removing body failed



            Timesteps: 11,625 / 2,000,000 (0.5813%)
            Episodes: 24
            Currently: Rollout
            Latest Reward: -207
            Latest Avg Rewards: -219
            Recent Change: -4.11
            Best Reward: -60.42
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0006
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 1282.2802
            Avg Critic Loss: 1559.4198
        

            Timesteps: 12,375 / 2,000,000 (0.6188%)
            Episodes: 25
            Currently: Rollout
            Latest Reward: -279
            Latest Avg Rewards: -222
            Recent Change: -3.08
            Best Reward: -60.42
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0006
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 1282.2802
            

Object 0 incorrectly sorted into sorting_one

b3Printf: Removing body failed



            Timesteps: 14,145 / 2,000,000 (0.7072%)
            Episodes: 28
            Currently: Rollout
            Latest Reward: -120
            Latest Avg Rewards: -223
            Recent Change: -2.01
            Best Reward: -60.42
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0006
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 1282.2802
            Avg Critic Loss: 1559.4198
        


Object 0 dropped to the floor

b3Printf: Removing body failed



            Timesteps: 14,340 / 2,000,000 (0.717%)
            Episodes: 29
            Currently: Rollout
            Latest Reward: -104
            Latest Avg Rewards: -219
            Recent Change: -2.63
            Best Reward: -60.42
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0006
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 1282.2802
            Avg Critic Loss: 1559.4198
        

            Timesteps: 15,090 / 2,000,000 (0.7545%)
            Episodes: 30
            Currently: Rollout
            Latest Reward: -228
            Latest Avg Rewards: -219
            Recent Change: -2.32
            Best Reward: -60.42
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0006
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 1282.2802
            A

Object 0 incorrectly sorted into sorting_one

b3Printf: Removing body failed



            Timesteps: 18,153 / 2,000,000 (0.9076%)
            Episodes: 35
            Currently: Rollout
            Latest Reward: -43
            Latest Avg Rewards: -225
            Recent Change: -0.7
            Best Reward: -43.14
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0004
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 1237.6031
            Avg Critic Loss: 1453.6473
        

            Timesteps: 18,903 / 2,000,000 (0.9451%)
            Episodes: 36
            Currently: Rollout
            Latest Reward: -245
            Latest Avg Rewards: -225
            Recent Change: -0.55
            Best Reward: -43.14
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0004
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 1237.6031
            Av