In [3]:
!apt install -y python-opengl ffmpeg > /dev/null 2>&1
%pip install pyvirtualdisplay 

Collecting pyvirtualdisplay
  Downloading PyVirtualDisplay-3.0-py3-none-any.whl.metadata (943 bytes)
Downloading PyVirtualDisplay-3.0-py3-none-any.whl (15 kB)
Installing collected packages: pyvirtualdisplay
Successfully installed pyvirtualdisplay-3.0
Note: you may need to restart the kernel to use updated packages.


In [4]:
!pip install \
gym==0.25.2 \
gymnasium==1.0.0 \
imutils==0.5.4 \
Jinja2==3.1.4 \
joblib \
libclang==18.1.1 \
Markdown==3.7 \
MarkupSafe==3.0.2 \
matplotlib==3.9.3 \
panda-gym==3.0.7 \
pillow==11.0.0 \
pybullet==3.2.6 \
six==1.16.0 \
sympy==1.13.1 \



Collecting gym==0.25.2
  Downloading gym-0.25.2.tar.gz (734 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m734.5/734.5 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting gymnasium==1.0.0
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting imutils==0.5.4
  Downloading imutils-0.5.4.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting Markdown==3.7
  Downloading Markdown-3.7-py3-none-any.whl.metadata (7.0 kB)
Collecting MarkupSafe==3.0.2
  Downloading MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)
Collecting matplotlib==3.9.3
  Downloading matplotlib-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting panda-gym==3.0.7
  Downloading panda_gym

In [5]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1024, 768))
display.start()


from matplotlib import pyplot as plt, animation
%matplotlib inline
from IPython import display

def create_anim(frames, dpi, fps):
    plt.figure(figsize=(frames[0].shape[1] / dpi, frames[0].shape[0] / dpi), dpi=dpi)
    patch = plt.imshow(frames[0])
    def setup():
        plt.axis('off')
    def animate(i):
        patch.set_data(frames[i])
    anim = animation.FuncAnimation(plt.gcf(), animate, init_func=setup, frames=len(frames), interval=fps)
    return anim

def display_anim(frames, dpi=72, fps=50):
    anim = create_anim(frames, dpi, fps)
    return anim.to_jshtml()

def save_anim(frames, filename, dpi=72, fps=50):
    anim = create_anim(frames, dpi, fps)
    anim.save(filename)


class trigger:
    def __init__(self):
        self._trigger = True

    def __call__(self, e):
        return self._trigger

    def set(self, t):
        self._trigger = t

In [6]:
import pybullet as p
# Add a marker for the world frame
def add_world_frame():
    length = 0.1  # Length of the axis
    radius = 0.01  # Radius of the lines

    print("Frame")
    print("X-axis: red")
    print("Y-axis: green")
    print("Z-axis: blue")

    # X-axis (red)
    p.addUserDebugLine([0, 0, 0], [length, 0, 0], [1, 0, 0], lineWidth=2)

    # Y-axis (green)
    p.addUserDebugLine([0, 0, 0], [0, length, 0], [0, 1, 0], lineWidth=2)

    # Z-axis (blue)
    p.addUserDebugLine([0, 0, 0], [0, 0, length], [0, 0, 1], lineWidth=2)


pybullet build time: Nov 28 2023 23:45:17


In [7]:
import math
from math import pi
from random import choice, uniform
from typing import Any, Dict, Tuple, Optional

import numpy as np
from panda_gym.envs.core import RobotTaskEnv, Task
from panda_gym.envs.robots.panda import Panda
from panda_gym.pybullet import PyBullet


class TargetObject:
    """
    TargetObject tracks the lifecycle of a target object (not a goal)
    """

    def __init__(
        self,
        id: str,
        name: str,
        shape: int,
        size: np.array,
        position: np.array,
        color: np.array,
        removed: bool = False,
    ):
        self.id = id
        self.name = name
        self.shape = shape
        self.size = size
        self.position = position
        self.color = color
        self.removed = removed


class Pick_And_Place(Task):
    def __init__(
        self,
        sim: PyBullet,
        observation_type: int,
        robot: Panda,
        objects_count: int = 3,
        img_size: Tuple[int, int] = (256, 256),
        blocker_bar: bool = True,
        sorting_count: int  = 1
    ):
        if observation_type not in [OBSERVATION_POSES, OBSERVATION_IMAGE]:
            raise Exception(
                f"Invalid output type {observation_type}. Must be one of "
                + f"{OBSERVATION_POSES} for values and {OBSERVATION_IMAGE} "
                + "four images."
            )

        super().__init__(sim)

        self.robot = robot
        self.observation_type = observation_type
        self.sorting_count = sorting_count
        self.score: float = 0.0

        self.objects_count: int = objects_count
        if observation_type == OBSERVATION_IMAGE:
            self.img_size = img_size
        self.object_opacity = 0.8

        self.sim.create_plane(z_offset=-0.4)

        # goal_positions will contain one of the three
        # preset strings (set below) w/ the resulting
        # starting position each goal is expected to be
        self.sorter_positions: Dict[str, np.array] = {}
        self.blocker_bar = blocker_bar
        self._init_sorting_areas()

        # Track each target object as part of our goal
        self.goal: Dict[int, TargetObject] = {}

        # Size multiplier is the range of sizes allowed for
        # target objects
        self.size_multiplier: Tuple[float, float] = (0.5, 1)

        self.task_init()

    def task_init(self):
        # Create our plane and table for the scenario
        self.sim.create_table(length=0.8, width=0.8, height=0.4, x_offset=-0.3)

        # These position_limits are where the objects are allowed
        # to spawn. This reads as (x, y), where each axis
        # in turn is a tuple of min/max placement.
        self.object_position_limits: Tuple[Tuple[float, float]] = (
            (-0.06, 0.06),
            (-0.2, 0.2),
        )
        # self.sim.create_sphere(
        #     body_name="marker",
        #     radius=0.025,
        #     mass=0.0,
        #     ghost=True,
        #     position=(0.06, 0.2, 0.01),
        #     rgba_color=(255, 255, 0, 1.0),
        # )

    def _init_sorting_areas(self):
        if self.sorting_count == 3:
            self.sorter_positions = {
                SORTING_ONE: np.array([-0.25, -0.2, 0.01]),
                SORTING_TWO: np.array([-0.25, 0.00, 0.01]),
                SORTING_THREE: np.array([-0.25, 0.2, 0.01]),
            }
        if self.sorting_count == 2:
            self.sorter_positions = {
                SORTING_ONE: np.array([-0.25, -0.2, 0.01]),
                SORTING_TWO: np.array([-0.25, 0.00, 0.01]),
            }
        if self.sorting_count == 1:
            self.sorter_positions = {
                SORTING_ONE: np.array([-0.25, -0.2, 0.01]),
            }
        if self.blocker_bar:
            self.sorter_positions["blocker"] = np.array([-0.2, 0.0, 0.01])
        count = self.sorting_count
        if (count == 3):
            self.sim.create_box(
                body_name=SORTING_THREE,
                half_extents=np.array([0.05, 0.1, 0.01]),
                mass=0.0,
                ghost=False,
                position=self.sorter_positions[SORTING_THREE],
                rgba_color=np.array([1.0, 0, 0, 0.4]),
            )
            count -=1
        if count == 2:
            self.sim.create_box(
                body_name=SORTING_TWO,
                half_extents=np.array([0.05, 0.1, 0.01]),
                mass=0.0,
                ghost=False,
                position=self.sorter_positions[SORTING_TWO],
                rgba_color=np.array([0.0, 1.0, 0, 0.4]),
            )
            count-=1
        if count == 1:
            self.sim.create_box(
                body_name=SORTING_ONE,
                half_extents=np.array([0.05, 0.1, 0.01]),
                mass=0.0,
                ghost=False,
                position=self.sorter_positions[SORTING_ONE],
                rgba_color=np.array([0, 0, 1.0, 0.5]),
            )

        if self.blocker_bar:
            # Create the blocking bar
            self.sim.create_box(
                body_name="blocker",
                half_extents=np.array([0.01, 0.3, 0.005]),
                mass=0.0,
                ghost=False,
                position=self.sorter_positions["blocker"],
                rgba_color=np.array([0.0, 0.0, 0.0, 0.8]),
            )

    def set_sorter_positions(self):
        """
        set_goal_positions will ensure that goals are placed
        in the appropriate place in the environment
        """
        # for count in range(self.sorting_count):
        #     self.sim.set_base_pose(
        #         GOALS[count],
        #         position=self.sorter_positions[GOALS[count]],
        #         orientation=np.array([0.0, 0.0, 0.0, 1.0]),
        #     )
        for sorter in self.sorter_positions:
            self.sim.set_base_pose(
                sorter,
                position=self.sorter_positions[sorter],
                orientation=np.array([0.0, 0.0, 0.0, 1.0]),
            )

    def setup_target_objects(self):
        """
        Generate self.objects_count objects randomly on the table of
        varying sizes, colors, and shapes. The shapes check to ensure
        that they do NOT collide with one another to start.
        """
        base_size = 0.025
        base_mass = 0.5
        base_box_volume = base_size**3
        base_cylinder_volume = pi * base_size**3  # h == r in base cylinder
        # First, delete each object to cleanup
        self.delete_all_objects()

        for object in range(0, self.objects_count):
            # Attempt to create the object. If it collides with
            # another, delete it and try again
            while True:
                name = f"object_{object}"
                color = self.get_random_color()
                if (self.sorting_count == 1):
                    shape = choice(SHAPES[0:1])
                if (self.sorting_count == 2):
                    shape = choice(SHAPES[0:2])
                if (self.sorting_count == 3):
                    shape = choice(SHAPES)
                position = self.get_random_object_position()

                if shape == CUBE:
                    x = base_size * uniform(
                        self.size_multiplier[0], self.size_multiplier[1]
                    )
                    y = base_size * uniform(
                        self.size_multiplier[0], self.size_multiplier[1]
                    )
                    z = base_size * uniform(
                        self.size_multiplier[0], self.size_multiplier[1]
                    )
                    size = np.array([x, y, z]).astype(np.float32)

                    volume = x * y * z
                    mass_multiplier = volume / base_box_volume

                    self.sim.create_box(
                        body_name=name,
                        half_extents=np.array([x, y, z]),
                        mass=base_mass * mass_multiplier,
                        ghost=False,
                        position=position,
                        rgba_color=color,
                    )
                elif shape == CYLINDER:
                    height = base_size * uniform(
                        self.size_multiplier[0], self.size_multiplier[1]
                    )
                    radius = base_size * uniform(
                        self.size_multiplier[0], self.size_multiplier[1]
                    )
                    size = np.array([height, radius, 0.0]).astype(np.float32)

                    volume = pi * radius**2 * height
                    mass_multiplier = volume / base_cylinder_volume

                    self.sim.create_cylinder(
                        body_name=name,
                        radius=radius,
                        height=height,
                        mass=base_mass * mass_multiplier,
                        ghost=False,
                        position=position,
                        rgba_color=color,
                    )
                elif shape == SPHERE:
                    multiplier = uniform(
                        self.size_multiplier[0], self.size_multiplier[1]
                    )
                    size = np.array([multiplier, 0.0, 0.0]).astype(np.float32)

                    self.sim.create_sphere(
                        body_name=name,
                        radius=base_size * multiplier,
                        mass=base_mass * multiplier,
                        ghost=False,
                        position=position,
                        rgba_color=color,
                    )
                else:
                    raise Exception("Improper shape chosen")

                id = self.sim._bodies_idx[name]

                # Now ensure that the shape created does not
                # intersect any of the existing shapes
                collisions = False
                # If this is the first, we're good; move on
                if len(self.goal) <= 0:
                    break
                # ...otherwise we're going to compare it
                # against all known objects. If there's
                # overlap we delete this and move on
                for other in self.goal:
                    other_id = self.goal[other].id
                    if self.check_collision(id, other_id):
                        collisions = True
                        break

                if collisions:
                    self.sim.physics_client.removeBody(id)
                    continue
                else:
                    break

            self.goal[object] = TargetObject(id, name, shape, size, position, color)

    def check_collision(self, object1: str, object2: str) -> bool:
        """
        check_collision will check if the two objects overlap at all
        and returns a boolean to that effect
        """
        contacts = self.sim.physics_client.getContactPoints(object1, object2)
        return contacts is not None and len(contacts) > 0

    def delete_all_objects(self):
        for object in self.goal:
            self.sim.physics_client.removeBody(self.goal[object].id)
        self.goal = {}

    def get_random_color(self) -> np.array:
        """
        Returns an appropriate color from a list of decent color choices
        in the form of a 4 dimensional RGBA array (colors are (0,255) ->
        (0, 1) scaled)
        """
        colors = [
            (255, 0, 0),
            (0, 255, 0),
            (0, 0, 255),
            (255, 0, 255),
            (178, 102, 255),
            (102, 255, 255),
            (102, 0, 204),
            (255, 128, 0),
            (204, 0, 102),
        ]
        color = choice(colors)

        return np.array([color[0], color[1], color[2], self.object_opacity])

    def get_random_object_position(self) -> np.array:
        """
        get_random_object_position returns a random np.array of an object's
        permissions within the permissive bounds set at instantiation.
        """
        x = uniform(
            self.object_position_limits[0][0], self.object_position_limits[0][1]
        )
        y = uniform(
            self.object_position_limits[1][0], self.object_position_limits[1][1]
        )
        z = 0.01
        return np.array([x, y, z])

    def reset(self):
        # Ensure each goal hasn't moved
        self.set_sorter_positions()

        # Generate new objects
        self.setup_target_objects()

        # Clear our score
        self.score = 0.0

    def get_obs(self) -> Tuple[np.array, float]:
        """
        Determines if any objects collided, adjusts score and reward accordingly,
        and returns an observation along with the reward for this step.

        Returns:
            np.array: The observation at this step.
            float: The reward for this step.
        """
        reward = 0.0  # Initialize the reward

        # Handle floor collisions
        reward += self._handle_floor_collisions()

        # Handle goal collisions
        reward += self._handle_goal_collisions()

        # Reward for moving towards the closest object
        reward += self._reward_closer_to_object()

        # Reward for successful grasping
        #reward += self._reward_grasping_success()

        # Reward for moving an object towards its goal
        reward += self._reward_object_towards_goal()

        # Penalize time-step to encourage efficiency
        reward += STEP_PENALTY

        # Ensure that each goal stays in position
        self.set_sorter_positions()

        # Return the observation and the reward
        if self.observation_type == OBSERVATION_IMAGE:
            observation = self._get_img()
        else:
            observation = self._get_poses_output()
        self.score += reward
        return observation, reward

    def _handle_floor_collisions(self) -> float:
        """Checks for floor collisions and penalizes accordingly."""
        reward = 0.0
        floor_id = self.sim._bodies_idx["plane"]
        for object_key in self.goal:
            if self.goal[object_key].removed:
                continue

            object_id = self.goal[object_key].id
            if self.check_collision(object_id, floor_id):
                reward += FLOOR_COLLISION_PENALTY
                self.sim.physics_client.removeBody(object_id)
                self.goal[object_key].removed = True
                print(f"Object {object_key} dropped to the floor")
        return reward

    def _handle_goal_collisions(self) -> float:
        """Checks for collisions between objects and goals, rewarding or penalizing as necessary."""
        reward = 0.0
        for object_key in self.goal:
            if self.goal[object_key].removed:
                continue

            for i in range(self.sorting_count):
                goal = GOALS[i]
                object = self.goal[object_key]
                object_id = object.id
                goal_id = self.sim._bodies_idx[goal]

                if self.check_collision(object_id, goal_id):
                    self.sim.physics_client.removeBody(object_id)
                    self.goal[object_key].removed = True

                    # Reward or penalize based on correct/incorrect sorting
                    if CORRECT_SORTS[goal] == object.shape:
                        reward += DROP_SUCCESS_REWARD
                        print(f"Object {object_key} correctly sorted into {goal}")
                    else:
                        reward += WRONG_DROP_PENALTY
                        print(f"Object {object_key} incorrectly sorted into {goal}")
        return reward

    def _reward_closer_to_object(self) -> float:
        """Rewards the agent for moving closer to the nearest object."""
        ee_position = self.robot.get_ee_position()
        closest_object, closest_distance = self._get_closest_object(ee_position)
        if closest_object:
            distance_to_object = np.linalg.norm(ee_position - self.get_object_pose(closest_object)[:3])
            distance_delta = abs(closest_distance - distance_to_object)
            return MOVE_TOWARD_OBJECT_REWARD * distance_delta
        return 0.0

    def _reward_grasping_success(self) -> float:
        """Rewards the agent for successfully grasping an object."""
        return 0.0
        closest_object, _ = self._get_closest_object(self.robot.get_ee_position())
        if closest_object and self._is_object_grasped(closest_object):
            print(f"Object {closest_object} successfully grasped")
            return GRASP_SUCCESS_REWARD

    def _reward_object_towards_goal(self) -> float:
        """Rewards the agent for moving objects closer to their goals."""
        closest_object, closest_distance = self._get_closest_object(self.robot.get_ee_position())
        if closest_object and not closest_object.removed:
            object_pos = self.get_object_pose(closest_object)[:3]
            goal_pos = self.sorter_positions[GOALS[closest_object.shape]]
            distance_to_goal = np.linalg.norm(object_pos - goal_pos)
            distance_delta = abs(closest_distance - distance_to_goal)
            return MOVE_OBJECT_TO_GOAL_REWARD * distance_delta

        return 0.0

    def _get_closest_object(self, ee_position: np.array) -> Tuple[Optional[TargetObject], float]:
        """ Return the closest object to the end-effector (EE). """
        closest_object = None
        closest_distance = float('inf')
        for object in self.goal.values():
            if object.removed:
                continue
            object_pos = self.get_object_pose(object)[:3]  # x, y, z position
            distance = np.linalg.norm(ee_position - object_pos)
            if distance < closest_distance:
                closest_object = object
                closest_distance = distance
        return closest_object, closest_distance

    def _is_object_grasped(self, object) -> bool:
        """ Check if the gripper has grasped an object. """
        return self.robot.get_fingers_width() < GRASP_THRESHOLD

    def get_object_pose(self, object: TargetObject) -> np.array:
        object_position = self.sim.get_base_position(object.name)
        object_rotation = self.sim.get_base_rotation(object.name)
        object_velocity = self.sim.get_base_velocity(object.name)
        object_angular_velocity = self.sim.get_base_angular_velocity(object.name)
        observation = np.concatenate(
            [object_position, object_rotation, object_velocity, object_angular_velocity]
        )
        return observation.astype(np.float32)



    def _get_poses_output(self) -> np.array:
        """
        _get_poses_output will return the poses of all objects in the scene,
        as well as their identity and size. It will be a series of values for
        the raw (x, y, z, theta, phi, psi) pose of the object, as well as an
        identity (type of shape), and size (0-1) for min/max size, and the pose
        of the robot's end effector, and a 0-1 value for its gripper open/close
        state. An example of the return would be:

        [[x, y, z, theta, phi, psi,
          xd, yd, zd, thetad, phid, psid, <~ velocities
        [identity], [size]] (times # of set objects), ...,
        (ee_x, ee_y, ee_z, ee_theta, ee_phi, ee_psi,
        ee_xd, ee_yd, ee_zd, ee_thetad, ee_phid, ee_psid), # <~ velocities
        gripper_status]

        Note that the identity is a one-hot encoded list of shape [CUBE, CYLINDER,
        SPHERE] and that size is a three value array of varying meaning based
        on size: [x, y, z] for CUBE, [radius, height] for CYLINDER, [radius]
        for SPHERE. Unused values are 0.0.
        """
        # The size of this vector is determined by the number of objects expected
        pose_values = 12
        shape_values = 3
        size_values = 3
        # End effector values
        ee_values = 12
        finger_values = 1
        # The length of our vector is (pose_values + (identity, size)) for each
        # object, pose_values for the end effector, and one additional value for
        # the gripper finger state (distance between fingers)
        size = (
            (len(self.goal) * (pose_values + shape_values + size_values))
            + ee_values
            + finger_values
        )
        observation: np.array = np.zeros((size,), dtype="float32")

        index = 0
        for object in self.goal.values():
            # If the object has been removed, just report 0's for its existence
            if object.removed:
                index += 1
                continue

            pose = self.get_object_pose(object)
            object_index = index * (pose_values + shape_values + size_values)
            observation[object_index : object_index + pose_values] = pose

            # The shape is a one hot encoded vector of [CUBE, CYLINDER, SPHERE]
            if object.shape == CUBE:
                shape_type = [1, 0, 0]
            elif object.shape == CYLINDER:
                shape_type = [0, 1, 0]
            elif object.shape == SPHERE:
                shape_type = [0, 0, 1]
            shapes_index = object_index + pose_values
            observation[shapes_index : shapes_index + shape_values] = shape_type
            size_index = shapes_index + shape_values
            observation[size_index : size_index + size_values] = object.size
            index += 1

        # Get the end effector position
        ee_position = self.robot.get_ee_position()
        ee_rotation_quaternion = self.sim.get_link_orientation(
            self.robot.body_name, self.robot.ee_link
        )
        ee_rotation = self._quaternion_to_euler(ee_rotation_quaternion)
        # print("rot", ee_rotation)
        # print("rot other", self.sim.get_base_rotation(self.robot.ee_link))
        ee_velocity = self.robot.get_ee_velocity()
        ee_rotational_velocity = self.sim.get_link_angular_velocity(
            self.robot.body_name, self.robot.ee_link
        )

        # ee_angulary_velocity = 0.0
        fingers_width = self.robot.get_fingers_width()
        ee_index = (pose_values + shape_values + size_values) * len(self.goal)
        observation[ee_index : ee_index + 3] = ee_position
        observation[ee_index + 3 : ee_index + 6] = ee_rotation
        observation[ee_index + 6 : ee_index + 9] = ee_velocity
        observation[ee_index + 9 : ee_index + 12] = ee_rotational_velocity
        observation[ee_index + 12] = fingers_width

        return observation

    def _quaternion_to_euler(self, quaternion: np.array):
        """
        _quaternion_to_euler will convert a quaternion to euler angless
        """
        x, y, z, w = quaternion
        t0 = 2.0 * (w * x + y * z)
        t1 = 1.0 - 2.0 * (x * x + y * y)
        X = math.atan2(t0, t1)

        t2 = 2.0 * (w * y - z * x)
        t2 = 1.0 if t2 > +1.0 else t2
        t2 = -1.0 if t2 < -1.0 else t2
        Y = math.asin(t2)

        t3 = 2.0 * (w * z + x * y)
        t4 = 1.0 - 2.0 * (y * y + z * z)
        Z = math.atan2(t3, t4)

        return np.array([X, Y, Z]).astype(np.float32)

    def _get_img(self) -> np.array:
        """
        _get_img will return the image from the camera in human rendering
        mode
        """
        # We have to swap render mode if it's set to human mode
        # to get it to draw for us.
        original_render_mode = self.sim.render_mode
        self.sim.render_mode = "rgb_array"
        img = self.sim.render(
            self.img_size[0],
            self.img_size[1],
            # target_position=self.camera_position,
            target_position=None,
            distance=0.0,
            yaw=45,
            pitch=-30,
            roll=0.0,
        )
        self.sim.render_mode = original_render_mode
        return img

    def get_achieved_goal(self) -> np.ndarray:
        return np.array(
            all(target.removed for target in self.goal.values()), dtype="bool"
        )

    def is_terminated(self) -> bool:
        """
        is_terminated returns whether or not the episode is
        in a terminal state; this can be due to:
        1. All objects have been removed somehow from the env
        2. The timer has hit 0

        It is not an indication of success
        """

        return all(obj.removed for obj in self.goal.values())

    def is_success(
        self,
        achieved_goal: np.ndarray,
        desired_goal: np.ndarray,
        info: Dict[str, Any] = ...,
    ) -> np.ndarray:
        """
        is_success is a misnamed function, required as a layover
        from using the panda_gym library. Instead it is best
        to read it as an interface w/ is_terminated, and in no
        way reads whether it was a success, since the episode can
        end via timeout without doing the goals.
        """
        return np.array([self.is_terminated()], dtype="bool")

    def compute_reward(
        self,
        achieved_goal: np.ndarray,
        desired_goal: np.ndarray,
        info: Dict[str, Any] = ...,
    ) -> np.ndarray:
        return np.array([self.score], dtype="float32")


class My_Arm_RobotEnv(RobotTaskEnv):
    """Sorter task wih Panda robot.

    Args:
        render_mode (str, optional): Render mode. Defaults to "human".
        control_type (str, optional): "ee" to control end-effector position or "joints" to control joint values.
            Defaults to "ee".
        render_width (int, optional): Image width. Defaults to 720.
        render_height (int, optional): Image height. Defaults to 480.
    """

    def __init__(
        self,
        observation_type: int,
        objects_count: int = 5,
        blocker_bar: bool = True,
        render_mode: str = "human",
        control_type: str = "ee",
        renderer: str = "OpenGL",
        render_width: int = 720,
        render_height: int = 480,
        sorting_count: int = 1
    ) -> None:
        if observation_type not in [OBSERVATION_IMAGE, OBSERVATION_POSES]:
            raise ValueError("observation_type must be one of either images or poses")

        sim = PyBullet(
            render_mode=render_mode,
            background_color=np.array((200, 200, 200)),
            renderer=renderer,
        )
        robot = Panda(
            sim,
            block_gripper=False,
            base_position=np.array([-0.6, 0.0, 0.0]),
            control_type=control_type,
        )
        task = Pick_And_Place(sim,
                              observation_type,
                              robot,
                              objects_count=objects_count,
                              blocker_bar=blocker_bar,
                              sorting_count=sorting_count)
        super().__init__(
            robot,
            task,
            render_width=render_width,
            render_height=render_height,
            render_target_position=None,
            render_distance=0.9,
            render_yaw=45,
            render_pitch=-30,
            render_roll=0.0,
        )
        self.total_score = 0
        self.sim.place_visualizer(
            target_position=np.zeros(3), distance=0.9, yaw=45, pitch=-30
        )

    def reset(self) -> Tuple[Dict[str, np.ndarray], Dict[str, np.ndarray]]:
        with self.sim.no_rendering():
            self.robot.reset()
            self.task.reset()
        observation = self._get_obs()
        self.total_score = 0
        return observation, None

    def _get_obs(self) -> Dict[str, np.ndarray]:
        observation, _ = self.task.get_obs()
        observation = observation.astype(np.float32)
        achieved_goal = self.task.get_achieved_goal().astype(np.float32)
        return {
            "observation": observation,
            "achieved_goal": achieved_goal,
        }

    def get_obs(self) -> np.ndarray:
        observation, _ = self.task.get_obs()
        return observation.astype(np.float32)

    def step(
        self, action: np.ndarray
    ) -> Tuple[Dict[str, np.ndarray], float, bool, bool, Dict[str, Any]]:
        score_prior = self.task.score

        if isinstance(action, dict):
            discrete_action = action["discrete"]
            continuous_action = action["continuous"]
            self.robot.set_action(continuous_action)

        self.sim.step()
        observation = self._get_obs()
        score_after = self.task.score

        # An episode is terminated iff the agent has reached the target
        terminated = bool(
            self.task.is_success(observation["achieved_goal"], self.task.get_goal())
        )
        truncated = False
        info = {"is_success": terminated}
        step_penalty = STEP_PENALTY
        reward = (score_after - score_prior) + step_penalty
        self.total_score += reward
        # print("Score: ",self.total_score)
        # print("reward: ", reward)
        return observation, reward, terminated, truncated, info


SORTING_ONE = "sorting_one"
SORTING_TWO = "sorting_two"
SORTING_THREE = "sorting_three"
GOALS = [SORTING_ONE, SORTING_TWO, SORTING_THREE]

CUBE = 0
CYLINDER = 1
SPHERE = 2
SHAPES = [CUBE, CYLINDER, SPHERE]

# This is the expected correct sorting results
CORRECT_SORTS = {
    SORTING_ONE: CYLINDER,
    SORTING_TWO: SPHERE,
    SORTING_THREE: CUBE,
}


# FLOOR_PENALTY = -50
# # WRONG_SORT_REWARD = 25
# # SORT_REWARD = 100
# WRONG_SORT_REWARD = 200
# SORT_REWARD = 500

MOVE_TOWARD_OBJECT_REWARD = -1.0     # Reward for moving EE toward the object
GRASP_SUCCESS_REWARD = 50.0        # Reward for successful grasp
MOVE_OBJECT_TO_GOAL_REWARD = -1.0   # Reward for moving object toward goal
DROP_SUCCESS_REWARD = 50.0       # Reward for successfully placing in correct goal
WRONG_DROP_PENALTY = -20.0        # Penalty for placing object in wrong goal
FLOOR_COLLISION_PENALTY = -50.0   # Penalty for dropping the object on the floor
STEP_PENALTY = -0.1               # Small penalty to encourage efficiency
GRASP_THRESHOLD = 0.02

OBSERVATION_POSES: int = 0
OBSERVATION_IMAGE: int = 1


import time
def test_env():

    env = My_Arm_RobotEnv(observation_type=0,
                          render_mode="human",
                          blocker_bar=False,
                          objects_count=1,
                          sorting_count=2
                          )
    add_world_frame()
    observation, info = env.reset()

    for _ in range(10000):
        time.sleep(1/24)
        action = env.action_space.sample()
        print(action)
        observation, reward, terminated, truncated, info = env.step(action)

        if terminated or truncated:
            print("Run 1 episode")
            observation, info = env.reset()


import time


def test_fixed_actions():
    env = My_Arm_RobotEnv(
        observation_type=0,
        render_mode="human",
        blocker_bar=False,
        objects_count=1,
        sorting_count=2
    )

    observation, info = env.reset()

    # List of fixed actions to cycle through
    fixed_actions = [
        [0, 0, 0.1, 0],  # Move up
        [0, 0, -0.1, 0],  # Move down
        [-0.1, 0, 0, 0],  # Move left
        [0.1, 0, 0, 0],  # Move right
        [0, 0.1, 0, 0],  # Move forward
        [0, -0.1, 0, 0],  # Move backward
        [0, 0, 0, 0.3],  # Open gripper
        [0, 0, 0, -0.2],  # Close gripper
        [0, 0, 0.1, 0.7],  # Move up + open gripper
        [0, 0, -0.1, -0.5]  # Move down + close gripper
    ]
    frame = []
    for action in fixed_actions:
        for _ in range(50):  # Each action lasts for 50 time steps
            # frame.append(env.render('human'))
            observation, reward, terminated, truncated, info = env.step(action)
            print(f"Action: {action}, Reward: {reward}, Terminated: {terminated}")
            time.sleep(1 / 24)  # Delay for rendering

        if terminated or truncated:
            print("Episode ended, resetting environment.")
            observation, info = env.reset()

    # display.HTML(display_anim(frame))

# if __name__ == '__main__':
#     test_fixed_actions()


In [8]:
import torch
from torch import Tensor
from torch.nn import (Sequential, Module, Linear, ModuleList, Softplus,ModuleDict, ModuleList)
from torch.nn import LeakyReLU
import numpy as np
from typing import Union, List

class DiscreteActor(Module):
    def __init__(
            self,
            obs_dim: int = 20,
            output_dim: int = 3,
            control_type=None
    ):
        """Init the discrete actor. This network estimate a distribution of
        discrete actions.
        Args:
            obs_dim (int, optional): Dimension of observation space. Defaults to 20.
            output_size (int, optional): Output size or number of discrete
            actions. Defaults to 3 (Move, Pick, Place)
        """
        super(DiscreteActor, self).__init__()

        if control_type is not None and control_type == 'pendulum':
            obs_dim = 3
            output_dim = 1

        self.model = Sequential(
            Linear(obs_dim, 256),
            LeakyReLU(),
            Linear(256, 128),
            LeakyReLU(),
            Linear(128, 64),
            LeakyReLU(),
            Linear(64, output_dim),
        )

    def forward(self, input: Union[np.ndarray, Tensor, List]) -> Union[
        np.ndarray, Tensor, List]:
        if isinstance(input, np.ndarray):
            input_tensor: Tensor = torch.from_numpy(input.astype("float32"))
        elif type(input) is list:
            input_tensor: Tensor = torch.from_numpy(
                np.array(input).astype("float32"))
        else:
            input_tensor = input

        # return distribution
        output = self.model(input_tensor)
        output_dist = torch.distributions.Categorical(logits=output)
        return output_dist

    def save(self, filepath: str):
        torch.save({
            "model": self.model.state_dict(),
        }, filepath)

    def load(self, filepath: str):
        data = torch.load(filepath)
        self.model.load_state_dict(data["model"])


class ContinuousActor(Module):
    def __init__(
            self,
            obs_dim: int = 20,
            continuous_param_dim: List = [4, 4, 4],
            control_type=None
    ):
        """Init the continuous actor. This network predicts mean and std for
        the continuous parameters.
        Args:
            obs_dim (int, optional): Dimension of observation space. Defaults to 20.
            continuous_param_dim (int, optional): Dimension of continuous
            parameter. Defaults to [1, 1, 1, 1], meaning each discrete action only has 1 parameter
        """
        super(ContinuousActor, self).__init__()

        if control_type is not None and control_type == 'pendulum':
            obs_dim = 3
            continuous_param_dim = [1]

        self.model = ModuleList(
            ModuleDict({
                "mean": Sequential(
                    Linear(obs_dim, 64),
                    LeakyReLU(),
                    Linear(64, param_dim)
                ),
                "std": Sequential(
                    Linear(obs_dim, 64),
                    LeakyReLU(),
                    Linear(64, param_dim),
                    Softplus()  # Ensures positive standard deviations
                )
            })
            for param_dim in continuous_param_dim
        )

    def forward(self, input: Union[np.ndarray, Tensor, List]):
        if isinstance(input, np.ndarray):
            input_tensor: Tensor = torch.from_numpy(input.astype("float32"))
        elif type(input) is list:
            input_tensor: Tensor = torch.from_numpy(
                np.array(input).astype("float32"))
        else:
            input_tensor = input

        continuous_params = [
            {
                "mean": head["mean"](input_tensor),
                "std": head["std"](input_tensor)
            }
            for head in self.model
        ]

        return continuous_params


    def save(self, filepath: str):
        torch.save({
            "model": self.model.state_dict(),
        }, filepath)

    def load(self, filepath: str):
        data = torch.load(filepath)
        self.model.load_state_dict(data["model"])


class Critic(Module):
    def __init__(
            self,
            obs_dim: int,
            control_type=None
    ):
        """Init the critic network. This network estimate V(s)"""
        super(Critic, self).__init__()

        if control_type is not None and control_type == 'pendulum':
            obs_dim = 3

        self.model = Sequential(
            Linear(obs_dim, 128),
            LeakyReLU(),
            Linear(128, 64),
            LeakyReLU(),
            Linear(64, 32),
            LeakyReLU(),
            Linear(32, 1),
        )

    def forward(self, input: np.ndarray) -> Tensor:
        if isinstance(input, np.ndarray):
            input_tensor: Tensor = torch.from_numpy(input.astype("float32"))
        elif type(input) is list:
            input_tensor: Tensor = torch.from_numpy(
                np.array(input).astype("float32"))
        else:
            input_tensor = input

        return self.model(input_tensor)

    def save(self, filepath: str):
        torch.save({
            "model": self.model.state_dict(),
        }, filepath)

    def load(self, filepath: str):
        data = torch.load(filepath)
        self.model.load_state_dict(data["model"])



In [9]:
import torch
import numpy as np
from torch import Tensor
from torch.nn import MSELoss
from torch.distributions import Normal
from panda_gym.envs.core import RobotTaskEnv
from typing import List, Tuple
import sys
import matplotlib.pyplot as plt
import pickle


class Trainer:
    def __init__(
            self,
            env: RobotTaskEnv,
            discrete_actor: DiscreteActor,
            continuous_actor: ContinuousActor,
            critic: Critic,
            timesteps: int,
            timesteps_per_batch: int,
            max_timesteps_per_episode: int,
            training_cycles_per_batch: int = 5,
            gamma: float = 0.99,
            epsilon: float = 0.2,
            alpha: float = 3e-4,
            save_every_x_timesteps: int = 50000
    ):
        # Environment
        self.env = env

        # Neural networks
        self.discrete_actor = discrete_actor
        self.continuous_actor = continuous_actor
        self.critic = critic

        # Hypeparameters
        self.gamma = gamma
        self.epsilon = epsilon
        self.alpha = alpha

        # Iteration parameters
        self.timesteps = timesteps
        self.current_timestep = 0
        self.max_timesteps_per_episode = max_timesteps_per_episode
        self.timesteps_per_batch = timesteps_per_batch
        self.training_cycles_per_batch = training_cycles_per_batch
        self.save_every_x_timesteps = save_every_x_timesteps

        # Optimizers
        self.discrete_optimizer = torch.optim.Adam(
            params=self.discrete_actor.parameters(), lr=self.alpha)
        self.continuous_optimizer = torch.optim.Adam(
            params=self.continuous_actor.parameters(), lr=self.alpha)
        self.critic_optimizer = torch.optim.Adam(
            params=self.critic.parameters(),
                                                 lr=self.alpha)
        # Memory
        self.total_rewards: List[float] = []
        self.terminal_timesteps: List[int] = []
        self.discrete_actor_losses: List[float] = []
        self.continuous_actor_losses: List[float] = []
        self.critic_losses: List[float] = []
        self.previous_print_length: int = 0
        self.current_action = "Initializing"
        self.last_save: int = 0

    def print_status(self):
        latest_reward = 0.0
        average_reward = 0.0
        best_reward = 0.0

        latest_discrete_loss = 0.0
        avg_discrete_loss = 0.0
        latest_continuous_loss = 0.0
        avg_continuous_loss = 0.0

        latest_critic_loss = 0.0
        avg_critic_loss = 0.0
        recent_change = 0.0

        if len(self.total_rewards) > 0:
            latest_reward = self.total_rewards[-1]

            last_n_episodes = 100
            average_reward = np.mean(self.total_rewards[-last_n_episodes:])

            episodes = [
                i
                for i in range(
                    len(self.total_rewards[-last_n_episodes:]),
                    min(last_n_episodes, 0),
                    -1,
                )
            ]
            coefficients = np.polyfit(
                episodes,
                self.total_rewards[-last_n_episodes:],
                1,
            )
            recent_change = coefficients[0]

            best_reward = max(self.total_rewards)

        if len(self.discrete_actor_losses) > 0:
            avg_count = 3 * self.timesteps_per_batch
            latest_discrete_loss = self.discrete_actor_losses[-1]
            avg_discrete_loss = np.mean(
                self.discrete_actor_losses[-avg_count:])
            latest_continuous_loss = self.continuous_actor_losses[-1]
            avg_continuous_loss = np.mean(
                self.continuous_actor_losses[-avg_count:])
            latest_critic_loss = self.critic_losses[-1]
            avg_critic_loss = np.mean(self.critic_losses[-avg_count:])

        msg = f"""
            =========================================
            Timesteps: {self.current_timestep:,} / {self.timesteps:,} ({round((self.current_timestep / self.timesteps) * 100, 4)}%)
            Episodes: {len(self.total_rewards):,}
            Currently: {self.current_action}
            Latest Reward: {round(latest_reward)}
            Latest Avg Rewards: {round(average_reward)}
            Recent Change: {round(recent_change, 2)}
            Best Reward: {round(best_reward, 2)}
            Latest Discrete Actor Loss: {round(latest_discrete_loss, 4)}
            Latest Continuous Actor Loss: {round(latest_continuous_loss, 4)}
            Avg Discrete Actor Loss: {round(avg_discrete_loss, 4)}
            Avg Continuous Actor Loss: {round(avg_continuous_loss, 4)}
            Latest Critic Loss: {round(latest_critic_loss, 4)}
            Avg Critic Loss: {round(avg_critic_loss, 4)}
            =========================================
        """

        # We print to STDERR as a hack to get around the noisy pybullet
        # environment. Hacky, but effective if paired w/ 1> /dev/null
        print(msg, file=sys.stderr)

    def create_plot(self, filepath: str):
        last_n_episodes = 10

        episodes = [i + 1 for i in range(len(self.total_rewards))]
        averages = [
            np.mean(self.total_rewards[i - last_n_episodes: i])
            for i in range(len(self.total_rewards))
        ]
        trend_data = np.polyfit(episodes, self.total_rewards, 1)
        trendline = np.poly1d(trend_data)

        plt.scatter(
            episodes, self.total_rewards, color="green"
        )  # , linestyle='None', marker='o', color='green')
        plt.plot(episodes, averages, linestyle="solid", color="red")
        plt.plot(episodes, trendline(episodes), linestyle="--", color="blue")

        plt.title("Rewards per episode")
        plt.ylabel("Reward")
        plt.xlabel("Episode")
        plt.savefig(filepath)

    def save(self, directory: str):
        """
        save will save the models, state, and any additional
        data to the given directory
        """
        self.last_save = self.current_timestep

        self.discrete_actor.save(f"{directory}/discrete_actor.pth")
        self.continuous_actor.save(f"{directory}/continuous_actor.pth")
        self.critic.save(f"{directory}/critic.pth")
        self.create_plot(f"{directory}/rewards.png")

        # Now save the trainer's state data
        data = {
            "timesteps": self.timesteps,
            "current_timestep": self.current_timestep,
            "max_timesteps_per_episode": self.max_timesteps_per_episode,
            "timesteps_per_batch": self.timesteps_per_batch,
            "save_every_x_timesteps": self.save_every_x_timesteps,
            "γ": self.gamma,
            "ε": self.epsilon,
            "α": self.alpha,
            "training_cycles_per_batch": self.training_cycles_per_batch,
            "total_rewards": self.total_rewards,
            "terminal_timesteps": self.terminal_timesteps,
            "discrete_actor_losses": self.discrete_actor_losses,
            "continuous_actor_losses": self.continuous_actor_losses,
            "critic_losses": self.critic_losses,
        }
        pickle.dump(data, open(f"{directory}/state.data", "wb"))

    def load(self, directory: str):
        """
        Load will load the models, state, and any additional
        data from the given directory
        """
        # Load our models first; they're the simplest
        self.discrete_actor.load(f"{directory}/discrete_actor.pth")
        self.continuous_actor.load(f"{directory}/continuous_actor.pth")
        self.critic.load(f"{directory}/critic.pth")

        data = pickle.load(open(f"{directory}/state.data", "rb"))

        self.timesteps = data["timesteps"]
        self.current_timestep = data["current_timestep"]
        self.last_save = self.current_timestep
        self.max_timesteps_per_episode = data["max_timesteps_per_episode"]
        self.timesteps_per_batch = data["timesteps_per_batch"]
        self.save_every_x_timesteps = data["save_every_x_timesteps"]

        # Hyperparameters
        self.gamma = data["γ"]
        self.epsilon = data["ε"]
        self.alpha = data["α"]
        self.training_cycles_per_batch = data["training_cycles_per_batch"]

        # Memory
        self.total_rewards = data["total_rewards"]
        self.terminal_timesteps = data["terminal_timesteps"]
        self.discrete_actor_losses = data["discrete_actor_losses"]
        self.continuous_actor_losses = data["continuous_actor_losses"]
        self.critic_losses = data["critic_losses"]

        self.discrete_optimizer = torch.optim.Adam(
            self.discrete_actor.parameters(), lr=self.alpha)
        self.continuous_optimizer = torch.optim.Adam(
            self.continuous_actor.parameters(), lr=self.alpha)
        self.critic_optimizer = torch.optim.Adam(
            self.critic.parameters(), lr=self.alpha)

    def run_episode(self):
        """Run a single episode."""
        observation, _ = self.env.reset()
        if isinstance(observation, dict):
            observation = observation["observation"]

        timesteps = 0
        observations = []
        discrete_actions = []
        continuous_params = []
        discrete_log_probs = []
        continuous_log_probs = []
        rewards = []

        while True:
            timesteps += 1

            observations.append(observation)

            current_discrete_dist = self.discrete_actor(observation)
            current_discrete_action = current_discrete_dist.sample()
            current_discrete_log_prob = current_discrete_dist.log_prob(
                current_discrete_action).detach().numpy()
            current_discrete_action = current_discrete_action.detach().numpy()

            current_continuous_params = self.continuous_actor(observation)
            mean = current_continuous_params[current_discrete_action][
                'mean']
            std = current_continuous_params[current_discrete_action][
                'std']
            current_continuous_dist = torch.distributions.Normal(mean, std)
            current_continuous_action = current_continuous_dist.sample()
            continuous_log_prob = current_continuous_dist.log_prob(
                current_continuous_action).detach().numpy()
            current_continuous_action = current_continuous_action.detach().numpy()

            action = {
                'discrete': current_discrete_action,
                'continuous': current_continuous_action
            }

            obs, reward, terminated, _, _ = self.env.step(action)


            discrete_actions.append(current_discrete_action)
            discrete_log_probs.append(current_discrete_log_prob)

            continuous_params.append(current_continuous_action)
            continuous_log_probs.append(continuous_log_prob)

            rewards.append(reward)

            if timesteps >= self.max_timesteps_per_episode:
                terminated = True

            if terminated:
                break

        # Calculate the discounted rewards for this episode
        discounted_rewards = self.calculate_discounted_reward(rewards)

        # Get the terminal reward and record for status tracking
        self.total_rewards.append(sum(rewards))

        return (observations, discrete_actions, continuous_params,
                discrete_log_probs, continuous_log_probs, discounted_rewards)

    def rollout(self):
        """Perform a rollout of the environment and return the memory of the
        episode with the current actor models
        """
        observations = []
        discrete_log_probabilities = []
        continuous_log_probabilities = []
        discrete_actions = []
        continuous_actions = []
        rewards = []

        while len(observations) < self.timesteps_per_batch:
            self.current_action = "Rollout"
            (
                obs,
                chosen_discrete_actions,
                chosen_continuous_actions,
                discrete_log_probs,
                continuous_log_probs,
                rwds
            ) = self.run_episode()

            # Combine these arrays into overall batch
            observations += obs
            discrete_actions += chosen_discrete_actions
            continuous_actions += chosen_continuous_actions
            discrete_log_probabilities += discrete_log_probs
            continuous_log_probabilities += continuous_log_probs
            rewards += rwds

            # Increment count of timesteps
            self.current_timestep += len(obs)

            self.print_status()

        # Trim the batch memory to the batch size
        observations = observations[: self.timesteps_per_batch]
        discrete_actions = discrete_actions[: self.timesteps_per_batch]
        continuous_actions = continuous_actions[: self.timesteps_per_batch]
        discrete_log_probabilities = discrete_log_probabilities[
            : self.timesteps_per_batch]
        continuous_log_probabilities = continuous_log_probabilities[
            : self.timesteps_per_batch]
        rewards = rewards[: self.timesteps_per_batch]

        return (observations, discrete_actions, continuous_actions,
                discrete_log_probabilities, continuous_log_probabilities,
                rewards)

    def calculate_discounted_reward(self, rewards):
        """Calculate the discounted reward of each timestep of an episode
        given its initial rewards and episode length"""
        discounted_rewards = []
        discounted_reward = 0.0
        for reward in reversed(rewards):
            discounted_reward = reward + self.gamma * discounted_reward
            discounted_rewards.insert(0, discounted_reward)

        return discounted_rewards

    def calculate_normalized_advantage(self, observations, rewards):
        """Calculate the normalized advantage of each timestep of a given
        batch of episode """
        V = self.critic(observations).detach().squeeze()

        advantage = Tensor(np.array(rewards, dtype="float32")) - V
        normalized_advantage = (advantage - advantage.mean()) / (
            advantage.std() + 1e-8)

        return normalized_advantage

    def training_step(
            self,
            observations,
            discrete_actions,
            continuous_actions,
            discrete_log_probabilities,
            continuous_log_probabilities,
            rewards,
            normalized_advantage
    ):
        """Peform a single epoch of training for the actors and critic model. Return the loss for each model at the end of the step"""
        # ---- Discrete Actor ----
        current_discrete_dist = self.discrete_actor(observations)
        current_discrete_log_probs = current_discrete_dist.log_prob(
            discrete_actions)
        discrete_ratio = torch.exp(
            current_discrete_log_probs - discrete_log_probabilities)
        discrete_actor_loss = -torch.min(
            discrete_ratio * normalized_advantage,
            torch.clamp(discrete_ratio, 1 - self.epsilon, 1 +
                        self.epsilon) * normalized_advantage
        ).mean()

        self.discrete_optimizer.zero_grad()
        discrete_actor_loss.backward()
        self.discrete_optimizer.step()

        # ---- Continuous Actor ----
        current_continuous_params = self.continuous_actor(observations)
        means = [current_continuous_params[
                                 int(discrete_action.item())]['mean']
                             for discrete_action in discrete_actions]
        stds = [current_continuous_params[
                     int(discrete_action.item())]['std']
                 for discrete_action in discrete_actions]

        means = torch.stack(means)
        stds = torch.stack(stds)

        current_continuous_dist = torch.distributions.Normal(means, stds)
        current_continuous_log_probs = current_continuous_dist.log_prob(
            continuous_actions)
        continuous_ratios = torch.exp(
            current_continuous_log_probs - continuous_log_probabilities)

        normalized_advantage = normalized_advantage.unsqueeze(1).unsqueeze(2)
        continuous_actor_loss = -torch.min(
            continuous_ratios * normalized_advantage,
            torch.clamp(continuous_ratios, 1 - self.epsilon,
                        1 + self.epsilon) * normalized_advantage
        ).mean()

        self.continuous_optimizer.zero_grad()
        continuous_actor_loss.backward()
        self.continuous_optimizer.step()

        # ---- Critic Network ----
        V = self.critic(observations)
        reward_tensor = Tensor(rewards).unsqueeze(-1)
        critic_loss = MSELoss()(V, reward_tensor)

        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        self.critic_optimizer.step()

        return discrete_actor_loss.item(), continuous_actor_loss.item(), critic_loss.item()

    def train(self):
        while self.current_timestep <= self.timesteps:
            # Rollout to get next training batch
            observations, discrete_actions, continuous_actions, discrete_log_probabilities, continuous_log_probabilities, rewards = self.rollout()

            # Convert to numpy arrays and then to tensors
            observations = Tensor(np.array(observations, dtype=np.float32))
            discrete_actions = Tensor(
                np.array(discrete_actions, dtype=np.float32))
            continuous_actions = Tensor(
                np.array(continuous_actions, dtype=np.float32))
            discrete_log_probabilities = Tensor(
                np.array(discrete_log_probabilities, dtype=np.float32))
            continuous_log_probabilities = Tensor(
                np.array(continuous_log_probabilities, dtype=np.float32))
            rewards = Tensor(np.array(rewards, dtype=np.float32))

            # Perform training steps
            for c in range(self.training_cycles_per_batch):
                self.current_action = (
                    f"Training cycle {c+1}/{self.training_cycles_per_batch}"
                )
                self.print_status()
                # Calculate losses
                normalized_advantage = self.calculate_normalized_advantage(
                    observations, rewards)
                discrete_loss, continuous_loss, critic_loss = self.training_step(
                    observations, discrete_actions, continuous_actions, discrete_log_probabilities, continuous_log_probabilities, rewards, normalized_advantage)

                self.discrete_actor_losses.append(discrete_loss)
                self.continuous_actor_losses.append(continuous_loss)
                self.critic_losses.append(critic_loss)

            # Every x timesteps, save current status
            if self.current_timestep - self.last_save >= self.save_every_x_timesteps:
                self.current_action = "Saving"
                self.print_status()
                self.save("training")

        print("")
        print("Training complete!")
        self.save("training")

In [None]:
# from test_env import *
# from model import DiscreteActor, ContinuousActor, Critic
import torch
# from trainer import Trainer

MOVE = 0
PICK = 1
PLACE = 2

action_space = {
    'discrete': {'Move': 0, 'Pick': 1, 'Place': 2},
    'continuous': [4, 4, 4]
}

discrete_dim = len(action_space['discrete'])
continuous_dim = action_space['continuous']

env = My_Arm_RobotEnv(
    observation_type=0,
    render_mode='rgb_array',
    blocker_bar=True,
    objects_count=2,
    sorting_count=3,
    renderer = "Tiny",
)
import os

# Create a directory named 'training' in the current working directory
os.makedirs('training', exist_ok=True)

# Verify if the directory was created
print("Directory 'training' created successfully" if os.path.exists('training') else "Failed to create directory")

obs, _ = env.reset()
obs_dim = len(obs['observation'])
d_actor = DiscreteActor(obs_dim=obs_dim, output_dim=discrete_dim)
c_actor = ContinuousActor(obs_dim=obs_dim,
                          continuous_param_dim=continuous_dim)
critic = Critic(obs_dim=obs_dim)

trainer = Trainer(
    env=env,
    discrete_actor=d_actor,
    continuous_actor=c_actor,
    critic=critic,
    timesteps=2_000_000,
    timesteps_per_batch=5_000,
    max_timesteps_per_episode=750,
)

trainer.train()

Directory 'training' created successfully
Object 0 incorrectly sorted into sorting_two


  coefficients = np.polyfit(

            Timesteps: 750 / 2,000,000 (0.0375%)
            Episodes: 1
            Currently: Rollout
            Latest Reward: -272
            Latest Avg Rewards: -272
            Recent Change: -136.08
            Best Reward: -272.16
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: 0.0
            Avg Continuous Actor Loss: 0.0
            Latest Critic Loss: 0.0
            Avg Critic Loss: 0.0
        


argv[0]=--background_color_red=0.7843137383460999
argv[1]=--background_color_green=0.7843137383460999
argv[2]=--background_color_blue=0.7843137383460999
Remove body failed



            Timesteps: 1,500 / 2,000,000 (0.075%)
            Episodes: 2
            Currently: Rollout
            Latest Reward: -268
            Latest Avg Rewards: -270
            Recent Change: -4.32
            Best Reward: -267.84
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: 0.0
            Avg Continuous Actor Loss: 0.0
            Latest Critic Loss: 0.0
            Avg Critic Loss: 0.0
        

            Timesteps: 2,250 / 2,000,000 (0.1125%)
            Episodes: 3
            Currently: Rollout
            Latest Reward: -432
            Latest Avg Rewards: -324
            Recent Change: 79.92
            Best Reward: -267.84
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: 0.0
            Avg Continuous Actor Loss: 0.0
            Latest Critic Loss: 0.0
            Avg Critic Loss: 0.0
        

            

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 3,890 / 2,000,000 (0.1945%)
            Episodes: 6
            Currently: Rollout
            Latest Reward: -160
            Latest Avg Rewards: -340
            Recent Change: -0.88
            Best Reward: -159.76
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: 0.0
            Avg Continuous Actor Loss: 0.0
            Latest Critic Loss: 0.0
            Avg Critic Loss: 0.0
        

            Timesteps: 4,640 / 2,000,000 (0.232%)
            Episodes: 7
            Currently: Rollout
            Latest Reward: -348
            Latest Avg Rewards: -341
            Recent Change: 0.32
            Best Reward: -159.76
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: 0.0
            Avg Continuous Actor Loss: 0.0
            Latest Critic Loss: 0.0
            Avg Critic Loss: 0.0
        

            T

Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed
Object 0 dropped to the floor



            Timesteps: 8,025 / 2,000,000 (0.4012%)
            Episodes: 12
            Currently: Rollout
            Latest Reward: -210
            Latest Avg Rewards: -318
            Recent Change: -7.34
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0157
            Avg Discrete Actor Loss: -0.0003
            Avg Continuous Actor Loss: 0.0173
            Latest Critic Loss: 2521.3652
            Avg Critic Loss: 2522.7972
        

            Timesteps: 8,775 / 2,000,000 (0.4387%)
            Episodes: 13
            Currently: Rollout
            Latest Reward: -304
            Latest Avg Rewards: -317
            Recent Change: -6.24
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0157
            Avg Discrete Actor Loss: -0.0003
            Avg Continuous Actor Loss: 0.0173
            Latest Critic Loss: 2521.3652
      

Remove body failed
Object 1 dropped to the floor



            Timesteps: 9,525 / 2,000,000 (0.4763%)
            Episodes: 14
            Currently: Rollout
            Latest Reward: -316
            Latest Avg Rewards: -317
            Recent Change: -5.02
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0157
            Avg Discrete Actor Loss: -0.0003
            Avg Continuous Actor Loss: 0.0173
            Latest Critic Loss: 2521.3652
            Avg Critic Loss: 2522.7972
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 10,275 / 2,000,000 (0.5137%)
            Episodes: 15
            Currently: Rollout
            Latest Reward: -265
            Latest Avg Rewards: -314
            Recent Change: -5.37
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0157
            Avg Discrete Actor Loss: -0.0003
            Avg Continuous Actor Loss: 0.0173
            Latest Critic Loss: 2521.3652
            Avg Critic Loss: 2522.7972
        


Remove body failed



            Timesteps: 11,025 / 2,000,000 (0.5513%)
            Episodes: 16
            Currently: Rollout
            Latest Reward: -256
            Latest Avg Rewards: -310
            Recent Change: -5.7
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0157
            Avg Discrete Actor Loss: -0.0003
            Avg Continuous Actor Loss: 0.0173
            Latest Critic Loss: 2521.3652
            Avg Critic Loss: 2522.7972
        

            Timesteps: 11,025 / 2,000,000 (0.5513%)
            Episodes: 16
            Currently: Training cycle 1/5
            Latest Reward: -256
            Latest Avg Rewards: -310
            Recent Change: -5.7
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0157
            Avg Discrete Actor Loss: -0.0003
            Avg Continuous Actor Loss: 0.0173
            Latest Critic Loss: 2521.

Object 1 dropped to the floor



            Timesteps: 11,775 / 2,000,000 (0.5887%)
            Episodes: 17
            Currently: Rollout
            Latest Reward: -405
            Latest Avg Rewards: -316
            Recent Change: -2.89
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0076
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0129
            Latest Critic Loss: 1536.8218
            Avg Critic Loss: 2030.3109
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 12,525 / 2,000,000 (0.6262%)
            Episodes: 18
            Currently: Rollout
            Latest Reward: -337
            Latest Avg Rewards: -317
            Recent Change: -2.07
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0076
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0129
            Latest Critic Loss: 1536.8218
            Avg Critic Loss: 2030.3109
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 13,275 / 2,000,000 (0.6638%)
            Episodes: 19
            Currently: Rollout
            Latest Reward: -297
            Latest Avg Rewards: -316
            Recent Change: -2.07
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0076
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0129
            Latest Critic Loss: 1536.8218
            Avg Critic Loss: 2030.3109
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 14,025 / 2,000,000 (0.7012%)
            Episodes: 20
            Currently: Rollout
            Latest Reward: -417
            Latest Avg Rewards: -321
            Recent Change: -0.32
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0076
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0129
            Latest Critic Loss: 1536.8218
            Avg Critic Loss: 2030.3109
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 14,775 / 2,000,000 (0.7388%)
            Episodes: 21
            Currently: Rollout
            Latest Reward: -413
            Latest Avg Rewards: -325
            Recent Change: 0.92
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0076
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0129
            Latest Critic Loss: 1536.8218
            Avg Critic Loss: 2030.3109
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 15,525 / 2,000,000 (0.7762%)
            Episodes: 22
            Currently: Rollout
            Latest Reward: -280
            Latest Avg Rewards: -323
            Recent Change: 0.27
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0076
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0129
            Latest Critic Loss: 1536.8218
            Avg Critic Loss: 2030.3109
        


Remove body failed



            Timesteps: 16,275 / 2,000,000 (0.8138%)
            Episodes: 23
            Currently: Rollout
            Latest Reward: -228
            Latest Avg Rewards: -319
            Recent Change: -0.8
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0076
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0129
            Latest Critic Loss: 1536.8218
            Avg Critic Loss: 2030.3109
        

            Timesteps: 16,275 / 2,000,000 (0.8138%)
            Episodes: 23
            Currently: Training cycle 1/5
            Latest Reward: -228
            Latest Avg Rewards: -319
            Recent Change: -0.8
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0076
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0129
            Latest Critic Loss: 1536.

Object 0 dropped to the floor



            Timesteps: 17,775 / 2,000,000 (0.8887%)
            Episodes: 25
            Currently: Rollout
            Latest Reward: -299
            Latest Avg Rewards: -316
            Recent Change: -1.27
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0048
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0104
            Latest Critic Loss: 1724.0717
            Avg Critic Loss: 1928.647
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 18,525 / 2,000,000 (0.9263%)
            Episodes: 26
            Currently: Rollout
            Latest Reward: -346
            Latest Avg Rewards: -317
            Recent Change: -0.88
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0048
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0104
            Latest Critic Loss: 1724.0717
            Avg Critic Loss: 1928.647
        


Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 18,858 / 2,000,000 (0.9429%)
            Episodes: 27
            Currently: Rollout
            Latest Reward: -233
            Latest Avg Rewards: -314
            Recent Change: -1.45
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0048
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0104
            Latest Critic Loss: 1724.0717
            Avg Critic Loss: 1928.647
        


Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 19,107 / 2,000,000 (0.9553%)
            Episodes: 28
            Currently: Rollout
            Latest Reward: -183
            Latest Avg Rewards: -309
            Recent Change: -2.27
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0048
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0104
            Latest Critic Loss: 1724.0717
            Avg Critic Loss: 1928.647
        

            Timesteps: 19,857 / 2,000,000 (0.9929%)
            Episodes: 29
            Currently: Rollout
            Latest Reward: -469
            Latest Avg Rewards: -315
            Recent Change: -0.95
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0048
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0104
            Latest Critic Loss: 1724.0717
     

Object 1 dropped to the floor



            Timesteps: 20,607 / 2,000,000 (1.0304%)
            Episodes: 30
            Currently: Rollout
            Latest Reward: -419
            Latest Avg Rewards: -318
            Recent Change: -0.19
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0048
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0104
            Latest Critic Loss: 1724.0717
            Avg Critic Loss: 1928.647
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 21,357 / 2,000,000 (1.0678%)
            Episodes: 31
            Currently: Rollout
            Latest Reward: -330
            Latest Avg Rewards: -319
            Recent Change: -0.1
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0048
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0104
            Latest Critic Loss: 1724.0717
            Avg Critic Loss: 1928.647
        

            Timesteps: 21,357 / 2,000,000 (1.0678%)
            Episodes: 31
            Currently: Training cycle 1/5
            Latest Reward: -330
            Latest Avg Rewards: -319
            Recent Change: -0.1
            Best Reward: -159.76
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0048
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0104
            Latest Critic Loss: 1724.0

Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 21,524 / 2,000,000 (1.0762%)
            Episodes: 32
            Currently: Rollout
            Latest Reward: -150
            Latest Avg Rewards: -313
            Recent Change: -1.05
            Best Reward: -150.47
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9773
            Avg Critic Loss: 1959.627
        


Object 1 dropped to the floor
Remove body failed



            Timesteps: 22,274 / 2,000,000 (1.1137%)
            Episodes: 33
            Currently: Rollout
            Latest Reward: -282
            Latest Avg Rewards: -312
            Recent Change: -1.12
            Best Reward: -150.47
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9773
            Avg Critic Loss: 1959.627
        

            Timesteps: 23,024 / 2,000,000 (1.1512%)
            Episodes: 34
            Currently: Rollout
            Latest Reward: -262
            Latest Avg Rewards: -311
            Recent Change: -1.28
            Best Reward: -150.47
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9773
     

Object 0 dropped to the floor



            Timesteps: 23,774 / 2,000,000 (1.1887%)
            Episodes: 35
            Currently: Rollout
            Latest Reward: -286
            Latest Avg Rewards: -310
            Recent Change: -1.29
            Best Reward: -150.47
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9773
            Avg Critic Loss: 1959.627
        

            Timesteps: 23,796 / 2,000,000 (1.1898%)
            Episodes: 36
            Currently: Rollout
            Latest Reward: -109
            Latest Avg Rewards: -305
            Recent Change: -2.09
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9773
     

Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed
Object 0 dropped to the floor



            Timesteps: 24,546 / 2,000,000 (1.2273%)
            Episodes: 37
            Currently: Rollout
            Latest Reward: -428
            Latest Avg Rewards: -308
            Recent Change: -1.4
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9773
            Avg Critic Loss: 1959.627
        

            Timesteps: 24,573 / 2,000,000 (1.2287%)
            Episodes: 38
            Currently: Rollout
            Latest Reward: -111
            Latest Avg Rewards: -303
            Recent Change: -2.09
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9773
      

Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed
Object 0 dropped to the floor



            Timesteps: 24,840 / 2,000,000 (1.242%)
            Episodes: 39
            Currently: Rollout
            Latest Reward: -229
            Latest Avg Rewards: -301
            Recent Change: -2.22
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9773
            Avg Critic Loss: 1959.627
        


Object 1 dropped to the floor
Remove body failed
Remove body failed
Object 0 dropped to the floor



            Timesteps: 24,997 / 2,000,000 (1.2498%)
            Episodes: 40
            Currently: Rollout
            Latest Reward: -151
            Latest Avg Rewards: -297
            Recent Change: -2.61
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9773
            Avg Critic Loss: 1959.627
        


Object 1 incorrectly sorted into sorting_two
Remove body failed
Remove body failed



            Timesteps: 25,122 / 2,000,000 (1.2561%)
            Episodes: 41
            Currently: Rollout
            Latest Reward: -111
            Latest Avg Rewards: -293
            Recent Change: -3.07
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9773
            Avg Critic Loss: 1959.627
        

            Timesteps: 25,872 / 2,000,000 (1.2936%)
            Episodes: 42
            Currently: Rollout
            Latest Reward: -325
            Latest Avg Rewards: -293
            Recent Change: -2.75
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9773
     

Object 1 dropped to the floor



            Timesteps: 26,622 / 2,000,000 (1.3311%)
            Episodes: 43
            Currently: Rollout
            Latest Reward: -534
            Latest Avg Rewards: -299
            Recent Change: -1.8
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9773
            Avg Critic Loss: 1959.627
        

            Timesteps: 26,622 / 2,000,000 (1.3311%)
            Episodes: 43
            Currently: Training cycle 1/5
            Latest Reward: -534
            Latest Avg Rewards: -299
            Recent Change: -1.8
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0007
            Latest Continuous Actor Loss: 0.0034
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0088
            Latest Critic Loss: 2050.9

Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed
Object 0 dropped to the floor



            Timesteps: 26,753 / 2,000,000 (1.3377%)
            Episodes: 44
            Currently: Rollout
            Latest Reward: -142
            Latest Avg Rewards: -295
            Recent Change: -2.15
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0018
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0074
            Latest Critic Loss: 2021.6132
            Avg Critic Loss: 1972.3827
        

            Timesteps: 27,503 / 2,000,000 (1.3752%)
            Episodes: 45
            Currently: Rollout
            Latest Reward: -350
            Latest Avg Rewards: -297
            Recent Change: -1.85
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0018
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0074
            Latest Critic Loss: 2021.6132
    

Remove body failed



            Timesteps: 28,253 / 2,000,000 (1.4126%)
            Episodes: 46
            Currently: Rollout
            Latest Reward: -208
            Latest Avg Rewards: -295
            Recent Change: -1.98
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0018
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0074
            Latest Critic Loss: 2021.6132
            Avg Critic Loss: 1972.3827
        


Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 28,494 / 2,000,000 (1.4247%)
            Episodes: 47
            Currently: Rollout
            Latest Reward: -191
            Latest Avg Rewards: -293
            Recent Change: -2.13
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0018
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0074
            Latest Critic Loss: 2021.6132
            Avg Critic Loss: 1972.3827
        


Object 0 dropped to the floor



            Timesteps: 29,244 / 2,000,000 (1.4622%)
            Episodes: 48
            Currently: Rollout
            Latest Reward: -288
            Latest Avg Rewards: -292
            Recent Change: -2.01
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0018
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0074
            Latest Critic Loss: 2021.6132
            Avg Critic Loss: 1972.3827
        


Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed
Object 0 dropped to the floor



            Timesteps: 29,522 / 2,000,000 (1.4761%)
            Episodes: 49
            Currently: Rollout
            Latest Reward: -183
            Latest Avg Rewards: -290
            Recent Change: -2.16
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0018
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0074
            Latest Critic Loss: 2021.6132
            Avg Critic Loss: 1972.3827
        

            Timesteps: 30,272 / 2,000,000 (1.5136%)
            Episodes: 50
            Currently: Rollout
            Latest Reward: -419
            Latest Avg Rewards: -293
            Recent Change: -1.73
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0018
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0074
            Latest Critic Loss: 2021.6132
    

Remove body failed
Object 1 dropped to the floor



            Timesteps: 31,022 / 2,000,000 (1.5511%)
            Episodes: 51
            Currently: Rollout
            Latest Reward: -260
            Latest Avg Rewards: -292
            Recent Change: -1.7
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0018
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0074
            Latest Critic Loss: 2021.6132
            Avg Critic Loss: 1972.3827
        


Remove body failed



            Timesteps: 31,772 / 2,000,000 (1.5886%)
            Episodes: 52
            Currently: Rollout
            Latest Reward: -285
            Latest Avg Rewards: -292
            Recent Change: -1.62
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0018
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0074
            Latest Critic Loss: 2021.6132
            Avg Critic Loss: 1972.3827
        

            Timesteps: 31,772 / 2,000,000 (1.5886%)
            Episodes: 52
            Currently: Training cycle 1/5
            Latest Reward: -285
            Latest Avg Rewards: -292
            Recent Change: -1.62
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0018
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0074
            Latest Critic Loss: 202

Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 31,892 / 2,000,000 (1.5946%)
            Episodes: 53
            Currently: Rollout
            Latest Reward: -139
            Latest Avg Rewards: -289
            Recent Change: -1.85
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: 0.0014
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0064
            Latest Critic Loss: 1400.5413
            Avg Critic Loss: 1877.3713
        


Object 1 dropped to the floor



            Timesteps: 32,642 / 2,000,000 (1.6321%)
            Episodes: 54
            Currently: Rollout
            Latest Reward: -352
            Latest Avg Rewards: -290
            Recent Change: -1.62
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: 0.0014
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0064
            Latest Critic Loss: 1400.5413
            Avg Critic Loss: 1877.3713
        


Remove body failed



            Timesteps: 33,392 / 2,000,000 (1.6696%)
            Episodes: 55
            Currently: Rollout
            Latest Reward: -227
            Latest Avg Rewards: -289
            Recent Change: -1.66
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: 0.0014
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0064
            Latest Critic Loss: 1400.5413
            Avg Critic Loss: 1877.3713
        

            Timesteps: 34,142 / 2,000,000 (1.7071%)
            Episodes: 56
            Currently: Rollout
            Latest Reward: -206
            Latest Avg Rewards: -288
            Recent Change: -1.73
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: 0.0014
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0064
            Latest Critic Loss: 1400.5413
          

Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 36,663 / 2,000,000 (1.8332%)
            Episodes: 60
            Currently: Rollout
            Latest Reward: -210
            Latest Avg Rewards: -291
            Recent Change: -1.08
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: 0.0014
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0064
            Latest Critic Loss: 1400.5413
            Avg Critic Loss: 1877.3713
        

            Timesteps: 37,413 / 2,000,000 (1.8707%)
            Episodes: 61
            Currently: Rollout
            Latest Reward: -214
            Latest Avg Rewards: -290
            Recent Change: -1.15
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: 0.0014
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0064
            Latest Critic Loss: 1400.5413
          

Object 0 dropped to the floor



            Timesteps: 38,163 / 2,000,000 (1.9082%)
            Episodes: 62
            Currently: Rollout
            Latest Reward: -260
            Latest Avg Rewards: -289
            Recent Change: -1.14
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0008
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0056
            Latest Critic Loss: 2146.0396
            Avg Critic Loss: 1916.1093
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 38,913 / 2,000,000 (1.9457%)
            Episodes: 63
            Currently: Rollout
            Latest Reward: -327
            Latest Avg Rewards: -290
            Recent Change: -1.03
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0008
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0056
            Latest Critic Loss: 2146.0396
            Avg Critic Loss: 1916.1093
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 39,663 / 2,000,000 (1.9831%)
            Episodes: 64
            Currently: Rollout
            Latest Reward: -317
            Latest Avg Rewards: -290
            Recent Change: -0.95
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0008
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0056
            Latest Critic Loss: 2146.0396
            Avg Critic Loss: 1916.1093
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 40,413 / 2,000,000 (2.0206%)
            Episodes: 65
            Currently: Rollout
            Latest Reward: -333
            Latest Avg Rewards: -291
            Recent Change: -0.84
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0008
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0056
            Latest Critic Loss: 2146.0396
            Avg Critic Loss: 1916.1093
        


Remove body failed



            Timesteps: 41,163 / 2,000,000 (2.0581%)
            Episodes: 66
            Currently: Rollout
            Latest Reward: -223
            Latest Avg Rewards: -290
            Recent Change: -0.9
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0008
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0056
            Latest Critic Loss: 2146.0396
            Avg Critic Loss: 1916.1093
        

            Timesteps: 41,913 / 2,000,000 (2.0957%)
            Episodes: 67
            Currently: Rollout
            Latest Reward: -197
            Latest Avg Rewards: -289
            Recent Change: -0.98
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0008
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0056
            Latest Critic Loss: 2146.0396
     

Object 1 dropped to the floor



            Timesteps: 42,663 / 2,000,000 (2.1332%)
            Episodes: 68
            Currently: Rollout
            Latest Reward: -358
            Latest Avg Rewards: -290
            Recent Change: -0.85
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0008
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0056
            Latest Critic Loss: 2146.0396
            Avg Critic Loss: 1916.1093
        

            Timesteps: 42,663 / 2,000,000 (2.1332%)
            Episodes: 68
            Currently: Training cycle 1/5
            Latest Reward: -358
            Latest Avg Rewards: -290
            Recent Change: -0.85
            Best Reward: -108.67
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0008
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0056
            Latest Critic Loss: 214

Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 42,685 / 2,000,000 (2.1343%)
            Episodes: 69
            Currently: Rollout
            Latest Reward: -108
            Latest Avg Rewards: -287
            Recent Change: -1.04
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0009
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0051
            Latest Critic Loss: 1204.9008
            Avg Critic Loss: 1827.5002
        

            Timesteps: 43,435 / 2,000,000 (2.1718%)
            Episodes: 70
            Currently: Rollout
            Latest Reward: -224
            Latest Avg Rewards: -286
            Recent Change: -1.07
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0009
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0051
            Latest Critic Loss: 1204.9008
    

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 45,240 / 2,000,000 (2.262%)
            Episodes: 73
            Currently: Rollout
            Latest Reward: -200
            Latest Avg Rewards: -284
            Recent Change: -1.14
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0009
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0051
            Latest Critic Loss: 1204.9008
            Avg Critic Loss: 1827.5002
        

            Timesteps: 45,990 / 2,000,000 (2.2995%)
            Episodes: 74
            Currently: Rollout
            Latest Reward: -294
            Latest Avg Rewards: -284
            Recent Change: -1.09
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0009
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0051
            Latest Critic Loss: 1204.9008
     

Object 0 dropped to the floor



            Timesteps: 47,490 / 2,000,000 (2.3745%)
            Episodes: 76
            Currently: Rollout
            Latest Reward: -524
            Latest Avg Rewards: -286
            Recent Change: -0.79
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0009
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0051
            Latest Critic Loss: 1204.9008
            Avg Critic Loss: 1827.5002
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 48,240 / 2,000,000 (2.412%)
            Episodes: 77
            Currently: Rollout
            Latest Reward: -340
            Latest Avg Rewards: -287
            Recent Change: -0.7
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0009
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0051
            Latest Critic Loss: 1204.9008
            Avg Critic Loss: 1827.5002
        

            Timesteps: 48,240 / 2,000,000 (2.412%)
            Episodes: 77
            Currently: Training cycle 1/5
            Latest Reward: -340
            Latest Avg Rewards: -287
            Recent Change: -0.7
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0009
            Avg Discrete Actor Loss: -0.0002
            Avg Continuous Actor Loss: 0.0051
            Latest Critic Loss: 1204.90

Remove body failed
Object 1 dropped to the floor



            Timesteps: 48,990 / 2,000,000 (2.4495%)
            Episodes: 78
            Currently: Rollout
            Latest Reward: -276
            Latest Avg Rewards: -287
            Recent Change: -0.69
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0006
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0046
            Latest Critic Loss: 1530.5286
            Avg Critic Loss: 1794.8717
        


Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed
Object 1 dropped to the floor



            Timesteps: 49,044 / 2,000,000 (2.4522%)
            Episodes: 79
            Currently: Rollout
            Latest Reward: -123
            Latest Avg Rewards: -285
            Recent Change: -0.82
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0006
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0046
            Latest Critic Loss: 1530.5286
            Avg Critic Loss: 1794.8717
        

            Timesteps: 49,794 / 2,000,000 (2.4897%)
            Episodes: 80
            Currently: Rollout
            Latest Reward: -299
            Latest Avg Rewards: -285
            Recent Change: -0.78
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0006
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0046
            Latest Critic Loss: 1530.5286
      

Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 50,159 / 2,000,000 (2.508%)
            Episodes: 81
            Currently: Rollout
            Latest Reward: -193
            Latest Avg Rewards: -284
            Recent Change: -0.83
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0006
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0046
            Latest Critic Loss: 1530.5286
            Avg Critic Loss: 1794.8717
        


Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 50,229 / 2,000,000 (2.5114%)
            Episodes: 82
            Currently: Rollout
            Latest Reward: -127
            Latest Avg Rewards: -282
            Recent Change: -0.94
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0006
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0046
            Latest Critic Loss: 1530.5286
            Avg Critic Loss: 1794.8717
        


Object 1 dropped to the floor



            Timesteps: 50,979 / 2,000,000 (2.5489%)
            Episodes: 83
            Currently: Rollout
            Latest Reward: -387
            Latest Avg Rewards: -283
            Recent Change: -0.82
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0006
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0046
            Latest Critic Loss: 1530.5286
            Avg Critic Loss: 1794.8717
        


Remove body failed



            Timesteps: 51,729 / 2,000,000 (2.5864%)
            Episodes: 84
            Currently: Rollout
            Latest Reward: -292
            Latest Avg Rewards: -283
            Recent Change: -0.78
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0006
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0046
            Latest Critic Loss: 1530.5286
            Avg Critic Loss: 1794.8717
        


Object 1 dropped to the floor



            Timesteps: 52,479 / 2,000,000 (2.6239%)
            Episodes: 85
            Currently: Rollout
            Latest Reward: -355
            Latest Avg Rewards: -284
            Recent Change: -0.69
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0006
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0046
            Latest Critic Loss: 1530.5286
            Avg Critic Loss: 1794.8717
        


Remove body failed



            Timesteps: 53,229 / 2,000,000 (2.6614%)
            Episodes: 86
            Currently: Rollout
            Latest Reward: -249
            Latest Avg Rewards: -284
            Recent Change: -0.7
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0006
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0046
            Latest Critic Loss: 1530.5286
            Avg Critic Loss: 1794.8717
        

            Timesteps: 53,979 / 2,000,000 (2.6989%)
            Episodes: 87
            Currently: Rollout
            Latest Reward: -344
            Latest Avg Rewards: -285
            Recent Change: -0.63
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0006
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0046
            Latest Critic Loss: 1530.5286
       

Object 1 dropped to the floor



            Timesteps: 54,729 / 2,000,000 (2.7365%)
            Episodes: 88
            Currently: Rollout
            Latest Reward: -254
            Latest Avg Rewards: -284
            Recent Change: -0.63
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0005
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0042
            Latest Critic Loss: 1548.515
            Avg Critic Loss: 1770.6513
        


Remove body failed



            Timesteps: 55,479 / 2,000,000 (2.774%)
            Episodes: 89
            Currently: Rollout
            Latest Reward: -209
            Latest Avg Rewards: -283
            Recent Change: -0.66
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0005
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0042
            Latest Critic Loss: 1548.515
            Avg Critic Loss: 1770.6513
        

            Timesteps: 56,229 / 2,000,000 (2.8115%)
            Episodes: 90
            Currently: Rollout
            Latest Reward: -298
            Latest Avg Rewards: -284
            Recent Change: -0.63
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0005
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0042
            Latest Critic Loss: 1548.515
       

Object 0 dropped to the floor
Object 1 dropped to the floor



            Timesteps: 59,098 / 2,000,000 (2.9549%)
            Episodes: 94
            Currently: Rollout
            Latest Reward: -360
            Latest Avg Rewards: -285
            Recent Change: -0.46
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0005
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0042
            Latest Critic Loss: 1548.515
            Avg Critic Loss: 1770.6513
        

            Timesteps: 59,098 / 2,000,000 (2.9549%)
            Episodes: 94
            Currently: Training cycle 1/5
            Latest Reward: -360
            Latest Avg Rewards: -285
            Recent Change: -0.46
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0005
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0042
            Latest Critic Loss: 1548

Remove body failed
Remove body failed



            Timesteps: 59,848 / 2,000,000 (2.9924%)
            Episodes: 95
            Currently: Rollout
            Latest Reward: -230
            Latest Avg Rewards: -285
            Recent Change: -0.48
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0004
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0038
            Latest Critic Loss: 1358.8687
            Avg Critic Loss: 1733.6433
        

            Timesteps: 60,598 / 2,000,000 (3.0299%)
            Episodes: 96
            Currently: Rollout
            Latest Reward: -203
            Latest Avg Rewards: -284
            Recent Change: -0.52
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0004
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0038
            Latest Critic Loss: 1358.8687
    

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 60,882 / 2,000,000 (3.0441%)
            Episodes: 97
            Currently: Rollout
            Latest Reward: -186
            Latest Avg Rewards: -283
            Recent Change: -0.56
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0004
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0038
            Latest Critic Loss: 1358.8687
            Avg Critic Loss: 1733.6433
        

            Timesteps: 61,632 / 2,000,000 (3.0816%)
            Episodes: 98
            Currently: Rollout
            Latest Reward: -568
            Latest Avg Rewards: -286
            Recent Change: -0.37
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0004
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0038
            Latest Critic Loss: 1358.8687
    

Object 0 dropped to the floor



            Timesteps: 63,882 / 2,000,000 (3.1941%)
            Episodes: 101
            Currently: Rollout
            Latest Reward: -283
            Latest Avg Rewards: -288
            Recent Change: -0.2
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0004
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0038
            Latest Critic Loss: 1358.8687
            Avg Critic Loss: 1733.6433
        


Remove body failed



            Timesteps: 64,632 / 2,000,000 (3.2316%)
            Episodes: 102
            Currently: Rollout
            Latest Reward: -288
            Latest Avg Rewards: -289
            Recent Change: -0.21
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0004
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0038
            Latest Critic Loss: 1358.8687
            Avg Critic Loss: 1733.6433
        

            Timesteps: 64,632 / 2,000,000 (3.2316%)
            Episodes: 102
            Currently: Training cycle 1/5
            Latest Reward: -288
            Latest Avg Rewards: -289
            Recent Change: -0.21
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0004
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0038
            Latest Critic Loss: 1

Object 1 dropped to the floor



            Timesteps: 66,882 / 2,000,000 (3.3441%)
            Episodes: 105
            Currently: Rollout
            Latest Reward: -260
            Latest Avg Rewards: -285
            Recent Change: 0.14
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0035
            Latest Critic Loss: 2135.4829
            Avg Critic Loss: 1767.7296
        


Remove body failed



            Timesteps: 67,632 / 2,000,000 (3.3816%)
            Episodes: 106
            Currently: Rollout
            Latest Reward: -265
            Latest Avg Rewards: -286
            Recent Change: 0.06
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0035
            Latest Critic Loss: 2135.4829
            Avg Critic Loss: 1767.7296
        


Object 1 dropped to the floor



            Timesteps: 68,382 / 2,000,000 (3.4191%)
            Episodes: 107
            Currently: Rollout
            Latest Reward: -278
            Latest Avg Rewards: -285
            Recent Change: 0.09
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0035
            Latest Critic Loss: 2135.4829
            Avg Critic Loss: 1767.7296
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 69,132 / 2,000,000 (3.4566%)
            Episodes: 108
            Currently: Rollout
            Latest Reward: -502
            Latest Avg Rewards: -288
            Recent Change: 0.2
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0035
            Latest Critic Loss: 2135.4829
            Avg Critic Loss: 1767.7296
        


Remove body failed



            Timesteps: 69,882 / 2,000,000 (3.4941%)
            Episodes: 109
            Currently: Rollout
            Latest Reward: -452
            Latest Avg Rewards: -290
            Recent Change: 0.28
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0035
            Latest Critic Loss: 2135.4829
            Avg Critic Loss: 1767.7296
        

            Timesteps: 69,882 / 2,000,000 (3.4941%)
            Episodes: 109
            Currently: Training cycle 1/5
            Latest Reward: -452
            Latest Avg Rewards: -290
            Recent Change: 0.28
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0035
            Latest Critic Loss: 2135.

Object 1 dropped to the floor



            Timesteps: 70,632 / 2,000,000 (3.5316%)
            Episodes: 110
            Currently: Rollout
            Latest Reward: -336
            Latest Avg Rewards: -288
            Recent Change: 0.42
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0033
            Latest Critic Loss: 1854.8658
            Avg Critic Loss: 1775.0967
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 71,382 / 2,000,000 (3.5691%)
            Episodes: 111
            Currently: Rollout
            Latest Reward: -311
            Latest Avg Rewards: -289
            Recent Change: 0.4
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0033
            Latest Critic Loss: 1854.8658
            Avg Critic Loss: 1775.0967
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 72,132 / 2,000,000 (3.6066%)
            Episodes: 112
            Currently: Rollout
            Latest Reward: -309
            Latest Avg Rewards: -290
            Recent Change: 0.36
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0033
            Latest Critic Loss: 1854.8658
            Avg Critic Loss: 1775.0967
        


Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 72,686 / 2,000,000 (3.6343%)
            Episodes: 113
            Currently: Rollout
            Latest Reward: -294
            Latest Avg Rewards: -290
            Recent Change: 0.37
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0033
            Latest Critic Loss: 1854.8658
            Avg Critic Loss: 1775.0967
        

            Timesteps: 73,436 / 2,000,000 (3.6718%)
            Episodes: 114
            Currently: Rollout
            Latest Reward: -212
            Latest Avg Rewards: -289
            Recent Change: 0.34
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0033
            Latest Critic Loss: 1854.8658
    

Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed
Object 1 dropped to the floor



            Timesteps: 73,557 / 2,000,000 (3.6778%)
            Episodes: 115
            Currently: Rollout
            Latest Reward: -140
            Latest Avg Rewards: -288
            Recent Change: 0.24
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0033
            Latest Critic Loss: 1854.8658
            Avg Critic Loss: 1775.0967
        

            Timesteps: 74,307 / 2,000,000 (3.7153%)
            Episodes: 116
            Currently: Rollout
            Latest Reward: -327
            Latest Avg Rewards: -288
            Recent Change: 0.25
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0033
            Latest Critic Loss: 1854.8658
    

Remove body failed



            Timesteps: 75,057 / 2,000,000 (3.7529%)
            Episodes: 117
            Currently: Rollout
            Latest Reward: -376
            Latest Avg Rewards: -288
            Recent Change: 0.37
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0033
            Latest Critic Loss: 1854.8658
            Avg Critic Loss: 1775.0967
        

            Timesteps: 75,057 / 2,000,000 (3.7529%)
            Episodes: 117
            Currently: Training cycle 1/5
            Latest Reward: -376
            Latest Avg Rewards: -288
            Recent Change: 0.37
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0003
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0033
            Latest Critic Loss: 185

Object 1 dropped to the floor



            Timesteps: 75,807 / 2,000,000 (3.7904%)
            Episodes: 118
            Currently: Rollout
            Latest Reward: -298
            Latest Avg Rewards: -288
            Recent Change: 0.4
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0031
            Latest Critic Loss: 1397.4285
            Avg Critic Loss: 1748.7809
        


Remove body failed
Object 1 incorrectly sorted into sorting_one



            Timesteps: 76,557 / 2,000,000 (3.8279%)
            Episodes: 119
            Currently: Rollout
            Latest Reward: -377
            Latest Avg Rewards: -289
            Recent Change: 0.46
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0031
            Latest Critic Loss: 1397.4285
            Avg Critic Loss: 1748.7809
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 77,307 / 2,000,000 (3.8653%)
            Episodes: 120
            Currently: Rollout
            Latest Reward: -448
            Latest Avg Rewards: -289
            Recent Change: 0.63
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0031
            Latest Critic Loss: 1397.4285
            Avg Critic Loss: 1748.7809
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 78,057 / 2,000,000 (3.9028%)
            Episodes: 121
            Currently: Rollout
            Latest Reward: -405
            Latest Avg Rewards: -289
            Recent Change: 0.78
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0031
            Latest Critic Loss: 1397.4285
            Avg Critic Loss: 1748.7809
        


Remove body failed



            Timesteps: 78,807 / 2,000,000 (3.9404%)
            Episodes: 122
            Currently: Rollout
            Latest Reward: -263
            Latest Avg Rewards: -289
            Recent Change: 0.76
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0031
            Latest Critic Loss: 1397.4285
            Avg Critic Loss: 1748.7809
        

            Timesteps: 79,557 / 2,000,000 (3.9779%)
            Episodes: 123
            Currently: Rollout
            Latest Reward: -218
            Latest Avg Rewards: -289
            Recent Change: 0.68
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0031
            Latest Critic Loss: 1397.4285
    

Object 0 dropped to the floor



            Timesteps: 80,307 / 2,000,000 (4.0153%)
            Episodes: 124
            Currently: Rollout
            Latest Reward: -443
            Latest Avg Rewards: -290
            Recent Change: 0.76
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0031
            Latest Critic Loss: 1397.4285
            Avg Critic Loss: 1748.7809
        

            Timesteps: 80,307 / 2,000,000 (4.0153%)
            Episodes: 124
            Currently: Training cycle 1/5
            Latest Reward: -443
            Latest Avg Rewards: -290
            Recent Change: 0.76
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0031
            Latest Critic Loss: 139

Remove body failed
Object 1 dropped to the floor



            Timesteps: 81,057 / 2,000,000 (4.0529%)
            Episodes: 125
            Currently: Rollout
            Latest Reward: -458
            Latest Avg Rewards: -292
            Recent Change: 0.86
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0029
            Latest Critic Loss: 1511.2657
            Avg Critic Loss: 1733.7022
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 81,807 / 2,000,000 (4.0903%)
            Episodes: 126
            Currently: Rollout
            Latest Reward: -521
            Latest Avg Rewards: -294
            Recent Change: 1.03
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0029
            Latest Critic Loss: 1511.2657
            Avg Critic Loss: 1733.7022
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 82,557 / 2,000,000 (4.1279%)
            Episodes: 127
            Currently: Rollout
            Latest Reward: -399
            Latest Avg Rewards: -295
            Recent Change: 1.06
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0029
            Latest Critic Loss: 1511.2657
            Avg Critic Loss: 1733.7022
        


Remove body failed



            Timesteps: 83,307 / 2,000,000 (4.1654%)
            Episodes: 128
            Currently: Rollout
            Latest Reward: -320
            Latest Avg Rewards: -297
            Recent Change: 1.0
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0029
            Latest Critic Loss: 1511.2657
            Avg Critic Loss: 1733.7022
        

            Timesteps: 84,057 / 2,000,000 (4.2029%)
            Episodes: 129
            Currently: Rollout
            Latest Reward: -261
            Latest Avg Rewards: -295
            Recent Change: 1.08
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0029
            Latest Critic Loss: 1511.2657
     

Object 1 dropped to the floor



            Timesteps: 84,807 / 2,000,000 (4.2403%)
            Episodes: 130
            Currently: Rollout
            Latest Reward: -299
            Latest Avg Rewards: -293
            Recent Change: 1.16
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0029
            Latest Critic Loss: 1511.2657
            Avg Critic Loss: 1733.7022
        


Remove body failed



            Timesteps: 85,557 / 2,000,000 (4.2778%)
            Episodes: 131
            Currently: Rollout
            Latest Reward: -295
            Latest Avg Rewards: -293
            Recent Change: 1.19
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0029
            Latest Critic Loss: 1511.2657
            Avg Critic Loss: 1733.7022
        

            Timesteps: 85,557 / 2,000,000 (4.2778%)
            Episodes: 131
            Currently: Training cycle 1/5
            Latest Reward: -295
            Latest Avg Rewards: -293
            Recent Change: 1.19
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0002
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0029
            Latest Critic Loss: 151

Object 0 dropped to the floor



            Timesteps: 86,307 / 2,000,000 (4.3153%)
            Episodes: 132
            Currently: Rollout
            Latest Reward: -498
            Latest Avg Rewards: -297
            Recent Change: 1.22
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0027
            Latest Critic Loss: 1946.6241
            Avg Critic Loss: 1748.0263
        


Remove body failed



            Timesteps: 87,057 / 2,000,000 (4.3529%)
            Episodes: 133
            Currently: Rollout
            Latest Reward: -256
            Latest Avg Rewards: -296
            Recent Change: 1.19
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0027
            Latest Critic Loss: 1946.6241
            Avg Critic Loss: 1748.0263
        


Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 87,211 / 2,000,000 (4.3605%)
            Episodes: 134
            Currently: Rollout
            Latest Reward: -156
            Latest Avg Rewards: -295
            Recent Change: 1.08
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0027
            Latest Critic Loss: 1946.6241
            Avg Critic Loss: 1748.0263
        

            Timesteps: 87,961 / 2,000,000 (4.398%)
            Episodes: 135
            Currently: Rollout
            Latest Reward: -350
            Latest Avg Rewards: -296
            Recent Change: 1.11
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0027
            Latest Critic Loss: 1946.6241
     

Object 0 dropped to the floor



            Timesteps: 88,711 / 2,000,000 (4.4356%)
            Episodes: 136
            Currently: Rollout
            Latest Reward: -386
            Latest Avg Rewards: -299
            Recent Change: 1.05
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0027
            Latest Critic Loss: 1946.6241
            Avg Critic Loss: 1748.0263
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 89,461 / 2,000,000 (4.473%)
            Episodes: 137
            Currently: Rollout
            Latest Reward: -386
            Latest Avg Rewards: -298
            Recent Change: 1.18
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0027
            Latest Critic Loss: 1946.6241
            Avg Critic Loss: 1748.0263
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 90,211 / 2,000,000 (4.5106%)
            Episodes: 138
            Currently: Rollout
            Latest Reward: -320
            Latest Avg Rewards: -300
            Recent Change: 1.08
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0027
            Latest Critic Loss: 1946.6241
            Avg Critic Loss: 1748.0263
        


Remove body failed



            Timesteps: 90,961 / 2,000,000 (4.548%)
            Episodes: 139
            Currently: Rollout
            Latest Reward: -180
            Latest Avg Rewards: -300
            Recent Change: 0.97
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0027
            Latest Critic Loss: 1946.6241
            Avg Critic Loss: 1748.0263
        

            Timesteps: 90,961 / 2,000,000 (4.548%)
            Episodes: 139
            Currently: Training cycle 1/5
            Latest Reward: -180
            Latest Avg Rewards: -300
            Recent Change: 0.97
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0027
            Latest Critic Loss: 1946.

Object 0 dropped to the floor



            Timesteps: 92,461 / 2,000,000 (4.6231%)
            Episodes: 141
            Currently: Rollout
            Latest Reward: -257
            Latest Avg Rewards: -302
            Recent Change: 0.68
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0026
            Latest Critic Loss: 1699.731
            Avg Critic Loss: 1746.251
        


Remove body failed



            Timesteps: 93,211 / 2,000,000 (4.6605%)
            Episodes: 142
            Currently: Rollout
            Latest Reward: -371
            Latest Avg Rewards: -302
            Recent Change: 0.73
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0026
            Latest Critic Loss: 1699.731
            Avg Critic Loss: 1746.251
        


Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed
Object 1 dropped to the floor



            Timesteps: 93,439 / 2,000,000 (4.6719%)
            Episodes: 143
            Currently: Rollout
            Latest Reward: -177
            Latest Avg Rewards: -299
            Recent Change: 0.8
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0026
            Latest Critic Loss: 1699.731
            Avg Critic Loss: 1746.251
        

            Timesteps: 94,189 / 2,000,000 (4.7094%)
            Episodes: 144
            Currently: Rollout
            Latest Reward: -438
            Latest Avg Rewards: -302
            Recent Change: 0.79
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0026
            Latest Critic Loss: 1699.731
        

Remove body failed



            Timesteps: 94,939 / 2,000,000 (4.747%)
            Episodes: 145
            Currently: Rollout
            Latest Reward: -182
            Latest Avg Rewards: -300
            Recent Change: 0.75
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0026
            Latest Critic Loss: 1699.731
            Avg Critic Loss: 1746.251
        


Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 95,120 / 2,000,000 (4.756%)
            Episodes: 146
            Currently: Rollout
            Latest Reward: -170
            Latest Avg Rewards: -300
            Recent Change: 0.61
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0026
            Latest Critic Loss: 1699.731
            Avg Critic Loss: 1746.251
        

            Timesteps: 95,870 / 2,000,000 (4.7935%)
            Episodes: 147
            Currently: Rollout
            Latest Reward: -261
            Latest Avg Rewards: -300
            Recent Change: 0.52
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0026
            Latest Critic Loss: 1699.731
        

Object 0 dropped to the floor



            Timesteps: 98,120 / 2,000,000 (4.906%)
            Episodes: 150
            Currently: Rollout
            Latest Reward: -336
            Latest Avg Rewards: -300
            Recent Change: 0.48
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0024
            Latest Critic Loss: 1185.3058
            Avg Critic Loss: 1716.0207
        


Remove body failed



            Timesteps: 98,870 / 2,000,000 (4.9435%)
            Episodes: 151
            Currently: Rollout
            Latest Reward: -338
            Latest Avg Rewards: -301
            Recent Change: 0.48
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0024
            Latest Critic Loss: 1185.3058
            Avg Critic Loss: 1716.0207
        


Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 99,015 / 2,000,000 (4.9508%)
            Episodes: 152
            Currently: Rollout
            Latest Reward: -164
            Latest Avg Rewards: -299
            Recent Change: 0.39
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0024
            Latest Critic Loss: 1185.3058
            Avg Critic Loss: 1716.0207
        


Object 0 dropped to the floor



            Timesteps: 99,765 / 2,000,000 (4.9883%)
            Episodes: 153
            Currently: Rollout
            Latest Reward: -299
            Latest Avg Rewards: -301
            Recent Change: 0.29
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0024
            Latest Critic Loss: 1185.3058
            Avg Critic Loss: 1716.0207
        


Remove body failed



            Timesteps: 100,515 / 2,000,000 (5.0257%)
            Episodes: 154
            Currently: Rollout
            Latest Reward: -231
            Latest Avg Rewards: -300
            Recent Change: 0.28
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0024
            Latest Critic Loss: 1185.3058
            Avg Critic Loss: 1716.0207
        


Object 0 dropped to the floor



            Timesteps: 101,265 / 2,000,000 (5.0633%)
            Episodes: 155
            Currently: Rollout
            Latest Reward: -521
            Latest Avg Rewards: -303
            Recent Change: 0.37
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0024
            Latest Critic Loss: 1185.3058
            Avg Critic Loss: 1716.0207
        


Remove body failed



            Timesteps: 102,015 / 2,000,000 (5.1007%)
            Episodes: 156
            Currently: Rollout
            Latest Reward: -216
            Latest Avg Rewards: -303
            Recent Change: 0.26
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0024
            Latest Critic Loss: 1185.3058
            Avg Critic Loss: 1716.0207
        

            Timesteps: 102,015 / 2,000,000 (5.1007%)
            Episodes: 156
            Currently: Training cycle 1/5
            Latest Reward: -216
            Latest Avg Rewards: -303
            Recent Change: 0.26
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0024
            Latest Critic Loss: 1185.30

Object 1 dropped to the floor



            Timesteps: 103,515 / 2,000,000 (5.1757%)
            Episodes: 158
            Currently: Rollout
            Latest Reward: -418
            Latest Avg Rewards: -305
            Recent Change: 0.3
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0023
            Latest Critic Loss: 1464.0308
            Avg Critic Loss: 1703.9051
        


Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 103,840 / 2,000,000 (5.192%)
            Episodes: 159
            Currently: Rollout
            Latest Reward: -210
            Latest Avg Rewards: -301
            Recent Change: 0.43
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0023
            Latest Critic Loss: 1464.0308
            Avg Critic Loss: 1703.9051
        

            Timesteps: 104,590 / 2,000,000 (5.2295%)
            Episodes: 160
            Currently: Rollout
            Latest Reward: -214
            Latest Avg Rewards: -301
            Recent Change: 0.33
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0023
            Latest Critic Loss: 1464.0308
         

Object 0 dropped to the floor



            Timesteps: 105,340 / 2,000,000 (5.267%)
            Episodes: 161
            Currently: Rollout
            Latest Reward: -438
            Latest Avg Rewards: -303
            Recent Change: 0.35
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0023
            Latest Critic Loss: 1464.0308
            Avg Critic Loss: 1703.9051
        


Remove body failed



            Timesteps: 106,090 / 2,000,000 (5.3045%)
            Episodes: 162
            Currently: Rollout
            Latest Reward: -287
            Latest Avg Rewards: -303
            Recent Change: 0.32
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0023
            Latest Critic Loss: 1464.0308
            Avg Critic Loss: 1703.9051
        


Object 0 dropped to the floor



            Timesteps: 106,840 / 2,000,000 (5.342%)
            Episodes: 163
            Currently: Rollout
            Latest Reward: -367
            Latest Avg Rewards: -304
            Recent Change: 0.37
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0023
            Latest Critic Loss: 1464.0308
            Avg Critic Loss: 1703.9051
        


Remove body failed



            Timesteps: 107,590 / 2,000,000 (5.3795%)
            Episodes: 164
            Currently: Rollout
            Latest Reward: -234
            Latest Avg Rewards: -303
            Recent Change: 0.34
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0023
            Latest Critic Loss: 1464.0308
            Avg Critic Loss: 1703.9051
        

            Timesteps: 107,590 / 2,000,000 (5.3795%)
            Episodes: 164
            Currently: Training cycle 1/5
            Latest Reward: -234
            Latest Avg Rewards: -303
            Recent Change: 0.34
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0023
            Latest Critic Loss: 1464.03

Object 1 dropped to the floor
Remove body failed



            Timesteps: 108,340 / 2,000,000 (5.417%)
            Episodes: 165
            Currently: Rollout
            Latest Reward: -305
            Latest Avg Rewards: -303
            Recent Change: 0.35
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0022
            Latest Critic Loss: 1371.9062
            Avg Critic Loss: 1688.6319
        


Object 1 dropped to the floor



            Timesteps: 109,090 / 2,000,000 (5.4545%)
            Episodes: 166
            Currently: Rollout
            Latest Reward: -288
            Latest Avg Rewards: -303
            Recent Change: 0.3
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0022
            Latest Critic Loss: 1371.9062
            Avg Critic Loss: 1688.6319
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 109,840 / 2,000,000 (5.492%)
            Episodes: 167
            Currently: Rollout
            Latest Reward: -343
            Latest Avg Rewards: -305
            Recent Change: 0.26
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0022
            Latest Critic Loss: 1371.9062
            Avg Critic Loss: 1688.6319
        


Remove body failed



            Timesteps: 110,590 / 2,000,000 (5.5295%)
            Episodes: 168
            Currently: Rollout
            Latest Reward: -241
            Latest Avg Rewards: -304
            Recent Change: 0.25
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0022
            Latest Critic Loss: 1371.9062
            Avg Critic Loss: 1688.6319
        

            Timesteps: 111,340 / 2,000,000 (5.567%)
            Episodes: 169
            Currently: Rollout
            Latest Reward: -325
            Latest Avg Rewards: -306
            Recent Change: 0.15
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0022
            Latest Critic Loss: 1371.9062
   

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 111,410 / 2,000,000 (5.5705%)
            Episodes: 170
            Currently: Rollout
            Latest Reward: -127
            Latest Avg Rewards: -305
            Recent Change: -0.01
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0022
            Latest Critic Loss: 1371.9062
            Avg Critic Loss: 1688.6319
        


Object 0 dropped to the floor



            Timesteps: 112,160 / 2,000,000 (5.608%)
            Episodes: 171
            Currently: Rollout
            Latest Reward: -304
            Latest Avg Rewards: -305
            Recent Change: -0.05
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0022
            Latest Critic Loss: 1371.9062
            Avg Critic Loss: 1688.6319
        


Remove body failed



            Timesteps: 112,910 / 2,000,000 (5.6455%)
            Episodes: 172
            Currently: Rollout
            Latest Reward: -212
            Latest Avg Rewards: -305
            Recent Change: -0.15
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0022
            Latest Critic Loss: 1371.9062
            Avg Critic Loss: 1688.6319
        

            Timesteps: 112,910 / 2,000,000 (5.6455%)
            Episodes: 172
            Currently: Training cycle 1/5
            Latest Reward: -212
            Latest Avg Rewards: -305
            Recent Change: -0.15
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0001
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0022
            Latest Critic Loss:

Object 0 dropped to the floor



            Timesteps: 114,410 / 2,000,000 (5.7205%)
            Episodes: 174
            Currently: Rollout
            Latest Reward: -291
            Latest Avg Rewards: -309
            Recent Change: -0.08
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0021
            Latest Critic Loss: 971.0241
            Avg Critic Loss: 1655.6377
        


Remove body failed



            Timesteps: 115,160 / 2,000,000 (5.758%)
            Episodes: 175
            Currently: Rollout
            Latest Reward: -249
            Latest Avg Rewards: -309
            Recent Change: -0.15
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0021
            Latest Critic Loss: 971.0241
            Avg Critic Loss: 1655.6377
        

            Timesteps: 115,910 / 2,000,000 (5.7955%)
            Episodes: 176
            Currently: Rollout
            Latest Reward: -299
            Latest Avg Rewards: -307
            Recent Change: -0.03
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0021
            Latest Critic Loss: 971.0241
         

Object 1 dropped to the floor



            Timesteps: 121,160 / 2,000,000 (6.058%)
            Episodes: 183
            Currently: Rollout
            Latest Reward: -430
            Latest Avg Rewards: -310
            Recent Change: -0.33
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.002
            Latest Critic Loss: 1320.3049
            Avg Critic Loss: 1641.877
        


Remove body failed



            Timesteps: 121,910 / 2,000,000 (6.0955%)
            Episodes: 184
            Currently: Rollout
            Latest Reward: -193
            Latest Avg Rewards: -309
            Recent Change: -0.41
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.002
            Latest Critic Loss: 1320.3049
            Avg Critic Loss: 1641.877
        


Object 1 dropped to the floor



            Timesteps: 122,660 / 2,000,000 (6.133%)
            Episodes: 185
            Currently: Rollout
            Latest Reward: -277
            Latest Avg Rewards: -308
            Recent Change: -0.4
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.002
            Latest Critic Loss: 1320.3049
            Avg Critic Loss: 1641.877
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 123,410 / 2,000,000 (6.1705%)
            Episodes: 186
            Currently: Rollout
            Latest Reward: -263
            Latest Avg Rewards: -308
            Recent Change: -0.46
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.002
            Latest Critic Loss: 1320.3049
            Avg Critic Loss: 1641.877
        

            Timesteps: 123,410 / 2,000,000 (6.1705%)
            Episodes: 186
            Currently: Training cycle 1/5
            Latest Reward: -263
            Latest Avg Rewards: -308
            Recent Change: -0.46
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.002
            Latest Critic Loss: 1320.304

Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 123,538 / 2,000,000 (6.1769%)
            Episodes: 187
            Currently: Rollout
            Latest Reward: -137
            Latest Avg Rewards: -306
            Recent Change: -0.54
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 573.6969
            Avg Critic Loss: 1596.4099
        

            Timesteps: 124,288 / 2,000,000 (6.2144%)
            Episodes: 188
            Currently: Rollout
            Latest Reward: -252
            Latest Avg Rewards: -306
            Recent Change: -0.6
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 573.6969
         

Object 0 dropped to the floor



            Timesteps: 125,038 / 2,000,000 (6.2519%)
            Episodes: 189
            Currently: Rollout
            Latest Reward: -270
            Latest Avg Rewards: -307
            Recent Change: -0.69
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 573.6969
            Avg Critic Loss: 1596.4099
        

            Timesteps: 125,080 / 2,000,000 (6.254%)
            Episodes: 190
            Currently: Rollout
            Latest Reward: -114
            Latest Avg Rewards: -305
            Recent Change: -0.8
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 573.6969
          

Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 125,830 / 2,000,000 (6.2915%)
            Episodes: 191
            Currently: Rollout
            Latest Reward: -477
            Latest Avg Rewards: -307
            Recent Change: -0.73
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 573.6969
            Avg Critic Loss: 1596.4099
        


Object 1 dropped to the floor



            Timesteps: 126,580 / 2,000,000 (6.329%)
            Episodes: 192
            Currently: Rollout
            Latest Reward: -336
            Latest Avg Rewards: -306
            Recent Change: -0.63
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 573.6969
            Avg Critic Loss: 1596.4099
        


Remove body failed



            Timesteps: 127,330 / 2,000,000 (6.3665%)
            Episodes: 193
            Currently: Rollout
            Latest Reward: -369
            Latest Avg Rewards: -307
            Recent Change: -0.64
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 573.6969
            Avg Critic Loss: 1596.4099
        


Object 0 dropped to the floor



            Timesteps: 128,080 / 2,000,000 (6.404%)
            Episodes: 194
            Currently: Rollout
            Latest Reward: -343
            Latest Avg Rewards: -307
            Recent Change: -0.59
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 573.6969
            Avg Critic Loss: 1596.4099
        


Remove body failed



            Timesteps: 128,830 / 2,000,000 (6.4415%)
            Episodes: 195
            Currently: Rollout
            Latest Reward: -284
            Latest Avg Rewards: -308
            Recent Change: -0.65
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 573.6969
            Avg Critic Loss: 1596.4099
        

            Timesteps: 128,830 / 2,000,000 (6.4415%)
            Episodes: 195
            Currently: Training cycle 1/5
            Latest Reward: -284
            Latest Avg Rewards: -308
            Recent Change: -0.65
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0019
            Latest Critic Loss: 573.69

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 128,911 / 2,000,000 (6.4455%)
            Episodes: 196
            Currently: Rollout
            Latest Reward: -132
            Latest Avg Rewards: -307
            Recent Change: -0.82
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0018
            Latest Critic Loss: 1199.147
            Avg Critic Loss: 1581.4929
        

            Timesteps: 129,661 / 2,000,000 (6.483%)
            Episodes: 197
            Currently: Rollout
            Latest Reward: -279
            Latest Avg Rewards: -308
            Recent Change: -0.91
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0018
            Latest Critic Loss: 1199.147
         

Object 0 dropped to the floor



            Timesteps: 131,161 / 2,000,000 (6.558%)
            Episodes: 199
            Currently: Rollout
            Latest Reward: -274
            Latest Avg Rewards: -304
            Recent Change: -0.76
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0018
            Latest Critic Loss: 1199.147
            Avg Critic Loss: 1581.4929
        


Remove body failed



            Timesteps: 131,911 / 2,000,000 (6.5956%)
            Episodes: 200
            Currently: Rollout
            Latest Reward: -233
            Latest Avg Rewards: -301
            Recent Change: -0.71
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0018
            Latest Critic Loss: 1199.147
            Avg Critic Loss: 1581.4929
        

            Timesteps: 132,661 / 2,000,000 (6.633%)
            Episodes: 201
            Currently: Rollout
            Latest Reward: -380
            Latest Avg Rewards: -302
            Recent Change: -0.68
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0018
            Latest Critic Loss: 1199.147
         

Object 0 dropped to the floor



            Timesteps: 134,911 / 2,000,000 (6.7455%)
            Episodes: 204
            Currently: Rollout
            Latest Reward: -330
            Latest Avg Rewards: -300
            Recent Change: -0.69
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 556.3082
            Avg Critic Loss: 1541.6655
        


Remove body failed



            Timesteps: 135,661 / 2,000,000 (6.7831%)
            Episodes: 205
            Currently: Rollout
            Latest Reward: -239
            Latest Avg Rewards: -300
            Recent Change: -0.75
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 556.3082
            Avg Critic Loss: 1541.6655
        


Object 1 dropped to the floor



            Timesteps: 136,411 / 2,000,000 (6.8205%)
            Episodes: 206
            Currently: Rollout
            Latest Reward: -370
            Latest Avg Rewards: -301
            Recent Change: -0.73
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 556.3082
            Avg Critic Loss: 1541.6655
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 137,161 / 2,000,000 (6.8581%)
            Episodes: 207
            Currently: Rollout
            Latest Reward: -612
            Latest Avg Rewards: -304
            Recent Change: -0.56
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 556.3082
            Avg Critic Loss: 1541.6655
        

            Timesteps: 137,195 / 2,000,000 (6.8598%)
            Episodes: 208
            Currently: Rollout
            Latest Reward: -112
            Latest Avg Rewards: -300
            Recent Change: -0.55
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 556.3082
        

Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 137,945 / 2,000,000 (6.8973%)
            Episodes: 209
            Currently: Rollout
            Latest Reward: -240
            Latest Avg Rewards: -298
            Recent Change: -0.5
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 556.3082
            Avg Critic Loss: 1541.6655
        

            Timesteps: 138,695 / 2,000,000 (6.9348%)
            Episodes: 210
            Currently: Rollout
            Latest Reward: -235
            Latest Avg Rewards: -297
            Recent Change: -0.51
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 556.3082
         

Object 1 dropped to the floor



            Timesteps: 139,445 / 2,000,000 (6.9723%)
            Episodes: 211
            Currently: Rollout
            Latest Reward: -286
            Latest Avg Rewards: -297
            Recent Change: -0.51
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 556.3082
            Avg Critic Loss: 1541.6655
        

            Timesteps: 139,445 / 2,000,000 (6.9723%)
            Episodes: 211
            Currently: Training cycle 1/5
            Latest Reward: -286
            Latest Avg Rewards: -297
            Recent Change: -0.51
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 556.30

Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 139,486 / 2,000,000 (6.9743%)
            Episodes: 212
            Currently: Rollout
            Latest Reward: -117
            Latest Avg Rewards: -295
            Recent Change: -0.61
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 1035.6281
            Avg Critic Loss: 1523.7469
        

            Timesteps: 140,236 / 2,000,000 (7.0118%)
            Episodes: 213
            Currently: Rollout
            Latest Reward: -181
            Latest Avg Rewards: -294
            Recent Change: -0.68
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 1035.6281
            Av

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed
Object 0 dropped to the floor



            Timesteps: 141,115 / 2,000,000 (7.0557%)
            Episodes: 215
            Currently: Rollout
            Latest Reward: -167
            Latest Avg Rewards: -297
            Recent Change: -0.79
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 1035.6281
            Avg Critic Loss: 1523.7469
        


Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 141,325 / 2,000,000 (7.0663%)
            Episodes: 216
            Currently: Rollout
            Latest Reward: -202
            Latest Avg Rewards: -296
            Recent Change: -0.82
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 1035.6281
            Avg Critic Loss: 1523.7469
        

            Timesteps: 142,075 / 2,000,000 (7.1038%)
            Episodes: 217
            Currently: Rollout
            Latest Reward: -254
            Latest Avg Rewards: -294
            Recent Change: -0.8
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 1035.6281
            Avg

Object 0 dropped to the floor



            Timesteps: 142,825 / 2,000,000 (7.1413%)
            Episodes: 218
            Currently: Rollout
            Latest Reward: -282
            Latest Avg Rewards: -294
            Recent Change: -0.8
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 1035.6281
            Avg Critic Loss: 1523.7469
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 143,575 / 2,000,000 (7.1788%)
            Episodes: 219
            Currently: Rollout
            Latest Reward: -355
            Latest Avg Rewards: -294
            Recent Change: -0.72
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 1035.6281
            Avg Critic Loss: 1523.7469
        

            Timesteps: 143,619 / 2,000,000 (7.181%)
            Episodes: 220
            Currently: Rollout
            Latest Reward: -119
            Latest Avg Rewards: -291
            Recent Change: -0.73
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 1035.6281
            Avg

Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed
Object 1 dropped to the floor



            Timesteps: 144,369 / 2,000,000 (7.2184%)
            Episodes: 221
            Currently: Rollout
            Latest Reward: -404
            Latest Avg Rewards: -291
            Recent Change: -0.59
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 1035.6281
            Avg Critic Loss: 1523.7469
        


Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor



            Timesteps: 144,473 / 2,000,000 (7.2236%)
            Episodes: 222
            Currently: Rollout
            Latest Reward: -145
            Latest Avg Rewards: -289
            Recent Change: -0.7
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 1035.6281
            Avg Critic Loss: 1523.7469
        

            Timesteps: 144,473 / 2,000,000 (7.2236%)
            Episodes: 222
            Currently: Training cycle 1/5
            Latest Reward: -145
            Latest Avg Rewards: -289
            Recent Change: -0.7
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0017
            Latest Critic Loss: 1035.6281
     

Remove body failed
Remove body failed



            Timesteps: 145,223 / 2,000,000 (7.2611%)
            Episodes: 223
            Currently: Rollout
            Latest Reward: -296
            Latest Avg Rewards: -290
            Recent Change: -0.74
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 986.1021
            Avg Critic Loss: 1505.5512
        

            Timesteps: 145,973 / 2,000,000 (7.2986%)
            Episodes: 224
            Currently: Rollout
            Latest Reward: -259
            Latest Avg Rewards: -288
            Recent Change: -0.66
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 986.1021
        

Object 0 dropped to the floor



            Timesteps: 147,473 / 2,000,000 (7.3736%)
            Episodes: 226
            Currently: Rollout
            Latest Reward: -303
            Latest Avg Rewards: -285
            Recent Change: -0.37
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 986.1021
            Avg Critic Loss: 1505.5512
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 148,223 / 2,000,000 (7.4111%)
            Episodes: 227
            Currently: Rollout
            Latest Reward: -575
            Latest Avg Rewards: -287
            Recent Change: -0.13
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 986.1021
            Avg Critic Loss: 1505.5512
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 148,973 / 2,000,000 (7.4486%)
            Episodes: 228
            Currently: Rollout
            Latest Reward: -313
            Latest Avg Rewards: -287
            Recent Change: -0.09
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 986.1021
            Avg Critic Loss: 1505.5512
        


Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 149,180 / 2,000,000 (7.459%)
            Episodes: 229
            Currently: Rollout
            Latest Reward: -170
            Latest Avg Rewards: -286
            Recent Change: -0.18
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 986.1021
            Avg Critic Loss: 1505.5512
        

            Timesteps: 149,930 / 2,000,000 (7.4965%)
            Episodes: 230
            Currently: Rollout
            Latest Reward: -268
            Latest Avg Rewards: -286
            Recent Change: -0.18
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 986.1021
         

Object 0 dropped to the floor



            Timesteps: 150,680 / 2,000,000 (7.534%)
            Episodes: 231
            Currently: Rollout
            Latest Reward: -311
            Latest Avg Rewards: -286
            Recent Change: -0.16
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 782.1748
            Avg Critic Loss: 1481.4561
        


Remove body failed



            Timesteps: 151,430 / 2,000,000 (7.5715%)
            Episodes: 232
            Currently: Rollout
            Latest Reward: -288
            Latest Avg Rewards: -284
            Recent Change: -0.03
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 782.1748
            Avg Critic Loss: 1481.4561
        


Object 0 dropped to the floor



            Timesteps: 152,180 / 2,000,000 (7.609%)
            Episodes: 233
            Currently: Rollout
            Latest Reward: -515
            Latest Avg Rewards: -286
            Recent Change: 0.09
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 782.1748
            Avg Critic Loss: 1481.4561
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 152,930 / 2,000,000 (7.6465%)
            Episodes: 234
            Currently: Rollout
            Latest Reward: -267
            Latest Avg Rewards: -287
            Recent Change: 0.0
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 782.1748
            Avg Critic Loss: 1481.4561
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 153,680 / 2,000,000 (7.684%)
            Episodes: 235
            Currently: Rollout
            Latest Reward: -334
            Latest Avg Rewards: -287
            Recent Change: 0.07
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 782.1748
            Avg Critic Loss: 1481.4561
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 154,430 / 2,000,000 (7.7215%)
            Episodes: 236
            Currently: Rollout
            Latest Reward: -387
            Latest Avg Rewards: -287
            Recent Change: 0.19
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 782.1748
            Avg Critic Loss: 1481.4561
        


Remove body failed



            Timesteps: 155,180 / 2,000,000 (7.759%)
            Episodes: 237
            Currently: Rollout
            Latest Reward: -390
            Latest Avg Rewards: -287
            Recent Change: 0.31
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 782.1748
            Avg Critic Loss: 1481.4561
        

            Timesteps: 155,180 / 2,000,000 (7.759%)
            Episodes: 237
            Currently: Training cycle 1/5
            Latest Reward: -390
            Latest Avg Rewards: -287
            Recent Change: 0.31
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0016
            Latest Critic Loss: 782.1748
   

Object 0 dropped to the floor



            Timesteps: 157,430 / 2,000,000 (7.8715%)
            Episodes: 240
            Currently: Rollout
            Latest Reward: -331
            Latest Avg Rewards: -289
            Recent Change: 0.21
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 636.2181
            Avg Critic Loss: 1453.9074
        


Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 157,541 / 2,000,000 (7.877%)
            Episodes: 241
            Currently: Rollout
            Latest Reward: -149
            Latest Avg Rewards: -288
            Recent Change: 0.11
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 636.2181
            Avg Critic Loss: 1453.9074
        

            Timesteps: 158,291 / 2,000,000 (7.9145%)
            Episodes: 242
            Currently: Rollout
            Latest Reward: -252
            Latest Avg Rewards: -287
            Recent Change: 0.14
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 636.2181
           

Object 1 dropped to the floor



            Timesteps: 159,041 / 2,000,000 (7.952%)
            Episodes: 243
            Currently: Rollout
            Latest Reward: -510
            Latest Avg Rewards: -290
            Recent Change: 0.2
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 636.2181
            Avg Critic Loss: 1453.9074
        


Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 159,154 / 2,000,000 (7.9577%)
            Episodes: 244
            Currently: Rollout
            Latest Reward: -138
            Latest Avg Rewards: -287
            Recent Change: 0.2
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 636.2181
            Avg Critic Loss: 1453.9074
        

            Timesteps: 159,904 / 2,000,000 (7.9952%)
            Episodes: 245
            Currently: Rollout
            Latest Reward: -246
            Latest Avg Rewards: -288
            Recent Change: 0.11
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 636.2181
           

Object 0 dropped to the floor



            Timesteps: 160,654 / 2,000,000 (8.0327%)
            Episodes: 246
            Currently: Rollout
            Latest Reward: -396
            Latest Avg Rewards: -290
            Recent Change: 0.11
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 636.2181
            Avg Critic Loss: 1453.9074
        

            Timesteps: 160,654 / 2,000,000 (8.0327%)
            Episodes: 246
            Currently: Training cycle 1/5
            Latest Reward: -396
            Latest Avg Rewards: -290
            Recent Change: 0.11
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 636.2181

Remove body failed



            Timesteps: 161,404 / 2,000,000 (8.0702%)
            Episodes: 247
            Currently: Rollout
            Latest Reward: -201
            Latest Avg Rewards: -289
            Recent Change: 0.04
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 443.3466
            Avg Critic Loss: 1421.35
        

            Timesteps: 162,154 / 2,000,000 (8.1077%)
            Episodes: 248
            Currently: Rollout
            Latest Reward: -457
            Latest Avg Rewards: -292
            Recent Change: 0.1
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 443.3466
            A

Object 1 dropped to the floor



            Timesteps: 162,904 / 2,000,000 (8.1452%)
            Episodes: 249
            Currently: Rollout
            Latest Reward: -294
            Latest Avg Rewards: -292
            Recent Change: 0.09
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 443.3466
            Avg Critic Loss: 1421.35
        


Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 162,945 / 2,000,000 (8.1472%)
            Episodes: 250
            Currently: Rollout
            Latest Reward: -114
            Latest Avg Rewards: -290
            Recent Change: 0.01
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 443.3466
            Avg Critic Loss: 1421.35
        

            Timesteps: 163,695 / 2,000,000 (8.1848%)
            Episodes: 251
            Currently: Rollout
            Latest Reward: -248
            Latest Avg Rewards: -289
            Recent Change: 0.02
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 443.3466
            

Object 0 dropped to the floor



            Timesteps: 164,445 / 2,000,000 (8.2223%)
            Episodes: 252
            Currently: Rollout
            Latest Reward: -375
            Latest Avg Rewards: -291
            Recent Change: -0.01
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 443.3466
            Avg Critic Loss: 1421.35
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 165,195 / 2,000,000 (8.2598%)
            Episodes: 253
            Currently: Rollout
            Latest Reward: -412
            Latest Avg Rewards: -292
            Recent Change: 0.07
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 443.3466
            Avg Critic Loss: 1421.35
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 165,945 / 2,000,000 (8.2973%)
            Episodes: 254
            Currently: Rollout
            Latest Reward: -261
            Latest Avg Rewards: -292
            Recent Change: 0.01
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 443.3466
            Avg Critic Loss: 1421.35
        

            Timesteps: 165,945 / 2,000,000 (8.2973%)
            Episodes: 254
            Currently: Training cycle 1/5
            Latest Reward: -261
            Latest Avg Rewards: -292
            Recent Change: 0.01
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0015
            Latest Critic Loss: 443.3466
 

Remove body failed
Object 0 dropped to the floor



            Timesteps: 166,695 / 2,000,000 (8.3347%)
            Episodes: 255
            Currently: Rollout
            Latest Reward: -366
            Latest Avg Rewards: -291
            Recent Change: 0.2
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 478.3944
            Avg Critic Loss: 1391.811
        


Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 166,885 / 2,000,000 (8.3443%)
            Episodes: 256
            Currently: Rollout
            Latest Reward: -173
            Latest Avg Rewards: -290
            Recent Change: 0.08
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 478.3944
            Avg Critic Loss: 1391.811
        


Object 0 dropped to the floor



            Timesteps: 167,635 / 2,000,000 (8.3818%)
            Episodes: 257
            Currently: Rollout
            Latest Reward: -389
            Latest Avg Rewards: -291
            Recent Change: 0.17
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 478.3944
            Avg Critic Loss: 1391.811
        


Remove body failed



            Timesteps: 168,385 / 2,000,000 (8.4192%)
            Episodes: 258
            Currently: Rollout
            Latest Reward: -385
            Latest Avg Rewards: -290
            Recent Change: 0.3
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 478.3944
            Avg Critic Loss: 1391.811
        


Object 1 dropped to the floor



            Timesteps: 169,135 / 2,000,000 (8.4567%)
            Episodes: 259
            Currently: Rollout
            Latest Reward: -275
            Latest Avg Rewards: -291
            Recent Change: 0.24
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 478.3944
            Avg Critic Loss: 1391.811
        


Remove body failed



            Timesteps: 169,885 / 2,000,000 (8.4943%)
            Episodes: 260
            Currently: Rollout
            Latest Reward: -232
            Latest Avg Rewards: -291
            Recent Change: 0.16
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 478.3944
            Avg Critic Loss: 1391.811
        

            Timesteps: 170,635 / 2,000,000 (8.5318%)
            Episodes: 261
            Currently: Rollout
            Latest Reward: -249
            Latest Avg Rewards: -289
            Recent Change: 0.22
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 478.3944
           

Object 1 dropped to the floor



            Timesteps: 171,385 / 2,000,000 (8.5693%)
            Episodes: 262
            Currently: Rollout
            Latest Reward: -784
            Latest Avg Rewards: -294
            Recent Change: 0.51
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 478.3944
            Avg Critic Loss: 1391.811
        

            Timesteps: 171,385 / 2,000,000 (8.5693%)
            Episodes: 262
            Currently: Training cycle 1/5
            Latest Reward: -784
            Latest Avg Rewards: -294
            Recent Change: 0.51
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0006
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 478.3944


Remove body failed
Object 0 dropped to the floor



            Timesteps: 172,135 / 2,000,000 (8.6067%)
            Episodes: 263
            Currently: Rollout
            Latest Reward: -368
            Latest Avg Rewards: -294
            Recent Change: 0.6
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 560.2044
            Avg Critic Loss: 1366.5991
        


Remove body failed



            Timesteps: 172,885 / 2,000,000 (8.6443%)
            Episodes: 264
            Currently: Rollout
            Latest Reward: -308
            Latest Avg Rewards: -295
            Recent Change: 0.57
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 560.2044
            Avg Critic Loss: 1366.5991
        

            Timesteps: 173,635 / 2,000,000 (8.6818%)
            Episodes: 265
            Currently: Rollout
            Latest Reward: -270
            Latest Avg Rewards: -295
            Recent Change: 0.56
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 560.2044
            

Object 1 dropped to the floor



            Timesteps: 174,385 / 2,000,000 (8.7193%)
            Episodes: 266
            Currently: Rollout
            Latest Reward: -282
            Latest Avg Rewards: -295
            Recent Change: 0.55
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 560.2044
            Avg Critic Loss: 1366.5991
        


Remove body failed



            Timesteps: 175,135 / 2,000,000 (8.7568%)
            Episodes: 267
            Currently: Rollout
            Latest Reward: -198
            Latest Avg Rewards: -293
            Recent Change: 0.53
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 560.2044
            Avg Critic Loss: 1366.5991
        

            Timesteps: 175,885 / 2,000,000 (8.7942%)
            Episodes: 268
            Currently: Rollout
            Latest Reward: -254
            Latest Avg Rewards: -293
            Recent Change: 0.47
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 560.2044
            

Object 0 dropped to the floor



            Timesteps: 176,635 / 2,000,000 (8.8317%)
            Episodes: 269
            Currently: Rollout
            Latest Reward: -254
            Latest Avg Rewards: -293
            Recent Change: 0.47
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 560.2044
            Avg Critic Loss: 1366.5991
        

            Timesteps: 176,635 / 2,000,000 (8.8317%)
            Episodes: 269
            Currently: Training cycle 1/5
            Latest Reward: -254
            Latest Avg Rewards: -293
            Recent Change: 0.47
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0014
            Latest Critic Loss: 560.2044
 

Remove body failed



            Timesteps: 177,385 / 2,000,000 (8.8692%)
            Episodes: 270
            Currently: Rollout
            Latest Reward: -245
            Latest Avg Rewards: -294
            Recent Change: 0.34
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 168.268
            Avg Critic Loss: 1330.2589
        

            Timesteps: 178,135 / 2,000,000 (8.9067%)
            Episodes: 271
            Currently: Rollout
            Latest Reward: -516
            Latest Avg Rewards: -296
            Recent Change: 0.48
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 168.268
            

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed
Object 0 dropped to the floor



            Timesteps: 180,571 / 2,000,000 (9.0286%)
            Episodes: 275
            Currently: Rollout
            Latest Reward: -155
            Latest Avg Rewards: -292
            Recent Change: 0.42
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 168.268
            Avg Critic Loss: 1330.2589
        

            Timesteps: 181,321 / 2,000,000 (9.0661%)
            Episodes: 276
            Currently: Rollout
            Latest Reward: -321
            Latest Avg Rewards: -293
            Recent Change: 0.45
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 168.268
            

Remove body failed



            Timesteps: 182,071 / 2,000,000 (9.1036%)
            Episodes: 277
            Currently: Rollout
            Latest Reward: -294
            Latest Avg Rewards: -293
            Recent Change: 0.43
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 168.268
            Avg Critic Loss: 1330.2589
        

            Timesteps: 182,071 / 2,000,000 (9.1036%)
            Episodes: 277
            Currently: Training cycle 1/5
            Latest Reward: -294
            Latest Avg Rewards: -293
            Recent Change: 0.43
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 168.268
 

Object 1 dropped to the floor



            Timesteps: 182,821 / 2,000,000 (9.141%)
            Episodes: 278
            Currently: Rollout
            Latest Reward: -314
            Latest Avg Rewards: -294
            Recent Change: 0.41
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 372.8675
            Avg Critic Loss: 1302.2313
        


Remove body failed



            Timesteps: 183,571 / 2,000,000 (9.1786%)
            Episodes: 279
            Currently: Rollout
            Latest Reward: -241
            Latest Avg Rewards: -292
            Recent Change: 0.48
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 372.8675
            Avg Critic Loss: 1302.2313
        

            Timesteps: 184,321 / 2,000,000 (9.2161%)
            Episodes: 280
            Currently: Rollout
            Latest Reward: -552
            Latest Avg Rewards: -295
            Recent Change: 0.6
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 372.8675
            A

Object 1 dropped to the floor



            Timesteps: 186,571 / 2,000,000 (9.3285%)
            Episodes: 283
            Currently: Rollout
            Latest Reward: -344
            Latest Avg Rewards: -297
            Recent Change: 0.74
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 372.8675
            Avg Critic Loss: 1302.2313
        


Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor



            Timesteps: 187,128 / 2,000,000 (9.3564%)
            Episodes: 284
            Currently: Rollout
            Latest Reward: -280
            Latest Avg Rewards: -298
            Recent Change: 0.66
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 372.8675
            Avg Critic Loss: 1302.2313
        

            Timesteps: 187,128 / 2,000,000 (9.3564%)
            Episodes: 284
            Currently: Training cycle 1/5
            Latest Reward: -280
            Latest Avg Rewards: -298
            Recent Change: 0.66
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 372.8675
 

Remove body failed
Remove body failed
Object 0 dropped to the floor



            Timesteps: 187,878 / 2,000,000 (9.3939%)
            Episodes: 285
            Currently: Rollout
            Latest Reward: -520
            Latest Avg Rewards: -301
            Recent Change: 0.78
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 643.954
            Avg Critic Loss: 1283.7688
        


Remove body failed



            Timesteps: 188,628 / 2,000,000 (9.4314%)
            Episodes: 286
            Currently: Rollout
            Latest Reward: -210
            Latest Avg Rewards: -300
            Recent Change: 0.7
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 643.954
            Avg Critic Loss: 1283.7688
        


Object 1 dropped to the floor



            Timesteps: 189,378 / 2,000,000 (9.4689%)
            Episodes: 287
            Currently: Rollout
            Latest Reward: -386
            Latest Avg Rewards: -303
            Recent Change: 0.66
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 643.954
            Avg Critic Loss: 1283.7688
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 190,128 / 2,000,000 (9.5064%)
            Episodes: 288
            Currently: Rollout
            Latest Reward: -391
            Latest Avg Rewards: -304
            Recent Change: 0.68
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 643.954
            Avg Critic Loss: 1283.7688
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 190,878 / 2,000,000 (9.5439%)
            Episodes: 289
            Currently: Rollout
            Latest Reward: -527
            Latest Avg Rewards: -307
            Recent Change: 0.79
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 643.954
            Avg Critic Loss: 1283.7688
        


Remove body failed



            Timesteps: 191,628 / 2,000,000 (9.5814%)
            Episodes: 290
            Currently: Rollout
            Latest Reward: -257
            Latest Avg Rewards: -308
            Recent Change: 0.64
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 643.954
            Avg Critic Loss: 1283.7688
        


Object 1 dropped to the floor



            Timesteps: 192,378 / 2,000,000 (9.6189%)
            Episodes: 291
            Currently: Rollout
            Latest Reward: -440
            Latest Avg Rewards: -308
            Recent Change: 0.82
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 643.954
            Avg Critic Loss: 1283.7688
        

            Timesteps: 192,378 / 2,000,000 (9.6189%)
            Episodes: 291
            Currently: Training cycle 1/5
            Latest Reward: -440
            Latest Avg Rewards: -308
            Recent Change: 0.82
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0013
            Latest Critic Loss: 643.954
 

Remove body failed
Object 1 dropped to the floor



            Timesteps: 193,128 / 2,000,000 (9.6564%)
            Episodes: 292
            Currently: Rollout
            Latest Reward: -360
            Latest Avg Rewards: -308
            Recent Change: 0.87
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 314.131
            Avg Critic Loss: 1257.0163
        


Remove body failed



            Timesteps: 193,878 / 2,000,000 (9.6939%)
            Episodes: 293
            Currently: Rollout
            Latest Reward: -230
            Latest Avg Rewards: -307
            Recent Change: 0.86
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 314.131
            Avg Critic Loss: 1257.0163
        

            Timesteps: 194,628 / 2,000,000 (9.7314%)
            Episodes: 294
            Currently: Rollout
            Latest Reward: -267
            Latest Avg Rewards: -306
            Recent Change: 0.86
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 314.131
            Avg Crit

Object 1 dropped to the floor



            Timesteps: 196,128 / 2,000,000 (9.8064%)
            Episodes: 296
            Currently: Rollout
            Latest Reward: -337
            Latest Avg Rewards: -308
            Recent Change: 0.75
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 314.131
            Avg Critic Loss: 1257.0163
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 196,878 / 2,000,000 (9.8439%)
            Episodes: 297
            Currently: Rollout
            Latest Reward: -241
            Latest Avg Rewards: -308
            Recent Change: 0.69
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 314.131
            Avg Critic Loss: 1257.0163
        


Remove body failed



            Timesteps: 197,628 / 2,000,000 (9.8814%)
            Episodes: 298
            Currently: Rollout
            Latest Reward: -471
            Latest Avg Rewards: -310
            Recent Change: 0.75
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 314.131
            Avg Critic Loss: 1257.0163
        

            Timesteps: 197,628 / 2,000,000 (9.8814%)
            Episodes: 298
            Currently: Training cycle 1/5
            Latest Reward: -471
            Latest Avg Rewards: -310
            Recent Change: 0.75
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 314.131
         

Object 1 dropped to the floor



            Timesteps: 198,378 / 2,000,000 (9.9189%)
            Episodes: 299
            Currently: Rollout
            Latest Reward: -681
            Latest Avg Rewards: -314
            Recent Change: 0.95
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 227.2179
            Avg Critic Loss: 1229.0828
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 199,128 / 2,000,000 (9.9564%)
            Episodes: 300
            Currently: Rollout
            Latest Reward: -366
            Latest Avg Rewards: -315
            Recent Change: 0.93
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 227.2179
            Avg Critic Loss: 1229.0828
        


Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 199,612 / 2,000,000 (9.9806%)
            Episodes: 301
            Currently: Rollout
            Latest Reward: -283
            Latest Avg Rewards: -314
            Recent Change: 0.95
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 227.2179
            Avg Critic Loss: 1229.0828
        

            Timesteps: 200,362 / 2,000,000 (10.0181%)
            Episodes: 302
            Currently: Rollout
            Latest Reward: -232
            Latest Avg Rewards: -315
            Recent Change: 0.83
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 227.2179
         

Object 1 dropped to the floor



            Timesteps: 203,362 / 2,000,000 (10.1681%)
            Episodes: 306
            Currently: Rollout
            Latest Reward: -471
            Latest Avg Rewards: -314
            Recent Change: 0.72
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 227.2179
            Avg Critic Loss: 1229.0828
        

            Timesteps: 203,362 / 2,000,000 (10.1681%)
            Episodes: 306
            Currently: Training cycle 1/5
            Latest Reward: -471
            Latest Avg Rewards: -314
            Recent Change: 0.72
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 227.21

Remove body failed



            Timesteps: 204,112 / 2,000,000 (10.2056%)
            Episodes: 307
            Currently: Rollout
            Latest Reward: -394
            Latest Avg Rewards: -312
            Recent Change: 0.95
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 452.0235
            Avg Critic Loss: 1208.6782
        


Object 1 dropped to the floor



            Timesteps: 204,862 / 2,000,000 (10.2431%)
            Episodes: 308
            Currently: Rollout
            Latest Reward: -237
            Latest Avg Rewards: -313
            Recent Change: 0.78
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 452.0235
            Avg Critic Loss: 1208.6782
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 205,612 / 2,000,000 (10.2806%)
            Episodes: 309
            Currently: Rollout
            Latest Reward: -321
            Latest Avg Rewards: -314
            Recent Change: 0.74
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 452.0235
            Avg Critic Loss: 1208.6782
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 206,362 / 2,000,000 (10.3181%)
            Episodes: 310
            Currently: Rollout
            Latest Reward: -422
            Latest Avg Rewards: -316
            Recent Change: 0.76
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 452.0235
            Avg Critic Loss: 1208.6782
        


Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed
Object 1 dropped to the floor



            Timesteps: 206,416 / 2,000,000 (10.3208%)
            Episodes: 311
            Currently: Rollout
            Latest Reward: -121
            Latest Avg Rewards: -314
            Recent Change: 0.62
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 452.0235
            Avg Critic Loss: 1208.6782
        

            Timesteps: 207,166 / 2,000,000 (10.3583%)
            Episodes: 312
            Currently: Rollout
            Latest Reward: -412
            Latest Avg Rewards: -317
            Recent Change: 0.56
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 452.0235
      

Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 207,794 / 2,000,000 (10.3897%)
            Episodes: 313
            Currently: Rollout
            Latest Reward: -523
            Latest Avg Rewards: -321
            Recent Change: 0.6
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 452.0235
            Avg Critic Loss: 1208.6782
        


Object 0 correctly sorted into sorting_one



            Timesteps: 208,544 / 2,000,000 (10.4272%)
            Episodes: 314
            Currently: Rollout
            Latest Reward: -224
            Latest Avg Rewards: -318
            Recent Change: 0.64
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 452.0235
            Avg Critic Loss: 1208.6782
        

            Timesteps: 208,544 / 2,000,000 (10.4272%)
            Episodes: 314
            Currently: Training cycle 1/5
            Latest Reward: -224
            Latest Avg Rewards: -318
            Recent Change: 0.64
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0012
            Latest Critic Loss: 452.

Remove body failed



            Timesteps: 209,294 / 2,000,000 (10.4647%)
            Episodes: 315
            Currently: Rollout
            Latest Reward: -209
            Latest Avg Rewards: -318
            Recent Change: 0.49
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 395.7382
            Avg Critic Loss: 1187.8688
        


Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 209,433 / 2,000,000 (10.4717%)
            Episodes: 316
            Currently: Rollout
            Latest Reward: -147
            Latest Avg Rewards: -318
            Recent Change: 0.32
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 395.7382
            Avg Critic Loss: 1187.8688
        

            Timesteps: 210,183 / 2,000,000 (10.5091%)
            Episodes: 317
            Currently: Rollout
            Latest Reward: -267
            Latest Avg Rewards: -318
            Recent Change: 0.25
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 395.7382
        

Object 0 dropped to the floor



            Timesteps: 210,933 / 2,000,000 (10.5466%)
            Episodes: 318
            Currently: Rollout
            Latest Reward: -353
            Latest Avg Rewards: -319
            Recent Change: 0.25
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 395.7382
            Avg Critic Loss: 1187.8688
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 211,683 / 2,000,000 (10.5842%)
            Episodes: 319
            Currently: Rollout
            Latest Reward: -252
            Latest Avg Rewards: -318
            Recent Change: 0.23
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 395.7382
            Avg Critic Loss: 1187.8688
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 212,433 / 2,000,000 (10.6217%)
            Episodes: 320
            Currently: Rollout
            Latest Reward: -316
            Latest Avg Rewards: -320
            Recent Change: 0.11
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 395.7382
            Avg Critic Loss: 1187.8688
        


Remove body failed



            Timesteps: 213,183 / 2,000,000 (10.6592%)
            Episodes: 321
            Currently: Rollout
            Latest Reward: -316
            Latest Avg Rewards: -319
            Recent Change: 0.16
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 395.7382
            Avg Critic Loss: 1187.8688
        


Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 213,403 / 2,000,000 (10.6701%)
            Episodes: 322
            Currently: Rollout
            Latest Reward: -235
            Latest Avg Rewards: -320
            Recent Change: 0.0
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 395.7382
            Avg Critic Loss: 1187.8688
        

            Timesteps: 214,153 / 2,000,000 (10.7077%)
            Episodes: 323
            Currently: Rollout
            Latest Reward: -358
            Latest Avg Rewards: -320
            Recent Change: 0.01
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 395.7382
         

Object 0 dropped to the floor



            Timesteps: 215,653 / 2,000,000 (10.7827%)
            Episodes: 325
            Currently: Rollout
            Latest Reward: -401
            Latest Avg Rewards: -323
            Recent Change: 0.11
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 299.1233
            Avg Critic Loss: 1165.7114
        


Remove body failed



            Timesteps: 216,403 / 2,000,000 (10.8201%)
            Episodes: 326
            Currently: Rollout
            Latest Reward: -362
            Latest Avg Rewards: -323
            Recent Change: 0.12
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 299.1233
            Avg Critic Loss: 1165.7114
        


Object 0 dropped to the floor



            Timesteps: 217,153 / 2,000,000 (10.8577%)
            Episodes: 327
            Currently: Rollout
            Latest Reward: -276
            Latest Avg Rewards: -320
            Recent Change: 0.24
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 299.1233
            Avg Critic Loss: 1165.7114
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 217,903 / 2,000,000 (10.8952%)
            Episodes: 328
            Currently: Rollout
            Latest Reward: -382
            Latest Avg Rewards: -321
            Recent Change: 0.28
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 299.1233
            Avg Critic Loss: 1165.7114
        


Remove body failed



            Timesteps: 218,653 / 2,000,000 (10.9326%)
            Episodes: 329
            Currently: Rollout
            Latest Reward: -254
            Latest Avg Rewards: -322
            Recent Change: 0.15
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 299.1233
            Avg Critic Loss: 1165.7114
        

            Timesteps: 219,403 / 2,000,000 (10.9701%)
            Episodes: 330
            Currently: Rollout
            Latest Reward: -305
            Latest Avg Rewards: -322
            Recent Change: 0.1
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 299.1233
         

Object 0 dropped to the floor



            Timesteps: 220,153 / 2,000,000 (11.0076%)
            Episodes: 331
            Currently: Rollout
            Latest Reward: -350
            Latest Avg Rewards: -323
            Recent Change: 0.11
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 310.8283
            Avg Critic Loss: 1144.8577
        


Remove body failed



            Timesteps: 220,903 / 2,000,000 (11.0451%)
            Episodes: 332
            Currently: Rollout
            Latest Reward: -258
            Latest Avg Rewards: -322
            Recent Change: 0.05
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 310.8283
            Avg Critic Loss: 1144.8577
        


Object 1 dropped to the floor



            Timesteps: 221,653 / 2,000,000 (11.0826%)
            Episodes: 333
            Currently: Rollout
            Latest Reward: -413
            Latest Avg Rewards: -321
            Recent Change: 0.22
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 310.8283
            Avg Critic Loss: 1144.8577
        


Remove body failed



            Timesteps: 222,403 / 2,000,000 (11.1201%)
            Episodes: 334
            Currently: Rollout
            Latest Reward: -204
            Latest Avg Rewards: -321
            Recent Change: 0.12
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 310.8283
            Avg Critic Loss: 1144.8577
        

            Timesteps: 223,153 / 2,000,000 (11.1577%)
            Episodes: 335
            Currently: Rollout
            Latest Reward: -388
            Latest Avg Rewards: -321
            Recent Change: 0.17
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 310.8283
        

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 223,587 / 2,000,000 (11.1794%)
            Episodes: 336
            Currently: Rollout
            Latest Reward: -252
            Latest Avg Rewards: -320
            Recent Change: 0.17
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 310.8283
            Avg Critic Loss: 1144.8577
        

            Timesteps: 224,337 / 2,000,000 (11.2169%)
            Episodes: 337
            Currently: Rollout
            Latest Reward: -357
            Latest Avg Rewards: -319
            Recent Change: 0.23
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0002
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0011
            Latest Critic Loss: 310.8283
        

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 225,128 / 2,000,000 (11.2564%)
            Episodes: 339
            Currently: Rollout
            Latest Reward: -112
            Latest Avg Rewards: -318
            Recent Change: -0.0
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 219.9282
            Avg Critic Loss: 1122.8422
        


Object 1 dropped to the floor



            Timesteps: 225,878 / 2,000,000 (11.2939%)
            Episodes: 340
            Currently: Rollout
            Latest Reward: -430
            Latest Avg Rewards: -319
            Recent Change: 0.07
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 219.9282
            Avg Critic Loss: 1122.8422
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 226,628 / 2,000,000 (11.3314%)
            Episodes: 341
            Currently: Rollout
            Latest Reward: -524
            Latest Avg Rewards: -322
            Recent Change: 0.09
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 219.9282
            Avg Critic Loss: 1122.8422
        


Remove body failed



            Timesteps: 227,378 / 2,000,000 (11.3689%)
            Episodes: 342
            Currently: Rollout
            Latest Reward: -494
            Latest Avg Rewards: -325
            Recent Change: 0.15
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 219.9282
            Avg Critic Loss: 1122.8422
        


Object 1 dropped to the floor



            Timesteps: 228,128 / 2,000,000 (11.4064%)
            Episodes: 343
            Currently: Rollout
            Latest Reward: -288
            Latest Avg Rewards: -323
            Recent Change: 0.24
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 219.9282
            Avg Critic Loss: 1122.8422
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 228,878 / 2,000,000 (11.4439%)
            Episodes: 344
            Currently: Rollout
            Latest Reward: -281
            Latest Avg Rewards: -324
            Recent Change: 0.1
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 219.9282
            Avg Critic Loss: 1122.8422
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 229,628 / 2,000,000 (11.4814%)
            Episodes: 345
            Currently: Rollout
            Latest Reward: -316
            Latest Avg Rewards: -325
            Recent Change: 0.05
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 219.9282
            Avg Critic Loss: 1122.8422
        


Remove body failed



            Timesteps: 230,378 / 2,000,000 (11.5189%)
            Episodes: 346
            Currently: Rollout
            Latest Reward: -220
            Latest Avg Rewards: -323
            Recent Change: 0.03
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 219.9282
            Avg Critic Loss: 1122.8422
        

            Timesteps: 230,378 / 2,000,000 (11.5189%)
            Episodes: 346
            Currently: Training cycle 1/5
            Latest Reward: -220
            Latest Avg Rewards: -323
            Recent Change: 0.03
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0009
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 219.9282

Object 1 dropped to the floor



            Timesteps: 231,128 / 2,000,000 (11.5564%)
            Episodes: 347
            Currently: Rollout
            Latest Reward: -319
            Latest Avg Rewards: -324
            Recent Change: -0.04
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0014
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 432.7423
            Avg Critic Loss: 1106.8048
        


Remove body failed



            Timesteps: 231,878 / 2,000,000 (11.5939%)
            Episodes: 348
            Currently: Rollout
            Latest Reward: -267
            Latest Avg Rewards: -322
            Recent Change: 0.0
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0014
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 432.7423
            Avg Critic Loss: 1106.8048
        

            Timesteps: 232,628 / 2,000,000 (11.6314%)
            Episodes: 349
            Currently: Rollout
            Latest Reward: -267
            Latest Avg Rewards: -322
            Recent Change: -0.05
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0014
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 432.7423
          

Object 0 dropped to the floor



            Timesteps: 234,128 / 2,000,000 (11.7064%)
            Episodes: 351
            Currently: Rollout
            Latest Reward: -614
            Latest Avg Rewards: -327
            Recent Change: -0.09
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0014
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 432.7423
            Avg Critic Loss: 1106.8048
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 234,878 / 2,000,000 (11.7439%)
            Episodes: 352
            Currently: Rollout
            Latest Reward: -437
            Latest Avg Rewards: -328
            Recent Change: -0.0
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0014
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 432.7423
            Avg Critic Loss: 1106.8048
        


Remove body failed



            Timesteps: 235,628 / 2,000,000 (11.7814%)
            Episodes: 353
            Currently: Rollout
            Latest Reward: -405
            Latest Avg Rewards: -327
            Recent Change: 0.1
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0014
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 432.7423
            Avg Critic Loss: 1106.8048
        

            Timesteps: 235,628 / 2,000,000 (11.7814%)
            Episodes: 353
            Currently: Training cycle 1/5
            Latest Reward: -405
            Latest Avg Rewards: -327
            Recent Change: 0.1
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0014
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 432.7423
 

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 235,817 / 2,000,000 (11.7909%)
            Episodes: 354
            Currently: Rollout
            Latest Reward: -227
            Latest Avg Rewards: -327
            Recent Change: -0.0
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0019
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 469.6632
            Avg Critic Loss: 1092.36
        

            Timesteps: 236,567 / 2,000,000 (11.8284%)
            Episodes: 355
            Currently: Rollout
            Latest Reward: -219
            Latest Avg Rewards: -326
            Recent Change: -0.04
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0019
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 469.6632
           

Object 0 dropped to the floor



            Timesteps: 238,817 / 2,000,000 (11.9408%)
            Episodes: 358
            Currently: Rollout
            Latest Reward: -293
            Latest Avg Rewards: -324
            Recent Change: -0.2
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0019
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 469.6632
            Avg Critic Loss: 1092.36
        


Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 238,883 / 2,000,000 (11.9442%)
            Episodes: 359
            Currently: Rollout
            Latest Reward: -124
            Latest Avg Rewards: -322
            Recent Change: -0.35
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0019
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 469.6632
            Avg Critic Loss: 1092.36
        

            Timesteps: 239,633 / 2,000,000 (11.9817%)
            Episodes: 360
            Currently: Rollout
            Latest Reward: -293
            Latest Avg Rewards: -323
            Recent Change: -0.42
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0019
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 469.6632
          

Object 1 dropped to the floor



            Timesteps: 241,133 / 2,000,000 (12.0566%)
            Episodes: 362
            Currently: Rollout
            Latest Reward: -314
            Latest Avg Rewards: -318
            Recent Change: -0.21
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0019
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 469.6632
            Avg Critic Loss: 1092.36
        

            Timesteps: 241,133 / 2,000,000 (12.0566%)
            Episodes: 362
            Currently: Training cycle 1/5
            Latest Reward: -314
            Latest Avg Rewards: -318
            Recent Change: -0.21
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0019
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 469.6632

Remove body failed
Object 0 dropped to the floor



            Timesteps: 241,883 / 2,000,000 (12.0941%)
            Episodes: 363
            Currently: Rollout
            Latest Reward: -397
            Latest Avg Rewards: -319
            Recent Change: -0.14
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 200.5064
            Avg Critic Loss: 1072.6063
        


Remove body failed



            Timesteps: 242,633 / 2,000,000 (12.1316%)
            Episodes: 364
            Currently: Rollout
            Latest Reward: -274
            Latest Avg Rewards: -318
            Recent Change: -0.17
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 200.5064
            Avg Critic Loss: 1072.6063
        

            Timesteps: 243,383 / 2,000,000 (12.1692%)
            Episodes: 365
            Currently: Rollout
            Latest Reward: -301
            Latest Avg Rewards: -319
            Recent Change: -0.21
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 200.5064
          

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 243,594 / 2,000,000 (12.1797%)
            Episodes: 366
            Currently: Rollout
            Latest Reward: -175
            Latest Avg Rewards: -318
            Recent Change: -0.32
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 200.5064
            Avg Critic Loss: 1072.6063
        


Object 1 dropped to the floor



            Timesteps: 244,344 / 2,000,000 (12.2172%)
            Episodes: 367
            Currently: Rollout
            Latest Reward: -377
            Latest Avg Rewards: -319
            Recent Change: -0.35
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 200.5064
            Avg Critic Loss: 1072.6063
        


Remove body failed



            Timesteps: 245,094 / 2,000,000 (12.2547%)
            Episodes: 368
            Currently: Rollout
            Latest Reward: -420
            Latest Avg Rewards: -321
            Recent Change: -0.33
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 200.5064
            Avg Critic Loss: 1072.6063
        


Object 0 dropped to the floor



            Timesteps: 245,844 / 2,000,000 (12.2922%)
            Episodes: 369
            Currently: Rollout
            Latest Reward: -327
            Latest Avg Rewards: -322
            Recent Change: -0.37
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 200.5064
            Avg Critic Loss: 1072.6063
        


Remove body failed



            Timesteps: 246,594 / 2,000,000 (12.3297%)
            Episodes: 370
            Currently: Rollout
            Latest Reward: -221
            Latest Avg Rewards: -321
            Recent Change: -0.48
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 200.5064
            Avg Critic Loss: 1072.6063
        

            Timesteps: 246,594 / 2,000,000 (12.3297%)
            Episodes: 370
            Currently: Training cycle 1/5
            Latest Reward: -221
            Latest Avg Rewards: -321
            Recent Change: -0.48
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 200.5064

Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 248,627 / 2,000,000 (12.4314%)
            Episodes: 373
            Currently: Rollout
            Latest Reward: -285
            Latest Avg Rewards: -320
            Recent Change: -0.5
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 259.5772
            Avg Critic Loss: 1054.9122
        


Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed
Object 1 dropped to the floor



            Timesteps: 249,182 / 2,000,000 (12.4591%)
            Episodes: 374
            Currently: Rollout
            Latest Reward: -264
            Latest Avg Rewards: -320
            Recent Change: -0.54
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 259.5772
            Avg Critic Loss: 1054.9122
        

            Timesteps: 249,932 / 2,000,000 (12.4966%)
            Episodes: 375
            Currently: Rollout
            Latest Reward: -399
            Latest Avg Rewards: -322
            Recent Change: -0.59
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 259.5772
        

Remove body failed



            Timesteps: 250,682 / 2,000,000 (12.5341%)
            Episodes: 376
            Currently: Rollout
            Latest Reward: -211
            Latest Avg Rewards: -321
            Recent Change: -0.66
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 259.5772
            Avg Critic Loss: 1054.9122
        

            Timesteps: 251,432 / 2,000,000 (12.5716%)
            Episodes: 377
            Currently: Rollout
            Latest Reward: -244
            Latest Avg Rewards: -320
            Recent Change: -0.72
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.001
            Latest Critic Loss: 259.5772
        

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 255,952 / 2,000,000 (12.7976%)
            Episodes: 384
            Currently: Rollout
            Latest Reward: -237
            Latest Avg Rewards: -312
            Recent Change: -0.83
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 180.4786
            Avg Critic Loss: 1036.34
        

            Timesteps: 256,702 / 2,000,000 (12.8351%)
            Episodes: 385
            Currently: Rollout
            Latest Reward: -547
            Latest Avg Rewards: -313
            Recent Change: -0.57
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 180.4786
          

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed
Object 0 dropped to the floor



            Timesteps: 257,171 / 2,000,000 (12.8585%)
            Episodes: 386
            Currently: Rollout
            Latest Reward: -256
            Latest Avg Rewards: -313
            Recent Change: -0.66
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 180.4786
            Avg Critic Loss: 1036.34
        

            Timesteps: 257,921 / 2,000,000 (12.8961%)
            Episodes: 387
            Currently: Rollout
            Latest Reward: -439
            Latest Avg Rewards: -314
            Recent Change: -0.54
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 180.4786
          

Remove body failed



            Timesteps: 258,671 / 2,000,000 (12.9335%)
            Episodes: 388
            Currently: Rollout
            Latest Reward: -234
            Latest Avg Rewards: -312
            Recent Change: -0.54
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 364.2123
            Avg Critic Loss: 1022.3346
        

            Timesteps: 258,706 / 2,000,000 (12.9353%)
            Episodes: 389
            Currently: Rollout
            Latest Reward: -115
            Latest Avg Rewards: -308
            Recent Change: -0.53
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 364.2123
          

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 259,456 / 2,000,000 (12.9728%)
            Episodes: 390
            Currently: Rollout
            Latest Reward: -193
            Latest Avg Rewards: -307
            Recent Change: -0.63
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 364.2123
            Avg Critic Loss: 1022.3346
        

            Timesteps: 260,206 / 2,000,000 (13.0103%)
            Episodes: 391
            Currently: Rollout
            Latest Reward: -308
            Latest Avg Rewards: -306
            Recent Change: -0.55
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 364.2123
          

Object 0 dropped to the floor



            Timesteps: 260,956 / 2,000,000 (13.0478%)
            Episodes: 392
            Currently: Rollout
            Latest Reward: -397
            Latest Avg Rewards: -306
            Recent Change: -0.46
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 364.2123
            Avg Critic Loss: 1022.3346
        


Remove body failed



            Timesteps: 261,706 / 2,000,000 (13.0853%)
            Episodes: 393
            Currently: Rollout
            Latest Reward: -248
            Latest Avg Rewards: -307
            Recent Change: -0.54
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 364.2123
            Avg Critic Loss: 1022.3346
        


Object 0 dropped to the floor



            Timesteps: 262,456 / 2,000,000 (13.1228%)
            Episodes: 394
            Currently: Rollout
            Latest Reward: -525
            Latest Avg Rewards: -309
            Recent Change: -0.44
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 364.2123
            Avg Critic Loss: 1022.3346
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 263,206 / 2,000,000 (13.1603%)
            Episodes: 395
            Currently: Rollout
            Latest Reward: -476
            Latest Avg Rewards: -311
            Recent Change: -0.35
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 364.2123
            Avg Critic Loss: 1022.3346
        

            Timesteps: 263,206 / 2,000,000 (13.1603%)
            Episodes: 395
            Currently: Training cycle 1/5
            Latest Reward: -476
            Latest Avg Rewards: -311
            Recent Change: -0.35
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 364.2123

Remove body failed
Object 0 dropped to the floor



            Timesteps: 263,956 / 2,000,000 (13.1978%)
            Episodes: 396
            Currently: Rollout
            Latest Reward: -293
            Latest Avg Rewards: -311
            Recent Change: -0.35
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0011
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 315.7475
            Avg Critic Loss: 1007.9404
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 264,706 / 2,000,000 (13.2353%)
            Episodes: 397
            Currently: Rollout
            Latest Reward: -498
            Latest Avg Rewards: -313
            Recent Change: -0.28
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0011
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 315.7475
            Avg Critic Loss: 1007.9404
        


Remove body failed



            Timesteps: 265,456 / 2,000,000 (13.2728%)
            Episodes: 398
            Currently: Rollout
            Latest Reward: -235
            Latest Avg Rewards: -311
            Recent Change: -0.23
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0011
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 315.7475
            Avg Critic Loss: 1007.9404
        


Object 0 dropped to the floor



            Timesteps: 266,206 / 2,000,000 (13.3103%)
            Episodes: 399
            Currently: Rollout
            Latest Reward: -331
            Latest Avg Rewards: -307
            Recent Change: 0.01
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0011
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 315.7475
            Avg Critic Loss: 1007.9404
        


Remove body failed



            Timesteps: 266,956 / 2,000,000 (13.3478%)
            Episodes: 400
            Currently: Rollout
            Latest Reward: -543
            Latest Avg Rewards: -309
            Recent Change: 0.18
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0011
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 315.7475
            Avg Critic Loss: 1007.9404
        


Object 1 dropped to the floor



            Timesteps: 267,706 / 2,000,000 (13.3853%)
            Episodes: 401
            Currently: Rollout
            Latest Reward: -346
            Latest Avg Rewards: -310
            Recent Change: 0.19
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0011
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 315.7475
            Avg Critic Loss: 1007.9404
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 268,456 / 2,000,000 (13.4228%)
            Episodes: 402
            Currently: Rollout
            Latest Reward: -318
            Latest Avg Rewards: -311
            Recent Change: 0.15
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0011
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 315.7475
            Avg Critic Loss: 1007.9404
        

            Timesteps: 268,456 / 2,000,000 (13.4228%)
            Episodes: 402
            Currently: Training cycle 1/5
            Latest Reward: -318
            Latest Avg Rewards: -311
            Recent Change: 0.15
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0011
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 315.

Remove body failed
Object 0 dropped to the floor



            Timesteps: 269,206 / 2,000,000 (13.4603%)
            Episodes: 403
            Currently: Rollout
            Latest Reward: -330
            Latest Avg Rewards: -312
            Recent Change: 0.11
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 528.8923
            Avg Critic Loss: 998.4594
        


Remove body failed



            Timesteps: 269,956 / 2,000,000 (13.4978%)
            Episodes: 404
            Currently: Rollout
            Latest Reward: -232
            Latest Avg Rewards: -312
            Recent Change: 0.01
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 528.8923
            Avg Critic Loss: 998.4594
        


Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 270,037 / 2,000,000 (13.5019%)
            Episodes: 405
            Currently: Rollout
            Latest Reward: -136
            Latest Avg Rewards: -311
            Recent Change: -0.15
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 528.8923
            Avg Critic Loss: 998.4594
        

            Timesteps: 270,787 / 2,000,000 (13.5394%)
            Episodes: 406
            Currently: Rollout
            Latest Reward: -248
            Latest Avg Rewards: -309
            Recent Change: -0.09
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 528.8923
         

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed
Object 0 dropped to the floor



            Timesteps: 271,567 / 2,000,000 (13.5784%)
            Episodes: 408
            Currently: Rollout
            Latest Reward: -323
            Latest Avg Rewards: -307
            Recent Change: -0.19
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 528.8923
            Avg Critic Loss: 998.4594
        


Remove body failed



            Timesteps: 272,317 / 2,000,000 (13.6158%)
            Episodes: 409
            Currently: Rollout
            Latest Reward: -235
            Latest Avg Rewards: -306
            Recent Change: -0.22
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 528.8923
            Avg Critic Loss: 998.4594
        

            Timesteps: 273,067 / 2,000,000 (13.6533%)
            Episodes: 410
            Currently: Rollout
            Latest Reward: -288
            Latest Avg Rewards: -304
            Recent Change: -0.16
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0004
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 528.8923
         

Object 0 dropped to the floor



            Timesteps: 275,317 / 2,000,000 (13.7658%)
            Episodes: 413
            Currently: Rollout
            Latest Reward: -472
            Latest Avg Rewards: -303
            Recent Change: -0.07
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 206.6982
            Avg Critic Loss: 982.9195
        


Remove body failed



            Timesteps: 276,067 / 2,000,000 (13.8034%)
            Episodes: 414
            Currently: Rollout
            Latest Reward: -237
            Latest Avg Rewards: -303
            Recent Change: -0.15
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 206.6982
            Avg Critic Loss: 982.9195
        


Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 276,259 / 2,000,000 (13.8129%)
            Episodes: 415
            Currently: Rollout
            Latest Reward: -159
            Latest Avg Rewards: -303
            Recent Change: -0.3
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 206.6982
            Avg Critic Loss: 982.9195
        


Object 1 dropped to the floor



            Timesteps: 277,009 / 2,000,000 (13.8505%)
            Episodes: 416
            Currently: Rollout
            Latest Reward: -373
            Latest Avg Rewards: -305
            Recent Change: -0.35
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 206.6982
            Avg Critic Loss: 982.9195
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 277,759 / 2,000,000 (13.8879%)
            Episodes: 417
            Currently: Rollout
            Latest Reward: -265
            Latest Avg Rewards: -305
            Recent Change: -0.39
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 206.6982
            Avg Critic Loss: 982.9195
        


Remove body failed



            Timesteps: 278,509 / 2,000,000 (13.9254%)
            Episodes: 418
            Currently: Rollout
            Latest Reward: -193
            Latest Avg Rewards: -303
            Recent Change: -0.43
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 206.6982
            Avg Critic Loss: 982.9195
        


Object 0 dropped to the floor



            Timesteps: 279,259 / 2,000,000 (13.9629%)
            Episodes: 419
            Currently: Rollout
            Latest Reward: -357
            Latest Avg Rewards: -305
            Recent Change: -0.43
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 206.6982
            Avg Critic Loss: 982.9195
        

            Timesteps: 279,259 / 2,000,000 (13.9629%)
            Episodes: 419
            Currently: Training cycle 1/5
            Latest Reward: -357
            Latest Avg Rewards: -305
            Recent Change: -0.43
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0009
            Latest Critic Loss: 206.6982
    

Remove body failed
Object 1 dropped to the floor



            Timesteps: 280,009 / 2,000,000 (14.0005%)
            Episodes: 420
            Currently: Rollout
            Latest Reward: -370
            Latest Avg Rewards: -305
            Recent Change: -0.38
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 335.0808
            Avg Critic Loss: 970.5173
        


Remove body failed



            Timesteps: 280,759 / 2,000,000 (14.0379%)
            Episodes: 421
            Currently: Rollout
            Latest Reward: -307
            Latest Avg Rewards: -305
            Recent Change: -0.38
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 335.0808
            Avg Critic Loss: 970.5173
        


Object 1 dropped to the floor



            Timesteps: 281,509 / 2,000,000 (14.0755%)
            Episodes: 422
            Currently: Rollout
            Latest Reward: -308
            Latest Avg Rewards: -306
            Recent Change: -0.42
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 335.0808
            Avg Critic Loss: 970.5173
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 282,259 / 2,000,000 (14.1129%)
            Episodes: 423
            Currently: Rollout
            Latest Reward: -271
            Latest Avg Rewards: -305
            Recent Change: -0.41
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 335.0808
            Avg Critic Loss: 970.5173
        


Remove body failed
Object 0 dropped to the floor



            Timesteps: 283,009 / 2,000,000 (14.1505%)
            Episodes: 424
            Currently: Rollout
            Latest Reward: -552
            Latest Avg Rewards: -306
            Recent Change: -0.18
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 335.0808
            Avg Critic Loss: 970.5173
        


Remove body failed



            Timesteps: 283,759 / 2,000,000 (14.1879%)
            Episodes: 425
            Currently: Rollout
            Latest Reward: -321
            Latest Avg Rewards: -305
            Recent Change: -0.11
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 335.0808
            Avg Critic Loss: 970.5173
        

            Timesteps: 284,509 / 2,000,000 (14.2255%)
            Episodes: 426
            Currently: Rollout
            Latest Reward: -369
            Latest Avg Rewards: -305
            Recent Change: -0.04
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 335.0808
       

Object 1 dropped to the floor



            Timesteps: 285,259 / 2,000,000 (14.263%)
            Episodes: 427
            Currently: Rollout
            Latest Reward: -308
            Latest Avg Rewards: -306
            Recent Change: -0.06
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 387.1099
            Avg Critic Loss: 959.4932
        


Remove body failed



            Timesteps: 286,009 / 2,000,000 (14.3005%)
            Episodes: 428
            Currently: Rollout
            Latest Reward: -269
            Latest Avg Rewards: -304
            Recent Change: -0.03
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 387.1099
            Avg Critic Loss: 959.4932
        


Object 1 dropped to the floor



            Timesteps: 286,759 / 2,000,000 (14.3379%)
            Episodes: 429
            Currently: Rollout
            Latest Reward: -417
            Latest Avg Rewards: -306
            Recent Change: 0.0
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 387.1099
            Avg Critic Loss: 959.4932
        


Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 286,832 / 2,000,000 (14.3416%)
            Episodes: 430
            Currently: Rollout
            Latest Reward: -138
            Latest Avg Rewards: -304
            Recent Change: -0.1
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 387.1099
            Avg Critic Loss: 959.4932
        


Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 286,954 / 2,000,000 (14.3477%)
            Episodes: 431
            Currently: Rollout
            Latest Reward: -144
            Latest Avg Rewards: -302
            Recent Change: -0.17
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 387.1099
            Avg Critic Loss: 959.4932
        

            Timesteps: 287,704 / 2,000,000 (14.3852%)
            Episodes: 432
            Currently: Rollout
            Latest Reward: -192
            Latest Avg Rewards: -302
            Recent Change: -0.26
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 387.1099
       

Object 1 dropped to the floor



            Timesteps: 288,454 / 2,000,000 (14.4227%)
            Episodes: 433
            Currently: Rollout
            Latest Reward: -430
            Latest Avg Rewards: -302
            Recent Change: -0.11
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 387.1099
            Avg Critic Loss: 959.4932
        


Remove body failed



            Timesteps: 289,204 / 2,000,000 (14.4602%)
            Episodes: 434
            Currently: Rollout
            Latest Reward: -264
            Latest Avg Rewards: -302
            Recent Change: -0.2
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 387.1099
            Avg Critic Loss: 959.4932
        


Object 1 dropped to the floor



            Timesteps: 289,954 / 2,000,000 (14.4977%)
            Episodes: 435
            Currently: Rollout
            Latest Reward: -269
            Latest Avg Rewards: -301
            Recent Change: -0.16
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 387.1099
            Avg Critic Loss: 959.4932
        

            Timesteps: 289,954 / 2,000,000 (14.4977%)
            Episodes: 435
            Currently: Training cycle 1/5
            Latest Reward: -269
            Latest Avg Rewards: -301
            Recent Change: -0.16
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0008
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 387.1

Remove body failed
Object 1 incorrectly sorted into sorting_two



            Timesteps: 290,704 / 2,000,000 (14.5352%)
            Episodes: 436
            Currently: Rollout
            Latest Reward: -397
            Latest Avg Rewards: -303
            Recent Change: -0.14
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0002
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 251.1757
            Avg Critic Loss: 946.386
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 291,454 / 2,000,000 (14.5727%)
            Episodes: 437
            Currently: Rollout
            Latest Reward: -266
            Latest Avg Rewards: -302
            Recent Change: -0.13
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0002
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 251.1757
            Avg Critic Loss: 946.386
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 292,204 / 2,000,000 (14.6102%)
            Episodes: 438
            Currently: Rollout
            Latest Reward: -385
            Latest Avg Rewards: -303
            Recent Change: -0.12
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0002
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 251.1757
            Avg Critic Loss: 946.386
        


Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 292,274 / 2,000,000 (14.6137%)
            Episodes: 439
            Currently: Rollout
            Latest Reward: -127
            Latest Avg Rewards: -303
            Recent Change: -0.34
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0002
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 251.1757
            Avg Critic Loss: 946.386
        

            Timesteps: 293,024 / 2,000,000 (14.6512%)
            Episodes: 440
            Currently: Rollout
            Latest Reward: -244
            Latest Avg Rewards: -302
            Recent Change: -0.3
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0002
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 251.1757
         

Object 0 dropped to the floor



            Timesteps: 293,774 / 2,000,000 (14.6887%)
            Episodes: 441
            Currently: Rollout
            Latest Reward: -445
            Latest Avg Rewards: -301
            Recent Change: -0.08
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0002
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 251.1757
            Avg Critic Loss: 946.386
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 294,524 / 2,000,000 (14.7262%)
            Episodes: 442
            Currently: Rollout
            Latest Reward: -270
            Latest Avg Rewards: -299
            Recent Change: 0.02
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0002
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 251.1757
            Avg Critic Loss: 946.386
        


Remove body failed



            Timesteps: 295,274 / 2,000,000 (14.7637%)
            Episodes: 443
            Currently: Rollout
            Latest Reward: -186
            Latest Avg Rewards: -298
            Recent Change: -0.05
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0002
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 251.1757
            Avg Critic Loss: 946.386
        

            Timesteps: 295,274 / 2,000,000 (14.7637%)
            Episodes: 443
            Currently: Training cycle 1/5
            Latest Reward: -186
            Latest Avg Rewards: -298
            Recent Change: -0.05
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0002
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 251.17

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 296,095 / 2,000,000 (14.8048%)
            Episodes: 446
            Currently: Rollout
            Latest Reward: -118
            Latest Avg Rewards: -295
            Recent Change: -0.29
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0013
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 281.2134
            Avg Critic Loss: 934.3071
        

            Timesteps: 296,845 / 2,000,000 (14.8423%)
            Episodes: 447
            Currently: Rollout
            Latest Reward: -288
            Latest Avg Rewards: -295
            Recent Change: -0.28
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0013
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 281.2134
       

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 300,128 / 2,000,000 (15.0064%)
            Episodes: 452
            Currently: Rollout
            Latest Reward: -202
            Latest Avg Rewards: -292
            Recent Change: 0.01
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0013
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 281.2134
            Avg Critic Loss: 934.3071
        

            Timesteps: 300,878 / 2,000,000 (15.0439%)
            Episodes: 453
            Currently: Rollout
            Latest Reward: -194
            Latest Avg Rewards: -290
            Recent Change: 0.02
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0013
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 281.2134
         

Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 305,737 / 2,000,000 (15.2868%)
            Episodes: 460
            Currently: Rollout
            Latest Reward: -223
            Latest Avg Rewards: -293
            Recent Change: -0.34
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 242.0775
            Avg Critic Loss: 921.9439
        

            Timesteps: 306,487 / 2,000,000 (15.3244%)
            Episodes: 461
            Currently: Rollout
            Latest Reward: -318
            Latest Avg Rewards: -293
            Recent Change: -0.33
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0001
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 242.0775
         

Object 0 dropped to the floor



            Timesteps: 307,237 / 2,000,000 (15.3618%)
            Episodes: 462
            Currently: Rollout
            Latest Reward: -292
            Latest Avg Rewards: -293
            Recent Change: -0.32
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 190.1801
            Avg Critic Loss: 909.115
        


Remove body failed



            Timesteps: 307,987 / 2,000,000 (15.3994%)
            Episodes: 463
            Currently: Rollout
            Latest Reward: -215
            Latest Avg Rewards: -291
            Recent Change: -0.3
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 190.1801
            Avg Critic Loss: 909.115
        

            Timesteps: 308,737 / 2,000,000 (15.4368%)
            Episodes: 464
            Currently: Rollout
            Latest Reward: -227
            Latest Avg Rewards: -291
            Recent Change: -0.35
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 190.1801
         

Object 1 dropped to the floor



            Timesteps: 309,487 / 2,000,000 (15.4744%)
            Episodes: 465
            Currently: Rollout
            Latest Reward: -428
            Latest Avg Rewards: -292
            Recent Change: -0.26
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 190.1801
            Avg Critic Loss: 909.115
        


Remove body failed
Object 1 dropped to the floor
Object 0 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 309,650 / 2,000,000 (15.4825%)
            Episodes: 466
            Currently: Rollout
            Latest Reward: -170
            Latest Avg Rewards: -292
            Recent Change: -0.41
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 190.1801
            Avg Critic Loss: 909.115
        


Object 1 dropped to the floor



            Timesteps: 310,400 / 2,000,000 (15.52%)
            Episodes: 467
            Currently: Rollout
            Latest Reward: -395
            Latest Avg Rewards: -292
            Recent Change: -0.29
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 190.1801
            Avg Critic Loss: 909.115
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 311,150 / 2,000,000 (15.5575%)
            Episodes: 468
            Currently: Rollout
            Latest Reward: -275
            Latest Avg Rewards: -291
            Recent Change: -0.23
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 190.1801
            Avg Critic Loss: 909.115
        


Remove body failed
Object 1 dropped to the floor



            Timesteps: 311,900 / 2,000,000 (15.595%)
            Episodes: 469
            Currently: Rollout
            Latest Reward: -378
            Latest Avg Rewards: -291
            Recent Change: -0.15
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 190.1801
            Avg Critic Loss: 909.115
        

            Timesteps: 311,900 / 2,000,000 (15.595%)
            Episodes: 469
            Currently: Training cycle 1/5
            Latest Reward: -378
            Latest Avg Rewards: -291
            Recent Change: -0.15
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0003
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 190.1801

Remove body failed



            Timesteps: 312,650 / 2,000,000 (15.6325%)
            Episodes: 470
            Currently: Rollout
            Latest Reward: -219
            Latest Avg Rewards: -291
            Recent Change: -0.24
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 270.4944
            Avg Critic Loss: 898.1123
        


Object 0 dropped to the floor
Object 1 incorrectly sorted into sorting_one
Remove body failed
Remove body failed



            Timesteps: 312,907 / 2,000,000 (15.6453%)
            Episodes: 471
            Currently: Rollout
            Latest Reward: -153
            Latest Avg Rewards: -290
            Recent Change: -0.36
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 270.4944
            Avg Critic Loss: 898.1123
        

            Timesteps: 313,657 / 2,000,000 (15.6829%)
            Episodes: 472
            Currently: Rollout
            Latest Reward: -270
            Latest Avg Rewards: -290
            Recent Change: -0.33
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 270.4944
       

Object 1 dropped to the floor



            Timesteps: 315,157 / 2,000,000 (15.7579%)
            Episodes: 474
            Currently: Rollout
            Latest Reward: -270
            Latest Avg Rewards: -289
            Recent Change: -0.39
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 270.4944
            Avg Critic Loss: 898.1123
        


Remove body failed



            Timesteps: 315,907 / 2,000,000 (15.7953%)
            Episodes: 475
            Currently: Rollout
            Latest Reward: -215
            Latest Avg Rewards: -287
            Recent Change: -0.37
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 270.4944
            Avg Critic Loss: 898.1123
        


Object 1 dropped to the floor



            Timesteps: 316,657 / 2,000,000 (15.8329%)
            Episodes: 476
            Currently: Rollout
            Latest Reward: -236
            Latest Avg Rewards: -288
            Recent Change: -0.45
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 270.4944
            Avg Critic Loss: 898.1123
        


Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 316,804 / 2,000,000 (15.8402%)
            Episodes: 477
            Currently: Rollout
            Latest Reward: -175
            Latest Avg Rewards: -287
            Recent Change: -0.54
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 270.4944
            Avg Critic Loss: 898.1123
        


Object 0 dropped to the floor



            Timesteps: 317,554 / 2,000,000 (15.8777%)
            Episodes: 478
            Currently: Rollout
            Latest Reward: -357
            Latest Avg Rewards: -288
            Recent Change: -0.52
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 270.4944
            Avg Critic Loss: 898.1123
        

            Timesteps: 317,554 / 2,000,000 (15.8777%)
            Episodes: 478
            Currently: Training cycle 1/5
            Latest Reward: -357
            Latest Avg Rewards: -288
            Recent Change: -0.52
            Best Reward: -108.08
            Latest Discrete Actor Loss: -0.0016
            Latest Continuous Actor Loss: 0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0008
            Latest Critic Loss: 270.4

Remove body failed



            Timesteps: 318,304 / 2,000,000 (15.9152%)
            Episodes: 479
            Currently: Rollout
            Latest Reward: -366
            Latest Avg Rewards: -288
            Recent Change: -0.42
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0007
            Latest Critic Loss: 220.9577
            Avg Critic Loss: 886.7429
        


Object 0 dropped to the floor



            Timesteps: 319,054 / 2,000,000 (15.9527%)
            Episodes: 480
            Currently: Rollout
            Latest Reward: -282
            Latest Avg Rewards: -288
            Recent Change: -0.44
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0007
            Latest Critic Loss: 220.9577
            Avg Critic Loss: 886.7429
        


Remove body failed
Object 0 dropped to the floor
Object 1 dropped to the floor
Remove body failed
Remove body failed



            Timesteps: 319,179 / 2,000,000 (15.9589%)
            Episodes: 481
            Currently: Rollout
            Latest Reward: -139
            Latest Avg Rewards: -287
            Recent Change: -0.56
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0007
            Latest Critic Loss: 220.9577
            Avg Critic Loss: 886.7429
        

            Timesteps: 319,929 / 2,000,000 (15.9965%)
            Episodes: 482
            Currently: Rollout
            Latest Reward: -473
            Latest Avg Rewards: -289
            Recent Change: -0.47
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0007
            Latest Critic Loss: 220.9577
       

Object 0 dropped to the floor



            Timesteps: 322,929 / 2,000,000 (16.1465%)
            Episodes: 486
            Currently: Rollout
            Latest Reward: -334
            Latest Avg Rewards: -291
            Recent Change: -0.37
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0007
            Latest Critic Loss: 220.9577
            Avg Critic Loss: 886.7429
        

            Timesteps: 322,929 / 2,000,000 (16.1465%)
            Episodes: 486
            Currently: Training cycle 1/5
            Latest Reward: -334
            Latest Avg Rewards: -291
            Recent Change: -0.37
            Best Reward: -108.08
            Latest Discrete Actor Loss: 0.0005
            Latest Continuous Actor Loss: -0.0
            Avg Discrete Actor Loss: -0.0001
            Avg Continuous Actor Loss: 0.0007
            Latest Critic Loss: 220.9