In [1]:
import os
import numpy as np
import pybullet as p
import pybullet_data
from panda import Panda
import pickle

import gym
from gym import error, spaces, utils
from gym.utils import seeding
import random

class PandaEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, sim_type="GUI"):
        # create simulation (GUI)
        
        self.urdfRootPath = pybullet_data.getDataPath()

        # GUI launches pybullet with visualization, DIRECT creates env with no visualization
        if sim_type == "GUI":
            p.connect(p.GUI)
        else:
            p.connect(p.DIRECT)
        p.setGravity(0, 0, -9.81)

        # set up camera
        self._set_camera()

        # load some scene objects
        p.loadURDF(os.path.join(self.urdfRootPath, "plane.urdf"), basePosition=[0, 0, -0.65])
        p.loadURDF(os.path.join(self.urdfRootPath, "table/table.urdf"), basePosition=[0.5, -0.6, -0.65])

        # load a panda robot
        self.panda = Panda([0, -0.6, 0])
        
        # define observation and action space
        self.action_space = spaces.Box(np.array([-1]*11), np.array([1]*11))
        self.observation_space = spaces.Box(np.array([-1]*11),np.array([1]*11))

    def reset(self, q=[0.0, -np.pi/4, 0.0, -2*np.pi/4, 0.0, np.pi/2, np.pi/4]):
        """Reset panda to a given position"""
        self.panda.reset(q)
        return [self.panda.state]

    def close(self):
        """Disconnects the pybullet environment and closes simulation"""
        p.disconnect()

    def step(self, action):
        """ Takes given action in the environment. Actions are in cartesian space action = [x, y, z, r, p, y]"""
        # get current state
        state = [self.panda.state]
        self.panda.step(dposition=action[:3])

        # take simulation step
        p.stepSimulation()

        # return next_state, reward, done, info
        next_state = [self.panda.state]
        reward = 0.0
        done = False
        info = {}
        return next_state, reward, done, info

    def state(self):
        return self.panda.state

    def render(self):
        (width, height, pxl, depth, segmentation) = p.getCameraImage(width=self.camera_width,
                                                                     height=self.camera_height,
                                                                     viewMatrix=self.view_matrix,
                                                                     projectionMatrix=self.proj_matrix)
        rgb_array = np.array(pxl, dtype=np.uint8)
        rgb_array = np.reshape(rgb_array, (self.camera_height, self.camera_width, 4))
        rgb_array = rgb_array[:, :, :3]
        return rgb_array

    def _set_camera(self):
        self.camera_width = 256
        self.camera_height = 256
        p.resetDebugVisualizerCamera(cameraDistance=1.3, cameraYaw=90, cameraPitch=-31.4,
                                     cameraTargetPosition=[1.1, 0.0, 0.0])
        self.view_matrix = p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=[0.5, 0, 0],
                                                               distance=1.0,
                                                               yaw=90,
                                                               pitch=-50,
                                                               roll=0,
                                                               upAxisIndex=2)
        self.proj_matrix = p.computeProjectionMatrixFOV(fov=60,
                                                        aspect=float(self.camera_width) / self.camera_height,
                                                        nearVal=0.1,
                                                        farVal=100.0)


pybullet build time: Apr 26 2022 03:13:28


In [2]:
import time

env = PandaEnv()

#Number of steps you run the agent for
num_steps = 1500

obs = env.reset()

for step in range(num_steps):
    # take random action, but you can also do something more intelligent
    # action = my_intelligent_agent_fn(obs) 
    
    action = env.action_space.sample()
    
    # apply the action
    obs, reward, done, infor = env.step(action)
    
    # Render the env
    env.render()
    # Wait a bit before the next frame unless you want to see a crazy fast video
    #time.sleep(0.001)
    
     # If the epsiode is up, then start another one
    if done:
            env.reset()
            
# Close the env
env.close()

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
