In [None]:
from gymnasium.envs.registration import register
import gymnasium as gym
import numpy as np


register(
    id="CustomParking-v0",
    entry_point="custom_parking:CustomParkingEnv",
    # entry_point='highway_env.envs:ParkingEnv',
)

def compute_reward(self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info: dict, p: float = 0.5) -> float:
        """
        Proximity to the goal is rewarded

        We use a weighted p-norm

        :param achieved_goal: the goal that was achieved
        :param desired_goal: the goal that was desired
        :param dict info: any supplementary information
        :param p: the Lp^p norm used in the reward. Use p<1 to have high kurtosis for rewards in [0, 1]
        :return: the corresponding reward
        """
        return -np.power(np.dot(np.abs(achieved_goal - desired_goal), np.array(self.config["reward_weights"])), p)
gym.make("CustomParking-v0", compute_reward=compute_reward)


In [None]:
import gymnasium as gym
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
import numpy as np

env = gym.make("parking-v0", render_mode="rgb_array")
print(env.action_space)

n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

model = DDPG("MultiInputPolicy", env, verbose=1)
model.learn(total_timesteps=100000)
model.save("ddpg_parking")

vec_env = model.get_env()
obs = vec_env.reset()
for i in range(1000):
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = vec_env.step(action)
    vec_env.render("human")

In [None]:
import gymnasium as gym
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
import numpy as np


env = gym.make("parking-v0", render_mode="rgb_array")

model = DDPG.load("ddpg_parking", env = env, verbose=1)
vec_env = model.get_env()

obs = vec_env.reset()
for i in range(1000):
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = vec_env.step(action)
    vec_env.render("human")

In [4]:
import gymnasium as gym
import highway_env
from highway_env.envs.common.abstract import AbstractEnv
from highway_env.envs.parking_env import ParkingEnv
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
import numpy as np
from gym.envs.registration import registry, register

class MyEnv(ParkingEnv, AbstractEnv):
    def __init__(self):
        super().__init__()

    def compute_reward(self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info: dict, p: float = 0.5) -> float:
        return super().compute_reward(achieved_goal, desired_goal, info, p)
    # def step(self, action):
    #     return self.env.step(action)

    # def reset(self):
    #     return self.env.reset()

    # def render(self, mode='human'):
    #     return self.env.render(mode)

module_name = __name__

env_name = 'URReacher-v1'
if env_name in registry:
    del registry[env_name]
register(
    id=env_name,
    entry_point=f'{module_name}:URRobotGym',
)

  import distutils.spawn


TypeError: argument of type 'EnvRegistry' is not iterable

In [None]:
import gymnasium as gym

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

# Parallel environments
vec_env = make_vec_env("parking-v0", n_envs=4)

model = PPO("MultiInputPolicy", vec_env, verbose=1)
model.learn(total_timesteps=250000)
model.save("ppo_parking")

del model # remove to demonstrate saving and loading

model = PPO.load("ppo_cartpole")

obs = vec_env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render("human")

In [None]:
env = gym.make("parking-v0", render_mode="rgb_array")

model = PPO.load("ppo_parking", env = env, verbose=1)
vec_env = model.get_env()
model.learn(total_timesteps=250000)
model.save("ppo_parking")

