In [2]:
import gymnasium as gym
import gymnasium_robotics

# PyTorch
import torch

# from collections import deque
import numpy as np
from stable_baselines3 import SAC
# from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.vec_env import DummyVecEnv

In [3]:
env_id = 'FrankaKitchen-v1'
task = 'kettle'
gym.register_envs(gymnasium_robotics)

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
n_actions = 9

In [6]:
flat_dim = 73
obs_low = np.full((flat_dim,), -1e10, dtype=np.float32)
obs_high = np.full((flat_dim,), 1e10, dtype=np.float32)

class FlattenDictWrapper(gym.ObservationWrapper):    
    def __init__(self, env):
        super().__init__(env)
        self.keys = env.observation_space.spaces.keys()
        self.observation_space = gym.spaces.Box(low=obs_low, high=obs_high, shape=(flat_dim,), dtype=np.float32)

    def observation(self, observation):
        achieved = observation['achieved_goal'][task].astype(np.float32)
        desired = observation['desired_goal'][task].astype(np.float32)
        obs = observation['observation'].astype(np.float32)

        flat_obs = np.concatenate([achieved, desired, obs], dtype=np.float32)
        return flat_obs

In [7]:
def make_env():
    env = gym.make(env_id, render_mode=None, tasks_to_complete=[task])  # Or your actual task
    env = FlattenDictWrapper(env)
    return env

env = DummyVecEnv([make_env])

In [14]:

# The noise objects for DDPG
# action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

model = SAC("MlpPolicy", env, verbose=1, device=device, buffer_size=5000000)


Using cuda:0 device


In [15]:
model.learn(total_timesteps=400000, log_interval=10)

---------------------------------
| time/              |          |
|    episodes        | 10       |
|    fps             | 15       |
|    time_elapsed    | 169      |
|    total_timesteps | 2669     |
| train/             |          |
|    actor_loss      | -49      |
|    critic_loss     | 0.122    |
|    ent_coef        | 0.463    |
|    ent_coef_loss   | -11.6    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2568     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 20       |
|    fps             | 15       |
|    time_elapsed    | 360      |
|    total_timesteps | 5469     |
| train/             |          |
|    actor_loss      | -64.1    |
|    critic_loss     | 0.0796   |
|    ent_coef        | 0.2      |
|    ent_coef_loss   | -24.2    |
|    learning_rate   | 0.0003   |
|    n_updates       | 5368     |
---------------------------------
---------------------------------
| time/       

<stable_baselines3.sac.sac.SAC at 0x7faae44dffa0>

In [16]:
model.save("sac_400000_"+task)

In [None]:
# vec_env = model.get_env()

# del model # remove to demonstrate saving and loading

# model = DDPG.load("ddpg_"+task)

# obs = vec_env.reset()


Exception: code expected at most 16 arguments, got 18


In [12]:
# success = False
# run_counter = 0
# while not success:
# 	vec_env.reset()
# 	done = False
# 	counter = 0
# 	while not done:
# 		action, _states = model.predict(obs)
# 		obs, rewards, dones, info = vec_env.step(action)
# 		done = dones[0]
# 		counter += 1
# 	if counter != 280:
# 		success = True
# 	run_counter += 1
# 	print(success, run_counter)


In [18]:
for i in range(10):
	env_eval = make_env()
	obs, _ = env_eval.reset()
	done = False
	ep_reward = 0

	while not done:
		action, _ = model.predict(obs, deterministic=True)
		obs, reward, terminated, truncated, _ = env_eval.step(action)
		done = terminated or truncated
		ep_reward += reward
	print(f"Episode reward: {ep_reward}")

Episode reward: 0.0
Episode reward: 0.0
Episode reward: 0.0
Episode reward: 0.0
Episode reward: 0.0
Episode reward: 0.0
Episode reward: 0.0
Episode reward: 0.0
Episode reward: 0.0
Episode reward: 0.0


In [None]:
print("Action space:", env_eval.action_space)
print("Model action space:", model.action_space)

Action space: Box(-1.0, 1.0, (9,), float64)
Model action space: Box(-1.0, 1.0, (9,), float64)
