In [1]:
from rltoolkit import SAC_AcM, DDPG_AcM, PPO_AcM
import pickle as pkl
import torch
import gym

# SPP-SAC Hopper

In [2]:
model_path = "../models/hopper_sac_acm_model.pkl"

torch.set_num_threads(1)

ENV_NAME = "Hopper-v2"
CUSTOM_LOSS = 1
DENORMALIZE_ACTOR_OUT = True
MIN_MAX_DENORMALIZE = True
ACM_CRITIC = True
NORM_CLOSS = False

In [3]:
model = SAC_AcM(
    env_name=ENV_NAME,
    custom_loss=CUSTOM_LOSS,
    denormalize_actor_out=DENORMALIZE_ACTOR_OUT,
    min_max_denormalize=MIN_MAX_DENORMALIZE,
    acm_critic=ACM_CRITIC,
    norm_closs=NORM_CLOSS,
)

In [4]:
model.load(model_path)

In [5]:
model.test(episodes=10)

3652.4606512293103

### Render simulation:

In [6]:
def render_off_policy(model, episodes=1):
    for j in range(episodes):
        model.env.render()
        obs = model.env.reset()
        done = False
        ep_ret = 0
        while not done:
            model.env.render()
            obs = model.process_obs(obs)
            obs = model.replay_buffer.normalize( obs )
            action = model.noise_action(obs, act_noise=0, deterministic=True)
            action_acm = model.process_action(action, obs)
            obs, r, done, _ = model.env.step(action_acm)

In [7]:
render_off_policy(model, episodes=1)

Creating window glfw


# SPP-SAC HalfCheetah

In [8]:
model_path = "../models/hcheetah_sac_acm_model.pkl"

torch.set_num_threads(1)

ENV_NAME = "HalfCheetah-v2"
CUSTOM_LOSS = 1
DENORMALIZE_ACTOR_OUT = True
MIN_MAX_DENORMALIZE = True
ACM_CRITIC = True
NORM_CLOSS = False

In [9]:
model = SAC_AcM(
    env_name=ENV_NAME,
    custom_loss=CUSTOM_LOSS,
    denormalize_actor_out=DENORMALIZE_ACTOR_OUT,
    min_max_denormalize=MIN_MAX_DENORMALIZE,
    acm_critic=ACM_CRITIC,
    norm_closs=NORM_CLOSS,
)

In [10]:
model.load(model_path)

In [11]:
model.test(episodes=10)

7347.939640539684

In [12]:
render_off_policy(model, episodes=1)

Creating window glfw


# SPP-SAC Ant

In [13]:
model_path = "../models/ant3m_sac_acm_model.pkl"

torch.set_num_threads(1)

ENV_NAME = "Ant-v2"
CUSTOM_LOSS = 1
DENORMALIZE_ACTOR_OUT = True
MIN_MAX_DENORMALIZE = True
ACM_CRITIC = True
NORM_CLOSS = False

In [14]:
model = SAC_AcM(
    env_name=ENV_NAME,
    custom_loss=CUSTOM_LOSS,
    denormalize_actor_out=DENORMALIZE_ACTOR_OUT,
    min_max_denormalize=MIN_MAX_DENORMALIZE,
    acm_critic=ACM_CRITIC,
    norm_closs=NORM_CLOSS,
)

In [15]:
model.load(model_path)

In [16]:
model.test(episodes=10)

3220.5479644576762

In [17]:
render_off_policy(model, episodes=1)

Creating window glfw


# SPP-DDPG HalfCheetah

In [18]:
from rltoolkit.acm.models.basic_acm import BasicAcM

In [19]:
model_path = "../models/hcheetah_ddpg_acm_model.pkl"

torch.set_num_threads(1)

ENV_NAME = "HalfCheetah-v2"
CUSTOM_LOSS = 1
DENORMALIZE_ACTOR_OUT = True
MIN_MAX_DENORMALIZE = True
ACM_CRITIC = True
NORM_CLOSS = False

In [20]:
model = DDPG_AcM(
    env_name=ENV_NAME,
    custom_loss=CUSTOM_LOSS,
    denormalize_actor_out=DENORMALIZE_ACTOR_OUT,
    min_max_denormalize=MIN_MAX_DENORMALIZE,
    acm_critic=ACM_CRITIC,
    norm_closs=NORM_CLOSS,
)
model.acm = BasicAcM(model.ob_dim * 2, model.ac_dim, False)

In [21]:
model.load(model_path)

In [22]:
model.test(episodes=10)

8216.312495370858

In [23]:
render_off_policy(model, episodes=1)

Creating window glfw


# SPP-DDPG Hopper

In [24]:
model_path = "../models/hopper_ddpg_acm_model.pkl"

torch.set_num_threads(1)

ENV_NAME = "Hopper-v2"
CUSTOM_LOSS = 1
DENORMALIZE_ACTOR_OUT = True
MIN_MAX_DENORMALIZE = True
ACM_CRITIC = True
NORM_CLOSS = False

In [25]:
model = DDPG_AcM(
    env_name=ENV_NAME,
    custom_loss=CUSTOM_LOSS,
    denormalize_actor_out=DENORMALIZE_ACTOR_OUT,
    min_max_denormalize=MIN_MAX_DENORMALIZE,
    acm_critic=ACM_CRITIC,
    norm_closs=NORM_CLOSS,
)
model.acm = BasicAcM(model.ob_dim * 2, model.ac_dim, False)

In [26]:
model.load(model_path)

In [27]:
model.test(episodes=10)

1675.3575791072642

In [28]:
render_off_policy(model, episodes=1)

Creating window glfw


# SPP-DDPG Ant

In [29]:
model_path = "../models/ant3m_ddpg_acm_model.pkl"

torch.set_num_threads(1)

ENV_NAME = "Ant-v2"
CUSTOM_LOSS = 1
DENORMALIZE_ACTOR_OUT = True
MIN_MAX_DENORMALIZE = True
ACM_CRITIC = True
NORM_CLOSS = False

In [30]:
model = DDPG_AcM(
    env_name=ENV_NAME,
    custom_loss=CUSTOM_LOSS,
    denormalize_actor_out=DENORMALIZE_ACTOR_OUT,
    min_max_denormalize=MIN_MAX_DENORMALIZE,
    acm_critic=ACM_CRITIC,
    norm_closs=NORM_CLOSS,
)
model.acm = BasicAcM(model.ob_dim * 2, model.ac_dim, False)

In [31]:
model.load(model_path)

In [32]:
model.test(episodes=10)

3151.900473908069

In [33]:
render_off_policy(model, episodes=1)

Creating window glfw


# SPP-PPO HalfCheetah

In [34]:
from rltoolkit.buffer import MemoryAcM

In [35]:
model_path = "../models/hcheetah_ppo_acm.pkl"

torch.set_num_threads(1)

ENV_NAME = "HalfCheetah-v2"
CUSTOM_LOSS = 1
DENORMALIZE_ACTOR_OUT = True
MIN_MAX_DENORMALIZE = True
ACM_CRITIC = True
NORM_CLOSS = True

In [36]:
model = PPO_AcM(
    env_name=ENV_NAME,
    custom_loss=CUSTOM_LOSS,
    denormalize_actor_out=DENORMALIZE_ACTOR_OUT,
    min_max_denormalize=MIN_MAX_DENORMALIZE,
    norm_closs=NORM_CLOSS,
)



In [37]:
model.load(model_path)
model.replay_buffer.min_obs = model.min_obs
model.replay_buffer.max_obs = model.max_obs
model.buffer = MemoryAcM(
    obs_mean=model.obs_mean,
    obs_std=model.obs_std,
    device=model.device,
    alpha=model.obs_norm_alpha,
    max_obs=model.max_obs,
    min_obs=model.min_obs,
    min_max_denormalize=model.min_max_denormalize,
)

In [38]:
model.test(episodes=10)

4232.026076982818

In [40]:
episodes = 1
for j in range(episodes):
    model.env.render()
    obs = model.env.reset()
    done = False
    ep_ret = 0
    while not done:
        model.env.render()
        obs = model.process_obs(obs)
        obs = model.buffer.normalize(obs)  # used only for normalization
        action, _ = model.actor.act(obs, deterministic=True)
        action_proc = model.process_action(action, obs)
        obs, r, done, _ = model.env.step(action_proc)
        


# SPP-PPO Hopper

In [41]:
model_path = "../models/hopper_ppo_acm.pkl"

torch.set_num_threads(1)

ENV_NAME = "Hopper-v2"
CUSTOM_LOSS = 1
DENORMALIZE_ACTOR_OUT = True
MIN_MAX_DENORMALIZE = True
ACM_CRITIC = True
NORM_CLOSS = True

In [42]:
model = PPO_AcM(
    env_name=ENV_NAME,
    custom_loss=CUSTOM_LOSS,
    denormalize_actor_out=DENORMALIZE_ACTOR_OUT,
    min_max_denormalize=MIN_MAX_DENORMALIZE,
    norm_closs=NORM_CLOSS,
)



In [43]:
model.load(model_path)
model.replay_buffer.min_obs = model.min_obs
model.replay_buffer.max_obs = model.max_obs
model.buffer = MemoryAcM(
    obs_mean=model.obs_mean,
    obs_std=model.obs_std,
    device=model.device,
    alpha=model.obs_norm_alpha,
    max_obs=model.max_obs,
    min_obs=model.min_obs,
    min_max_denormalize=model.min_max_denormalize,
)

In [44]:
model.test(episodes=10)

1400.0603166060203

In [45]:
episodes = 1
for j in range(episodes):
    model.env.render()
    obs = model.env.reset()
    done = False
    ep_ret = 0
    while not done:
        model.env.render()
        obs = model.process_obs(obs)
        obs = model.buffer.normalize(obs)  # used only for normalization
        action, _ = model.actor.act(obs, deterministic=True)
        action_proc = model.process_action(action, obs)
        obs, r, done, _ = model.env.step(action_proc)
        


Creating window glfw
