In [2]:
import ipywidgets as widgets
from ipywidgets import HBox, VBox
import gymnasium as gym
from agents import SACAgent2
from omegaconf import DictConfig, OmegaConf
from hydra import compose, initialize
import matplotlib.pyplot as plt
from IPython.display import clear_output
import os
import wandb
import torch as T
from gymnasium.wrappers import RecordVideo
os.environ["SDL_VIDEODRIVER"] = "dummy"

In [2]:
env = gym.make('LunarLander-v2', continuous=True)
n_actions = env.action_space.shape[0] if type(env.action_space) == gym.spaces.box.Box else env.action_space.n
env_info = {"input_dims":env.observation_space.shape, "n_actions": n_actions, "max_action": env.action_space.high}

In [3]:
api = wandb.Api()
artifact = api.artifact('tum-adlr-ws22-06/ADLR randomized envs/lunar_lander_model:v131')

In [4]:
artifact.download()

[34m[1mwandb[0m:   5 of 5 files downloaded.  


'.\\artifacts\\lunar_lander_model-v131'

In [5]:
with initialize(version_base=None, config_path="conf"):
    cfg = compose(config_name="config")

In [6]:
agent = SACAgent2(**OmegaConf.to_object(cfg.agent), **OmegaConf.to_object(cfg.training),
                      **env_info)
agent.load_agent(".\\artifacts\\lunar_lander_model-v131")

In [19]:
@widgets.interact_manual(g=(-12.0, 0.0), wind_power=(0.0, 20.0), turbulence_power=(0.0, 2.0)
                         )
def make_video(g=-10, wind_power=0.0, turbulence_power=0.0, plot_every_action=5):
    env = gym.make('LunarLander-v2', continuous=True, render_mode='rgb_array', gravity=g , enable_wind=True, wind_power=wind_power, 
             turbulence_power=turbulence_power)
    obs, info = env.reset()
    step = 0
    while True:
        action = agent.action(obs, addNoise=False)
        new_obs, reward, done, _, _ = env.step(action)
        if step % plot_every_action == 0:
            clear_output(wait=True)
            plt.imshow( env.render())
            plt.show()
        step += 1
        if done:
            break
    env.close()

interactive(children=(FloatSlider(value=-10.0, description='g', max=0.0, min=-12.0), FloatSlider(value=0.0, de…

In [None]:
def validate(agent, validation_args, experiment_path, episode, test_env_fabric):
    '''
    doing all the validation stuff + logging
    returns, whether the env is solved
    '''
    
    for evaluation_episode in range(validation_args.eval_eps):
        video_path = os.path.join(experiment_path, "videos", str(episode))
        test_env = test_env_fabric.generate_env()
        gravity, enable_wind, wind_power, turbulence_power = test_env.gravity, test_env.enable_wind, test_env.wind_power, test_env.turbulence_power
        test_env = RecordVideo(test_env, video_path)
        obs, info = test_env.reset()
        rewards = 0

        for step in range(validation_args.validation_episode_length):

            # Get deterministic action
            with T.no_grad():
                action = agent.action(obs, addNoise=False)
                

            # Take step in environment
            new_obs, reward, done, _, _ = test_env.step(action)

            # Update obs
            obs = new_obs

            # Update rewards
            rewards += reward
            stop_reward.append(rewards)

            # End episode if done
            if done:
                break

    
    avg_reward = round(sum(stop_reward) / len(stop_reward), 3)
    min_reward = round(min(stop_reward), 3)
    
    if validation_args.eval_stop_condition == "avg":  
        stop_reward = avg_reward
    elif validation_args.eval_stop_condition == "min":
        stop_reward = min_reward
    else:
        raise ValueError(f"Unknown eval_stop_condition {validation_args.eval_stop_condition}")
    
    save_path = os.path.join(experiment_path, "saves")
    
    agent.save_agent(save_path)
    
    
    art = wandb.Artifact("lunar_lander_model", type="model")
    for f in os.listdir(save_path):
        art.add_file(os.path.join(save_path, f))
    wandb.log_artifact(art)
    
    
    print(f"Episode: {episode} | Average evaluation reward: {avg_reward} | Min evaluation reward: {min_reward} | Agent saved at {save_path}")
    
    wandb.log({"Validation after episode": episode,  "Average evaluation reward": avg_reward,
               "Min evaluation reward": min_reward})
    with open(f"{experiment_path}/evaluation_rewards.csv", "a") as f:
        f.write(f"{episode}, {stop_reward}\n")
    try:
        if stop_reward > test_env.spec.reward_threshold * 1.1:  # x 1.1 because of small eval_episodes
            print(f"Environment solved after {episode} episodes")
            return True
    except Exception as e:
        if stop_reward > -120:
            print(f"Environment solved after {episode} episodes")
            return True
    return False