In [None]:
"""
run anaconda prompt 
"""

import os

home_path = os.path.expanduser('~')
cur_path = os.getcwd()
conda_path = home_path + "\\anaconda3"
conda_script_path = home_path + "\\anaconda3\\Scripts\\activate.bat"
exc = ' '.join(['start', '%windir%\System32\cmd.exe "/K"', conda_script_path, conda_path])
!$exc

In [None]:
"""
run tensorboard server

conda activate py38-pytorch-gpu && tensorboard --port=6006 --logdir=runs
http://localhost:6006/
"""

"""
print env info
"""

import gym
from env import Env

env = gym.make(Env.BIPEDALWALKER.value)

print(dir(env))
print(f'{env._max_episode_steps=}')
print(f'{env.action_space=}')
print(f'{env.metadata=}')
print(f'{env.observation_space.shape[0]=}')
print(f'{env.reward_range=}')
print(f'{env.seed=}')
print(f'{env.spec=}')

In [None]:

from env import Env
from runner import RunnerParams

def dqn_pole_train():
    from algorithms.dqn import DQNParams
    from algorithms.dqn_runner import DQNRunner
    algo_param = DQNParams(buffer_limit=50000, n_train_start=4000,
                            n_node=128, start_epsilon=0.1, learning_rate=0.0001,
                            update_interval=40)
    runner_param = RunnerParams(save_net=True, name_postfix=str(algo_param),
                                target_score=500.0,
                                interval=8, 
                                max_video=100, video_record_interval=200,
                                reward_scale=100.0)
    DQNRunner(Env.CARTPOLE.value, algo_param, runner_param).run()


def dqn_pole_load():
    from algorithms.dqn import DQNParams
    from algorithms.dqn_runner import DQNRunner
    algo_param = DQNParams(buffer_limit=50000, n_train_start=4000,
                            n_node=128, start_epsilon=0.1, learning_rate=0.0001,
                            update_interval=40)
    runner_param = RunnerParams(train=False,
                                load_net=True, 
                                load_name='DQN-CartPole-v1-500.0-train=True-intvl=8-rwdscl=100.0-node=128-lRate=0.0001-gma=0.98-nBuf=50000-nBat=32-nStrt=4000-updIntvl=40-1632225762.pt',
                                name_postfix=str(algo_param),
                                target_score=999.0,
                                max_video=100, 
                                interval=1, video_record_interval=1,
                                reward_scale=100.0)
    DQNRunner(Env.CARTPOLE.value, algo_param, runner_param).run()


def ddqn_pole_train():
    from algorithms.ddqn import DDQNParams
    from algorithms.ddqn_runner import DDQNRunner
    algo_param = DDQNParams(buffer_limit=50000, n_train_start=2000,
                            batch_size=32, gamma=0.98,
                            n_node=128, start_epsilon=0.08, learning_rate=0.0005,
                            update_interval=20)
    runner_param = RunnerParams(save_net=True, name_postfix=str(algo_param),
                                target_score=500.0,
                                interval=8, 
                                max_video=100, video_record_interval=200,
                                reward_scale=100.0)
    DDQNRunner(Env.CARTPOLE.value, algo_param, runner_param).run()


def ddqn_pole_load():
    from algorithms.ddqn import DDQNParams
    from algorithms.ddqn_runner import DDQNRunner
    algo_param = DDQNParams(buffer_limit=50000, n_train_start=2000,
                            batch_size=32, gamma=0.98,
                            n_node=128, start_epsilon=0.08, learning_rate=0.0005,
                            update_interval=20)
    runner_param = RunnerParams(train=False,
                                load_net=True, 
                                load_name='DDQN-CartPole-v1-500.0-train=True-intvl=8-rwdscl=100.0-node=128-lRate=0.0005-gma=0.98-nBuf=50000-nBat=32-nStrt=2000-updIntvl=20-1632228354.pt',
                                name_postfix=str(algo_param),
                                target_score=999.0,
                                max_video=100, 
                                interval=1, video_record_interval=1,
                                reward_scale=100.0)
    DDQNRunner(Env.CARTPOLE.value, algo_param, runner_param).run()


def ac_pole_train():
    from algorithms.actorcritic import ActorCriticParams
    from algorithms.actorcritic_runner import ActorCriticRunner
    algo_param = ActorCriticParams(n_node=256, learning_rate=0.0002,
                                    gamma=0.98, n_rollout=10)
    runner_param = RunnerParams(save_net=True, name_postfix=str(algo_param),
                                target_score=500.0,
                                interval=8, 
                                max_video=100, video_record_interval=200,
                                reward_scale=100.0)
    ActorCriticRunner(Env.CARTPOLE.value, algo_param, runner_param).run()


def ac_pole_load():
    from algorithms.actorcritic import ActorCriticParams
    from algorithms.actorcritic_runner import ActorCriticRunner
    algo_param = ActorCriticParams(n_node=256, learning_rate=0.0002,
                                    gamma=0.98, n_rollout=10)
    runner_param = RunnerParams(train=False,
                                load_net=True, 
                                load_name='ActorCritic-CartPole-v1-500.0-train=True-intvl=8-rwdscl=100.0-node=256-lRate=0.0002-gma=0.98-nRoll=10-1632236701.pt',
                                name_postfix=str(algo_param),
                                target_score=999.0,
                                max_video=100, 
                                interval=1, video_record_interval=1,
                                reward_scale=100.0)
    ActorCriticRunner(Env.CARTPOLE.value, algo_param, runner_param).run()


def ppo_pole_train():
    from algorithms.ppo import PPOParams
    from algorithms.ppo_runner import PPORunner
    algo_param = PPOParams(n_node=128, learning_rate=0.0001, 
                            gamma=0.98, lmbda=0.95, eps_clip=0.1, 
                            k_epoch=3, t_horizon=20)
    runner_param = RunnerParams(save_net=True, name_postfix=str(algo_param),
                                target_score=500.0,
                                interval=100, 
                                max_video=100, video_record_interval=200,
                                reward_scale=100.0)
    PPORunner(Env.CARTPOLE.value, algo_param, runner_param).run()


def ppo_pole_load():
    from algorithms.ppo import PPOParams
    from algorithms.ppo_runner import PPORunner
    algo_param = PPOParams(n_node=128, learning_rate=0.0001, 
                            gamma=0.98, lmbda=0.95, eps_clip=0.1, 
                            k_epoch=3, t_horizon=20)
    runner_param = RunnerParams(train=False,
                                load_net=True, 
                                load_name='PPO-CartPole-v1-500.0-train=True-intvl=100-rwdscl=100.0-node=128-lRate=0.0001-gma=0.98-lmb=0.95-epsclp=0.1-k=3-t=20-1632243563.pt',
                                name_postfix=str(algo_param),
                                target_score=999.0,
                                max_video=100, 
                                interval=1, video_record_interval=1,
                                reward_scale=100.0)
    PPORunner(Env.CARTPOLE.value, algo_param, runner_param).run()


def ppolstm_pole_train():
    from algorithms.ppolstm import PPOlstmParams
    from algorithms.ppolstm_runner import PPOlstmRunner
    algo_param = PPOlstmParams(n_node=128, learning_rate=0.0001, 
                                gamma=0.98, lmbda=0.95, 
                                eps_clip=0.1, k_epoch=3, t_horizon=20)
    runner_param = RunnerParams(save_net=True, name_postfix=str(algo_param),
                                target_score=500.0,
                                interval=32, 
                                max_video=100, video_record_interval=200,
                                reward_scale=100.0)
    PPOlstmRunner(Env.CARTPOLE.value, algo_param, runner_param).run()


def ppolstm_pole_load():
    from algorithms.ppolstm import PPOlstmParams
    from algorithms.ppolstm_runner import PPOlstmRunner
    algo_param = PPOlstmParams(n_node=128, learning_rate=0.0001, 
                                gamma=0.98, lmbda=0.95, 
                                eps_clip=0.1, k_epoch=3, t_horizon=20)
    runner_param = RunnerParams(train=False,
                                load_net=True, 
                                load_name='PPOlstm-CartPole-v1-500.0-train=True-intvl=32-rwdscl=100.0-node=128-lRate=0.0001-gma=0.98-lmb=0.95-epsclp=0.1-k=3-t=20-1632245913.pt',
                                name_postfix=str(algo_param),
                                target_score=999.0,
                                max_video=100, 
                                interval=1, video_record_interval=1,
                                reward_scale=100.0)
    PPOlstmRunner(Env.CARTPOLE.value, algo_param, runner_param).run()


ppolstm_pole_load()
