In [4]:
"""
run anaconda prompt 
"""

import os

home_path = os.path.expanduser('~')
cur_path = os.getcwd()
conda_path = home_path + "\\anaconda3"
conda_script_path = home_path + "\\anaconda3\\Scripts\\activate.bat"
exc = ' '.join(['start', '%windir%\System32\cmd.exe "/K"', conda_script_path, conda_path])
!$exc

"""
run tensorboard server

conda activate py38-pytorch-gpu && tensorboard --port=6006 --logdir=runs
http://localhost:6006/
"""

'\nrun tensorboard server\n\nconda activate py38-pytorch-gpu && tensorboard --port=6006 --logdir=runs\nhttp://localhost:6006/\n'

In [None]:
from env import Env
from runner import RunnerParams
from logger import Logger
from remover import Remover
from algorithms.dqn import DQN, DQNParams
from algorithms.dqn_runner import DQNRunner
from algorithms.ddqn import DDQN, DDQNParams
from algorithms.ddqn_runner import DDQNRunner
from algorithms.reinforce import Reinforce, ReinforceParams
from algorithms.reinforce_runner import ReinforceRunner
from algorithms.actorcritic import ActorCritic, ActorCriticParams
from algorithms.actorcritic_runner import ActorCriticRunner

import itertools


class Trainer:

    def __init__(self, check_intervals=None):
        self._testcases = []
        self._envs = [Env.CARTPOLE, Env.LUNARLANDER]
        self._algos = [Reinforce, ActorCritic, DQN, DDQN]
        self._runners = {Reinforce: ReinforceRunner, ActorCritic: ActorCriticRunner, 
                        DQN: DQNRunner, DDQN: DDQNRunner}
        self._params = {Reinforce: ReinforceParams, ActorCritic: ActorCriticParams, 
                        DQN: DQNParams, DDQN: DDQNParams}
        self._check_intervals = check_intervals
        self.allcases = [*itertools.product(self._envs, self._algos)]
    
    def default_hyperparam(self, env, algo):
        algo_param = None

        if env == Env.CARTPOLE:
            if algo == Reinforce:
                algo_param = self._params[algo](
                        n_node=128, learning_rate=0.0005, gamma=0.98)
            elif algo == ActorCritic:
                algo_param = self._params[algo](
                        n_node=256, learning_rate=0.0002, gamma=0.98, n_rollout=10)
            elif algo == DQN:
                algo_param = self._params[algo](
                        n_node=128, learning_rate=0.0005, gamma=0.98, buffer_limit=50000, 
                        batch_size=32, n_train_start=2000, start_epsilon=0.1, update_interval=40)
            elif algo == DDQN:
                algo_param = self._params[algo](
                        n_node=128, learning_rate=0.0005, gamma=0.98, buffer_limit=50000, 
                        batch_size=32, n_train_start=2000, start_epsilon=0.1, update_interval=40)
            else:
                raise Exception(f'algorithm does not exist: {algo}')
        elif env == Env.LUNARLANDER:
            if algo == Reinforce:
                algo_param = self._params[algo](
                        n_node=128, learning_rate=0.002, gamma=0.98)
            elif algo == ActorCritic:
                algo_param = self._params[algo](
                        n_node=256, learning_rate=0.002, gamma=0.98, n_rollout=20)
            elif algo == DQN:
                algo_param = self._params[algo](
                        n_node=512, learning_rate=0.0005, gamma=0.98, buffer_limit=100000, 
                        batch_size=64, n_train_start=10000, start_epsilon=0.2, update_interval=20)
            elif algo == DDQN:
                algo_param = self._params[algo](
                        n_node=512, learning_rate=0.0005, gamma=0.98, buffer_limit=100000, 
                        batch_size=64, n_train_start=10000, start_epsilon=0.2, update_interval=20)
            else:
                raise Exception(f'algorithm does not exist: {algo}')
        else:
            raise Exception(f'env does not exist: {env}')

        return algo_param

    def add_case(self, env, algo, algo_param=None):
        algo_param = algo_param if algo_param else self.default_hyperparam(env, algo)
        algo_runner = self._runners[algo]
        self._testcases += [(env, algo_runner, algo_param)]

    def run(self, runner_params=dict()):
        runner_params = {**runner_params, 
                        'save_net':True, 'video_record_interval':0, 'print_interval':0}
        runner_param = RunnerParams(**runner_params)

        for check_interval in self._check_intervals:
            runner_param.check_interval = check_interval

            for i, (env, runner, algo_param) in enumerate(self._testcases):
                runner_param.name_postfix=str(algo_param)

                if env == Env.CARTPOLE:
                    runner_param.target_score = 500.0
                    runner_param.reward_scale = 100.0
                elif env == Env.LUNARLANDER:
                    runner_param.target_score = 200.0
                    runner_param.reward_scale = 30.0

                runner(env.value, algo_param, runner_param).run()

In [1]:
Remover().remove_dirs(['runs', 'weights', 'videos'])


all_cases = Trainer().allcases
cartpole_case = filter(lambda x:x[0] == Env.CARTPOLE, all_cases)
lunar_case = filter(lambda x:x[0] == Env.LUNARLANDER, all_cases)

tr = Trainer([1])

for env, algo in [(Env.LUNARLANDER, Reinforce)]:
    tr.add_case(env, algo)

tr.run()
print('전체 테스트 종료')

c:\Users\kuro1\Source\Repos\Remote\Univ\graduation-project\src\weights 처리중 에러 발생
<class 'FileNotFoundError'>
(2, '지정된 경로를 찾을 수 없습니다')
초기 설정
algorithm: Reinforce
env: LunarLander-v2
state space: (8,)
action space: Discrete(4)
시뮬레이션 시작


In [1]:
def tuning_reinforce():
    Remover().remove_dirs(['runs', 'weights', 'videos'])
    tr = Trainer([1])
    for env, algo in [(Env.LUNARLANDER, Reinforce)]:
        start = 0.0005
        k = 0.001
        for i in range(21):
            hparam = tr.default_hyperparam(env, algo)
            hparam.learning_rate = start + k*i
            tr.add_case(env, algo, hparam)

    runner_params = {'max_episode':1000}
    tr.run(runner_params)
    print('전체 테스트 종료')

tuning_reinforce()

NameError: name 'Remover' is not defined