In [3]:
"""
run anaconda prompt 
"""

import os

home_path = os.path.expanduser('~')
cur_path = os.getcwd()
conda_path = home_path + "\\anaconda3"
conda_script_path = home_path + "\\anaconda3\\Scripts\\activate.bat"
exc = ' '.join(['start', '%windir%\System32\cmd.exe "/K"', conda_script_path, conda_path])
!$exc

"""
run tensorboard server

conda activate py38-pytorch-gpu && tensorboard --port=6006 --logdir=runs
http://localhost:6006/
"""

'\nrun tensorboard server\n\nconda activate py38-pytorch-gpu && tensorboard --port=6006 --logdir=runs\nhttp://localhost:6006/\n'

In [None]:
class AlgoParamDict:
    def __init__(self):
        """
        만드는 중
        """
        self.dic = {
            'rwdscl': 'reward_scale'
        }
        self.r_dic = {v: k for k, v in self.dic.items()}
    
    def long(self, s):
        return self.dic.get(s, None)
        
    def short(self, s):
        return self.r_dic.get(s, None)

In [1]:
from env import Env
from runner import RunnerParams
from logger import Logger
from remover import Remover
from algorithms.dqn import DQN, DQNParams
from algorithms.dqn_runner import DQNRunner
from algorithms.ddqn import DDQN, DDQNParams
from algorithms.ddqn_runner import DDQNRunner
from algorithms.reinforce import Reinforce, ReinforceParams
from algorithms.reinforce_runner import ReinforceRunner
from algorithms.actorcritic import ActorCritic, ActorCriticParams
from algorithms.actorcritic_runner import ActorCriticRunner

import itertools


class Trainer:

    def __init__(self, check_intervals=None):
        self._testcases = []
        self._envs = [Env.CARTPOLE, Env.LUNARLANDER]
        self._algos = [Reinforce, ActorCritic, DQN, DDQN]
        self._runners = {Reinforce: ReinforceRunner, ActorCritic: ActorCriticRunner, 
                        DQN: DQNRunner, DDQN: DDQNRunner}
        self._params = {Reinforce: ReinforceParams, ActorCritic: ActorCriticParams, 
                        DQN: DQNParams, DDQN: DDQNParams}
        self._check_intervals = check_intervals
        self.allcases = [*itertools.product(self._envs, self._algos)]
    
    def default_hyperparam(self, env, algo):
        algo_param = None

        if env == Env.CARTPOLE:
            if algo == Reinforce:
                algo_param = self._params[algo](
                        n_node=128, learning_rate=0.0005, gamma=0.98)
            elif algo == ActorCritic:
                algo_param = self._params[algo](
                        n_node=128, learning_rate=0.0005, gamma=0.98, n_rollout=10)
            elif algo == DQN:
                algo_param = self._params[algo](
                        n_node=128, learning_rate=0.0005, gamma=0.98, buffer_limit=50000, 
                        batch_size=32, n_train_start=2000, start_epsilon=0.1, update_interval=10)
            elif algo == DDQN:
                algo_param = self._params[algo](
                        n_node=128, learning_rate=0.0005, gamma=0.98, buffer_limit=50000, 
                        batch_size=32, n_train_start=2000, start_epsilon=0.1, update_interval=10)
            else:
                raise Exception(f'algorithm does not exist: {algo}')
        elif env == Env.LUNARLANDER:
            if algo == Reinforce:
                algo_param = self._params[algo](
                        n_node=256, learning_rate=0.0025, gamma=0.98)
            elif algo == ActorCritic:
                algo_param = self._params[algo](
                        n_node=256, learning_rate=0.0025, gamma=0.98, n_rollout=20)
            elif algo == DQN:
                algo_param = self._params[algo](
                        n_node=256, learning_rate=0.0005, gamma=0.98, buffer_limit=100000, 
                        batch_size=64, n_train_start=10000, start_epsilon=0.2, update_interval=20)
            elif algo == DDQN:
                algo_param = self._params[algo](
                        n_node=256, learning_rate=0.0005, gamma=0.98, buffer_limit=100000, 
                        batch_size=64, n_train_start=10000, start_epsilon=0.2, update_interval=20)
            else:
                raise Exception(f'algorithm does not exist: {algo}')
        else:
            raise Exception(f'env does not exist: {env}')

        return algo_param

    def add_case(self, env, algo, algo_param=None):
        algo_param = algo_param if algo_param else self.default_hyperparam(env, algo)
        algo_runner = self._runners[algo]
        self._testcases += [(env, algo_runner, algo_param)]

    def run(self, runner_params=dict()):
        runner_params = {**runner_params, 
                        'save_net':True, 'video_record_interval':0, 'print_interval':0}
        runner_param = RunnerParams(**runner_params)

        for check_interval in self._check_intervals:
            runner_param.check_interval = check_interval

            for i, (env, runner, algo_param) in enumerate(self._testcases):
                runner_param.name_postfix=str(algo_param)

                if env == Env.CARTPOLE:
                    runner_param.target_score = 500.0
                    runner_param.reward_scale = 100.0
                elif env == Env.LUNARLANDER:
                    runner_param.target_score = 200.0
                    runner_param.reward_scale = 30.0

                runner(env.value, algo_param, runner_param).run()


In [2]:
def train():
    all_cases = Trainer().allcases
    cartpole_cases = filter(lambda x:x[0] == Env.CARTPOLE, all_cases)
    lunar_cases = filter(lambda x:x[0] == Env.LUNARLANDER, all_cases)

    # tr = Trainer([20])
    # for env, algo in cartpole_cases:
    #     tr.add_case(env, algo)
    # tr.run()

    tr = Trainer([10])
    for env, algo in lunar_cases:
        tr.add_case(env, algo)
    tr.run()

def replay():
    cases = [
        [
            Env.CARTPOLE.value, 
            ReinforceRunner,
            ReinforceParams(n_node=128, learning_rate=0.0005, gamma=0.98),
            "Reinforce_CartPole-v1_500_train=True_intvl=20_rwdscl=100.0_node=128_lRate=0.0005_gma=0.98_1634893243"
        ],
        [
            Env.CARTPOLE.value, 
            ActorCriticRunner,
            ActorCriticParams(n_node=128, learning_rate=0.0005, gamma=0.98, n_rollout=10),
            "ActorCritic_CartPole-v1_500_train=True_intvl=20_rwdscl=100.0_node=128_lRate=0.0005_gma=0.98_nRoll=10_1634893506"
        ],
        [
            Env.CARTPOLE.value, 
            DQNRunner,
            DQNParams(n_node=128, learning_rate=0.0005, gamma=0.98, buffer_limit=50000, 
                        batch_size=32, n_train_start=2000, update_interval=10),
            "DQN_CartPole-v1_500_train=True_intvl=20_rwdscl=100.0_node=128_lRate=0.0005_gma=0.98_nBuf=50000_nBat=32_nStrt=2000_updIntvl=10_1634893791"
        ],
        [
            Env.CARTPOLE.value, 
            DDQNRunner,
            DDQNParams(n_node=128, learning_rate=0.0005, gamma=0.98, buffer_limit=50000, 
                        batch_size=32, n_train_start=5000, update_interval=10),
            "DDQN_CartPole-v1_500_train=True_intvl=20_rwdscl=100.0_node=128_lRate=0.0005_gma=0.98_nBuf=50000_nBat=32_nStrt=2000_updIntvl=10_1634893884"
        ],
        [
            Env.LUNARLANDER.value, 
            ReinforceRunner,
            ReinforceParams(n_node=256, learning_rate=0.0025, gamma=0.98),
            "Reinforce_LunarLander-v2_209_train=True_intvl=10_rwdscl=30.0_node=256_lRate=0.0025_gma=0.98_1634900723"
        ],
        [
            Env.LUNARLANDER.value, 
            ActorCriticRunner,
            ActorCriticParams(n_node=256, learning_rate=0.0025, gamma=0.98, n_rollout=20),
            "ActorCritic_LunarLander-v2_209_train=True_intvl=10_rwdscl=30.0_node=256_lRate=0.0025_gma=0.98_nRoll=20_1634902832"
        ],
        [
            Env.LUNARLANDER.value, 
            DQNRunner,
            DQNParams(n_node=256, learning_rate=0.0005, gamma=0.98, buffer_limit=100000, 
                        batch_size=64, n_train_start=10000, update_interval=20),
            "DQN_LunarLander-v2_226_train=True_intvl=10_rwdscl=30.0_node=256_lRate=0.0005_gma=0.98_nBuf=100000_nBat=64_nStrt=10000_updIntvl=20_1634905473"
        ],
        [
            Env.LUNARLANDER.value, 
            DDQNRunner,
            DDQNParams(n_node=256, learning_rate=0.0005, gamma=0.98, buffer_limit=100000, 
                        batch_size=64, n_train_start=10000, update_interval=20),
            "DDQN_LunarLander-v2_220_train=True_intvl=10_rwdscl=30.0_node=256_lRate=0.0005_gma=0.98_nBuf=100000_nBat=64_nStrt=10000_updIntvl=20_1634906356"
        ],
    ]
    
    for case in cases:
        case[-1] += ".pt"
    
    for env, runner, algop, load_name in cases:
        runnerp = RunnerParams(train=False, save_net=False, load_net=True, target_score=9999.0,
                                load_name=load_name, name_postfix=str(algop), 
                                check_interval=1, max_video=0, save_check_log=False, save_step_log=True,
                                print_interval=0, video_record_interval=0, max_episode=1000)
        runner(env, algop, runnerp).run()
    
    print('모두 종료됨')

# Remover().remove_dirs(['runs', 'weights', 'videos'])
# Remover().remove_dirs(['videos'])
# print_interval=10, max_epi=100 > 4분 35초
# print_interval=0, max_epi=100 > 4분 25초
# save_check_log=True, print_interval=0, max_epi=100 > 4분 37초
Remover().remove_dirs(['logs'])
for _ in range(10):
    replay()

c:\Users\kuro1\Source\Repos\Remote\Univ\graduation-project\src\logs 처리중 에러 발생
<class 'FileNotFoundError'>
(2, '지정된 경로를 찾을 수 없습니다')
초기 설정
algorithm: Reinforce
env: CartPole-v1
state space: (4,)
action space: Discrete(2)
네트워크 불러오기
시뮬레이션 시작
시뮬레이션 종료
1000 에피소드 초과하여 종료
초기 설정
algorithm: ActorCritic
env: CartPole-v1
state space: (4,)
action space: Discrete(2)
네트워크 불러오기
시뮬레이션 시작
시뮬레이션 종료
1000 에피소드 초과하여 종료
초기 설정
algorithm: DQN
env: CartPole-v1
state space: (4,)
action space: Discrete(2)
네트워크 불러오기
시뮬레이션 시작
시뮬레이션 종료
1000 에피소드 초과하여 종료
초기 설정
algorithm: DDQN
env: CartPole-v1
state space: (4,)
action space: Discrete(2)
네트워크 불러오기
시뮬레이션 시작
시뮬레이션 종료
1000 에피소드 초과하여 종료
초기 설정
algorithm: Reinforce
env: LunarLander-v2
state space: (8,)
action space: Discrete(4)
네트워크 불러오기
시뮬레이션 시작
시뮬레이션 종료
1000 에피소드 초과하여 종료
초기 설정
algorithm: ActorCritic
env: LunarLander-v2
state space: (8,)
action space: Discrete(4)
네트워크 불러오기
시뮬레이션 시작
시뮬레이션 종료
1000 에피소드 초과하여 종료
초기 설정
algorithm: DQN
env: LunarLander-v2
state space: (8,)
action 

In [None]:
def tuning_reinforce():
    tr = Trainer([10])
    for env, algo in [(Env.LUNARLANDER, Reinforce)]:
        start = 5
        k = 5
        for i in range(21):
            hparam = tr.default_hyperparam(env, algo)
            hparam.learning_rate = (start + k*i)/10000
            tr.add_case(env, algo, hparam)

    runner_params = {'max_episode':1000}
    tr.run(runner_params)
    print('전체 테스트 종료')

def tuning_ddqn():
    tr = Trainer([10])
    for env, algo in [(Env.CARTPOLE, DDQN), (Env.LUNARLANDER, DDQN)]:
        n_node = 16
        for i in range(5):
            hparam = tr.default_hyperparam(env, algo)
            hparam.n_node = n_node
            tr.add_case(env, algo, hparam)
            n_node *= 2

    runner_params = {'max_episode':1000}
    tr.run(runner_params)
    print('전체 테스트 종료')

# Remover().remove_dirs(['runs', 'weights', 'videos'])
tuning_ddqn()