In [1]:
import sys, os

ppath = lambda x: os.path.dirname(os.path.abspath(x))
file_name = os.getcwd()
sys.path.append(ppath(ppath(file_name)))

from recorder.recorder import Recorder

In [2]:
import gym

from actor_critic import *
from torch.distributions import Categorical

from torch.utils.tensorboard import SummaryWriter

In [3]:
def main():  
    recorder = Recorder(gym.make('CartPole-v1'), False)
    env = recorder.wrapped_env()
    writer = SummaryWriter()

    # policy 대신 model class를 만들었다. 
    # actor-critic 둘 다 있어야 하기 때문에 policy와 value를 둘 다 얻을 수 있게 함.
    model = ActorCritic()    
    print_interval = 20
    score = 0.0

    for n_epi in range(10000):
        done = False
        s = env.reset()
        while not done:
            for t in range(n_rollout):
                # 확률 분포 구하고
                prob = model.pi(torch.from_numpy(s).float())
                # 확률 분포 모델 만들고
                m = Categorical(prob)
                # 샘플링하고
                a = m.sample().item()
                # 환경에 넘겨주고 다음 observation 얻고
                s_prime, r, done, info = env.step(a)
                # TD의 경우 매번 학습이 가능하지만 모아서 batch learning 하니까 학습이 더 잘 됐다.
                model.put_data((s,a,r,s_prime,done))
                
                s = s_prime
                score += r

                if score/print_interval > 400:
                    recorder.update([n_epi + 1])
                
                if done:
                    break                     
            
            model.train_net()

        if len(recorder.recorded_epi()) >= 5:
            env.reset() # 마지막 에피소드 비디오 마무리
            break

        if n_epi%print_interval==0 and n_epi!=0:
            print("# of episode :{}, avg score : {:.1f}".format(n_epi, score/print_interval))
            writer.add_scalar("score/train", score/print_interval, n_epi)
            score = 0.0

    writer.flush()
    writer.close()
    env.close()

In [4]:
if __name__ == '__main__':
    main()

# of episode :20, avg score : 16.9
# of episode :40, avg score : 21.1
# of episode :60, avg score : 19.6
# of episode :80, avg score : 17.9
# of episode :100, avg score : 20.4
# of episode :120, avg score : 20.6
# of episode :140, avg score : 29.7
# of episode :160, avg score : 24.4
# of episode :180, avg score : 34.2
# of episode :200, avg score : 32.2
# of episode :220, avg score : 37.2
# of episode :240, avg score : 40.0
# of episode :260, avg score : 43.0
# of episode :280, avg score : 41.1
# of episode :300, avg score : 64.4
# of episode :320, avg score : 56.5
# of episode :340, avg score : 85.0
# of episode :360, avg score : 60.5
# of episode :380, avg score : 97.2
# of episode :400, avg score : 94.0
# of episode :420, avg score : 98.5
# of episode :440, avg score : 119.7
# of episode :460, avg score : 131.6
# of episode :480, avg score : 180.5
# of episode :500, avg score : 154.0
# of episode :520, avg score : 149.8


KeyboardInterrupt: 