In [2]:
from dqn_agent import DQNAgent
from tetris import Tetris
from datetime import datetime
from statistics import mean, median
import random
from logs import CustomTensorBoard
from tqdm import tqdm

Using TensorFlow backend.





In [3]:
def dqn():
    #model 학습을 위한 args를 만들기
    env = Tetris()
    episodes = 2000
    max_steps = None
    epsilon_stop_episode = 1500
    mem_size = 20000
    discount = 0.95
    batch_size = 128
    epochs = 15
    render_every = 1
    log_every = 50
    replay_start_size = 2000
    train_every = 1
    n_neurons = [32, 32]
    render_delay = None
    activations = ['relu', 'relu', 'linear']

    agent = DQNAgent(env.get_state_size(),
                     n_neurons=n_neurons, activations=activations,
                     epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size,
                     discount=discount, replay_start_size=replay_start_size)

    # log data를 저장하기위해 directory를 만들고 log data 저장을 위해 tensorboard를 이용
    log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
    log = CustomTensorBoard(log_dir=log_dir)

    scores = []

    # 설정해둔 episode만큼 게임을 진행
    # 여기서 'tqdm'은 아래에 진행 상황을 보여주는 bar를 위해 이용됨.
    for episode in tqdm(range(episodes)):
        current_state = env.reset()
        done = False
        steps = 0
        
        # 경기 렌더링을 몇번의 episode마다 진행할것인지 결정
        if render_every and episode % render_every == 0:
            render = True
        else:
            render = False

        # done(false: not game over, true: game over)
        # 경기가 끝날 때까지 진행한다. (done!=true)
        while not done and (not max_steps or steps < max_steps):
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())
            
            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break
            
            #dqn_agent.play()에 args를 넘기고 reward와 경기 종료 유무 받아 저장
            reward, done = env.play(best_action[0], best_action[1], render=render,
                                    render_delay=render_delay)
            
            #train을 위해 memory buffer에 학습자료 넣기.
            agent.add_to_memory(current_state, next_states[best_action], reward, done)
            current_state = next_states[best_action]
            steps += 1

        scores.append(env.get_game_score())

        # train_every로 정해둔 만큼마다 train을 시도
        if episode % train_every == 0:
            agent.train(batch_size=batch_size, epochs=epochs)
            

        # log_every로 정해둔 만큼마다 설정해둔 dir에 log를 생성한다.
        # log의 들어가는 data는 log_every만큼의 episode에 대해서 평균 점수, 최저 점수, 최고 점수를 기록한다.
        if log_every and episode and episode % log_every == 0:
            avg_score = mean(scores[-log_every:])
            min_score = min(scores[-log_every:])
            max_score = max(scores[-log_every:])

            log.log(episode, avg_score=avg_score, min_score=min_score,
                    max_score=max_score)


if __name__ == "__main__":
    dqn()

  1%|▊                                                                               | 21/2000 [00:13<22:54,  1.44it/s]

KeyboardInterrupt: 