In [1]:
from ai_image_preprocessor import preprocess

def step(env, action, state):
    next_frame_1, reward_1, done_1, _ = env.step(action)
    next_frame_2, reward_2, done_2, _ = env.step(action)
    next_state = util.update_state_arr(state, preprocess(next_frame_1), preprocess(next_frame_2))
    return (next_state, int(reward_1 + reward_2), done_1 or done_2)

In [2]:
import ai_display as display
import ai_util as util
from ai_logger import Logger
import time

def train(env, agent, n_episodes=10000, model_name="model", save_interval=5, log_interval=1):
    logger = Logger(10, "episode | frames | score | step time | ep time")
    
    for episode in range(n_episodes):
        
        frame = env.reset()
        state = util.create_state_arr(preprocess(frame))
        score = 0
        steps = 0
        start_time = time.time()
        for t in range(1000):
#             display.show_state(state[3], env.spec.id, t, score)

            action = agent.act(state)

            next_state, reward, done = step(env, action, state)
            
            agent.remember(state, action, reward, next_state, done)
            
            state = next_state
            score += reward
            
            agent.replay(batch_size=32)
            steps += 1
            if done: 
                duration = time.time() - start_time
                logger.add("{:>7d} | {:>6d} | {:>5d} | {:>9.5f} | {:>7.5f}"
                       .format(episode+1, t, score, duration/t, duration))
                break
                
        if episode % log_interval == 0:
            logger.log()
            
        if episode % save_interval == 0:
            agent.save_model(model_name)
            f= open("steps.txt","a+")
            f.write(str(steps))
            f.close()
        
    agent.save_model(model_name)


In [None]:
from ai_agent import Agent
import gym

env = gym.make("BreakoutDeterministic-v4")
n_actions = env.action_space.n
state_shape = (84, 84, 4)

agent = Agent(state_shape, n_actions, epsilon=1.0)

model_name = "models/new-model.h5"
# agent.new_model()
agent.load_model(model_name) 
train(env, agent, model_name=model_name)

episode | frames | score | step time | ep time
     57 |    123 |     2 |   0.14599 | 17.95647
     58 |     76 |     1 |   0.13774 | 10.46841
     59 |    157 |     4 |   0.12777 | 20.05925
     60 |    118 |     3 |   0.12747 | 15.04190
     61 |    112 |     2 |   0.12689 | 14.21216
     62 |     86 |     1 |   0.12939 | 11.12756
     63 |     75 |     0 |   0.12763 | 9.57258
     64 |    103 |     2 |   0.12629 | 13.00769
     65 |    110 |     2 |   0.12628 | 13.89043
     66 |     94 |     1 |   0.12710 | 11.94715
