In [None]:
import os
os.chdir('/home')

import numpy as np
import pandas as pd
import gym

from pavlov import pipeline
from pavlov.pipeline import transformations
from pavlov import models
from pavlov import actors
from pavlov import agents
from pavlov import auxiliary

from keras import optimizers

In [None]:
# CARTPOLE

env = gym.make('Breakout-v0')

topology_config = {
    'layer_sizes': [128],
    'activation': 'relu'
}
topology = models.topology.DenseTopology(**topology_config)

dqn_config = {
    'gamma': 0.99,
    'tau': 1.0,
    'optimizer': optimizers.Adam(0.0001)
}
model = models.DQNModel(topology, **dqn_config)

epsilon_schedule = auxiliary.schedules.LinearDecay(1.0, 0.1, 500, -1)
actor = actors.EpsilonGreedyActor(epsilon_schedule)
buffer_size = 10000
batch_size = 64

pipeline = pipeline.Pipeline()
pipeline.add(transformations.rgb_to_grey())
pipeline.add(transformations.downsample(new_shape=(84, 84)))
pipeline.add(transformations.combine_consecutive(2, 'max'))
pipeline.add(transformations.stack_consecutive(4))

agent = agents.Agent(env,
                     state_pipeline=pipeline,
                     model=model, actor=actor,
                     buffer_size=buffer_size, batch_size=batch_size,
                     report_freq=1, warmup_length=50)
agent.run_indefinitely()