In [1]:
import os
os.chdir('/home')

from warnings import filterwarnings
filterwarnings('ignore', module='skimage')

import numpy as np
import pandas as pd
import gym

from IPython import display
import matplotlib.pyplot as plt
%matplotlib inline

from kerasgym.models import cnn_model_base, dense_model_base, DDPGModel, DQNModel
from kerasgym.agents import Agent
from kerasgym.agents.process_state import downsample, rgb_to_binary
from kerasgym.agents.process_state import stack_consecutive, combine_consecutive
from kerasgym.agents.process_prediction import argmax_scalar, scalar_to_onehot
from kerasgym.agents.exploration import LinearDecay, ScopingPeriodic, EpsilonGreedy
from kerasgym.agents.exploration import graph_schedule
from keras.optimizers import RMSprop, Adam

Using TensorFlow backend.


In [None]:
# TODO: split agents into discrete and continuous, do cleanup from there

In [None]:
# CARTPOLE

env = gym.make('CartPole-v1')
env.reset()

base_config = {
    'in_shape': env.observation_space.shape,
    'layer_sizes': [64, 32, 16],
    'activation': 'relu'
}

base_model = dense_model_base(**base_config)

dqn_config = {
    'action_dim': env.action_space.n,
    'gamma': 0.99,
    'tau': 1.0,
    #'optimizer': RMSprop(lr=0.0025, rho=0.95, epsilon=0.01)
    'optimizer': Adam(lr=0.001)
}
model = DQNModel(base_model, **dqn_config)

schedule = LinearDecay(1.0, 0.1, 500, -1)
explorer = EpsilonGreedy(schedule)
buffer_size = 10000
batch_size = 32

agent = Agent(env,
              state_processing_fns=[],
              model=model, ptoc_fn=argmax_scalar(),
              ctol_fn=scalar_to_onehot(),
              explorer=explorer, buffer_size=buffer_size,
              batch_size=batch_size, warmup_length=0)
agent.reset()
agent.run_indefinitely()

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
Episode: 0. Average Reward: 6.5. Average Duration: 6.5. Explore: 0.998
Episode: 10. Average Reward: 17.1. Average Duration: 17.1. Explore: 0.98
Episode: 20. Average Reward: 21.4. Average Duration: 21.4. Explore: 0.962
Episode: 30. Average Reward: 27.1. Average Duration: 27.1. Explore: 0.944
Episode: 40. Average Reward: 18.9. Average Duration: 18.9. Explore: 0.926
Episode: 50. Average Reward: 23.3. Average Duration: 23.3. Explore: 0.908
Episode: 60. Average Reward: 29.2. Average Duration: 29.2. Explore: 0.89
Episode: 70. Average Reward: 24.0. Average Duration: 24.0. Explore: 0.872
Episode: 80. Average Reward: 22.7. Average Duration: 22.7. Explore: 0.854
Episode: 90. Average Reward: 41.3. Average Duration: 41.3. Explore: 0.836
Episode: 100. Average Reward: 27.9. Average Duration: 27.9. Explore: 0.818
Episode: 110. Average Reward: 38.4. Average Duration: 38.4. Explore: 0.8
Episode: 

In [None]:
# MOUNTAIN CAR

env = gym.make('MountainCarContinuous-v0')
env.reset()

base_config = {
    'in_shape': env.observation_space.shape,
    'layer_sizes': [16, 16],
    'activation': 'relu'
}

base_model = dense_model_base(**base_config)

ddpg_config = {
    'action_dim': env.ac`tion_space.shape[0],
    'actor_activation': 'softmax',
    'gamma': 0.99,
    'tau': 0.125,
    'actor_alpha': 1e-3,
    'critic_alpha': 1e-3
}
model = DDPGModel(base_model, **ddpg_config)

schedule = LinearDecay(1.0, 0.1, 500, -1)
explorer = EpsilonGreedy(schedule, discrete=False)
buffer_size = 10000
batch_size = 32

agent = Agent(env,
              state_processing_fns=[],
              model=model, ptoc_fn=lambda x,y: return x,
              ctol_fn=lambda x,y: return x,
              explorer=explorer, buffer_size=buffer_size,
              batch_size=batch_size, warmup_length=0)
agent.reset()
agent.run_indefinitely()

In [None]:
# BREAKOUT

# env
env = gym.make('BreakoutDeterministic-v4')
env.reset()

# custom shape due to downsampling and stacking
shape = (105, 80, 4)

# model
base_config = {
    'in_shape': shape,
    'conv_layer_sizes': [16, 32],
    'fc_layer_sizes': [256],
    'kernel_sizes': [(8,8), (4,4)],
    'strides': [(4,4), (2,2)],
    'activation': 'relu'
}

base_model = cnn_model_base(**base_config)

dqn_config = {
    'action_dim': env.action_space.n,
    'gamma': 0.99,
    'tau': 0.15,
    'optimizer': RMSprop(lr=0.00025, rho=0.95, epsilon=0.01)
}
model = DQNModel(base_model, **dqn_config)

schedule = LinearDecay(1.0, 0.1, 1000000, interval=1)
explorer = EpsilonGreedy(schedule)
buffer_size = 100000
batch_size = 32

agent = Agent(env,
              state_processing_fns=[downsample(shape[:-1]), rgb_to_binary(),
                                    #combine_consecutive(fun='diff'),
                                    stack_consecutive(4)],
              model=model, ptoc_fn=argmax_scalar(),
              ctol_fn=scalar_to_onehot(),
              explorer=explorer, buffer_size=buffer_size,
              batch_size=batch_size, warmup_length=50000,
              state_dtype=np.uint8)
agent.reset()

In [None]:
agent.run_indefinitely()