In [5]:
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Convolution2D, Permute, Input
from tensorflow.keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint

In [18]:
INPUT_SHAPE = (10,)
WINDOW_LENGTH = 4
nb_actions = 5

input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE
model = Sequential()

# (width, height, channels)
#model.add(Permute((2, 1), input_shape=input_shape))
model.add(Input(shape=(4, 10)))
model.add(Flatten())
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 40)                0         
                                                                 
 dense_4 (Dense)             (None, 32)                1312      
                                                                 
 activation_4 (Activation)   (None, 32)                0         
                                                                 
 dense_5 (Dense)             (None, 5)                 165       
                                                                 
 activation_5 (Activation)   (None, 5)                 0         
                                                                 
Total params: 1,477
Trainable params: 1,477
Non-trainable params: 0
_________________________________________________________________
None


In [7]:
class myProcessor(Processor):
    def process_observation(self, observation):
        print("obs", observation)
        return np.array(observation)

    def process_state_batch(self, batch):
        # We could perform this processing step in `process_observation`. In this case, however,
        # we would need to store a `float32` array instead, which is 4x more memory intensive than
        # an `uint8` array. This matters if we store 1M observations.
        # processed_batch = batch.astype('float32') / 255.
        return batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

processor = myProcessor()

In [19]:
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                              nb_steps=1000000)
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)
dqn.compile(Adam(learning_rate=.00025), metrics=['mae'])

In [21]:
dqn.forward(np.random.normal(size=(10)))

1

In [9]:
cycles = 20

def env_creator(render_mode="rgb_array", cycles=200):
    from src.world import world_utils
    env = world_utils.env(render_mode=render_mode, max_cycles=cycles)
    return env

env = env_creator(render_mode="rgb_array", cycles=cycles)

In [10]:
def train():
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that now you can use the built-in tensorflow.keras callbacks!
    weights_filename = f'dqn_noenv_weights.h5f'
    checkpoint_weights_filename = 'dqn_noenv_weights_{step}.h5f'
    log_filename = f'dqn_noenv_log.json'
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)

def test():
    weights_filename = f'dqn_{args.env_name}_weights.h5f'
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)

In [11]:
train()

Training for 1750000 steps ...
obs None
Interval 1 (0 steps performed)


ValueError: Error when checking input: expected input_1 to have 3 dimensions, but got array with shape (1, 4)