# Environment Setup

In [1]:
import numpy as np
from gym import logger

In [2]:
from fish_pond import FishPondEnv

In [3]:
logger.set_level(40)
np.random.seed(0)

In [4]:
params = {
    'num_agents': 1,
    'health_max': 100,
    'grid_size': 10,
    'pond_size_ratio': 0.25,
    'fishing_zone_size': 4,
    'fish_count_initial': 10,
    'fish_count_max': 50,
    'fish_regeneration_rate': 0.05,
    'hunger_per_step': -5,
    'nutrition_per_fish': 10,
    'reward_per_step': 1,
    'episode_length': 3
}

In [5]:
env = FishPondEnv(params)
env.seed(0)

# Agent

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.optimizers import Adam

In [7]:
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [8]:
nb_actions = env.action_space.n

In [9]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.state_space.shape))
model.add(Dense(16,  activation='relu'))
model.add(Dense(5,  activation='relu'))
model.add(Dense(5,  activation='linear'))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 102)               0         
_________________________________________________________________
dense (Dense)                (None, 16)                1648      
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 85        
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 30        
Total params: 1,763
Trainable params: 1,763
Non-trainable params: 0
_________________________________________________________________


In [11]:
memory = SequentialMemory(limit=500, window_length=1)

In [12]:
policy = BoltzmannQPolicy()

In [13]:
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy)

In [14]:
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Train

Add: Add Imitation Mode for partial training

In [15]:
dqn.fit(env, nb_steps=100, verbose=2)
dqn.save_weights('dqn_{}_weights.h5f'.format('FishPond'), overwrite=True)

Training for 100 steps ...
  4/100: episode: 1, duration: 0.050s, episode steps:   4, steps per second:  80, episode reward:  4.000, mean reward:  1.000 [ 1.000,  1.000], mean action: 4.000 [4.000, 4.000],  loss: --, mae: --, mean_q: --
  8/100: episode: 2, duration: 0.005s, episode steps:   4, steps per second: 786, episode reward:  4.000, mean reward:  1.000 [ 1.000,  1.000], mean action: 4.000 [4.000, 4.000],  loss: --, mae: --, mean_q: --
 12/100: episode: 3, duration: 0.005s, episode steps:   4, steps per second: 867, episode reward:  4.000, mean reward:  1.000 [ 1.000,  1.000], mean action: 4.000 [4.000, 4.000],  loss: --, mae: --, mean_q: --
 16/100: episode: 4, duration: 0.005s, episode steps:   4, steps per second: 865, episode reward:  4.000, mean reward:  1.000 [ 1.000,  1.000], mean action: 4.000 [4.000, 4.000],  loss: --, mae: --, mean_q: --
 20/100: episode: 5, duration: 0.005s, episode steps:   4, steps per second: 851, episode reward:  4.000, mean reward:  1.000 [ 1.000

# Test and Visualize

In [16]:
dqn.test(env, nb_episodes=1, visualize=True)

Fish Count: 12.0
Fish Count (Running): 12.1550625
Agent Health: [100.]


1 1 1 1 1 1 1 1 1 1
1 1 1 2 2 2 2 1 1 1
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 3 0 0 0 0
0 0 0 0 0 0 0 0 0 0


Episode 1: reward: 4.000, steps: 4


<tensorflow.python.keras.callbacks.History at 0x8626d3f608>