# Environment Setup

In [1]:
import numpy as np
from gym import logger

In [2]:
from fish_pond import FishPondEnv

In [3]:
logger.set_level(40)
np.random.seed(0)

In [4]:
params = {
    'num_agents': 1,
    'health_max': 100,
    'grid_size': 5,
    'pond_size_ratio': 0.25,
    'fishing_zone_size': 4,
    'fish_count_initial': 10,
    'fish_count_max': 50,
    'fish_regeneration_rate': 0.05,
    'hunger_per_step': -5,
    'nutrition_per_fish': 10,
    'reward_per_step': 1,
    'episode_length': 3
}

In [5]:
env = FishPondEnv(params)
env.seed(0)

# Agent

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.optimizers import Adam

In [7]:
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [8]:
nb_actions = env.action_space.n

In [9]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.state_space.shape))
model.add(Dense(16,  activation='relu'))
model.add(Dense(5,  activation='relu'))
model.add(Dense(5,  activation='linear'))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 27)                0         
_________________________________________________________________
dense (Dense)                (None, 16)                448       
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 85        
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 30        
Total params: 563
Trainable params: 563
Non-trainable params: 0
_________________________________________________________________


In [11]:
memory = SequentialMemory(limit=500, window_length=1)

In [12]:
policy = BoltzmannQPolicy()

In [13]:
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy)

In [14]:
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Train

In [17]:
dqn.fit(env, nb_steps=5, verbose=2)
dqn.save_weights('dqn_{}_weights.h5f'.format('FishPond'), overwrite=True)

Training for 5 steps ...
old fish count -  10.0
new fish count -  10.5 11.025
False
old fish count -  10.5
new fish count -  11.025 11.57625
False
old fish count -  11.025
new fish count -  11.57625 12.1550625
False
old fish count -  11.57625
new fish count -  12.1550625 12.762815625
True
 4/5: episode: 1, duration: 0.009s, episode steps:   4, steps per second: 450, episode reward:  4.000, mean reward:  1.000 [ 1.000,  1.000], mean action: 0.000 [0.000, 0.000],  loss: --, mae: --, mean_q: --
old fish count -  10.0
new fish count -  10.5 11.025
False
done, took 0.012 seconds


In [18]:
dqn.test(env, nb_episodes=5, visualize=True)

Testing for 5 episodes ...
old fish count -  10.0
new fish count -  10.5 11.025
False
Hello
old fish count -  10.5
new fish count -  11.025 11.57625
False
Hello
old fish count -  11.025
new fish count -  11.57625 12.1550625
False
Hello
old fish count -  11.57625
new fish count -  12.1550625 12.762815625
True
Hello
Episode 1: reward: 4.000, steps: 4
old fish count -  10.0
new fish count -  10.5 11.025
False
Hello
old fish count -  10.5
new fish count -  11.025 11.57625
False
Hello
old fish count -  11.025
new fish count -  11.57625 12.1550625
False
Hello
old fish count -  11.57625
new fish count -  12.1550625 12.762815625
True
Hello
Episode 2: reward: 4.000, steps: 4
old fish count -  9
new fish count -  9.45 9.9225
False
Hello
old fish count -  8
new fish count -  8.4 8.82
False
Hello
old fish count -  7
new fish count -  7.35 7.717499999999999
False
Hello
old fish count -  6
new fish count -  6.3 6.615
True
Hello
Episode 3: reward: 4.000, steps: 4
old fish count -  10.0
new fish count

<tensorflow.python.keras.callbacks.History at 0x97f17ca688>