In [2]:
import json
import os

import numpy as np

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rc
rc('text', usetex=True)
%matplotlib inline

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import legacy, Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy, LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory  # For experience replay!

from rescue_scheduler_multi_agent_env_v4 import *
Adam._name = "hey"


2023-04-02 02:27:18.366673: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-02 02:27:18.661133: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-02 02:27:18.663845: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
MILLION = 1000000
HTHOUSAND = 100000
THOUSAND = 1000

In [4]:
pygame.init()
pygame.display.set_caption("Resource Extraction Game")
window = pygame.display.set_mode((WINDOW_PIXELS, WINDOW_PIXELS+200))
env = Resuerv4(window = window)

In [5]:
states = env.observation_space.shape[0]
actions = env.action_space.n

In [6]:
states, actions

(8, 49)

In [7]:
def build_model(states, actions, h_nodes, h_act):
    model = Sequential()
    model.add(Flatten(input_shape=(1, states)))
    for n, a in zip(h_nodes, h_act):
        model.add(Dense(n, activation=a))
    model.add(Dense(actions, activation='linear'))
    return model

def build_agent(model, actions, tmu, policy, ml):
    memory = SequentialMemory(limit=ml, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=100,
                   target_model_update=tmu)
    return dqn


In [8]:
model = build_model(states, actions, [32, 16], ['relu', 'relu'])

In [9]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 8)                 0         
                                                                 
 dense (Dense)               (None, 32)                288       
                                                                 
 dense_1 (Dense)             (None, 16)                528       
                                                                 
 dense_2 (Dense)             (None, 49)                833       
                                                                 
Total params: 1,649
Trainable params: 1,649
Non-trainable params: 0
_________________________________________________________________


In [10]:
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000)

dqn = build_agent(model, actions, 0.01, policy, 50000)
dqn.compile(legacy.Adam(learning_rate=1e-3), metrics=['mae'])
# dqn.compile(Adam(lr=1e-2), metrics=['mse'])

2023-04-02 02:27:24.083389: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:353] MLIR V1 optimization pass is not enabled
2023-04-02 02:27:24.174985: W tensorflow/c/c_api.cc:300] Operation '{name:'dense_2_1/bias/Assign' id:144 op device:{requested: '', assigned: ''} def:{{{node dense_2_1/bias/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_2_1/bias, dense_2_1/bias/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


In [11]:
history = dqn.fit(env, nb_steps=10000, visualize=True, verbose=1)

Training for 10000 steps ...
Interval 1 (0 steps performed)


  updates=self.state_updates,
2023-04-02 02:27:24.744729: W tensorflow/c/c_api.cc:300] Operation '{name:'dense_2/BiasAdd' id:75 op device:{requested: '', assigned: ''} def:{{{node dense_2/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_2/MatMul, dense_2/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-04-02 02:27:24.765535: W tensorflow/c/c_api.cc:300] Operation '{name:'total_3/Assign' id:308 op device:{requested: '', assigned: ''} def:{{{node total_3/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](total_3, total_3/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modif

  101/10000 [..............................] - ETA: 57:52 - reward: -3.8218

2023-04-02 02:28:00.648194: W tensorflow/c/c_api.cc:300] Operation '{name:'dense_2_1/BiasAdd' id:149 op device:{requested: '', assigned: ''} def:{{{node dense_2_1/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_2_1/MatMul, dense_2_1/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-04-02 02:28:00.999878: W tensorflow/c/c_api.cc:300] Operation '{name:'loss_3/AddN' id:409 op device:{requested: '', assigned: ''} def:{{{node loss_3/AddN}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_3/mul, loss_3/mul_1)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-04-02 02:28:0

done, took 3750.425 seconds


In [12]:
history.history

{'episode_reward': [-1954.0,
  -1143.0,
  -1910.0,
  -2267.0,
  -2063.0,
  -1833.0,
  -1753.0,
  -1478.0,
  -1433.0,
  -1300.0,
  -1476.0,
  -1331.0,
  -840.0,
  -764.0,
  -891.0,
  -715.0,
  -464.0,
  -479.0,
  -448.0,
  -354.0],
 'nb_episode_steps': [500,
  294,
  500,
  500,
  500,
  500,
  500,
  500,
  500,
  500,
  500,
  500,
  500,
  500,
  500,
  500,
  500,
  500,
  500,
  500],
 'nb_steps': [500,
  794,
  1294,
  1794,
  2294,
  2794,
  3294,
  3794,
  4294,
  4794,
  5294,
  5794,
  6294,
  6794,
  7294,
  7794,
  8294,
  8794,
  9294,
  9794]}

In [13]:
data = history.history
data['episode_reward'] = [float(v) for v in data['episode_reward']]
data['nb_episode_steps'] = [int(v) for v in data['nb_episode_steps']]
data['nb_steps'] = [int(v) for v in data['nb_steps']]

import pandas as pd
data_df = pd.DataFrame(data)
data_df.to_csv("rescuer4_2_victims_2_agents_20_episodes_500_steps.csv")

with open("rescuer4_2_victims_2_agents_20_episodes_500_steps.json", 'w') as f:
    json.dump(data, f)
dqn.save_weights("rescuer4_2_victims_2_agents_20_episodes_500_steps.h5f", overwrite=True)

In [21]:
import seaborn as sns
import pandas as pd
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)
data = pd.DataFrame(data)
rewards = data["episode_reward"]
sns.scatterplot(x= range(20), y = rewards)

ValueError: array length 20 does not match index length 53

# Reload from memory and test

In [8]:
pygame.init()
pygame.display.set_caption("Resource Extraction Game")
window = pygame.display.set_mode((WINDOW_PIXELS, WINDOW_PIXELS+200))
env = Resuerv4(window=window)

[[0 0]
 [4 3]]


In [9]:
def build_model(states, actions, h_nodes, h_act):
    model = Sequential()
    model.add(Flatten(input_shape=(1, states)))
    for n, a in zip(h_nodes, h_act):
        model.add(Dense(n, activation=a))
    model.add(Dense(actions, activation='linear'))
    return model

def build_agent(model, actions, tmu, policy, ml):
    memory = SequentialMemory(limit=ml, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=100,
                   target_model_update=tmu)
    return dqn


In [10]:
states = env.observation_space.shape[0]
actions = env.action_space.n
model = build_model(states, actions, [32, 16], ['relu', 'relu'])
print(model.summary())
dqn = build_agent(model, actions, 0.01, EpsGreedyQPolicy(eps=0), 50000)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Load weights
dqn.load_weights("rescuer4_2_victims_2_agents_20_episodes_500_steps.h5f")

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 8)                 0         
                                                                 
 dense_3 (Dense)             (None, 32)                288       
                                                                 
 dense_4 (Dense)             (None, 16)                528       
                                                                 
 dense_5 (Dense)             (None, 49)                833       
                                                                 
Total params: 1,649
Trainable params: 1,649
Non-trainable params: 0
_________________________________________________________________
None


2023-04-02 00:09:21.172666: W tensorflow/c/c_api.cc:300] Operation '{name:'dense_4/kernel/Assign' id:485 op device:{requested: '', assigned: ''} def:{{{node dense_4/kernel/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_4/kernel, dense_4/kernel/Initializer/stateless_random_uniform)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-04-02 00:09:21.680396: W tensorflow/c/c_api.cc:300] Operation '{name:'count_7/Assign' id:767 op device:{requested: '', assigned: ''} def:{{{node count_7/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](count_7, count_7/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future.

In [11]:
nb_episodes = 10

In [12]:
scores = dqn.test(env, nb_episodes=nb_episodes, visualize=True, verbose=0)

  updates=self.state_updates,
2023-04-02 00:09:22.232267: W tensorflow/c/c_api.cc:300] Operation '{name:'dense_5/BiasAdd' id:519 op device:{requested: '', assigned: ''} def:{{{node dense_5/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_5/MatMul, dense_5/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


In [13]:
rewards = np.array(scores.history['episode_reward'])

In [14]:
np.savetxt(f"rescuerv3_training_{nb_episodes}.txt", rewards)

In [15]:
rewards

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])