In [1]:
from __future__ import annotations

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
# Load the important classes
import random as rnd
from typing import Dict, TYPE_CHECKING, List, Tuple, Optional

import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np

import core.log as log
import settings.env_io as env_io
import settings.env_settings_io as env_settings_io
from agents.resource_weighting_agent import ResourceWeightingAgent
from agents.task_pricing_agent import TaskPricingAgent
from env.environment import OnlineFlexibleResourceAllocationEnv
from env.task_stage import TaskStage

if TYPE_CHECKING:
    from env.server import Server
    import numpy as np
    from env.task import Task

In [3]:
from train import run_env, eval_env, allocate_agents

In [4]:
log.console_debug_level = log.LogLevel.INFO
log.debug_filename = 'training.log'

In [5]:
# Setup the environment
_env = OnlineFlexibleResourceAllocationEnv.make('../settings/basic_env.json')

In [6]:
# Setup the agents
_task_pricing_agents = [TaskPricingAgent('Default {}'.format(agent_num)) for agent_num in range(10)]
_resource_weighting_agents = [ResourceWeightingAgent('Default {}'.format(agent_num)) for agent_num in range(10)]

In [7]:
# Create the training envs
_training_envs: List[str] = [f'../settings/eval_envs/eval_env_{training_env_num}.json'
                             for training_env_num in range(10)]
for training_env_num in range(10):
    _env.reset()
    env_io.save_environment(_env, f'../settings/eval_envs/eval_env_{training_env_num}.json')

In [8]:
episode = 0

In [9]:
# Loop over the episodes
episode += 1

In [10]:
log.info(f'Episode: {episode}')
_env.reset()

_server_task_pricing_agents, _server_resource_allocation_agents = allocate_agents(_env, _task_pricing_agents, _resource_weighting_agents)
total_price, num_completed_tasks, num_failed_tasks = run_env(_env, _server_task_pricing_agents,
                                                             _server_resource_allocation_agents)
log.info(f'Total Price: {total_price}, Num Completed Task: {num_completed_tasks}, Num Failed Tasks: {num_failed_tasks}')

Episode: 1
Environment: Env State (0x1f9e7f03cc8) at time step: 0
	Auction Task -> Basic 107 Task (0x1f9e7f19d38) - Unassigned, Storage: 79.0, Comp: 34.0, Results data: 14.0, Auction time: 0, Deadline: 12
	Servers -> Basic 0: [], Basic 1: [], Basic 2: [], Basic 3: [], Basic 4: [], Basic 5: []
Unallocated tasks
	Basic 87 Task (0x1f9e7f1f948) - Unassigned, Storage: 56.0, Comp: 45.0, Results data: 12.0, Auction time: 1, Deadline: 13
	Basic 18 Task (0x1f9e7f1eaf8) - Unassigned, Storage: 68.0, Comp: 40.0, Results data: 13.0, Auction time: 3, Deadline: 12
	Basic 30 Task (0x1f9e7f12048) - Unassigned, Storage: 51.0, Comp: 37.0, Results data: 19.0, Auction time: 3, Deadline: 14
	Basic 38 Task (0x1f9e7f140d8) - Unassigned, Storage: 79.0, Comp: 31.0, Results data: 17.0, Auction time: 3, Deadline: 9
	Basic 49 Task (0x1f9e7f14828) - Unassigned, Storage: 56.0, Comp: 26.0, Results data: 26.0, Auction time: 3, Deadline: 13
	Basic 111 Task (0x1f9e7f19f78) - Unassigned, Storage: 80.0, Comp: 38.0, Result

Auction prices - {Basic 0 Server: 8, Basic 1 Server: 9, Basic 2 Server: 8, Basic 3 Server: 4, Basic 4 Server: 6, Basic 5 Server: 10}
Auction Rewards - {Basic 3 Server: 4}
Next State: Env State (0x1f9e7f03ea8) at time step: 1
	Auction Task -> None
	Servers -> Basic 0: [], Basic 1: [], Basic 2: [], Basic 3: [Basic 107, Basic 87], Basic 4: [], Basic 5: []

Resource allocation weights - {Basic 0 Server: [], Basic 1 Server: [], Basic 2 Server: [], Basic 3 Server: [Basic 107 Task: 3, Basic 87 Task: 9], Basic 4 Server: [], Basic 5 Server: []}
Resource allocation Rewards - {Basic 3: []}
Env Done: False
Next State: Env State (0x1f9e7f03cc8) at time step: 2
	Auction Task -> None
	Servers -> Basic 0: [], Basic 1: [], Basic 2: [], Basic 3: [Basic 107, Basic 87], Basic 4: [], Basic 5: []

Resource allocation weights - {Basic 0 Server: [], Basic 1 Server: [], Basic 2 Server: [], Basic 3 Server: [Basic 107 Task: 1, Basic 87 Task: 4], Basic 4 Server: [], Basic 5 Server: []}
Resource allocation Rewards

Resource allocation weights - {Basic 0 Server: [Basic 121 Task: 1.0], Basic 1 Server: [Basic 2 Task: 10, Basic 114 Task: 10], Basic 2 Server: [Basic 111 Task: 3, Basic 1 Task: 9], Basic 3 Server: [Basic 107 Task: 10, Basic 87 Task: 3, Basic 30 Task: 5], Basic 4 Server: [Basic 49 Task: 3, Basic 12 Task: 7], Basic 5 Server: [Basic 18 Task: 2, Basic 38 Task: 6, Basic 106 Task: 10]}
Resource allocation Rewards - {Basic 0: [], Basic 1: [], Basic 2: [], Basic 3: [], Basic 4: [], Basic 5: []}
Env Done: False
Next State: Env State (0x1f9e7f03cc8) at time step: 7
	Auction Task -> None
	Servers -> Basic 0: [Basic 121], Basic 1: [Basic 2, Basic 114], Basic 2: [Basic 111, Basic 1], Basic 3: [Basic 107, Basic 87, Basic 30], Basic 4: [Basic 49, Basic 12], Basic 5: [Basic 18, Basic 38, Basic 106]

Resource allocation weights - {Basic 0 Server: [Basic 121 Task: 1.0], Basic 1 Server: [Basic 2 Task: 3, Basic 114 Task: 10], Basic 2 Server: [Basic 111 Task: 10, Basic 1 Task: 3], Basic 3 Server: [Basic 107

Resource allocation weights - {Basic 0 Server: [Basic 0 Task: 6, Basic 101 Task: 7, Basic 121 Task: 8], Basic 1 Server: [Basic 2 Task: 9, Basic 114 Task: 1, Basic 6 Task: 10], Basic 2 Server: [Basic 1 Task: 8, Basic 78 Task: 8, Basic 29 Task: 7], Basic 3 Server: [Basic 87 Task: 7, Basic 107 Task: 3, Basic 30 Task: 8, Basic 56 Task: 8, Basic 119 Task: 10], Basic 4 Server: [Basic 12 Task: 9, Basic 112 Task: 9, Basic 84 Task: 6, Basic 49 Task: 10, Basic 88 Task: 1], Basic 5 Server: [Basic 18 Task: 10, Basic 106 Task: 5]}
Resource allocation Rewards - {Basic 0: [], Basic 1: [], Basic 2: [Basic 1], Basic 3: [], Basic 4: [], Basic 5: [Basic 106]}
Env Done: False
Next State: Env State (0x1f9e7f03ea8) at time step: 12
	Auction Task -> Basic 15 Task (0x1f9e7f1ed38) - Unassigned, Storage: 71.0, Comp: 44.0, Results data: 17.0, Auction time: 12, Deadline: 24
	Servers -> Basic 0: [Basic 0, Basic 101, Basic 121], Basic 1: [Basic 2, Basic 114, Basic 6], Basic 2: [Basic 78, Basic 29], Basic 3: [Basic 

Resource allocation weights - {Basic 0 Server: [Basic 0 Task: 2, Basic 101 Task: 7, Basic 68 Task: 1], Basic 1 Server: [Basic 114 Task: 1, Basic 6 Task: 8, Basic 7 Task: 8, Basic 14 Task: 7, Basic 51 Task: 4], Basic 2 Server: [Basic 78 Task: 9, Basic 29 Task: 3, Basic 31 Task: 3, Basic 85 Task: 10], Basic 3 Server: [Basic 56 Task: 2, Basic 119 Task: 8, Basic 15 Task: 4, Basic 37 Task: 5], Basic 4 Server: [Basic 84 Task: 10, Basic 88 Task: 8], Basic 5 Server: []}
Resource allocation Rewards - {Basic 0: [], Basic 1: [], Basic 2: [], Basic 3: [], Basic 4: []}
Env Done: False
Next State: Env State (0x1f9e7ffadb8) at time step: 16
	Auction Task -> Basic 118 Task (0x1f9e7f213a8) - Unassigned, Storage: 63.0, Comp: 49.0, Results data: 25.0, Auction time: 16, Deadline: 23
	Servers -> Basic 0: [Basic 0, Basic 101, Basic 68], Basic 1: [Basic 114, Basic 6, Basic 7, Basic 14, Basic 51], Basic 2: [Basic 29, Basic 31, Basic 85, Basic 78], Basic 3: [Basic 56, Basic 119, Basic 15, Basic 37], Basic 4: [

Auction prices - {Basic 0 Server: 5, Basic 1 Server: 2, Basic 2 Server: 9, Basic 3 Server: 7, Basic 4 Server: 7, Basic 5 Server: 3}
Auction Rewards - {Basic 1 Server: 2}
Next State: Env State (0x1f9e7f03ea8) at time step: 21
	Auction Task -> Basic 115 Task (0x1f9e7f211f8) - Unassigned, Storage: 69.0, Comp: 30.0, Results data: 19.0, Auction time: 21, Deadline: 29
	Servers -> Basic 0: [Basic 68, Basic 118, Basic 80], Basic 1: [Basic 7, Basic 14, Basic 51, Basic 71, Basic 20, Basic 99, Basic 104, Basic 91], Basic 2: [Basic 85, Basic 113, Basic 53], Basic 3: [Basic 119, Basic 15, Basic 90], Basic 4: [], Basic 5: [Basic 69]

Auction prices - {Basic 0 Server: 5, Basic 1 Server: 3, Basic 2 Server: 10, Basic 3 Server: 7, Basic 4 Server: 6, Basic 5 Server: 7}
Auction Rewards - {Basic 1 Server: 3}
Next State: Env State (0x1f9e800df98) at time step: 21
	Auction Task -> None
	Servers -> Basic 0: [Basic 68, Basic 118, Basic 80], Basic 1: [Basic 7, Basic 14, Basic 51, Basic 71, Basic 20, Basic 99, B

Resource allocation Rewards - {Basic 0: [Basic 68], Basic 1: [Basic 14, Basic 71], Basic 2: [], Basic 3: [Basic 15], Basic 4: [], Basic 5: []}
Env Done: False
Next State: Env State (0x1f9e800df98) at time step: 25
	Auction Task -> Basic 8 Task (0x1f9e7f1e9d8) - Unassigned, Storage: 66.0, Comp: 54.0, Results data: 15.0, Auction time: 25, Deadline: 32
	Servers -> Basic 0: [Basic 80], Basic 1: [Basic 51, Basic 20, Basic 99, Basic 104, Basic 91, Basic 115, Basic 28], Basic 2: [Basic 113, Basic 53], Basic 3: [Basic 90, Basic 60, Basic 96, Basic 39], Basic 4: [Basic 35], Basic 5: [Basic 69, Basic 61]

Auction prices - {Basic 0 Server: 9, Basic 1 Server: 1, Basic 2 Server: 6, Basic 3 Server: 10, Basic 4 Server: 6, Basic 5 Server: 9}
Auction Rewards - {Basic 1 Server: 1}
Next State: Env State (0x1f9e804f8b8) at time step: 25
	Auction Task -> Basic 73 Task (0x1f9e7f1f678) - Unassigned, Storage: 50.0, Comp: 56.0, Results data: 23.0, Auction time: 25, Deadline: 35
	Servers -> Basic 0: [Basic 80],

Resource allocation weights - {Basic 0 Server: [Basic 76 Task: 2, Basic 80 Task: 6], Basic 1 Server: [Basic 20 Task: 8, Basic 99 Task: 1, Basic 104 Task: 2, Basic 115 Task: 3, Basic 28 Task: 3, Basic 8 Task: 4, Basic 54 Task: 8, Basic 13 Task: 10], Basic 2 Server: [Basic 75 Task: 10, Basic 53 Task: 1, Basic 47 Task: 5], Basic 3 Server: [Basic 90 Task: 7, Basic 60 Task: 9, Basic 96 Task: 5, Basic 39 Task: 7, Basic 73 Task: 8, Basic 58 Task: 3, Basic 109 Task: 5, Basic 16 Task: 1], Basic 4 Server: [Basic 35 Task: 1.0], Basic 5 Server: [Basic 61 Task: 1.0]}
Resource allocation Rewards - {Basic 0: [], Basic 1: [Basic 99, Basic 115], Basic 2: [], Basic 3: [Basic 60], Basic 4: [], Basic 5: [Basic 61]}
Env Done: False
Next State: Env State (0x1f9e804f8b8) at time step: 30
	Auction Task -> Basic 17 Task (0x1f9e7f1ec18) - Unassigned, Storage: 53.0, Comp: 32.0, Results data: 22.0, Auction time: 30, Deadline: 39
	Servers -> Basic 0: [Basic 76, Basic 80], Basic 1: [Basic 20, Basic 104, Basic 28, B

Auction prices - {Basic 0 Server: 10, Basic 1 Server: 0, Basic 2 Server: 6, Basic 3 Server: 8, Basic 4 Server: 7, Basic 5 Server: 4}
Auction Rewards - {Basic 5 Server: 4}
Next State: Env State (0x1f9e800df98) at time step: 33
	Auction Task -> Basic 63 Task (0x1f9e7f0c438) - Unassigned, Storage: 54.0, Comp: 32.0, Results data: 15.0, Auction time: 33, Deadline: 43
	Servers -> Basic 0: [Basic 76, Basic 95, Basic 3], Basic 1: [Basic 54, Basic 13], Basic 2: [Basic 75, Basic 47], Basic 3: [Basic 73, Basic 58, Basic 109, Basic 16, Basic 57], Basic 4: [Basic 17, Basic 94], Basic 5: [Basic 82, Basic 33, Basic 62]

Auction prices - {Basic 0 Server: 5, Basic 1 Server: 7, Basic 2 Server: 9, Basic 3 Server: 0, Basic 4 Server: 0, Basic 5 Server: 2}
Auction Rewards - {Basic 5 Server: 2}
Next State: Env State (0x1f9e8051f98) at time step: 33
	Auction Task -> Basic 70 Task (0x1f9e7f1fb88) - Unassigned, Storage: 55.0, Comp: 46.0, Results data: 21.0, Auction time: 33, Deadline: 42
	Servers -> Basic 0: [B

Auction prices - {Basic 0 Server: 10, Basic 1 Server: 2, Basic 2 Server: 10, Basic 3 Server: 0, Basic 4 Server: 0, Basic 5 Server: 7}
Auction Rewards - {Basic 1 Server: 2}
Next State: Env State (0x1f9e800df98) at time step: 37
	Auction Task -> Basic 100 Task (0x1f9e7f19948) - Unassigned, Storage: 65.0, Comp: 25.0, Results data: 25.0, Auction time: 37, Deadline: 44
	Servers -> Basic 0: [Basic 76, Basic 95, Basic 3, Basic 83], Basic 1: [Basic 13, Basic 70, Basic 32, Basic 97], Basic 2: [Basic 45, Basic 4], Basic 3: [Basic 58, Basic 16, Basic 57, Basic 40, Basic 89], Basic 4: [Basic 17, Basic 94, Basic 65], Basic 5: [Basic 82, Basic 33, Basic 62, Basic 63]

Auction prices - {Basic 0 Server: 3, Basic 1 Server: 6, Basic 2 Server: 7, Basic 3 Server: 10, Basic 4 Server: 4, Basic 5 Server: 10}
Auction Rewards - {Basic 0 Server: 3}
Next State: Env State (0x1f9e8051f98) at time step: 37
	Auction Task -> Basic 116 Task (0x1f9e7f21288) - Unassigned, Storage: 65.0, Comp: 54.0, Results data: 17.0, A

Resource allocation Rewards - {Basic 0: [Basic 95], Basic 1: [], Basic 2: [], Basic 3: [], Basic 5: [Basic 33]}
Env Done: False
Next State: Env State (0x1f9e8013f98) at time step: 42
	Auction Task -> Basic 9 Task (0x1f9e7f1eb88) - Unassigned, Storage: 53.0, Comp: 27.0, Results data: 30.0, Auction time: 42, Deadline: 52
	Servers -> Basic 0: [Basic 83, Basic 100], Basic 1: [Basic 70, Basic 32, Basic 97, Basic 79], Basic 2: [Basic 45, Basic 4, Basic 10], Basic 3: [Basic 57, Basic 40, Basic 89, Basic 5], Basic 4: [], Basic 5: [Basic 62, Basic 63, Basic 116, Basic 59]

Auction prices - {Basic 0 Server: 7, Basic 1 Server: 2, Basic 2 Server: 10, Basic 3 Server: 2, Basic 4 Server: 6, Basic 5 Server: 10}
Auction Rewards - {Basic 1 Server: 2}
Next State: Env State (0x1f9e800df98) at time step: 42
	Auction Task -> Basic 105 Task (0x1f9e7f19c18) - Unassigned, Storage: 60.0, Comp: 57.0, Results data: 29.0, Auction time: 42, Deadline: 50
	Servers -> Basic 0: [Basic 83, Basic 100], Basic 1: [Basic 70

Auction prices - {Basic 0 Server: 2, Basic 1 Server: 8, Basic 2 Server: 10, Basic 3 Server: 2, Basic 4 Server: 6, Basic 5 Server: 9}
Auction Rewards - {Basic 0 Server: 2}
Next State: Env State (0x1f9e802ef98) at time step: 45
	Auction Task -> None
	Servers -> Basic 0: [Basic 83, Basic 52, Basic 123, Basic 67], Basic 1: [Basic 32, Basic 97, Basic 79, Basic 9, Basic 105, Basic 64], Basic 2: [Basic 4, Basic 10, Basic 19, Basic 45], Basic 3: [Basic 40, Basic 89, Basic 5, Basic 103, Basic 122, Basic 24], Basic 4: [Basic 117], Basic 5: [Basic 116, Basic 59]

Resource allocation weights - {Basic 0 Server: [Basic 83 Task: 1, Basic 52 Task: 2, Basic 123 Task: 8, Basic 67 Task: 6], Basic 1 Server: [Basic 32 Task: 9, Basic 97 Task: 9, Basic 79 Task: 5, Basic 9 Task: 5, Basic 105 Task: 2, Basic 64 Task: 10], Basic 2 Server: [Basic 4 Task: 10, Basic 10 Task: 5, Basic 19 Task: 10, Basic 45 Task: 8], Basic 3 Server: [Basic 40 Task: 1, Basic 89 Task: 5, Basic 5 Task: 7, Basic 103 Task: 1, Basic 122 Ta

Auction prices - {Basic 0 Server: 10, Basic 1 Server: 5, Basic 2 Server: 2, Basic 3 Server: 4, Basic 4 Server: 4, Basic 5 Server: 7}
Auction Rewards - {Basic 2 Server: 2}
Next State: Env State (0x1f9e8013f98) at time step: 49
	Auction Task -> Basic 81 Task (0x1f9e7f1f288) - Unassigned, Storage: 60.0, Comp: 54.0, Results data: 21.0, Auction time: 49, Deadline: 59
	Servers -> Basic 0: [Basic 52, Basic 123, Basic 67, Basic 23], Basic 1: [Basic 79, Basic 9, Basic 105, Basic 64, Basic 26, Basic 44, Basic 72], Basic 2: [Basic 19, Basic 43], Basic 3: [Basic 103, Basic 122, Basic 24, Basic 48, Basic 92], Basic 4: [Basic 117], Basic 5: []

Auction prices - {Basic 0 Server: 10, Basic 1 Server: 6, Basic 2 Server: 3, Basic 3 Server: 7, Basic 4 Server: 6, Basic 5 Server: 2}
Auction Rewards - {Basic 5 Server: 2}
Next State: Env State (0x1f9e8012f98) at time step: 49
	Auction Task -> Basic 98 Task (0x1f9e7f19828) - Unassigned, Storage: 61.0, Comp: 32.0, Results data: 14.0, Auction time: 49, Deadline:

Resource allocation weights - {Basic 0 Server: [Basic 52 Task: 1, Basic 123 Task: 7, Basic 67 Task: 7, Basic 23 Task: 4], Basic 1 Server: [Basic 26 Task: 5, Basic 44 Task: 5, Basic 72 Task: 2, Basic 22 Task: 10, Basic 25 Task: 4, Basic 50 Task: 8], Basic 2 Server: [Basic 43 Task: 1.0], Basic 3 Server: [Basic 122 Task: 7, Basic 48 Task: 10, Basic 92 Task: 2, Basic 42 Task: 6], Basic 4 Server: [], Basic 5 Server: [Basic 81 Task: 1, Basic 98 Task: 10, Basic 108 Task: 2, Basic 66 Task: 1, Basic 93 Task: 8]}
Resource allocation Rewards - {Basic 0: [], Basic 1: [], Basic 2: [], Basic 3: [Basic 122], Basic 5: []}
Env Done: False
Next State: Env State (0x1f9e800df98) at time step: 54
	Auction Task -> Basic 21 Task (0x1f9e7f125e8) - Unassigned, Storage: 64.0, Comp: 41.0, Results data: 17.0, Auction time: 54, Deadline: 63
	Servers -> Basic 0: [Basic 52, Basic 123, Basic 67, Basic 23], Basic 1: [Basic 26, Basic 44, Basic 72, Basic 22, Basic 25, Basic 50], Basic 2: [Basic 43], Basic 3: [Basic 48, 

Resource allocation weights - {Basic 0 Server: [], Basic 1 Server: [Basic 26 Task: 9, Basic 44 Task: 6, Basic 22 Task: 1, Basic 25 Task: 1, Basic 50 Task: 2, Basic 110 Task: 2, Basic 11 Task: 3, Basic 46 Task: 5], Basic 2 Server: [Basic 43 Task: 1, Basic 55 Task: 1, Basic 36 Task: 6, Basic 77 Task: 1], Basic 3 Server: [Basic 92 Task: 1, Basic 42 Task: 3, Basic 21 Task: 10, Basic 120 Task: 3], Basic 4 Server: [Basic 34 Task: 1.0], Basic 5 Server: [Basic 81 Task: 10, Basic 108 Task: 3, Basic 66 Task: 8, Basic 93 Task: 6, Basic 86 Task: 1, Basic 41 Task: 8]}
Resource allocation Rewards - {Basic 1: [Basic 26], Basic 2: [], Basic 3: [], Basic 4: [], Basic 5: []}
Env Done: False
Next State: Env State (0x1f9e8094f98) at time step: 57
	Auction Task -> Basic 27 Task (0x1f9e7f12168) - Unassigned, Storage: 78.0, Comp: 60.0, Results data: 11.0, Auction time: 57, Deadline: 67
	Servers -> Basic 0: [], Basic 1: [Basic 44, Basic 22, Basic 25, Basic 50, Basic 110, Basic 11, Basic 46], Basic 2: [Basic 5

In [16]:
# Every 3 episodes, the agents are trained
if episode % 3 == 0:
    for task_pricing_agent in _server_task_pricing_agents.values():
        task_pricing_agent.train()
    for resource_weighting_agent in _server_resource_allocation_agents.values():
        resource_weighting_agent.train()

In [12]:
agent = _task_pricing_agents[0]

network_variables = agent.network_model.trainable_variables
with tf.GradientTape() as tape:
    tape.watch(network_variables)

    model_val = np.zeros((agent.minibatch_size, agent.num_outputs))
    target_val = np.zeros((agent.minibatch_size, agent.num_outputs))

    minibatch = rnd.sample(agent.replay_buffer, agent.minibatch_size)
    for pos, trajectory in enumerate(minibatch):
        observation, action, reward, next_observation = trajectory

        model_val[pos] = np.array(agent.network_model(observation))
        target_val[pos] = np.array(agent.network_model(observation))

        if next_observation is not None:
            max_next_value = np.max(agent.network_target(next_observation))
            model_val[pos][action] = reward + agent.discount_factor * max_next_value
        else:
            model_val[pos][action] = reward

        error = tf.square(target_val - model_val)
        error = tf.reduce_mean(0.5 * error)

print(f'agent with error {error}')
network_gradients = tape.gradient(error, network_variables)
print(f'Network gradients are {network_gradients}')
# self.optimiser.apply_gradients(zip(network_gradients, network_variables))

agent with error 0.04607089559463275
Network gradients are [None, None, None, None, None, None, None, None, None, None]


In [15]:
agent = _task_pricing_agents[0]

network_variables = agent.network_model.trainable_variables

minibatch = rnd.sample(agent.replay_buffer, agent.minibatch_size)
obs = np.array([traj[0] for traj in minibatch])


ValueError: could not broadcast input array from shape (2,9) into shape (1)

In [17]:
# Every 15 episodes, the agents are evaluated
if episode % 15 == 0:
    eval_env(_training_envs, _task_pricing_agents, _resource_weighting_agents)