In [1]:
from __future__ import annotations

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
# Load the important classes
import random as rnd
from typing import Dict, TYPE_CHECKING, List, Tuple, Optional

import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np

import core.log as log
import settings.env_io as env_io
import settings.env_settings_io as env_settings_io
from agents.resource_weighting_agent import ResourceWeightingAgent
from agents.task_pricing_agent import TaskPricingAgent
from env.environment import OnlineFlexibleResourceAllocationEnv
from env.task_stage import TaskStage

if TYPE_CHECKING:
    from env.server import Server
    import numpy as np
    from env.task import Task

In [3]:
from train import run_env, eval_env, allocate_agents

In [4]:
log.console_debug_level = log.LogLevel.INFO
log.debug_filename = 'training.log'

In [5]:
# Setup the environment
_env = OnlineFlexibleResourceAllocationEnv.make('../settings/basic_env.json')

In [6]:
# Setup the agents
_task_pricing_agents = [TaskPricingAgent('Default {}'.format(agent_num)) for agent_num in range(10)]
_resource_weighting_agents = [ResourceWeightingAgent('Default {}'.format(agent_num)) for agent_num in range(10)]

In [7]:
# Create the training envs
_training_envs: List[str] = [f'../settings/eval_envs/eval_env_{training_env_num}.json'
                             for training_env_num in range(10)]
for training_env_num in range(10):
    _env.reset()
    env_io.save_environment(_env, f'../settings/eval_envs/eval_env_{training_env_num}.json')

In [8]:
episode = 0

In [9]:
# Loop over the episodes
episode += 1

In [10]:
log.info(f'Episode: {episode}')
_env.reset()

_server_task_pricing_agents, _server_resource_allocation_agents = allocate_agents(_env, _task_pricing_agents, _resource_weighting_agents)
total_price, num_completed_tasks, num_failed_tasks = run_env(_env, _server_task_pricing_agents,
                                                             _server_resource_allocation_agents)
log.info(f'Total Price: {total_price}, Num Completed Task: {num_completed_tasks}, Num Failed Tasks: {num_failed_tasks}')

Episode: 1
Environment: Env State (0x132c2850318) at time step: 0
	Auction Task -> Basic 29 Task (0x132c285dd38) - Unassigned, Storage: 53.0, Comp: 29.0, Results data: 26.0, Auction time: 0, Deadline: 8
	Servers -> Basic 0: [], Basic 1: [], Basic 2: []
Unallocated tasks
	Basic 69 Task (0x132c2857ca8) - Unassigned, Storage: 73.0, Comp: 57.0, Results data: 23.0, Auction time: 0, Deadline: 8
	Basic 33 Task (0x132c285da68) - Unassigned, Storage: 79.0, Comp: 28.0, Results data: 22.0, Auction time: 1, Deadline: 8
	Basic 7 Task (0x132c2855ca8) - Unassigned, Storage: 77.0, Comp: 40.0, Results data: 19.0, Auction time: 2, Deadline: 8
	Basic 61 Task (0x132c2857b88) - Unassigned, Storage: 80.0, Comp: 48.0, Results data: 23.0, Auction time: 2, Deadline: 11
	Basic 82 Task (0x132c2858168) - Unassigned, Storage: 62.0, Comp: 48.0, Results data: 26.0, Auction time: 2, Deadline: 12
	Basic 106 Task (0x132c2858ee8) - Unassigned, Storage: 58.0, Comp: 34.0, Results data: 22.0, Auction time: 2, Deadline: 11


Auction prices - {Basic 0 Server: 5, Basic 1 Server: 5, Basic 2 Server: 1}
Auction Rewards - {Basic 2 Server: 1}
Next State: Env State (0x132c2850318) at time step: 1
	Auction Task -> None
	Servers -> Basic 0: [Basic 29, Basic 69], Basic 1: [], Basic 2: [Basic 33]

Resource allocation weights - {Basic 0 Server: [Basic 29 Task: 9, Basic 69 Task: 4], Basic 1 Server: [], Basic 2 Server: [Basic 33 Task: 1.0]}
Resource allocation Rewards - {Basic 0: [], Basic 2: []}
Env Done: False
Next State: Env State (0x132c28502c8) at time step: 2
	Auction Task -> Basic 7 Task (0x132c2855ca8) - Unassigned, Storage: 77.0, Comp: 40.0, Results data: 19.0, Auction time: 2, Deadline: 8
	Servers -> Basic 0: [Basic 29, Basic 69], Basic 1: [], Basic 2: [Basic 33]

Auction prices - {Basic 0 Server: 0, Basic 1 Server: 2, Basic 2 Server: 5}
Auction Rewards - {Basic 1 Server: 2}
Next State: Env State (0x132c2850318) at time step: 2
	Auction Task -> Basic 61 Task (0x132c2857b88) - Unassigned, Storage: 80.0, Comp: 48

Auction Rewards - {Basic 2 Server: 2}
Next State: Env State (0x132c2850318) at time step: 6
	Auction Task -> None
	Servers -> Basic 0: [Basic 29, Basic 69, Basic 44, Basic 9], Basic 1: [Basic 7, Basic 82, Basic 0, Basic 3, Basic 21, Basic 97], Basic 2: [Basic 33, Basic 61, Basic 106, Basic 35, Basic 68, Basic 72, Basic 88, Basic 108]

Resource allocation weights - {Basic 0 Server: [Basic 29 Task: 1, Basic 69 Task: 4, Basic 44 Task: 2, Basic 9 Task: 8], Basic 1 Server: [Basic 7 Task: 5, Basic 82 Task: 9, Basic 0 Task: 4, Basic 3 Task: 9, Basic 21 Task: 4, Basic 97 Task: 5], Basic 2 Server: [Basic 33 Task: 3, Basic 61 Task: 9, Basic 106 Task: 9, Basic 35 Task: 8, Basic 68 Task: 4, Basic 72 Task: 4, Basic 88 Task: 3, Basic 108 Task: 1]}
Resource allocation Rewards - {Basic 0: [], Basic 1: [], Basic 2: []}
Env Done: False
Next State: Env State (0x132c28502c8) at time step: 7
	Auction Task -> Basic 37 Task (0x132c285c048) - Unassigned, Storage: 77.0, Comp: 44.0, Results data: 27.0, Auction 

Resource allocation weights - {Basic 0 Server: [Basic 44 Task: 3, Basic 9 Task: 7], Basic 1 Server: [Basic 97 Task: 2, Basic 47 Task: 2, Basic 15 Task: 4, Basic 89 Task: 4, Basic 104 Task: 9], Basic 2 Server: [Basic 35 Task: 3, Basic 68 Task: 5, Basic 72 Task: 4, Basic 88 Task: 10, Basic 108 Task: 3, Basic 37 Task: 3, Basic 19 Task: 4, Basic 60 Task: 9]}
Resource allocation Rewards - {Basic 0: [], Basic 1: [], Basic 2: []}
Env Done: False
Next State: Env State (0x132c2a2fd18) at time step: 14
	Auction Task -> Basic 57 Task (0x132c2857558) - Unassigned, Storage: 56.0, Comp: 42.0, Results data: 28.0, Auction time: 14, Deadline: 24
	Servers -> Basic 0: [Basic 44, Basic 9], Basic 1: [Basic 97, Basic 47, Basic 15, Basic 89, Basic 104], Basic 2: [Basic 35, Basic 68, Basic 72, Basic 88, Basic 108, Basic 37, Basic 19, Basic 60]

Auction prices - {Basic 0 Server: 3, Basic 1 Server: 5, Basic 2 Server: 8}
Auction Rewards - {Basic 0 Server: 3}
Next State: Env State (0x132c2a01818) at time step: 14

Resource allocation Rewards - {Basic 0: [], Basic 1: [Basic 104], Basic 2: []}
Env Done: False
Next State: Env State (0x132c28502c8) at time step: 20
	Auction Task -> None
	Servers -> Basic 0: [Basic 57, Basic 65, Basic 26], Basic 1: [Basic 47, Basic 15, Basic 89, Basic 41, Basic 46], Basic 2: [Basic 60, Basic 96, Basic 40, Basic 67, Basic 91, Basic 83, Basic 99]

Resource allocation weights - {Basic 0 Server: [Basic 57 Task: 9, Basic 65 Task: 4, Basic 26 Task: 4], Basic 1 Server: [Basic 47 Task: 8, Basic 15 Task: 4, Basic 89 Task: 1, Basic 41 Task: 4, Basic 46 Task: 7], Basic 2 Server: [Basic 60 Task: 9, Basic 96 Task: 4, Basic 40 Task: 4, Basic 67 Task: 9, Basic 91 Task: 4, Basic 83 Task: 4, Basic 99 Task: 5]}
Resource allocation Rewards - {Basic 0: [], Basic 1: [Basic 47], Basic 2: []}
Env Done: False
Next State: Env State (0x132c2a2fd18) at time step: 21
	Auction Task -> Basic 109 Task (0x132c28620d8) - Unassigned, Storage: 60.0, Comp: 43.0, Results data: 20.0, Auction time: 21, De

Resource allocation Rewards - {Basic 0: [], Basic 1: [], Basic 2: [Basic 67]}
Env Done: False
Next State: Env State (0x132c2a2fd18) at time step: 28
	Auction Task -> Basic 13 Task (0x132c2855af8) - Unassigned, Storage: 77.0, Comp: 27.0, Results data: 25.0, Auction time: 28, Deadline: 38
	Servers -> Basic 0: [Basic 109], Basic 1: [Basic 42, Basic 59, Basic 66, Basic 22, Basic 38, Basic 103], Basic 2: [Basic 40, Basic 83, Basic 64, Basic 63]

Auction prices - {Basic 0 Server: 5, Basic 1 Server: 5, Basic 2 Server: 10}
Auction Rewards - {Basic 0 Server: 5}
Next State: Env State (0x132c28502c8) at time step: 28
	Auction Task -> Basic 85 Task (0x132c2858318) - Unassigned, Storage: 64.0, Comp: 37.0, Results data: 28.0, Auction time: 28, Deadline: 37
	Servers -> Basic 0: [Basic 109, Basic 13], Basic 1: [Basic 42, Basic 59, Basic 66, Basic 22, Basic 38, Basic 103], Basic 2: [Basic 40, Basic 83, Basic 64, Basic 63]

Auction prices - {Basic 0 Server: 5, Basic 1 Server: 1, Basic 2 Server: 3}
Aucti

Auction Rewards - {Basic 2 Server: 2}
Next State: Env State (0x132c28502c8) at time step: 36
	Auction Task -> None
	Servers -> Basic 0: [Basic 52, Basic 13], Basic 1: [Basic 85, Basic 103], Basic 2: [Basic 63, Basic 4, Basic 51, Basic 34, Basic 27]

Resource allocation weights - {Basic 0 Server: [Basic 52 Task: 3, Basic 13 Task: 7], Basic 1 Server: [Basic 85 Task: 5, Basic 103 Task: 6], Basic 2 Server: [Basic 63 Task: 1, Basic 4 Task: 10, Basic 51 Task: 1, Basic 34 Task: 6, Basic 27 Task: 9]}
Resource allocation Rewards - {Basic 0: [Basic 13], Basic 1: [], Basic 2: []}
Env Done: False
Next State: Env State (0x132c2a2fd18) at time step: 37
	Auction Task -> Basic 92 Task (0x132c2858708) - Unassigned, Storage: 75.0, Comp: 48.0, Results data: 22.0, Auction time: 37, Deadline: 47
	Servers -> Basic 0: [Basic 52], Basic 1: [Basic 85, Basic 103], Basic 2: [Basic 63, Basic 4, Basic 51, Basic 34, Basic 27]

Auction prices - {Basic 0 Server: 1, Basic 1 Server: 7, Basic 2 Server: 4}
Auction Reward

Auction Rewards - {Basic 1 Server: 3}
Next State: Env State (0x132c28502c8) at time step: 47
	Auction Task -> None
	Servers -> Basic 0: [Basic 79, Basic 39, Basic 81, Basic 92], Basic 1: [Basic 12, Basic 10, Basic 94, Basic 18], Basic 2: [Basic 25, Basic 55, Basic 23, Basic 49, Basic 93]

Resource allocation weights - {Basic 0 Server: [Basic 79 Task: 5, Basic 39 Task: 5, Basic 81 Task: 7, Basic 92 Task: 2], Basic 1 Server: [Basic 12 Task: 2, Basic 10 Task: 8, Basic 94 Task: 5, Basic 18 Task: 4], Basic 2 Server: [Basic 25 Task: 9, Basic 55 Task: 6, Basic 23 Task: 1, Basic 49 Task: 10, Basic 93 Task: 6]}
Resource allocation Rewards - {Basic 0: [Basic 92], Basic 1: [Basic 94], Basic 2: [Basic 25]}
Env Done: False
Next State: Env State (0x132c2a2fd18) at time step: 48
	Auction Task -> Basic 14 Task (0x132c2855948) - Unassigned, Storage: 67.0, Comp: 45.0, Results data: 16.0, Auction time: 48, Deadline: 56
	Servers -> Basic 0: [Basic 39, Basic 81, Basic 79], Basic 1: [Basic 12, Basic 10, Bas

Auction Rewards - {Basic 0 Server: 5}
Next State: Env State (0x132c28502c8) at time step: 54
	Auction Task -> None
	Servers -> Basic 0: [Basic 81, Basic 14, Basic 54, Basic 110, Basic 43, Basic 17, Basic 31, Basic 74], Basic 1: [Basic 18, Basic 11, Basic 32, Basic 78], Basic 2: [Basic 23, Basic 90, Basic 58, Basic 8]

Resource allocation weights - {Basic 0 Server: [Basic 81 Task: 6, Basic 14 Task: 4, Basic 54 Task: 7, Basic 110 Task: 4, Basic 43 Task: 4, Basic 17 Task: 9, Basic 31 Task: 1, Basic 74 Task: 9], Basic 1 Server: [Basic 18 Task: 10, Basic 11 Task: 7, Basic 32 Task: 3, Basic 78 Task: 3], Basic 2 Server: [Basic 23 Task: 6, Basic 90 Task: 8, Basic 58 Task: 6, Basic 8 Task: 3]}
Resource allocation Rewards - {Basic 0: [Basic 81], Basic 1: [], Basic 2: [Basic 23]}
Env Done: False
Next State: Env State (0x132c2a2fd18) at time step: 55
	Auction Task -> None
	Servers -> Basic 0: [Basic 14, Basic 54, Basic 110, Basic 43, Basic 17, Basic 31, Basic 74], Basic 1: [Basic 18, Basic 11, Bas

Resource allocation weights - {Basic 0 Server: [Basic 31 Task: 9, Basic 74 Task: 9], Basic 1 Server: [Basic 78 Task: 3, Basic 87 Task: 3, Basic 84 Task: 6, Basic 53 Task: 4, Basic 102 Task: 4, Basic 28 Task: 2], Basic 2 Server: [Basic 90 Task: 7, Basic 58 Task: 9, Basic 2 Task: 5, Basic 30 Task: 9, Basic 62 Task: 9, Basic 86 Task: 4, Basic 50 Task: 4, Basic 107 Task: 2]}
Resource allocation Rewards - {Basic 0: [], Basic 1: [Basic 78], Basic 2: [Basic 58]}
Env Done: False
Next State: Env State (0x132c3b82f98) at time step: 62
	Auction Task -> Basic 73 Task (0x132c28573a8) - Unassigned, Storage: 63.0, Comp: 59.0, Results data: 27.0, Auction time: 62, Deadline: 71
	Servers -> Basic 0: [Basic 31, Basic 74], Basic 1: [Basic 87, Basic 84, Basic 53, Basic 102, Basic 28], Basic 2: [Basic 90, Basic 2, Basic 30, Basic 62, Basic 86, Basic 50, Basic 107]

Auction prices - {Basic 0 Server: 2, Basic 1 Server: 9, Basic 2 Server: 0}
Auction Rewards - {Basic 0 Server: 2}
Next State: Env State (0x132c28

Auction Rewards - {Basic 0 Server: 1}
Next State: Env State (0x132c28502c8) at time step: 65
	Auction Task -> Basic 100 Task (0x132c2858b88) - Unassigned, Storage: 61.0, Comp: 27.0, Results data: 11.0, Auction time: 65, Deadline: 76
	Servers -> Basic 0: [Basic 31, Basic 73, Basic 111, Basic 20, Basic 36, Basic 77], Basic 1: [Basic 53, Basic 102, Basic 28, Basic 5, Basic 48], Basic 2: [Basic 2, Basic 30, Basic 62, Basic 86, Basic 50, Basic 107, Basic 105, Basic 45, Basic 71, Basic 76, Basic 16]

Auction prices - {Basic 0 Server: 5, Basic 1 Server: 6, Basic 2 Server: 9}
Auction Rewards - {Basic 0 Server: 5}
Next State: Env State (0x132c3b82f98) at time step: 65
	Auction Task -> None
	Servers -> Basic 0: [Basic 31, Basic 73, Basic 111, Basic 20, Basic 36, Basic 77, Basic 100], Basic 1: [Basic 53, Basic 102, Basic 28, Basic 5, Basic 48], Basic 2: [Basic 2, Basic 30, Basic 62, Basic 86, Basic 50, Basic 107, Basic 105, Basic 45, Basic 71, Basic 76, Basic 16]

Resource allocation weights - {B

Resource allocation weights - {Basic 0 Server: [Basic 73 Task: 9, Basic 111 Task: 8, Basic 20 Task: 2, Basic 36 Task: 1, Basic 77 Task: 5, Basic 100 Task: 8, Basic 6 Task: 2, Basic 70 Task: 7, Basic 112 Task: 2], Basic 1 Server: [Basic 53 Task: 3, Basic 28 Task: 10, Basic 5 Task: 4, Basic 48 Task: 8, Basic 24 Task: 3, Basic 80 Task: 1], Basic 2 Server: [Basic 50 Task: 2, Basic 45 Task: 3, Basic 71 Task: 4, Basic 76 Task: 1, Basic 16 Task: 10, Basic 1 Task: 4, Basic 98 Task: 7, Basic 75 Task: 10, Basic 101 Task: 2]}
Resource allocation Rewards - {Basic 0: [], Basic 1: [Basic 53], Basic 2: [Basic 71]}
Env Done: False
Next State: Env State (0x132c28502c8) at time step: 71
	Auction Task -> Basic 95 Task (0x132c28588b8) - Unassigned, Storage: 80.0, Comp: 43.0, Results data: 24.0, Auction time: 71, Deadline: 83
	Servers -> Basic 0: [Basic 73, Basic 111, Basic 20, Basic 36, Basic 77, Basic 100, Basic 6, Basic 70, Basic 112], Basic 1: [Basic 28, Basic 5, Basic 48, Basic 24, Basic 80], Basic 2:

In [11]:
agent = next(agent for agent in _server_task_pricing_agents.values())

In [29]:
network_variables = agent.network_model.trainable_variables

minibatch = rnd.sample(agent.replay_buffer, 2)
gradients = []
for trajectory in minibatch:
    observation, action, reward, next_observation = trajectory
    
    with tf.GradientTape() as tape:
        tape.watch(network_variables)
        
        target = np.array(agent.network_model(observation))
        if next_observation is None:
            target[0][action] = reward
        else:
            target[0][action] = reward + np.max(agent.network_target(next_observation))

        loss = tf.square(target - agent.network_model(observation))
        print(loss)

        network_gradients = tape.gradient(loss, network_variables)
        gradients.append(network_gradients)

tf.Tensor(
[[0.        0.        0.        0.        0.        0.        0.
  0.        0.        0.        1.3976436]], shape=(1, 11), dtype=float32)
tf.Tensor(
[[0.        0.        0.        0.        0.5752733 0.        0.
  0.        0.        0.        0.       ]], shape=(1, 11), dtype=float32)


In [32]:
total_gradients = [sum(grad[var] for grad in gradients) for var in range(len(gradients[0]))]
total_gradients

[<tf.Tensor: shape=(9, 40), dtype=float32, numpy=
 array([[ 2.64523783e-06, -7.12865090e-04, -6.53185737e-07,
          3.22272172e-07, -1.39572658e-04,  8.19104753e-05,
          5.28900884e-03,  3.86136118e-04,  3.86408374e-06,
         -1.03437378e-04,  2.03705655e-04,  1.54336290e-02,
         -8.23411455e-08, -7.54513962e-09,  5.56716579e-04,
          1.04750041e-04,  4.92906020e-06,  6.12401520e-04,
          2.88911851e-05, -8.06656715e-08,  2.38858862e-03,
          4.11874838e-02, -1.88844240e-06,  8.13853603e-07,
         -2.96271639e-04, -1.39033730e-07, -4.22246771e-08,
         -9.45648737e-03,  1.71037061e-06, -5.86764036e-05,
         -3.31361836e-04, -5.77372906e-04, -1.17515425e-04,
          6.23333449e-07, -1.87485799e-04,  4.72179707e-03,
          4.91505580e-06, -2.79998151e-03,  2.77350278e-04,
         -5.03624287e-05],
        [ 3.17428530e-05, -8.55438039e-03, -7.83822907e-06,
          3.86726606e-06, -1.67487143e-03,  9.82925761e-04,
          6.34681135e-0

In [16]:
# Every 3 episodes, the agents are trained
if episode % 3 == 0:
    for task_pricing_agent in _server_task_pricing_agents.values():
        task_pricing_agent.train()
    for resource_weighting_agent in _server_resource_allocation_agents.values():
        resource_weighting_agent.train()

In [17]:
# Every 15 episodes, the agents are evaluated
if episode % 15 == 0:
    eval_env(_training_envs, _task_pricing_agents, _resource_weighting_agents)