In [1]:
from __future__ import annotations

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [4]:
# Load the important classes
import random as rnd
from typing import Dict, TYPE_CHECKING, List, Tuple, Optional

import matplotlib.pyplot as plt

import core.log as log
import settings.env_io as env_io
import settings.env_settings_io as env_settings_io
from agents.resource_weighting_agent import ResourceWeightingAgent
from agents.task_pricing_agent import TaskPricingAgent
from env.environment import OnlineFlexibleResourceAllocationEnv
from env.task_stage import TaskStage

if TYPE_CHECKING:
    from env.server import Server
    import numpy as np
    from env.task import Task

In [14]:
from train import run_env, eval_env, allocate_agents

In [5]:
log.console_debug_level = log.LogLevel.INFO
log.debug_filename = 'training.log'

In [6]:
# Setup the environment
_env = OnlineFlexibleResourceAllocationEnv.make('../settings/basic_env_setting.json')

In [7]:
# Setup the agents
_task_pricing_agents = [TaskPricingAgent('Default {}'.format(agent_num)) for agent_num in range(10)]
_resource_weighting_agents = [ResourceWeightingAgent('Default {}'.format(agent_num)) for agent_num in range(10)]

In [8]:
# Create the training envs
_training_envs: List[str] = [f'../settings/eval_envs/eval_env_{training_env_num}.json'
                             for training_env_num in range(10)]
for training_env_num in range(10):
    _env.reset()
    env_io.save_environment(_env, f'../settings/eval_envs/eval_env_{training_env_num}.json')

In [9]:
episode = 0

In [10]:
# Loop over the episodes
episode += 1

In [15]:
log.info(f'Episode: {episode}')
_env.reset()

_server_task_pricing_agents, _server_resource_allocation_agents = allocate_agents(_env, _task_pricing_agents, _resource_weighting_agents)
total_price, num_completed_tasks, num_failed_tasks = run_env(_env, _server_task_pricing_agents,
                                                             _server_resource_allocation_agents)
log.info(f'Total Price: {total_price}, Num Completed Task: {num_completed_tasks}, Num Failed Tasks: {num_failed_tasks}')

Episode: 1
Initial state: Env State (0x24af541c368) at time step: 0
	Auction Task -> Basic 4 Task (0x24af5428f78) - Unassigned, Storage: 69, Comp: 39, Results data: 11, Auction time: 0, Deadline: 6
	Servers -> Basic 0: [], Basic 1: [], Basic 2: []
Task Pricing agents - {Basic 0 Server: Default 2, Basic 1 Server: Default 3, Basic 2 Server: Default 7}
Resource allocation agents - {Basic 0 Server: Default 8, Basic 1 Server: Default 0, Basic 2 Server: Default 9}
Initial State: Env State (0x24af541c368) at time step: 0
	Auction Task -> Basic 4 Task (0x24af5428f78) - Unassigned, Storage: 69, Comp: 39, Results data: 11, Auction time: 0, Deadline: 6
	Servers -> Basic 0: [], Basic 1: [], Basic 2: []
Auction prices -> Basic 0: 24, Basic 1: 16, Basic 2: 8
Auction Rewards - Basic 2 Server: 8
Next State: Env State (0x24af541ca98) at time step: 0
	Auction Task -> None
	Servers -> Basic 0: [], Basic 1: [], Basic 2: [Basic 4]

Resource allocation weights -> Basic 0 Server - [], Basic 1 Server - [], Ba

Auction prices -> Basic 0: 25, Basic 1: 2, Basic 2: 13
Auction Rewards - Basic 1 Server: 2
Next State: Env State (0x24af541ca98) at time step: 7
	Auction Task -> Basic 25 Task (0x24af5424708) - Unassigned, Storage: 68, Comp: 34, Results data: 16, Auction time: 7, Deadline: 13
	Servers -> Basic 0: [Basic 67, Basic 32, Basic 9, Basic 81], Basic 1: [Basic 56, Basic 64, Basic 108, Basic 102, Basic 106, Basic 22, Basic 52, Basic 2], Basic 2: [Basic 103]

Auction prices -> Basic 0: 11, Basic 1: 26, Basic 2: 24
Auction Rewards - Basic 0 Server: 11
Next State: Env State (0x24af36b11d8) at time step: 7
	Auction Task -> None
	Servers -> Basic 0: [Basic 67, Basic 32, Basic 9, Basic 81, Basic 25], Basic 1: [Basic 56, Basic 64, Basic 108, Basic 102, Basic 106, Basic 22, Basic 52, Basic 2], Basic 2: [Basic 103]

Resource allocation weights -> Basic 0 Server - [Basic 67 Task: 1, Basic 32 Task: 1, Basic 9 Task: 7, Basic 81 Task: 4, Basic 25 Task: 4], Basic 1 Server - [Basic 56 Task: 6, Basic 64 Task: 

Resource allocation weights -> Basic 0 Server - [Basic 32 Task: 8, Basic 9 Task: 24, Basic 81 Task: 6, Basic 25 Task: 6, Basic 86 Task: 2, Basic 57 Task: 4, Basic 31 Task: 5], Basic 1 Server - [Basic 64 Task: 6, Basic 108 Task: 3, Basic 106 Task: 4, Basic 22 Task: 3, Basic 52 Task: 3, Basic 2 Task: 9, Basic 77 Task: 20, Basic 68 Task: 6, Basic 109 Task: 4, Basic 35 Task: 8], Basic 2 Server - [Basic 103 Task: 7, Basic 76 Task: 4, Basic 93 Task: 7]
Resource allocation Rewards - Basic 0: [], Basic 1: [Basic 106], Basic 2: []
Env Done: False
Next State: Env State (0x24af36b11d8) at time step: 12
	Auction Task -> Basic 19 Task (0x24af5424438) - Unassigned, Storage: 74, Comp: 39, Results data: 14, Auction time: 12, Deadline: 19
	Servers -> Basic 0: [Basic 32, Basic 9, Basic 81, Basic 25, Basic 86, Basic 57, Basic 31], Basic 1: [Basic 64, Basic 108, Basic 22, Basic 52, Basic 2, Basic 77, Basic 68, Basic 109, Basic 35], Basic 2: [Basic 76, Basic 93, Basic 103]

Auction prices -> Basic 0: 26, B

Resource allocation weights -> Basic 0 Server - [Basic 9 Task: 4, Basic 86 Task: 10, Basic 57 Task: 6, Basic 31 Task: 3, Basic 10 Task: 7, Basic 20 Task: 5, Basic 44 Task: 7, Basic 18 Task: 10], Basic 1 Server - [Basic 64 Task: 9, Basic 22 Task: 5, Basic 77 Task: 2, Basic 68 Task: 20, Basic 109 Task: 5, Basic 35 Task: 10, Basic 19 Task: 20, Basic 1 Task: 20, Basic 26 Task: 3, Basic 50 Task: 3, Basic 72 Task: 5, Basic 112 Task: 1], Basic 2 Server - [Basic 76 Task: 11, Basic 93 Task: 4, Basic 103 Task: 3, Basic 75 Task: 6]
Resource allocation Rewards - Basic 0: [], Basic 1: [Basic 64], Basic 2: [Basic 103]
Env Done: False
Next State: Env State (0x24af541c368) at time step: 15
	Auction Task -> Basic 83 Task (0x24af5446d38) - Unassigned, Storage: 59, Comp: 47, Results data: 16, Auction time: 15, Deadline: 22
	Servers -> Basic 0: [Basic 9, Basic 86, Basic 57, Basic 31, Basic 10, Basic 20, Basic 44, Basic 18], Basic 1: [Basic 22, Basic 77, Basic 68, Basic 109, Basic 35, Basic 19, Basic 1, Ba

Resource allocation weights -> Basic 0 Server - [Basic 86 Task: 2, Basic 57 Task: 24, Basic 31 Task: 3, Basic 10 Task: 24, Basic 20 Task: 5, Basic 44 Task: 11, Basic 18 Task: 5, Basic 83 Task: 9, Basic 46 Task: 4, Basic 85 Task: 6], Basic 1 Server - [Basic 77 Task: 1, Basic 68 Task: 8, Basic 109 Task: 6, Basic 19 Task: 3, Basic 1 Task: 9, Basic 26 Task: 11, Basic 50 Task: 5, Basic 72 Task: 4, Basic 112 Task: 5, Basic 15 Task: 20, Basic 53 Task: 10, Basic 34 Task: 20], Basic 2 Server - [Basic 93 Task: 5, Basic 75 Task: 1, Basic 89 Task: 1, Basic 76 Task: 7, Basic 13 Task: 8]
Resource allocation Rewards - Basic 0: [Basic 86, Basic 57], Basic 1: [], Basic 2: []
Env Done: False
Next State: Env State (0x24af541c368) at time step: 19
	Auction Task -> Basic 27 Task (0x24af5424a68) - Unassigned, Storage: 73, Comp: 58, Results data: 26, Auction time: 19, Deadline: 27
	Servers -> Basic 0: [Basic 31, Basic 10, Basic 20, Basic 44, Basic 18, Basic 83, Basic 46, Basic 85], Basic 1: [Basic 77, Basic 

Auction prices -> Basic 0: 8, Basic 1: 4, Basic 2: 11
Auction Rewards - Basic 1 Server: 4
Next State: Env State (0x24af541c368) at time step: 23
	Auction Task -> Basic 111 Task (0x24af5434d38) - Unassigned, Storage: 68, Comp: 40, Results data: 18, Auction time: 23, Deadline: 33
	Servers -> Basic 0: [Basic 10, Basic 44, Basic 18, Basic 46, Basic 85, Basic 78, Basic 12], Basic 1: [Basic 1, Basic 50, Basic 112, Basic 34, Basic 27, Basic 6, Basic 14, Basic 33, Basic 91], Basic 2: [Basic 89, Basic 13, Basic 98, Basic 75]

Auction prices -> Basic 0: 23, Basic 1: 6, Basic 2: 20
Auction Rewards - Basic 1 Server: 6
Next State: Env State (0x24af541ca98) at time step: 23
	Auction Task -> None
	Servers -> Basic 0: [Basic 10, Basic 44, Basic 18, Basic 46, Basic 85, Basic 78, Basic 12], Basic 1: [Basic 1, Basic 50, Basic 112, Basic 34, Basic 27, Basic 6, Basic 14, Basic 33, Basic 91, Basic 111], Basic 2: [Basic 89, Basic 13, Basic 98, Basic 75]

Resource allocation weights -> Basic 0 Server - [Basic

Resource allocation weights -> Basic 0 Server - [Basic 46 Task: 6, Basic 78 Task: 5, Basic 12 Task: 24, Basic 36 Task: 10, Basic 39 Task: 1, Basic 70 Task: 3], Basic 1 Server - [Basic 1 Task: 9, Basic 50 Task: 8, Basic 27 Task: 6, Basic 6 Task: 20, Basic 14 Task: 20, Basic 33 Task: 7, Basic 91 Task: 3, Basic 111 Task: 2, Basic 28 Task: 3, Basic 66 Task: 20, Basic 90 Task: 5, Basic 100 Task: 1], Basic 2 Server - [Basic 89 Task: 6, Basic 48 Task: 6, Basic 84 Task: 6]
Resource allocation Rewards - Basic 0: [], Basic 1: [Basic 1, Basic 50], Basic 2: []
Env Done: False
Next State: Env State (0x24af541c368) at time step: 27
	Auction Task -> Basic 92 Task (0x24af5434288) - Unassigned, Storage: 76, Comp: 37, Results data: 17, Auction time: 27, Deadline: 33
	Servers -> Basic 0: [Basic 46, Basic 78, Basic 12, Basic 36, Basic 39, Basic 70], Basic 1: [Basic 27, Basic 6, Basic 14, Basic 33, Basic 91, Basic 111, Basic 28, Basic 66, Basic 90, Basic 100], Basic 2: [Basic 89, Basic 48, Basic 84]

Aucti

Auction prices -> Basic 0: 3, Basic 1: 2, Basic 2: 13
Auction Rewards - Basic 1 Server: 2
Next State: Env State (0x24af36b11d8) at time step: 31
	Auction Task -> Basic 37 Task (0x24af544c318) - Unassigned, Storage: 56, Comp: 34, Results data: 18, Auction time: 31, Deadline: 43
	Servers -> Basic 0: [Basic 36, Basic 39, Basic 70, Basic 80, Basic 11, Basic 95], Basic 1: [Basic 33, Basic 111, Basic 28, Basic 90, Basic 100, Basic 92, Basic 43, Basic 107, Basic 21], Basic 2: [Basic 48, Basic 84, Basic 8, Basic 17, Basic 7]

Auction prices -> Basic 0: 15, Basic 1: 17, Basic 2: 11
Auction Rewards - Basic 2 Server: 11
Next State: Env State (0x24af5581f98) at time step: 31
	Auction Task -> None
	Servers -> Basic 0: [Basic 36, Basic 39, Basic 70, Basic 80, Basic 11, Basic 95], Basic 1: [Basic 33, Basic 111, Basic 28, Basic 90, Basic 100, Basic 92, Basic 43, Basic 107, Basic 21], Basic 2: [Basic 48, Basic 84, Basic 8, Basic 17, Basic 7, Basic 37]

Resource allocation weights -> Basic 0 Server - [B

Auction Rewards - Basic 1 Server: 2
Next State: Env State (0x24af559ef98) at time step: 35
	Auction Task -> Basic 87 Task (0x24af5446f78) - Unassigned, Storage: 80, Comp: 39, Results data: 18, Auction time: 35, Deadline: 46
	Servers -> Basic 0: [Basic 36, Basic 80, Basic 11, Basic 95, Basic 45, Basic 61, Basic 99], Basic 1: [Basic 28, Basic 90, Basic 21, Basic 40, Basic 42, Basic 110, Basic 69], Basic 2: [Basic 8, Basic 17, Basic 7, Basic 37, Basic 38, Basic 3]

Auction prices -> Basic 0: 6, Basic 1: 4, Basic 2: 17
Auction Rewards - Basic 1 Server: 4
Next State: Env State (0x24af5581f98) at time step: 35
	Auction Task -> None
	Servers -> Basic 0: [Basic 36, Basic 80, Basic 11, Basic 95, Basic 45, Basic 61, Basic 99], Basic 1: [Basic 28, Basic 90, Basic 21, Basic 40, Basic 42, Basic 110, Basic 69, Basic 87], Basic 2: [Basic 8, Basic 17, Basic 7, Basic 37, Basic 38, Basic 3]

Resource allocation weights -> Basic 0 Server - [Basic 36 Task: 14, Basic 80 Task: 10, Basic 11 Task: 10, Basic 9

Resource allocation Rewards - Basic 0: [Basic 80, Basic 11, Basic 95], Basic 1: [], Basic 2: []
Env Done: False
Next State: Env State (0x24af559ef98) at time step: 40
	Auction Task -> Basic 30 Task (0x24af5424af8) - Unassigned, Storage: 50, Comp: 33, Results data: 15, Auction time: 40, Deadline: 48
	Servers -> Basic 0: [Basic 45, Basic 61, Basic 99, Basic 58, Basic 63], Basic 1: [Basic 21, Basic 40, Basic 42, Basic 110, Basic 69, Basic 87, Basic 82, Basic 59, Basic 71, Basic 88], Basic 2: [Basic 37, Basic 38, Basic 3, Basic 65, Basic 105]

Auction prices -> Basic 0: 11, Basic 1: 24, Basic 2: 8
Auction Rewards - Basic 2 Server: 8
Next State: Env State (0x24af5581f98) at time step: 40
	Auction Task -> Basic 60 Task (0x24af5446048) - Unassigned, Storage: 78, Comp: 58, Results data: 24, Auction time: 40, Deadline: 49
	Servers -> Basic 0: [Basic 45, Basic 61, Basic 99, Basic 58, Basic 63], Basic 1: [Basic 21, Basic 40, Basic 42, Basic 110, Basic 69, Basic 87, Basic 82, Basic 59, Basic 71, B

Auction prices -> Basic 0: 19, Basic 1: 2, Basic 2: 0
Auction Rewards - Basic 1 Server: 2
Next State: Env State (0x24af5581f98) at time step: 44
	Auction Task -> Basic 97 Task (0x24af5434558) - Unassigned, Storage: 80, Comp: 31, Results data: 17, Auction time: 44, Deadline: 53
	Servers -> Basic 0: [Basic 61, Basic 99, Basic 58, Basic 63, Basic 104], Basic 1: [Basic 40, Basic 42, Basic 69, Basic 87, Basic 82, Basic 59, Basic 71, Basic 88, Basic 94, Basic 73, Basic 41], Basic 2: [Basic 38, Basic 3, Basic 105, Basic 30, Basic 60, Basic 96, Basic 101, Basic 29]

Auction prices -> Basic 0: 12, Basic 1: 2, Basic 2: 14
Auction Rewards - Basic 1 Server: 2
Next State: Env State (0x24af559ef98) at time step: 44
	Auction Task -> None
	Servers -> Basic 0: [Basic 61, Basic 99, Basic 58, Basic 63, Basic 104], Basic 1: [Basic 40, Basic 42, Basic 69, Basic 87, Basic 82, Basic 59, Basic 71, Basic 88, Basic 94, Basic 73, Basic 41, Basic 97], Basic 2: [Basic 38, Basic 3, Basic 105, Basic 30, Basic 60, Ba

Auction Rewards - Basic 0 Server: 14
Next State: Env State (0x24af5581f98) at time step: 48
	Auction Task -> Basic 79 Task (0x24af5446af8) - Unassigned, Storage: 72, Comp: 40, Results data: 14, Auction time: 48, Deadline: 55
	Servers -> Basic 0: [Basic 58, Basic 104, Basic 55], Basic 1: [Basic 94, Basic 73, Basic 41, Basic 97, Basic 51, Basic 24], Basic 2: [Basic 30, Basic 60, Basic 96, Basic 101, Basic 29, Basic 23, Basic 62, Basic 5, Basic 0]

Auction prices -> Basic 0: 19, Basic 1: 20, Basic 2: 14
Auction Rewards - Basic 2 Server: 14
Next State: Env State (0x24af55d0ef8) at time step: 48
	Auction Task -> None
	Servers -> Basic 0: [Basic 58, Basic 104, Basic 55], Basic 1: [Basic 94, Basic 73, Basic 41, Basic 97, Basic 51, Basic 24], Basic 2: [Basic 30, Basic 60, Basic 96, Basic 101, Basic 29, Basic 23, Basic 62, Basic 5, Basic 0, Basic 79]

Resource allocation weights -> Basic 0 Server - [Basic 58 Task: 6, Basic 104 Task: 11, Basic 55 Task: 19], Basic 1 Server - [Basic 94 Task: 4, Ba

In [16]:
# Every 3 episodes, the agents are trained
if episode % 3 == 0:
    for task_pricing_agent in _server_task_pricing_agents.values():
        task_pricing_agent.train()
    for resource_weighting_agent in _server_resource_allocation_agents.values():
        resource_weighting_agent.train()

In [17]:
# Every 15 episodes, the agents are evaluated
if episode % 15 == 0:
    eval_env(_training_envs, _task_pricing_agents, _resource_weighting_agents)