In [49]:
# import torch
# import torchvision
import numpy as np
import matplotlib.pyplot as plt
import unittest
import random
import math
import datetime as dt
from cluster_sim import Cluster

In [53]:
PERCENTAGE = 100/3
NUM_OF_NODES = 3
NODE_CAPACITY = 1
PODS_LOOKAHEAD = 1

class Agent:
    def __init__(self, num_of_nodes, _alpha, _gamma):
        self.num_of_nodes = num_of_nodes
        self.num_of_actions = num_of_nodes + 1
        self.alpha = _alpha
        self.gamma = _gamma
        self.q_table = {}

    def sample(self): # choose random node
        return random.randint(0, self.num_of_nodes)

    def get_action(self, curr_state, _explore=False):
        _curr_entry = self.state_to_q_entry(curr_state)
        if _explore:
            _last_node_sorted_index = self.sample()
        else:
            _last_node_sorted_index = np.argmax(self.q_table[_curr_entry])
        if _last_node_sorted_index >= self.num_of_nodes:
            _action = -1
        else:
            node_memory_bucket = int(_curr_entry[_last_node_sorted_index])
            _, nodes_memory_view = curr_state
            node_buckets = [self.calc_bucket(memory) for _, memory in nodes_memory_view]
            real_node_index = node_buckets.index(node_memory_bucket)
            _action = nodes_memory_view[real_node_index][0]

        return _curr_entry, _last_node_sorted_index, _action

    def update_q_table(self, _curr_entry, _next_state, _last_node_sorted_index, _reward):
        next_entry = self.state_to_q_entry(_next_state)
        old_value = self.q_table[_curr_entry][_last_node_sorted_index]
        next_max = max(self.q_table[next_entry])
        self.q_table[_curr_entry][_last_node_sorted_index] = (1 - alpha) * old_value + alpha * (_reward + gamma * next_max)

    def state_to_q_entry(self, _state):
        """
        :param _state: (pod_lookahead_list, nodes_memory_view) -> list
        pod_lookahead_list: (pod.uuid, pod.memory)
        nodes_memory_view: (node.index, node.memory_left)
        :return:
        """
        pod_lookahead_list, nodes_memory_view = _state
        pods_buckets = sorted([str(self.calc_bucket(memory_left)) for uuid, memory_left in pod_lookahead_list])
        nodes_buckets = sorted([str(self.calc_bucket(memory_left)) for index, memory_left in nodes_memory_view])

        entry_list = nodes_buckets + pods_buckets
        entry = "".join(entry_list)

        if not self.q_table.get(entry):
            self.q_table[entry] = [0] * self.num_of_actions

        return entry

    @staticmethod
    def calc_bucket(node_memory_left):
        return math.floor(round(node_memory_left, 0) / PERCENTAGE)


class Environment:
    def __init__(self, num_of_nodes=NUM_OF_NODES, random_scheduler=None):
        self.q_table = None
        self.num_of_actions = None
        self.num_of_pods_lookahead = None
        self.pods_in_nodes = None
        self.node_capacity = None
        self.num_of_nodes = None
        self.reset()

    def reset(self, n=NUM_OF_NODES, c=NODE_CAPACITY, p=PODS_LOOKAHEAD, random_scheduler=None):
        self.num_of_nodes = n
        self.node_capacity = c # Capacity - free memory left
        self.pods_in_nodes = [0] * self.num_of_nodes
        self.num_of_pods_lookahead = p
        self.num_of_actions = n + 1
        self.simulation = Cluster(n, random_scheduler)

    def start(self, run_time_seconds):
        self.simulation.train_run(run_time_seconds)

    def end(self):
        self.simulation.end_train_run()

    @staticmethod
    def get_state():
        return env.simulation.get_current_state()

    def step(self, node_choice):
        """
        input: node_choice - int from -1 to (num_of_nodes -1)
        output:
        next_state: Observations of the environment
        reward: If your action was beneficial or not
        done: Indicates if we have successfully put pod in node, also called one episode
        info: Additional info such as performance and latency for debugging purposes
        """
        _, overload = self.simulation.schedule_pod(node_choice)
        _next_state = self.simulation.get_current_state()

        if overload:
            _reward = -1
        else:
            _reward = 0
            for node in state[1]:
                if node[1] >= 95:
                    _reward += 1

        return _next_state, _reward


In [54]:
env = Environment()

2022-03-09 21:25:20 Roees-MacBook-Air.local root[26250] INFO Initializing cluster...
2022-03-09 21:25:20 Roees-MacBook-Air.local root[26250] INFO Cluster config: NUM_OF_NODES = 3, NODE_MEMORY = 1, SPAWN_TIME_INTERVAL_SECS = 3


In [55]:
# Hyper parameters
alpha = 0.1
gamma = 0.6
agent = Agent(NUM_OF_NODES, alpha, gamma)

In [None]:
# nodes = [(12312, 40), (11111, 75), (3333, 18)]
# pods = [(1233, 60), (786532, 21), (3154623, 91)]
# s = (pods, nodes)
#
# env.q_table = {
#     "147269": [0.2,0.6,1,0.3],
#     "235999": [0,0.2,2,4]
#     }
# action = env.get_action(s, explore=False)
# env.step(action)
# env.update_q_table(0.1, 0.5)

In [58]:
import logging
import time

logging.basicConfig(
    level=logging.DEBUG,
    format='{asctime} {levelname:<6} {message}',
    style='{'
)

epsilon = 0.1
seconds_per_epoch = 30

# For plotting metrics
all_epochs = []
all_penalties = []

for i in range(1, 2):

    pods_assigned = 0
    env.reset()
    env.start(seconds_per_epoch + 1)
    end_time = dt.datetime.now() + dt.timedelta(seconds=seconds_per_epoch)

    while dt.datetime.now() < end_time:
        state = env.get_state(); ''' state = (pending_pods_lookahead, (node_id,nodes_bucket)) '''
        while len(state[0]) < PODS_LOOKAHEAD:
            time.sleep(0.0001)
            state = env.get_state()

        explore = True if random.uniform(0, 1) < epsilon else False

        curr_entry, last_node_sorted_index, action = agent.get_action(state, explore)

        next_state, reward = env.step(action)

        agent.update_q_table(curr_entry, next_state, last_node_sorted_index, reward)

        pods_assigned += 1

    print(f"{pods_assigned=}")
    env.end()
    print(f"Episode: {i} out of 30, pods assigned in episode: {pods_assigned}")

print("Training finished.\n")

2022-03-09 21:28:20 Roees-MacBook-Air.local root[26250] INFO Initializing cluster...
2022-03-09 21:28:20 Roees-MacBook-Air.local root[26250] INFO Cluster config: NUM_OF_NODES = 3, NODE_MEMORY = 1, SPAWN_TIME_INTERVAL_SECS = 3
2022-03-09 21:28:20 Roees-MacBook-Air.local root[26250] INFO Run started, running for 31 seconds
2022-03-09 21:28:20 Roees-MacBook-Air.local root[26250] INFO Done setting up intervals
2022-03-09 21:28:23 Roees-MacBook-Air.local root[26250] INFO New pod pending! UUID: 1c9d3a, Memory: 0.2, runtime: 26.45641364505309
2022-03-09 21:28:23 Roees-MacBook-Air.local root[26250] INFO Inside schedule pod, with pod 1c9d3a
2022-03-09 21:28:23 Roees-MacBook-Air.local root[26250] INFO node_index: 0
2022-03-09 21:28:23 Roees-MacBook-Air.local root[26250] INFO pod.memory=0.2, node.memory_left=1
2022-03-09 21:28:23 Roees-MacBook-Air.local root[26250] INFO Pod 1c9d3a scheduled on Node 0
2022-03-09 21:28:23 Roees-MacBook-Air.local root[26250] INFO scheduled=True, overload=False
2022-

pods_assigned=3932
Episode: 1 out of 30, pods assigned in episode: 3932
Training finished.



In [59]:
agent.q_table

{'0000': [-0.9901158555887433, -0.995693042687574, -0.9998407320891147, 0.0],
 '000': [0, 0, 0, 0]}

# All the issues left
## What if there are less pods waiting then the lookahead?