In [1]:
from CloudEnvModule import CloudEnv, STOP, CpuMem, calculate_evenness
from gym.spaces import Box
import numpy as np

In [84]:
class NewCloudEnv(CloudEnv):
    def __init__(self):
        super().__init__()
        self.timestamp = 0

    def get_reward(self):
        ans = 0
        ans -= self.timestamp
        for i in range(self.n):
            if self.vms[i].cpu == STOP:
                ans += 1
        ans *= 0.1
        ans += calculate_evenness(self.servers)
        return self.update_score(ans)

    def step(self, action):
        self.timestamp += 1
        return super().step(action)
    
    def reset(self):
        self.timestamp = 0
        return super().reset()

env = NewCloudEnv()

In [85]:
# Average reward over n_episodes of given policy

def policyEvaluation(env, policy, n_episodes=10):
    scores = []
    for episode in range(n_episodes):
        scores.append(policy(env))
    print(np.average(scores))

In [86]:
# First matching server

def processGreedyFirstServer(env):
    done = False
    state = env.reset()
    score = 0
    def getCpuMem(vec):
        return CpuMem(vec[0], vec[1])
    while not done:
        ans = [-1, -1]
        for i in range(env.n):
            for j in range(env.m):
                if getCpuMem(state['vms'][i]) <= getCpuMem(state['servers'][j]):
                    ans = [i, j]
        state, reward, done, _ = env.step(ans)
        score += reward
    return score

In [87]:
# Random matching vm/server

def processRandomVmServer(env):
    done = False
    state = env.reset()
    score = 0
    def getCpuMem(vec):
        return CpuMem(vec[0], vec[1])
    while not done:
        ans = [-1, -1]
        while ans == [-1, -1]:
            j = np.random.randint(env.m)
            if getCpuMem(state['vms'][i]) <= getCpuMem(state['servers'][j]):
                ans = [i, j]
        state, reward, done, _ = env.step(ans)
        score += reward
    return score

In [88]:
# Match a server with the biggest current LCPU + LMEM

def processBiggestServer(env):
    done = False
    state = env.reset()
    score = 0
    def getCpuMem(vec):
        return CpuMem(vec[0], vec[1])
    while not done:
        ans = [-1, -1]
        best = -1
        for i in range(env.n):
            for j in range(env.m):
                vm = getCpuMem(state['vms'][i])
                server = getCpuMem(state['servers'][j])
                if vm <= server and server.cpu + server.mem > best:
                    ans = [i, j]
                    best = server.cpu + server.mem
        state, reward, done, _ = env.step(ans)
        score += reward
    return score

In [None]:
# Random matching vm/server

def processRandomVmServerWithBannedMoves(env):
    done = False
    state = env.reset()
    score = 0
    def getCpuMem(vec):
        return CpuMem(vec[0], vec[1])
    while not done:
        ans = [-1, -1]
        while ans == [-1, -1]:
            j = np.random.randint(env.m)
            if getCpuMem(state['vms'][i]) <= getCpuMem(state['servers'][j]):
                ans = [i, j]
        state, reward, done, _ = env.step(ans)
        score += reward
    return score

In [89]:
policyEvaluation(env, processGreedyFirstServer, n_episodes=1)

Ans before: -1.0
Ans before 2: 0.0
Ans after: 0.9889053782488918
Ans before: -2.0
Ans before 2: 0.0
Ans after: 0.9780542364434822
Ans before: -3.0
Ans before 2: 0.0
Ans after: 0.9674386465472011
Ans before: -4.0
Ans before 2: 0.0
Ans after: 0.9570510210245536
Ans before: -5.0
Ans before 2: 0.0
Ans after: 0.9468840947550984
Ans before: -6.0
Ans before 2: 0.0
Ans after: 0.9369309080880929
Ans before: -7.0
Ans before 2: 0.0
Ans after: 0.9271847909547516
Ans before: -8.0
Ans before 2: 0.0
Ans after: 0.9176393479618958
Ans before: -9.0
Ans before 2: 0.0
Ans after: 0.9082884443969942
Ans before: -10.0
Ans before 2: 0.0
Ans after: 0.899126193080239
Ans before: -11.0
Ans before 2: 0.0
Ans after: 0.8901469420044419
Ans before: -12.0
Ans before 2: 0.0
Ans after: 0.8813452627082239
Ans before: -13.0
Ans before 2: 0.0
Ans after: 0.8727159393322396
Ans before: -14.0
Ans before 2: 0.0
Ans after: 0.8642539583120745
Ans before: -15.0
Ans before 2: 0.0
Ans after: 0.855954498665021
Ans before: -16.0
Ans

In [8]:
policyEvaluation(env, processRandomVmServer, n_episodes=1)

0.8456129112051152


In [9]:
policyEvaluation(env, processBiggestServer, n_episodes=1) # deterministic

1.0
