# Data Generation

## 1. 1-label data generation

In [None]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [None]:
from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
from kube_hr_scheduler.strategies.model.default import Model

In [None]:
env = gym.make('SimKubeEnv-v0')
scheduler = SimHrScheduler(env, 'default.py')

In [126]:
# Data generation
# Data consists of state, scheduler decision
data_size = 10000000
with open('data.csv', 'w') as f:
    for i in range(data_size):
        state = list(env.random_state_gen())
        action = scheduler.decision(env)
        data = state + [action]
        f.write(','.join(list(map(str,data))) + '\n')

## 2. 6-confidence data generation

In [None]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [None]:
from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
from kube_hr_scheduler.strategies.model.default import Model

In [None]:
env = gym.make('SimKubeEnv-v0')
scheduler = SimHrScheduler(env, 'default.py')

In [None]:
# Data generation
# Data consists of state, scheduler decision
data_size = 1000000
with open('data_2.csv', 'w') as f:
    for i in range(data_size):
        state = list(env.random_state_gen())
        confidence = scheduler.get_confidence(env)
        data = state + confidence.tolist()
        f.write(','.join(list(map(str,data))) + '\n')

# Training

## Set Dataset

In [88]:
import torch
from torch.utils.data import Dataset, DataLoader

import pandas as pd

In [89]:
class CustomDataset(Dataset):
    def __init__(self, csv_path, train=True):
        self.data = pd.read_csv(csv_path)

        if train:
            self.data = self.data.sample(frac=0.8, random_state=42)
        else:
            self.data = self.data.drop(self.data.sample(frac=0.8, random_state=42).index)

        self.data = self.transform(self.data)
        self.input = self.data[:, :-1]
        self.label = self.data[:, -1]
        # Long tensor for cross entropy loss
        self.label = torch.tensor(self.label, dtype=torch.long)

    def transform(self, data):
        return torch.tensor(data.values, dtype=torch.float32)

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.input[idx], self.label[idx]

In [90]:
train_dataset = CustomDataset('data.csv', train=True)
test_dataset = CustomDataset('data.csv', train=False)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

  self.label = torch.tensor(self.label, dtype=torch.long)
  self.label = torch.tensor(self.label, dtype=torch.long)


In [91]:
for batch in train_dataloader:
    inputs, labels = batch
    print(inputs.shape, labels.shape)
    print(inputs, labels)
    break

torch.Size([32, 12]) torch.Size([32])
tensor([[0.4800, 0.1100, 0.1000, 0.5700, 0.8100, 0.2800, 0.1800, 0.9800, 0.6300,
         0.1300, 0.0000, 0.0000],
        [0.3400, 1.0000, 0.3200, 0.6500, 0.4300, 0.7700, 0.7200, 0.2400, 0.8400,
         0.1700, 0.0000, 0.0000],
        [0.8800, 0.7000, 0.6600, 0.1100, 0.4900, 0.2900, 0.6500, 0.5100, 0.4300,
         0.5300, 0.4000, 0.0300],
        [0.1600, 0.0000, 0.7000, 0.6500, 0.6400, 0.9700, 0.1800, 0.0400, 0.7300,
         0.0100, 0.2900, 0.4400],
        [0.8700, 0.3700, 0.4800, 0.3700, 0.0400, 0.0800, 0.1500, 0.3000, 0.1300,
         0.1500, 0.0000, 0.0000],
        [0.9700, 0.0200, 0.5200, 0.8900, 0.1300, 0.8300, 0.0600, 0.1200, 0.0900,
         0.3600, 0.0000, 0.0000],
        [0.2300, 0.0200, 0.0700, 0.4400, 0.2300, 0.4700, 0.7700, 0.6900, 0.6000,
         0.3600, 0.0200, 0.1600],
        [0.6900, 0.5900, 0.4700, 0.6900, 0.5300, 0.8000, 0.1300, 0.3900, 0.3700,
         0.7600, 0.0400, 0.1400],
        [0.8000, 0.2500, 0.4200, 0.2300, 0

## Model

In [92]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(12, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 6)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [93]:
net = Net()
print(net)

Net(
  (fc1): Linear(in_features=12, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=6, bias=True)
)


In [94]:
# Training
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

## Train

In [95]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data)
        # print(f"output: {output}")
        # print(f"target: {target}")
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

In [96]:
def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            test_loss += criterion(output, target).item() * data.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [97]:
epochs = 500
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(net, train_dataloader, criterion, optimizer)
    test_loss, test_acc = test(net, test_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 99:
        break

Epoch 1: Train Loss: 0.8049, Train Acc: 71.55%, Test Loss: 0.5964, Test Acc: 77.76%
Epoch 2: Train Loss: 0.4581, Train Acc: 82.60%, Test Loss: 0.3409, Test Acc: 86.98%
Epoch 3: Train Loss: 0.2698, Train Acc: 90.05%, Test Loss: 0.2561, Test Acc: 90.45%
Epoch 4: Train Loss: 0.2352, Train Acc: 91.35%, Test Loss: 0.2483, Test Acc: 90.86%
Epoch 5: Train Loss: 0.2266, Train Acc: 91.75%, Test Loss: 0.2422, Test Acc: 91.11%
Epoch 6: Train Loss: 0.2218, Train Acc: 91.94%, Test Loss: 0.2398, Test Acc: 91.25%
Epoch 7: Train Loss: 0.2182, Train Acc: 92.11%, Test Loss: 0.2371, Test Acc: 91.40%
Epoch 8: Train Loss: 0.2151, Train Acc: 92.26%, Test Loss: 0.2356, Test Acc: 91.47%
Epoch 9: Train Loss: 0.2120, Train Acc: 92.38%, Test Loss: 0.2307, Test Acc: 91.53%
Epoch 10: Train Loss: 0.2093, Train Acc: 92.47%, Test Loss: 0.2266, Test Acc: 91.70%
Epoch 11: Train Loss: 0.2065, Train Acc: 92.56%, Test Loss: 0.2216, Test Acc: 91.79%
Epoch 12: Train Loss: 0.2037, Train Acc: 92.66%, Test Loss: 0.2182, Test A

## Test

In [98]:
# predict the score for each action
def predict_score(data, model=net):
    model.eval()
    if not torch.is_tensor(data):
        data = torch.tensor(data, dtype=torch.float32)
    output = model(data)
    # Softmax
    output = F.softmax(output, dim=1)
    print(output)

In [99]:
def predict(data, model=net):
    model.eval()
    with torch.no_grad():
        # if data is not torch tensor, convert it to torch tensor
        if not torch.is_tensor(data):
            data = torch.tensor(data, dtype=torch.float32)
        output = model(data)
        return output.argmax(dim=1, keepdim=True)


In [100]:
sample1 = [[0.99, 0.90, 0.80, 0.80, 0.95, 0.95, 0.90, 0.85, 0.0, 0.0, 0.0, 0.0]]
sample2 = [[0.99, 0.90, 0.80, 0.80, 0.95, 0.95, 0.90, 0.85, 0.0, 0.0, 0.6, 0.7]]

In [101]:
predict(sample1)

tensor([[0]])

In [102]:
predict(sample2)

tensor([[5]])

In [103]:
predict_score(sample1)
predict_score(sample2)

tensor([[1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.5359e-25]],
       grad_fn=<SoftmaxBackward0>)
tensor([[3.7056e-28, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]],
       grad_fn=<SoftmaxBackward0>)


# Multi Moddal training

### Dataset

In [136]:
import torch
from torch.utils.data import Dataset, DataLoader

import pandas as pd

In [137]:
class MMDataset(Dataset):
    def __init__(self, csv_path, train=True):
        self.data = pd.read_csv(csv_path)
        # Drop the row which has 0 for the last -2, -3 columns
        # self.data = self.data.drop(self.data[(self.data.iloc[:, -2] == 0) & (self.data.iloc[:, -3] == 0)].index)

        if train:
            self.data = self.data.sample(frac=0.8, random_state=42)
        else:
            self.data = self.data.drop(self.data.sample(frac=0.8, random_state=42).index)

        self.data = self.transform(self.data)
        self.input1 = self.data[:, :-3]
        self.input2 = self.data[:, -3:-1]
        self.label = self.data[:, -1]
        # Long tensor for cross entropy loss
        self.label = torch.tensor(self.label, dtype=torch.long)

    def transform(self, data):
        return torch.tensor(data.values, dtype=torch.float32)

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.input1[idx], self.input2[idx], self.label[idx]

In [138]:
train_dataset = MMDataset('data.csv', train=True)
test_dataset = MMDataset('data.csv', train=False)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

  self.label = torch.tensor(self.label, dtype=torch.long)
  self.label = torch.tensor(self.label, dtype=torch.long)


In [139]:
for batch in train_dataloader:
    input1, input2, labels = batch
    print(input1.shape, input2.shape, labels.shape)
    print(f"input1: {input1}\ninput2: {input2}\nlabels: {labels}")
    break

torch.Size([32, 10]) torch.Size([32, 2]) torch.Size([32])
input1: tensor([[0.4800, 0.1100, 0.1000, 0.5700, 0.8100, 0.2800, 0.1800, 0.9800, 0.6300,
         0.1300],
        [0.3400, 1.0000, 0.3200, 0.6500, 0.4300, 0.7700, 0.7200, 0.2400, 0.8400,
         0.1700],
        [0.8800, 0.7000, 0.6600, 0.1100, 0.4900, 0.2900, 0.6500, 0.5100, 0.4300,
         0.5300],
        [0.1600, 0.0000, 0.7000, 0.6500, 0.6400, 0.9700, 0.1800, 0.0400, 0.7300,
         0.0100],
        [0.8700, 0.3700, 0.4800, 0.3700, 0.0400, 0.0800, 0.1500, 0.3000, 0.1300,
         0.1500],
        [0.9700, 0.0200, 0.5200, 0.8900, 0.1300, 0.8300, 0.0600, 0.1200, 0.0900,
         0.3600],
        [0.2300, 0.0200, 0.0700, 0.4400, 0.2300, 0.4700, 0.7700, 0.6900, 0.6000,
         0.3600],
        [0.6900, 0.5900, 0.4700, 0.6900, 0.5300, 0.8000, 0.1300, 0.3900, 0.3700,
         0.7600],
        [0.8000, 0.2500, 0.4200, 0.2300, 0.6300, 0.5800, 0.3200, 0.1800, 0.9700,
         0.5100],
        [0.8100, 0.8100, 0.4200, 0.5900, 0.

## Step1: 
Pod + Nodes(5) as inputs, Outputting the best action, Comparing with default scheduler label

In [148]:
import torch.nn as nn
import torch.nn.functional as F

class MultiModalNet(nn.Module):
    def __init__(self):
        super(MultiModalNet, self).__init__()
        self.fc1_2 = nn.Linear(2, 16)   # Pod quota (CPU, Memory)
        self.fc1_10 = nn.Linear(10, 16) # 5 Nodes status (CPU, Memory)
        self.fc2 = nn.Linear(32, 16)    # Concatenated vector
        self.fc3 = nn.Linear(16, 6)     # 6-sized vector

    def forward(self, x2, x10):
        x2 = F.relu(self.fc1_2(x2))
        x10 = F.relu(self.fc1_10(x10))  
        x = torch.cat((x2, x10), dim=1) 
        x = F.relu(self.fc2(x))  
        x = self.fc3(x)     
        # x = F.softmax(self.fc3(x), dim=1)
        # x = torch.argmax(x, dim=1)
        return x

net_1 = MultiModalNet()
print(net_1)

MultiModalNet(
  (fc1_2): Linear(in_features=2, out_features=16, bias=True)
  (fc1_10): Linear(in_features=10, out_features=16, bias=True)
  (fc2): Linear(in_features=32, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=6, bias=True)
)


In [149]:
# Training
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net_1.parameters(), lr=0.001)

In [150]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for data10, data2, target in train_loader:
        optimizer.zero_grad()
        output = model(data2, data10)
        # print(f"output: {output}")
        # print(f"target: {target}")
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data2.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

In [151]:
def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data10, data2, target in test_loader:
            output = model(data2, data10)
            test_loss += criterion(output, target).item() * data2.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [152]:
epochs = 500
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(net_1, train_dataloader, criterion, optimizer)
    test_loss, test_acc = test(net_1, test_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.9316, Train Acc: 65.41%, Test Loss: 0.7023, Test Acc: 74.40%
Epoch 2: Train Loss: 0.6733, Train Acc: 74.72%, Test Loss: 0.6485, Test Acc: 75.63%
Epoch 3: Train Loss: 0.6241, Train Acc: 76.11%, Test Loss: 0.6077, Test Acc: 76.96%
Epoch 4: Train Loss: 0.5980, Train Acc: 76.99%, Test Loss: 0.5921, Test Acc: 77.36%
Epoch 5: Train Loss: 0.5888, Train Acc: 77.22%, Test Loss: 0.5873, Test Acc: 77.52%
Epoch 6: Train Loss: 0.5854, Train Acc: 77.32%, Test Loss: 0.5855, Test Acc: 77.47%
Epoch 7: Train Loss: 0.5840, Train Acc: 77.30%, Test Loss: 0.5847, Test Acc: 77.51%
Epoch 8: Train Loss: 0.5670, Train Acc: 77.92%, Test Loss: 0.5448, Test Acc: 79.12%
Epoch 9: Train Loss: 0.5320, Train Acc: 79.27%, Test Loss: 0.5226, Test Acc: 80.08%
Epoch 10: Train Loss: 0.5184, Train Acc: 79.75%, Test Loss: 0.5141, Test Acc: 80.19%
Epoch 11: Train Loss: 0.5134, Train Acc: 79.96%, Test Loss: 0.5110, Test Acc: 80.24%
Epoch 12: Train Loss: 0.5114, Train Acc: 80.03%, Test Loss: 0.5095, Test A

# RL algorithm

In [None]:
# A3C model
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
class A3C(nn.Module):
    def __init__(self, input_size, output_size):
        super(A3C, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.actor = nn.Linear(64, output_size)
        self.critic = nn.Linear(64, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        policy = F.softmax(self.actor(x), dim=-1)
        value = self.critic(x)
        return policy, value
    
    def act(self, state):
        state = torch.from_numpy(state).float().unsqueeze(0)
        policy, _ = self.forward(state)
        action = policy.multinomial(1)
        return action.item()
    
    def evaluate(self, state):
        policy, value = self.forward(state)
        return policy, value
    
    def get_action(self, state):
        policy, _ = self.forward(state)
        action = policy.max(1)[1].detach().numpy()[0]
        return action
    
    def get_value(self, state):
        _, value = self.forward(state)
        return value
    
    def get_policy(self, state):
        policy, _ = self.forward(state)
        return policy
    
    def get_entropy(self, state):
        policy, _ = self.forward(state)
        entropy = -(policy * torch.log(policy)).sum(1).mean()
        return entropy
    
    def get_log_prob(self, state, action):
        policy, _ = self.forward(state)
        action_prob = policy.gather(1, action)
        log_prob = torch.log(action_prob)
        return log_prob
    
    def get_loss(self, state, action, reward, next_state, done, gamma=0.99, beta=0.01):
        _, value = self.evaluate(state)
        _, next_value = self.evaluate(next_state)
        target = reward + gamma * next_value * (1 - done)
        delta = target - value
        policy, _ = self.forward(state)
        action_prob = policy.gather(1, action)
        log_prob = torch.log(action_prob)
        entropy = -(policy * torch.log(policy)).sum(1).mean()
        loss = -log_prob * delta.detach() + beta * entropy
        return loss.mean()

In [None]:
# A3C agent
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import gym
import numpy as np
import matplotlib.pyplot as plt
from collections import deque
import random

class A3CAgent:
    def __init__(self, env):
        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.n
        self.model = A3C(self.state_size, self.action_size)
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        
    def train(self, max_episode_num=500):
        episode, episode_reward, done = 0, 0, False
        state = self.env.reset()
        while episode < max_episode_num:
            action = self.model.act(state)
            next_state, reward, done, _ = self.env.step(action)
            self.optimizer.zero_grad()
            loss = self.model.get_loss(state, action, reward, next_state, done)
            loss.backward()
            self.optimizer.step()
            state = next_state
            episode_reward += reward
            if done:
                print(f'Episode: {episode+1}, Episode Reward: {episode_reward:.2f}')
                episode += 1
                episode_reward = 0
                state = self.env.reset()
        self.env.close()
    
    def play(self):
        state = self.env.reset()
        done = False
        while not done:
            self.env.render()
            action = self.model.get_action(state)
            next_state, reward, done, _ = self.env.step(action)
            state = next_state
        self.env.close()