## Prep

In [40]:
import os, sys

base_path = os.path.join(os.getcwd(), "..")
print(f"Base Path: {base_path}")
sys.path.append(base_path)

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..


In [41]:
import stable_baselines3 as sb3
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.logger import configure

from datetime import datetime

import gym
from gym import spaces

import torch
import torch.nn as nn
import torch.nn.functional as F

from kube_sim_gym.envs import *

In [42]:
sample1 = torch.tensor([[0.99, 0.90, 0.80, 0.80, 0.95, 0.95, 0.90, 0.85, 0.0, 0.0, 0.0, 0.0]])
sample2 = torch.tensor([[0.99, 0.90, 0.80, 0.80, 0.95, 0.95, 0.90, 0.85, 0.0, 0.0, 0.6, 0.7]])
sample3 = torch.tensor([[0.99, 0.90, 0.40, 0.40, 0.15, 0.15, 0.90, 0.85, 0.8, 0.8, 0.6, 0.7]])

## RL Training utils

In [43]:
def test_rl_model(scenario_file, rl_model):

    test_env1 = gym.make('SimKubeEnv-v0', reward_file='train_dynamic.py', scenario_file=scenario_file)
    test_env2 = gym.make('SimKubeEnv-v0', reward_file='train_dynamic.py', scenario_file=scenario_file)

    # RL Scheduler
    rl_model.set_env(test_env1)

    # Default Scheduler
    from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
    default_scheduler = SimHrScheduler(test_env2, 'default.py')


    # Test the model
    obs1 = test_env1.reset()
    obs2 = test_env2.reset()
    done1 = False
    done2 = False
    step1 = 0
    step2 = 0
    acc_rew1 = 0
    acc_rew2 = 0

    print(f"Testing with {scenario_file} (my model vs. default)")
    while not done1 or not done2:
        if not done1:
            action1, _ = rl_model.predict(obs1)
            # action1 = rl_scheduler.decision(test_env1)
            obs1, reward1, done1, _ = test_env1.step(action1)
            step1 += 1
            acc_rew1 += reward1
        if not done2:
            action2 = default_scheduler.decision(test_env2)
            obs2, reward2, done2, _ = test_env2.step(action2)
            step2 += 1
            acc_rew2 += reward2

    acc_rew1 = round(acc_rew1, 2)
    acc_rew2 = round(acc_rew2, 2)

    print(f"Test result(reward): {acc_rew1} vs. {acc_rew2}")
    print(f"Test result(step): {step1} vs. {step2}")

    return acc_rew1, acc_rew2, step1, step2

In [44]:
from IPython.display import clear_output
from notebook.net_arch import *
import glob

def train_rl_model(json_tracker_fname):

    date = datetime(1992, 7, 5, 8, 33)
    date = date.strftime("%m%d%Y%H%M")

    log_name = json_tracker_fname.split('.')[0]
    log_path = f'training/log/{log_name}'

    if not os.path.exists(log_path):
        os.makedirs(log_path)

    logger = configure(log_path, ['stdout', 'csv', 'tensorboard'])
    
    # Load the json tracker
    import json
    with open(f'training/{json_tracker_fname}', 'r') as f:
        json_tracker = json.load(f)

    last_idx = json_tracker['last_idx']
    learning_steps = json_tracker['learning_steps']
    model_type = json_tracker['model_type']
    reward_file = json_tracker['reward_file']
    model_fname = json_tracker['model_fname']

    # Environment
    envs = []
    for i in range(1, 50):
        env = gym.make('SimKubeEnv-v0', reward_file=reward_file, scenario_file=f'trace2017_100_{i}.csv')
        envs.append(env)

    current_idx = last_idx + 1 # -1 as default

    # Model type : DQN or PPO
    if model_type == 'DQN':
        model = sb3.DQN
    elif model_type == 'PPO':
        model = sb3.PPO
    else:
        print(f"Unknown model type: {model_type}")
        return
    
    model_fpath = f'net_arch/{model_fname}.zip'

    # Check if the model exists
    # Load Model
    if os.path.exists(model_fpath):
        print(f"Loading the model from {model_fname}")
        model = model.load(model_fpath)
    else: # Error
        print(f"Model file does not exist: {model_fname}")
        return
    
    # If last_idx is not -1 and there's a model trained in training/model, then load the model
    if last_idx != -1 and glob.glob(f'training/model/{model_fname}_*'):
        # Load the model with the latest date
        model_fpaths = glob.glob(f'training/model/{model_fname}_*')
        model_fpaths.sort()
        model_fpath = model_fpaths[-1]
        print(f"Loading the model from {model_fpath}")
    
    # Save the model, append _{date} to the model name
    trained_model_fname = f'{model_fname}_{date}'
    trained_model_fpath = f'training/model/{trained_model_fname}'

    # Set logger
    model.set_logger(logger)

    # Train the model
    while current_idx < 20: # Target training steps (Can be changed!)
        print(f"Training with {current_idx}th trace")

        # Test the model first
        a1, a2, a3, a4 = test_rl_model('scenario-5l-5m-1000p-10m_unbalanced.csv', model)
        b1, b2, b3, b4 = test_rl_model('scenario-10l-3m-1000p-10m_unbalanced.csv', model)
        c1, c2, c3, c4 = test_rl_model('scenario-3l-10m-1000p-10m_unbalanced.csv', model)

        with open(f'training/log/{log_name}/test_result.txt', 'a') as f:
            f.write(f"{current_idx},{a1},{a2},{a3},{a4},{b1},{b2},{b3},{b4},{c1},{c2},{c3},{c4}\n")

        env = envs[current_idx]
        model.set_env(env)
        model.learn(total_timesteps=learning_steps)

        # Save the model
        model.save(trained_model_fpath)

        # Update the json tracker
        json_tracker['last_idx'] = current_idx
        with open(f'training/{json_tracker_fname}', 'w') as f:
            json.dump(json_tracker, f)

        current_idx += 1

        clear_output()

        



### Dataset

In [45]:
import torch
from torch.utils.data import Dataset, DataLoader

import pandas as pd

In [46]:
class Pr_Dataset(Dataset):
    def __init__(self, csv_path, train=True):
        self.data = pd.read_csv(csv_path)
        # Drop the row which has 0 for the last -2, -3 columns
        # self.data = self.data.drop(self.data[(self.data.iloc[:, -2] == 0) & (self.data.iloc[:, -3] == 0)].index)

        if train:
            self.data = self.data.sample(frac=0.8, random_state=42)
        else:
            self.data = self.data.drop(self.data.sample(frac=0.8, random_state=42).index)

        self.data = self.transform(self.data)
        self.input = self.data[:, :-6]
        self.label = self.data[:, -6:]

    def transform(self, data):
        return torch.tensor(data.values, dtype=torch.float32)

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.input[idx], self.label[idx]

In [47]:
import os

data_path = os.path.join(base_path, "dataset", "data_dynamic.csv")
train_dynamic_dataset = Pr_Dataset(data_path, train=True)
test_dynamic_dataset = Pr_Dataset(data_path, train=False)
train_dynamic_dataloader = DataLoader(train_dynamic_dataset, batch_size=64, shuffle=False)
test_dynamic_dataloader = DataLoader(test_dynamic_dataset, batch_size=64, shuffle=False)

In [48]:
for batch in train_dynamic_dataloader:
    input, labels = batch
    print(input.shape, labels.shape)
    print(f"input1: {input}\nlabels: {labels}")
    break

torch.Size([64, 12]) torch.Size([64, 6])
input1: tensor([[0.5600, 0.6700, 0.5200, 0.9400, 0.9100, 0.7400, 0.3200, 0.2900, 0.7300,
         0.4100, 0.2800, 0.1300],
        [0.5900, 0.7400, 0.2100, 0.2000, 0.7800, 0.8000, 0.1200, 0.9400, 0.0600,
         0.7200, 0.3000, 0.2200],
        [0.9700, 0.8100, 0.7500, 0.7100, 0.8400, 0.8000, 0.8100, 0.8800, 0.9500,
         0.9800, 0.0300, 0.0800],
        [0.1000, 0.6000, 0.1000, 0.9000, 0.5800, 0.1500, 0.8400, 0.8900, 0.1300,
         0.9700, 0.1200, 0.2400],
        [0.8700, 0.9500, 0.9100, 0.7700, 0.8500, 0.9300, 0.8300, 0.7200, 0.8600,
         0.9400, 0.0900, 0.1200],
        [0.9700, 0.8700, 0.7500, 0.7200, 0.8300, 0.8000, 0.8500, 0.8900, 0.9200,
         0.8800, 0.2500, 0.2000],
        [1.0000, 0.9300, 0.8600, 0.7200, 0.8000, 0.7100, 0.8900, 0.9900, 1.0000,
         0.9900, 0.2900, 0.1600],
        [0.9300, 0.2300, 0.7000, 0.8200, 0.4100, 0.2000, 0.0600, 0.7900, 0.5800,
         0.3800, 0.1300, 0.1600],
        [0.8900, 0.9000, 0.7300

In [49]:
import os

data_path = os.path.join(base_path, "dataset", "data_dynamic2.csv")
train_dynamic2_dataset = Pr_Dataset(data_path, train=True)
test_dynamic2_dataset = Pr_Dataset(data_path, train=False)
train_dynamic2_dataloader = DataLoader(train_dynamic2_dataset, batch_size=64, shuffle=False)
test_dynamic2_dataloader = DataLoader(test_dynamic2_dataset, batch_size=64, shuffle=False)

In [50]:
for batch in train_dynamic2_dataloader:
    input, labels = batch
    print(input.shape, labels.shape)
    print(f"input1: {input}\nlabels: {labels}")
    break

torch.Size([64, 12]) torch.Size([64, 6])
input1: tensor([[0.9800, 0.8100, 0.7200, 0.7700, 0.9000, 0.7900, 0.9100, 0.8700, 0.9100,
         0.9100, 0.0700, 0.0100],
        [0.6600, 0.0400, 0.4300, 0.9800, 0.7100, 0.3500, 0.0800, 0.1700, 0.8300,
         0.1400, 0.1100, 0.1700],
        [0.7500, 0.9400, 0.9200, 0.8700, 0.9100, 0.8800, 0.9600, 0.8100, 0.9700,
         0.9100, 0.1900, 0.3000],
        [0.8800, 0.4500, 0.5500, 0.2100, 0.2000, 0.0700, 0.8300, 0.3400, 0.3900,
         0.4700, 0.0200, 0.0200],
        [0.9200, 0.8200, 0.4700, 0.9400, 0.1300, 0.7900, 0.5300, 0.8500, 0.4400,
         0.5500, 0.1700, 0.1900],
        [0.4300, 0.1600, 0.2300, 0.8100, 0.4400, 0.2800, 0.4100, 0.7900, 0.7600,
         0.0800, 0.2100, 0.1800],
        [0.7300, 0.9600, 0.7900, 0.7600, 0.7800, 0.7600, 0.9500, 0.9400, 0.8500,
         0.8800, 0.0400, 0.2100],
        [0.5500, 0.4700, 1.0000, 0.8600, 0.1000, 0.1400, 0.1900, 0.5500, 0.7500,
         0.7200, 0.1800, 0.0200],
        [0.8900, 0.3600, 0.6900

In [51]:
import os

data_path = os.path.join(base_path, "dataset", "data_default.csv")
train_default_dataset = Pr_Dataset(data_path, train=True)
test_default_dataset = Pr_Dataset(data_path, train=False)
train_default_dataloader = DataLoader(train_default_dataset, batch_size=64, shuffle=False)
test_default_dataloader = DataLoader(test_default_dataset, batch_size=64, shuffle=False)

In [52]:
for batch in train_default_dataloader:
    input, labels = batch
    print(input.shape, labels.shape)
    print(f"input1: {input}\nlabels: {labels}")
    break

torch.Size([64, 12]) torch.Size([64, 6])
input1: tensor([[0.8800, 0.8200, 0.4300, 0.1300, 0.7900, 0.1500, 0.4600, 0.7800, 0.8000,
         0.4100, 0.2700, 0.0900],
        [0.4500, 0.9100, 0.7200, 0.0900, 0.9700, 0.0700, 0.0300, 0.1500, 1.0000,
         0.4300, 0.1400, 0.1300],
        [0.9700, 0.9000, 0.8800, 0.9100, 0.9500, 0.9900, 0.9300, 0.7800, 0.8800,
         0.9200, 0.1600, 0.1000],
        [0.9200, 0.8700, 0.8100, 0.9100, 0.8500, 0.7700, 0.7200, 0.8900, 0.9600,
         0.7800, 0.2400, 0.2800],
        [0.9800, 0.7600, 0.8900, 0.7200, 0.9200, 0.7900, 0.9200, 0.8500, 0.8500,
         0.8200, 0.2700, 0.1200],
        [0.7000, 0.6200, 0.2500, 0.6000, 0.2400, 0.6400, 0.6900, 0.2700, 0.6400,
         0.4700, 0.0400, 0.0200],
        [0.7700, 0.9900, 0.2700, 0.4100, 0.5900, 0.0400, 0.5100, 0.3700, 0.4200,
         0.3700, 0.2200, 0.2400],
        [0.9600, 0.4700, 0.2600, 0.9000, 0.4800, 0.2400, 0.1700, 0.5400, 0.6200,
         0.8000, 0.2800, 0.1000],
        [0.6000, 0.3800, 0.4900

In [53]:
import os

data_path = os.path.join(base_path, "dataset", "data_default2.csv")
train_default2_dataset = Pr_Dataset(data_path, train=True)
test_default2_dataset = Pr_Dataset(data_path, train=False)
train_default2_dataloader = DataLoader(train_default2_dataset, batch_size=64, shuffle=False)
test_default2_dataloader = DataLoader(test_default2_dataset, batch_size=64, shuffle=False)

In [54]:
for batch in train_default2_dataloader:
    input, labels = batch
    print(input.shape, labels.shape)
    print(f"input1: {input}\nlabels: {labels}")
    break

torch.Size([64, 12]) torch.Size([64, 6])
input1: tensor([[0.1400, 0.9600, 0.2000, 0.3200, 0.9600, 0.1300, 0.4600, 0.4600, 0.4400,
         0.7200, 0.1900, 0.2600],
        [0.8300, 0.7600, 0.7400, 0.9400, 0.8200, 0.8600, 0.9400, 0.7700, 0.7500,
         0.8100, 0.2800, 0.1400],
        [0.8100, 0.9400, 0.9000, 0.8400, 0.8600, 0.9800, 0.8100, 0.9900, 0.7500,
         0.7800, 0.0600, 0.1500],
        [0.7100, 0.7100, 0.8600, 0.9100, 0.7800, 0.7600, 0.8500, 0.8200, 0.9700,
         0.9000, 0.2000, 0.1100],
        [0.9800, 0.8600, 0.7700, 0.8900, 0.7500, 0.9800, 0.7000, 0.8100, 0.7900,
         0.8200, 0.1500, 0.1900],
        [0.8300, 0.3800, 0.1300, 0.2200, 0.9200, 0.8100, 0.7800, 0.4700, 0.8700,
         0.5300, 0.0400, 0.2100],
        [0.4600, 0.2400, 0.9800, 0.6300, 0.4600, 0.3300, 0.8300, 0.1100, 0.4600,
         0.6700, 0.2800, 0.0200],
        [0.8000, 0.4000, 0.1800, 0.9200, 0.9000, 0.0500, 0.8400, 0.8400, 0.3400,
         0.3400, 0.1600, 0.2400],
        [0.9700, 0.9200, 0.8000

In [55]:
import os

data_path = os.path.join(base_path, "dataset", "data_dynamic_time.csv")
train_dynamic_time_dataset = Pr_Dataset(data_path, train=True)
test_dynamic_time_dataset = Pr_Dataset(data_path, train=False)
train_dynamic_time_dataloader = DataLoader(train_dynamic_time_dataset, batch_size=64, shuffle=False)
test_dynamic_time_dataloader = DataLoader(test_dynamic_time_dataset, batch_size=64, shuffle=False)

In [56]:
for batch in train_default2_dataloader:
    input, labels = batch
    print(input.shape, labels.shape)
    print(f"input1: {input}\nlabels: {labels}")
    break

torch.Size([64, 12]) torch.Size([64, 6])
input1: tensor([[0.1400, 0.9600, 0.2000, 0.3200, 0.9600, 0.1300, 0.4600, 0.4600, 0.4400,
         0.7200, 0.1900, 0.2600],
        [0.8300, 0.7600, 0.7400, 0.9400, 0.8200, 0.8600, 0.9400, 0.7700, 0.7500,
         0.8100, 0.2800, 0.1400],
        [0.8100, 0.9400, 0.9000, 0.8400, 0.8600, 0.9800, 0.8100, 0.9900, 0.7500,
         0.7800, 0.0600, 0.1500],
        [0.7100, 0.7100, 0.8600, 0.9100, 0.7800, 0.7600, 0.8500, 0.8200, 0.9700,
         0.9000, 0.2000, 0.1100],
        [0.9800, 0.8600, 0.7700, 0.8900, 0.7500, 0.9800, 0.7000, 0.8100, 0.7900,
         0.8200, 0.1500, 0.1900],
        [0.8300, 0.3800, 0.1300, 0.2200, 0.9200, 0.8100, 0.7800, 0.4700, 0.8700,
         0.5300, 0.0400, 0.2100],
        [0.4600, 0.2400, 0.9800, 0.6300, 0.4600, 0.3300, 0.8300, 0.1100, 0.4600,
         0.6700, 0.2800, 0.0200],
        [0.8000, 0.4000, 0.1800, 0.9200, 0.9000, 0.0500, 0.8400, 0.8400, 0.3400,
         0.3400, 0.1600, 0.2400],
        [0.9700, 0.9200, 0.8000

In [57]:
import os

data_path = os.path.join(base_path, "dataset", "data_dynamic2_time.csv")
train_dynamic2_time_dataset = Pr_Dataset(data_path, train=True)
test_dynamic2_time_dataset = Pr_Dataset(data_path, train=False)
train_dynamic2_time_dataloader = DataLoader(train_dynamic2_time_dataset, batch_size=64, shuffle=False)
test_dynamic2_time_dataloader = DataLoader(test_dynamic2_time_dataset, batch_size=64, shuffle=False)

In [58]:
for batch in train_dynamic2_time_dataloader:
    input, labels = batch
    print(input.shape, labels.shape)
    print(f"input1: {input}\nlabels: {labels}")
    break

torch.Size([64, 13]) torch.Size([64, 6])
input1: tensor([[ 0.5900,  0.0200,  0.9000,  0.6100,  0.6900,  0.9900,  0.3300,  0.7600,
          0.7500,  0.0100,  0.0700,  0.2100, -0.1900],
        [ 0.7700,  0.4800,  0.7200,  0.2900,  0.2700,  0.2400,  0.8800,  0.7200,
          0.0600,  0.7800,  0.1700,  0.1700, -2.1000],
        [ 0.2600,  0.7100,  0.0700,  0.6700,  0.3100,  0.8800,  0.1000,  0.6400,
          0.5800,  0.2000,  0.1400,  0.0800, -1.4800],
        [ 0.7500,  0.7000,  0.7100,  0.9200,  0.7100,  0.9400,  0.8400,  0.8300,
          0.7400,  0.8100,  0.2800,  0.2000, -1.3800],
        [ 0.5800,  0.2700,  0.4000,  0.2100,  0.7700,  1.0000,  0.8600,  0.5500,
          0.1900,  0.7100,  0.2800,  0.2000, -1.2200],
        [ 0.6800,  0.6900,  0.9000,  0.3300,  0.2400,  0.3100,  0.0600,  0.8200,
          0.6700,  0.6000,  0.2800,  0.0700, -0.9800],
        [ 0.5400,  0.9500,  0.6900,  0.3200,  0.1700,  0.4500,  0.8500,  0.3100,
          0.7500,  0.9200,  0.2900,  0.2100, -1.4200],

## Models

### PPO Multi-modal(Untrained + Pretrained)

#### Reward : Dynamic

In [19]:
class FE_MM_net(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 64):
        super(FE_MM_net, self).__init__(observation_space, features_dim)
        self.fc1_1 = nn.Linear(10, 16) # 5 Nodes status (CPU, Memory)
        self.fc1_2 = nn.Linear(2, 16)   # Pod quota (CPU, Memory)
        self.fc2_1 = nn.Linear(16, 32)
        self.fc2_2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(64, 64)    # Last layer of FE_net


    def forward(self, x):
        x1 = x[:, :10]
        x1 = 1 - x1
        x2 = x[:, 10:]
        x1 = F.relu(self.fc1_1(x1))  
        x2 = F.relu(self.fc1_2(x2))
        x1 = F.relu(self.fc2_1(x1))
        x2 = F.relu(self.fc2_2(x2))
        x = torch.cat((x1, x2), dim=1)
        x = self.fc3(x)
        return x

In [20]:
policy_kwargs = dict(
    features_extractor_class=FE_MM_net,
    features_extractor_kwargs=dict(features_dim=64),
    net_arch=[dict(pi=[64, 32], vf=[64, 32])]
)

env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.PPO('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [21]:
rl_model.policy

ActorCriticPolicy(
  (features_extractor): FE_MM_net(
    (fc1_1): Linear(in_features=10, out_features=16, bias=True)
    (fc1_2): Linear(in_features=2, out_features=16, bias=True)
    (fc2_1): Linear(in_features=16, out_features=32, bias=True)
    (fc2_2): Linear(in_features=16, out_features=32, bias=True)
    (fc3): Linear(in_features=64, out_features=64, bias=True)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential()
    (policy_net): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=32, out_features=6, bias=True)
  (value_net): Linear(in_features=32, out_features=1, bias=True)
)

In [22]:
# model_ppo_mm_ut_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_mm_ut_dynamic'))

In [23]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class PPO_MM_Action_net(nn.Module):
    def __init__(self, original_model):
        super(PPO_MM_Action_net, self).__init__()
        self.features_extractor = original_model.policy.features_extractor
        self.policy_net = original_model.policy.mlp_extractor.policy_net
        self.action_net = original_model.policy.action_net

    def forward(self, x):
        x = self.features_extractor(x)
        x = self.policy_net(x)
        x = self.action_net(x)
        return x

In [24]:
ppo_mm_action_model = PPO_MM_Action_net(rl_model)

In [25]:
ppo_mm_action_model(sample1)

tensor([[ 0.0033,  0.0019, -0.0006,  0.0022, -0.0006, -0.0012]],
       grad_fn=<AddmmBackward0>)

In [26]:
# Training
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(ppo_mm_action_model.parameters(), lr=0.001)

In [27]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [28]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(ppo_mm_action_model, train_dynamic_dataloader, criterion, optimizer)
    test_loss, test_acc = test(ppo_mm_action_model, test_dynamic_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.1244, Train Acc: 69.80%, Test Loss: 0.0754, Test Acc: 76.94%
Epoch 2: Train Loss: 0.0648, Train Acc: 79.27%, Test Loss: 0.0548, Test Acc: 81.17%
Epoch 3: Train Loss: 0.0525, Train Acc: 81.37%, Test Loss: 0.0488, Test Acc: 82.25%
Epoch 4: Train Loss: 0.0478, Train Acc: 82.12%, Test Loss: 0.0462, Test Acc: 82.70%
Epoch 5: Train Loss: 0.0454, Train Acc: 82.56%, Test Loss: 0.0428, Test Acc: 83.31%
Epoch 6: Train Loss: 0.0439, Train Acc: 82.76%, Test Loss: 0.0413, Test Acc: 83.89%
Epoch 7: Train Loss: 0.0428, Train Acc: 83.01%, Test Loss: 0.0416, Test Acc: 84.06%
Epoch 8: Train Loss: 0.0421, Train Acc: 83.15%, Test Loss: 0.0416, Test Acc: 83.86%
Epoch 9: Train Loss: 0.0413, Train Acc: 83.31%, Test Loss: 0.0406, Test Acc: 84.00%
Epoch 10: Train Loss: 0.0408, Train Acc: 83.39%, Test Loss: 0.0409, Test Acc: 84.09%
Epoch 11: Train Loss: 0.0403, Train Acc: 83.48%, Test Loss: 0.0384, Test Acc: 84.32%
Epoch 12: Train Loss: 0.0399, Train Acc: 83.47%, Test Loss: 0.0428, Test A

In [29]:
rl_model.policy.features_extractor.load_state_dict(ppo_mm_action_model.features_extractor.state_dict())
rl_model.policy.mlp_extractor.policy_net.load_state_dict(ppo_mm_action_model.policy_net.state_dict())
rl_model.policy.action_net.load_state_dict(ppo_mm_action_model.action_net.state_dict())
rl_model.policy.mlp_extractor.value_net.load_state_dict(ppo_mm_action_model.policy_net.state_dict())

<All keys matched successfully>

In [30]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_mm_pr_dynamic'))

#### Reward : Dynamic2

In [31]:
class FE_MM_net(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 16):
        super(FE_MM_net, self).__init__(observation_space, features_dim)
        self.fc1_1 = nn.Linear(10, 16) # 5 Nodes status (CPU, Memory)
        self.fc1_2 = nn.Linear(2, 16)   # Pod quota (CPU, Memory)
        self.fc2_1 = nn.Linear(16, 32)
        self.fc2_2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(64, 64)    # Last layer of FE_net


    def forward(self, x):
        x1 = x[:, :10]
        x1 = 1 - x1
        x2 = x[:, 10:]
        x1 = F.relu(self.fc1_1(x1))  
        x2 = F.relu(self.fc1_2(x2))
        x1 = F.relu(self.fc2_1(x1))
        x2 = F.relu(self.fc2_2(x2))
        x = torch.cat((x1, x2), dim=1)
        x = self.fc3(x)
        return x

In [32]:
policy_kwargs = dict(
    features_extractor_class=FE_MM_net,
    features_extractor_kwargs=dict(features_dim=64),
    net_arch=[dict(pi=[64, 32], vf=[64, 32])]
)

env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic2.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.PPO('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [33]:
rl_model.policy

ActorCriticPolicy(
  (features_extractor): FE_MM_net(
    (fc1_1): Linear(in_features=10, out_features=16, bias=True)
    (fc1_2): Linear(in_features=2, out_features=16, bias=True)
    (fc2_1): Linear(in_features=16, out_features=32, bias=True)
    (fc2_2): Linear(in_features=16, out_features=32, bias=True)
    (fc3): Linear(in_features=64, out_features=64, bias=True)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential()
    (policy_net): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=32, out_features=6, bias=True)
  (value_net): Linear(in_features=32, out_features=1, bias=True)
)

In [34]:
# model_ppo_mm_ut_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_mm_ut_dynamic2'))

In [35]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class PPO_MM_Action_net(nn.Module):
    def __init__(self, original_model):
        super(PPO_MM_Action_net, self).__init__()
        self.features_extractor = original_model.policy.features_extractor
        self.policy_net = original_model.policy.mlp_extractor.policy_net
        self.action_net = original_model.policy.action_net

    def forward(self, x):
        x = self.features_extractor(x)
        x = self.policy_net(x)
        x = self.action_net(x)
        return x

In [36]:
ppo_mm_action_model = PPO_MM_Action_net(rl_model)

In [37]:
ppo_mm_action_model(sample1)

tensor([[-2.5743e-03, -3.8203e-03,  2.3502e-04,  3.0332e-03, -8.3489e-05,
         -1.0153e-03]], grad_fn=<AddmmBackward0>)

In [38]:
# Training
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(ppo_mm_action_model.parameters(), lr=0.001)

In [39]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [40]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(ppo_mm_action_model, train_dynamic2_dataloader, criterion, optimizer)
    test_loss, test_acc = test(ppo_mm_action_model, test_dynamic2_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.0134, Train Acc: 53.44%, Test Loss: 0.0064, Test Acc: 62.23%
Epoch 2: Train Loss: 0.0056, Train Acc: 65.52%, Test Loss: 0.0045, Test Acc: 70.04%
Epoch 3: Train Loss: 0.0044, Train Acc: 70.84%, Test Loss: 0.0038, Test Acc: 72.97%
Epoch 4: Train Loss: 0.0039, Train Acc: 73.56%, Test Loss: 0.0033, Test Acc: 75.83%
Epoch 5: Train Loss: 0.0037, Train Acc: 74.73%, Test Loss: 0.0032, Test Acc: 75.93%
Epoch 6: Train Loss: 0.0035, Train Acc: 75.32%, Test Loss: 0.0031, Test Acc: 76.58%
Epoch 7: Train Loss: 0.0034, Train Acc: 75.74%, Test Loss: 0.0030, Test Acc: 77.09%
Epoch 8: Train Loss: 0.0033, Train Acc: 75.78%, Test Loss: 0.0028, Test Acc: 76.58%
Epoch 9: Train Loss: 0.0032, Train Acc: 75.93%, Test Loss: 0.0031, Test Acc: 77.78%
Epoch 10: Train Loss: 0.0032, Train Acc: 76.18%, Test Loss: 0.0030, Test Acc: 77.70%
Epoch 11: Train Loss: 0.0031, Train Acc: 76.15%, Test Loss: 0.0030, Test Acc: 78.52%
Epoch 12: Train Loss: 0.0030, Train Acc: 76.40%, Test Loss: 0.0026, Test A

In [41]:
rl_model.policy.features_extractor.load_state_dict(ppo_mm_action_model.features_extractor.state_dict())
rl_model.policy.mlp_extractor.policy_net.load_state_dict(ppo_mm_action_model.policy_net.state_dict())
rl_model.policy.action_net.load_state_dict(ppo_mm_action_model.action_net.state_dict())
rl_model.policy.mlp_extractor.value_net.load_state_dict(ppo_mm_action_model.policy_net.state_dict())

<All keys matched successfully>

In [42]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_mm_pr_dynamic2'))

#### Reward : Default

In [43]:
class FE_MM_net(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 16):
        super(FE_MM_net, self).__init__(observation_space, features_dim)
        self.fc1_1 = nn.Linear(10, 16) # 5 Nodes status (CPU, Memory)
        self.fc1_2 = nn.Linear(2, 16)   # Pod quota (CPU, Memory)
        self.fc2_1 = nn.Linear(16, 32)
        self.fc2_2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(64, 64)    # Last layer of FE_net


    def forward(self, x):
        x1 = x[:, :10]
        x1 = 1 - x1
        x2 = x[:, 10:]
        x1 = F.relu(self.fc1_1(x1))  
        x2 = F.relu(self.fc1_2(x2))
        x1 = F.relu(self.fc2_1(x1))
        x2 = F.relu(self.fc2_2(x2))
        x = torch.cat((x1, x2), dim=1)
        x = self.fc3(x)
        return x

In [44]:
policy_kwargs = dict(
    features_extractor_class=FE_MM_net,
    features_extractor_kwargs=dict(features_dim=64),
    net_arch=[dict(pi=[64, 32], vf=[64, 32])]
)

env = gym.make('SimKubeEnv-v0', reward_file='train_default.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.PPO('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [45]:
rl_model.policy

ActorCriticPolicy(
  (features_extractor): FE_MM_net(
    (fc1_1): Linear(in_features=10, out_features=16, bias=True)
    (fc1_2): Linear(in_features=2, out_features=16, bias=True)
    (fc2_1): Linear(in_features=16, out_features=32, bias=True)
    (fc2_2): Linear(in_features=16, out_features=32, bias=True)
    (fc3): Linear(in_features=64, out_features=64, bias=True)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential()
    (policy_net): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=32, out_features=6, bias=True)
  (value_net): Linear(in_features=32, out_features=1, bias=True)
)

In [46]:
# model_ppo_mm_ut_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_mm_ut_default'))

In [47]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class PPO_MM_Action_net(nn.Module):
    def __init__(self, original_model):
        super(PPO_MM_Action_net, self).__init__()
        self.features_extractor = original_model.policy.features_extractor
        self.policy_net = original_model.policy.mlp_extractor.policy_net
        self.action_net = original_model.policy.action_net

    def forward(self, x):
        x = self.features_extractor(x)
        x = self.policy_net(x)
        x = self.action_net(x)
        return x

In [48]:
ppo_mm_action_model = PPO_MM_Action_net(rl_model)

In [49]:
ppo_mm_action_model(sample1)

tensor([[-0.0024,  0.0048,  0.0019, -0.0059,  0.0035, -0.0003]],
       grad_fn=<AddmmBackward0>)

In [50]:
# Training
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(ppo_mm_action_model.parameters(), lr=0.001)

In [51]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [52]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(ppo_mm_action_model, train_default_dataloader, criterion, optimizer)
    test_loss, test_acc = test(ppo_mm_action_model, test_default_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.0031, Train Acc: 78.57%, Test Loss: 0.0017, Test Acc: 80.98%
Epoch 2: Train Loss: 0.0015, Train Acc: 81.19%, Test Loss: 0.0015, Test Acc: 79.64%
Epoch 3: Train Loss: 0.0013, Train Acc: 81.78%, Test Loss: 0.0014, Test Acc: 80.38%
Epoch 4: Train Loss: 0.0012, Train Acc: 82.46%, Test Loss: 0.0012, Test Acc: 81.64%
Epoch 5: Train Loss: 0.0011, Train Acc: 83.20%, Test Loss: 0.0011, Test Acc: 81.96%
Epoch 6: Train Loss: 0.0009, Train Acc: 83.93%, Test Loss: 0.0009, Test Acc: 83.82%
Epoch 7: Train Loss: 0.0008, Train Acc: 84.96%, Test Loss: 0.0008, Test Acc: 84.63%
Epoch 8: Train Loss: 0.0007, Train Acc: 85.79%, Test Loss: 0.0007, Test Acc: 85.10%
Epoch 9: Train Loss: 0.0006, Train Acc: 86.56%, Test Loss: 0.0006, Test Acc: 85.55%
Epoch 10: Train Loss: 0.0005, Train Acc: 87.28%, Test Loss: 0.0006, Test Acc: 85.70%
Epoch 11: Train Loss: 0.0005, Train Acc: 87.98%, Test Loss: 0.0005, Test Acc: 86.29%
Epoch 12: Train Loss: 0.0004, Train Acc: 88.71%, Test Loss: 0.0005, Test A

In [53]:
rl_model.policy.features_extractor.load_state_dict(ppo_mm_action_model.features_extractor.state_dict())
rl_model.policy.mlp_extractor.policy_net.load_state_dict(ppo_mm_action_model.policy_net.state_dict())
rl_model.policy.action_net.load_state_dict(ppo_mm_action_model.action_net.state_dict())
rl_model.policy.mlp_extractor.value_net.load_state_dict(ppo_mm_action_model.policy_net.state_dict())

<All keys matched successfully>

In [54]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_mm_pr_default'))

#### Reward : Default2

In [8]:
class FE_MM_net(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 16):
        super(FE_MM_net, self).__init__(observation_space, features_dim)
        self.fc1_1 = nn.Linear(15, 16) # 5 Nodes status (CPU, Memory)
        self.fc1_2 = nn.Linear(2, 16)   # Pod quota (CPU, Memory)
        self.fc2_1 = nn.Linear(16, 32)
        self.fc2_2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(64, 64)    # Last layer of FE_net


    def forward(self, x):
        x1 = x[:, :10]
        x1 = 1 - x1
        x2 = x[:, 10:]

        is_allocatable = ((x1[:, ::2] - x2[:, 0].unsqueeze(1)) >= 0) & ((x1[:, 1::2] - x2[:, 1].unsqueeze(1)) >= 0)
        is_allocatable = is_allocatable.float()

        x1 = torch.cat((x1, is_allocatable), dim=1)

        x1 = F.relu(self.fc1_1(x1))  
        x2 = F.relu(self.fc1_2(x2))
        x1 = F.relu(self.fc2_1(x1))
        x2 = F.relu(self.fc2_2(x2))
        x = torch.cat((x1, x2), dim=1)
        x = self.fc3(x)
        return x

In [9]:
policy_kwargs = dict(
    features_extractor_class=FE_MM_net,
    features_extractor_kwargs=dict(features_dim=64),
    net_arch=[dict(pi=[64, 32], vf=[64, 32])],
    
)

env = gym.make('SimKubeEnv-v0', reward_file='train_default.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.PPO('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [10]:
rl_model.policy

ActorCriticPolicy(
  (features_extractor): FE_MM_net(
    (fc1_1): Linear(in_features=15, out_features=16, bias=True)
    (fc1_2): Linear(in_features=2, out_features=16, bias=True)
    (fc2_1): Linear(in_features=16, out_features=32, bias=True)
    (fc2_2): Linear(in_features=16, out_features=32, bias=True)
    (fc3): Linear(in_features=64, out_features=64, bias=True)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential()
    (policy_net): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=32, out_features=6, bias=True)
  (value_net): Linear(in_features=32, out_features=1, bias=True)
)

In [11]:
# model_ppo_mm_ut_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_mm_ut_default2'))

In [12]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class PPO_MM_Action_net(nn.Module):
    def __init__(self, original_model):
        super(PPO_MM_Action_net, self).__init__()
        self.features_extractor = original_model.policy.features_extractor
        self.policy_net = original_model.policy.mlp_extractor.policy_net
        self.action_net = original_model.policy.action_net

    def forward(self, x):
        x = self.features_extractor(x)
        x = self.policy_net(x)
        x = self.action_net(x)
        return x

In [13]:
ppo_mm_action_model = PPO_MM_Action_net(rl_model)

In [14]:
ppo_mm_action_model(sample1)

tensor([[ 0.0068, -0.0070,  0.0013,  0.0072, -0.0095, -0.0023]],
       grad_fn=<AddmmBackward0>)

In [15]:
# Training
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(ppo_mm_action_model.parameters(), lr=0.001)

In [16]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [18]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(ppo_mm_action_model, train_default2_dataloader, criterion, optimizer)
    test_loss, test_acc = test(ppo_mm_action_model, test_default2_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.0406, Train Acc: 40.95%, Test Loss: 0.0132, Test Acc: 59.29%
Epoch 2: Train Loss: 0.0075, Train Acc: 70.77%, Test Loss: 0.0050, Test Acc: 76.50%
Epoch 3: Train Loss: 0.0041, Train Acc: 78.24%, Test Loss: 0.0032, Test Acc: 81.58%
Epoch 4: Train Loss: 0.0029, Train Acc: 81.57%, Test Loss: 0.0024, Test Acc: 83.87%
Epoch 5: Train Loss: 0.0023, Train Acc: 83.11%, Test Loss: 0.0020, Test Acc: 83.81%
Epoch 6: Train Loss: 0.0020, Train Acc: 83.99%, Test Loss: 0.0018, Test Acc: 84.11%
Epoch 7: Train Loss: 0.0017, Train Acc: 84.61%, Test Loss: 0.0018, Test Acc: 83.98%
Epoch 8: Train Loss: 0.0015, Train Acc: 85.14%, Test Loss: 0.0018, Test Acc: 84.61%
Epoch 9: Train Loss: 0.0013, Train Acc: 85.80%, Test Loss: 0.0016, Test Acc: 85.69%
Epoch 10: Train Loss: 0.0012, Train Acc: 86.53%, Test Loss: 0.0014, Test Acc: 86.75%
Epoch 11: Train Loss: 0.0011, Train Acc: 87.22%, Test Loss: 0.0013, Test Acc: 87.68%
Epoch 12: Train Loss: 0.0010, Train Acc: 87.83%, Test Loss: 0.0012, Test A

In [19]:
rl_model.policy.features_extractor.load_state_dict(ppo_mm_action_model.features_extractor.state_dict())
rl_model.policy.mlp_extractor.policy_net.load_state_dict(ppo_mm_action_model.policy_net.state_dict())
rl_model.policy.action_net.load_state_dict(ppo_mm_action_model.action_net.state_dict())
rl_model.policy.mlp_extractor.value_net.load_state_dict(ppo_mm_action_model.policy_net.state_dict())

<All keys matched successfully>

In [20]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_mm_pr_default2'))

### PPO Single-modal Dynamic (Untrained + Pretrained)

In [129]:
class FE_SM_net(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 16):
        super(FE_SM_net, self).__init__(observation_space, features_dim)
        self.fc1 = nn.Linear(12, 16)
        self.fc2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(32, 64)    # Last layer of FE_net

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)     # (batch_size, 16)
        return x

In [130]:
policy_kwargs = dict(
    features_extractor_class=FE_SM_net,
    features_extractor_kwargs=dict(features_dim=64),
    net_arch=[dict(pi=[80, 80], vf=[80, 80])]
)

env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.PPO('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [131]:
rl_model.policy

ActorCriticPolicy(
  (features_extractor): FE_SM_net(
    (fc1): Linear(in_features=12, out_features=16, bias=True)
    (fc2): Linear(in_features=16, out_features=32, bias=True)
    (fc3): Linear(in_features=32, out_features=64, bias=True)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential()
    (policy_net): Sequential(
      (0): Linear(in_features=64, out_features=80, bias=True)
      (1): Tanh()
      (2): Linear(in_features=80, out_features=80, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=64, out_features=80, bias=True)
      (1): Tanh()
      (2): Linear(in_features=80, out_features=80, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=80, out_features=6, bias=True)
  (value_net): Linear(in_features=80, out_features=1, bias=True)
)

In [132]:
# Save untrained model
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_sm_ut_dynamic'))

In [96]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class PPO_SM_Action_net(nn.Module):
    def __init__(self, original_model):
        super(PPO_SM_Action_net, self).__init__()
        self.features_extractor = original_model.policy.features_extractor
        self.policy_net = original_model.policy.mlp_extractor.policy_net
        self.action_net = original_model.policy.action_net

    def forward(self, x):
        x = self.features_extractor(x)
        x = self.policy_net(x)
        x = self.action_net(x)
        return x

In [97]:
ppo_sm_action_model = PPO_SM_Action_net(rl_model)

In [98]:
# Training
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(ppo_sm_action_model.parameters(), lr=0.001)

In [99]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [100]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(ppo_sm_action_model, train_dynamic_dataloader, criterion, optimizer)
    test_loss, test_acc = test(ppo_sm_action_model, test_dynamic_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.1135, Train Acc: 72.45%, Test Loss: 0.0739, Test Acc: 78.37%
Epoch 2: Train Loss: 0.0673, Train Acc: 79.63%, Test Loss: 0.0590, Test Acc: 80.87%
Epoch 3: Train Loss: 0.0604, Train Acc: 80.79%, Test Loss: 0.0546, Test Acc: 82.42%
Epoch 4: Train Loss: 0.0569, Train Acc: 81.36%, Test Loss: 0.0507, Test Acc: 83.22%
Epoch 5: Train Loss: 0.0543, Train Acc: 81.86%, Test Loss: 0.0516, Test Acc: 83.17%
Epoch 6: Train Loss: 0.0524, Train Acc: 82.22%, Test Loss: 0.0481, Test Acc: 83.45%
Epoch 7: Train Loss: 0.0509, Train Acc: 82.46%, Test Loss: 0.0474, Test Acc: 83.20%
Epoch 8: Train Loss: 0.0495, Train Acc: 82.74%, Test Loss: 0.0453, Test Acc: 83.70%
Epoch 9: Train Loss: 0.0485, Train Acc: 82.84%, Test Loss: 0.0473, Test Acc: 83.28%
Epoch 10: Train Loss: 0.0477, Train Acc: 82.96%, Test Loss: 0.0464, Test Acc: 83.35%
Epoch 11: Train Loss: 0.0469, Train Acc: 83.06%, Test Loss: 0.0469, Test Acc: 83.19%
Epoch 12: Train Loss: 0.0461, Train Acc: 83.23%, Test Loss: 0.0498, Test A

In [101]:
rl_model.policy.features_extractor.load_state_dict(ppo_sm_action_model.features_extractor.state_dict())
rl_model.policy.mlp_extractor.policy_net.load_state_dict(ppo_sm_action_model.policy_net.state_dict())
rl_model.policy.action_net.load_state_dict(ppo_sm_action_model.action_net.state_dict())

<All keys matched successfully>

In [102]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_sm_pr_dynamic'))

### PPO Single-modal Default2 (Untrained + Pretrained)

In [133]:
class FE_SM_net(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 16):
        super(FE_SM_net, self).__init__(observation_space, features_dim)
        self.fc1 = nn.Linear(12, 16)
        self.fc2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(32, 64)    # Last layer of FE_net

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)     # (batch_size, 16)
        return x

In [134]:
policy_kwargs = dict(
    features_extractor_class=FE_SM_net,
    features_extractor_kwargs=dict(features_dim=64),
    net_arch=[dict(pi=[80, 80], vf=[80, 80])]
)

env = gym.make('SimKubeEnv-v0', reward_file='train_default2.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.PPO('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [135]:
rl_model.policy

ActorCriticPolicy(
  (features_extractor): FE_SM_net(
    (fc1): Linear(in_features=12, out_features=16, bias=True)
    (fc2): Linear(in_features=16, out_features=32, bias=True)
    (fc3): Linear(in_features=32, out_features=64, bias=True)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential()
    (policy_net): Sequential(
      (0): Linear(in_features=64, out_features=80, bias=True)
      (1): Tanh()
      (2): Linear(in_features=80, out_features=80, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=64, out_features=80, bias=True)
      (1): Tanh()
      (2): Linear(in_features=80, out_features=80, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=80, out_features=6, bias=True)
  (value_net): Linear(in_features=80, out_features=1, bias=True)
)

In [136]:
# Save untrained model
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_sm_ut_default2'))

In [137]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class PPO_SM_Action_net(nn.Module):
    def __init__(self, original_model):
        super(PPO_SM_Action_net, self).__init__()
        self.features_extractor = original_model.policy.features_extractor
        self.policy_net = original_model.policy.mlp_extractor.policy_net
        self.action_net = original_model.policy.action_net

    def forward(self, x):
        x = self.features_extractor(x)
        x = self.policy_net(x)
        x = self.action_net(x)
        return x

In [138]:
ppo_sm_action_model = PPO_SM_Action_net(rl_model)

In [139]:
# Training
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(ppo_sm_action_model.parameters(), lr=0.001)

In [140]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [None]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(ppo_sm_action_model, train_default2_dataloader, criterion, optimizer)
    test_loss, test_acc = test(ppo_sm_action_model, test_default2_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.1135, Train Acc: 72.45%, Test Loss: 0.0739, Test Acc: 78.37%
Epoch 2: Train Loss: 0.0673, Train Acc: 79.63%, Test Loss: 0.0590, Test Acc: 80.87%
Epoch 3: Train Loss: 0.0604, Train Acc: 80.79%, Test Loss: 0.0546, Test Acc: 82.42%
Epoch 4: Train Loss: 0.0569, Train Acc: 81.36%, Test Loss: 0.0507, Test Acc: 83.22%
Epoch 5: Train Loss: 0.0543, Train Acc: 81.86%, Test Loss: 0.0516, Test Acc: 83.17%
Epoch 6: Train Loss: 0.0524, Train Acc: 82.22%, Test Loss: 0.0481, Test Acc: 83.45%
Epoch 7: Train Loss: 0.0509, Train Acc: 82.46%, Test Loss: 0.0474, Test Acc: 83.20%
Epoch 8: Train Loss: 0.0495, Train Acc: 82.74%, Test Loss: 0.0453, Test Acc: 83.70%
Epoch 9: Train Loss: 0.0485, Train Acc: 82.84%, Test Loss: 0.0473, Test Acc: 83.28%
Epoch 10: Train Loss: 0.0477, Train Acc: 82.96%, Test Loss: 0.0464, Test Acc: 83.35%
Epoch 11: Train Loss: 0.0469, Train Acc: 83.06%, Test Loss: 0.0469, Test Acc: 83.19%
Epoch 12: Train Loss: 0.0461, Train Acc: 83.23%, Test Loss: 0.0498, Test A

In [None]:
rl_model.policy.features_extractor.load_state_dict(ppo_sm_action_model.features_extractor.state_dict())
rl_model.policy.mlp_extractor.policy_net.load_state_dict(ppo_sm_action_model.policy_net.state_dict())
rl_model.policy.action_net.load_state_dict(ppo_sm_action_model.action_net.state_dict())

<All keys matched successfully>

In [None]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_sm_pr_dynamic'))

### DQN Multi-modal Dynamic (Untrained + Pretrained)

In [33]:
class FE_MM_net(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 16):
        super(FE_MM_net, self).__init__(observation_space, features_dim)
        self.fc1_1 = nn.Linear(10, 16) # 5 Nodes status (CPU, Memory)
        self.fc1_2 = nn.Linear(2, 16)   # Pod quota (CPU, Memory)
        self.fc2_1 = nn.Linear(16, 8)
        self.fc2_2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(16, 16)    # Concatenated vector
        self.fc4 = nn.Linear(16, 16)    # Last layer of FE_net


    def forward(self, x):
        x1 = x[:, :10]
        x2 = x[:, 10:]
        x1 = F.relu(self.fc1_1(x1))  
        x2 = F.relu(self.fc1_2(x2))
        x1 = F.relu(self.fc2_1(x1))
        x2 = F.relu(self.fc2_2(x2))
        x = torch.cat((x1, x2), dim=1)
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [34]:
policy_kwargs = dict(
    features_extractor_class=FE_MM_net,
    features_extractor_kwargs=dict(features_dim=16),
    net_arch=[80, 80]
)

env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.DQN('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [35]:
rl_model.policy

DQNPolicy(
  (q_net): QNetwork(
    (features_extractor): FE_MM_net(
      (fc1_1): Linear(in_features=10, out_features=16, bias=True)
      (fc1_2): Linear(in_features=2, out_features=16, bias=True)
      (fc2_1): Linear(in_features=16, out_features=8, bias=True)
      (fc2_2): Linear(in_features=16, out_features=8, bias=True)
      (fc3): Linear(in_features=16, out_features=16, bias=True)
      (fc4): Linear(in_features=16, out_features=16, bias=True)
    )
    (q_net): Sequential(
      (0): Linear(in_features=16, out_features=80, bias=True)
      (1): ReLU()
      (2): Linear(in_features=80, out_features=80, bias=True)
      (3): ReLU()
      (4): Linear(in_features=80, out_features=6, bias=True)
    )
  )
  (q_net_target): QNetwork(
    (features_extractor): FE_MM_net(
      (fc1_1): Linear(in_features=10, out_features=16, bias=True)
      (fc1_2): Linear(in_features=2, out_features=16, bias=True)
      (fc2_1): Linear(in_features=16, out_features=8, bias=True)
      (fc2_2): Line

In [36]:
# model_ppo_mm_ut_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_dqn_mm_ut_dynamic'))

In [37]:
rl_model.policy.q_net_target(sample1)

tensor([[ 0.0019, -0.1223,  0.0332, -0.0266, -0.0474, -0.0251]],
       grad_fn=<AddmmBackward0>)

In [38]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class DQN_MM_net(nn.Module):
    def __init__(self, original_model):
        super(DQN_MM_net, self).__init__()
        self.q_net = original_model.policy.q_net

    def forward(self, x):
        x = self.q_net(x)
        return x

In [39]:
dqn_mm_model = DQN_MM_net(rl_model)

In [40]:
# Training
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(dqn_mm_model.parameters(), lr=0.001)

In [41]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [42]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(dqn_mm_model, train_pr_dataloader, criterion, optimizer)
    test_loss, test_acc = test(dqn_mm_model, test_pr_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.1929, Train Acc: 61.92%, Test Loss: 0.1382, Test Acc: 73.71%
Epoch 2: Train Loss: 0.1290, Train Acc: 74.93%, Test Loss: 0.1304, Test Acc: 74.60%
Epoch 3: Train Loss: 0.1254, Train Acc: 75.37%, Test Loss: 0.1292, Test Acc: 74.84%
Epoch 4: Train Loss: 0.1240, Train Acc: 75.56%, Test Loss: 0.1261, Test Acc: 75.12%
Epoch 5: Train Loss: 0.1226, Train Acc: 75.73%, Test Loss: 0.1239, Test Acc: 75.25%
Epoch 6: Train Loss: 0.1212, Train Acc: 75.81%, Test Loss: 0.1223, Test Acc: 75.21%
Epoch 7: Train Loss: 0.1200, Train Acc: 75.88%, Test Loss: 0.1216, Test Acc: 75.15%
Epoch 8: Train Loss: 0.1191, Train Acc: 75.99%, Test Loss: 0.1207, Test Acc: 75.11%
Epoch 9: Train Loss: 0.1184, Train Acc: 76.04%, Test Loss: 0.1200, Test Acc: 75.21%
Epoch 10: Train Loss: 0.1179, Train Acc: 76.13%, Test Loss: 0.1190, Test Acc: 75.27%
Epoch 11: Train Loss: 0.1175, Train Acc: 76.18%, Test Loss: 0.1189, Test Acc: 75.39%
Epoch 12: Train Loss: 0.1172, Train Acc: 76.22%, Test Loss: 0.1182, Test A

In [43]:
rl_model.policy.q_net.load_state_dict(dqn_mm_model.q_net.state_dict())
rl_model.policy.q_net_target.load_state_dict(dqn_mm_model.q_net.state_dict())

<All keys matched successfully>

In [44]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_dqn_mm_pr_dynamic'))

In [45]:
rl_model.policy.q_net_target(sample1)

tensor([[ 0.5014, -0.9858, -1.0000, -1.0103, -1.0104, -0.9910]],
       grad_fn=<AddmmBackward0>)

In [46]:
rl_model.policy.q_net(sample2)

tensor([[-0.5059, -0.9256, -0.9021, -1.0459, -1.0538,  0.6572]],
       grad_fn=<AddmmBackward0>)

### DQN Single-modal Dynamic (Untrained + Pretrained)

In [108]:
class FE_SM_net(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 16):
        super(FE_SM_net, self).__init__(observation_space, features_dim)
        self.fc1 = nn.Linear(12, 16) # 5 Nodes status (CPU, Memory)
        self.fc2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(32, 64)


    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [109]:
policy_kwargs = dict(
    features_extractor_class=FE_SM_net,
    features_extractor_kwargs=dict(features_dim=64),
    net_arch=[80, 80]
)

env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.DQN('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [110]:
rl_model.policy

DQNPolicy(
  (q_net): QNetwork(
    (features_extractor): FE_SM_net(
      (fc1): Linear(in_features=12, out_features=16, bias=True)
      (fc2): Linear(in_features=16, out_features=32, bias=True)
      (fc3): Linear(in_features=32, out_features=64, bias=True)
    )
    (q_net): Sequential(
      (0): Linear(in_features=64, out_features=80, bias=True)
      (1): ReLU()
      (2): Linear(in_features=80, out_features=80, bias=True)
      (3): ReLU()
      (4): Linear(in_features=80, out_features=6, bias=True)
    )
  )
  (q_net_target): QNetwork(
    (features_extractor): FE_SM_net(
      (fc1): Linear(in_features=12, out_features=16, bias=True)
      (fc2): Linear(in_features=16, out_features=32, bias=True)
      (fc3): Linear(in_features=32, out_features=64, bias=True)
    )
    (q_net): Sequential(
      (0): Linear(in_features=64, out_features=80, bias=True)
      (1): ReLU()
      (2): Linear(in_features=80, out_features=80, bias=True)
      (3): ReLU()
      (4): Linear(in_features

In [111]:
# model_ppo_mm_ut_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_dqn_sm_ut_dynamic'))

In [112]:
rl_model.policy.q_net_target(sample1)

tensor([[-0.0595, -0.0151,  0.1065,  0.0312, -0.1239, -0.0574]],
       grad_fn=<AddmmBackward0>)

In [113]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class DQN_SM_net(nn.Module):
    def __init__(self, original_model):
        super(DQN_SM_net, self).__init__()
        self.q_net = original_model.policy.q_net

    def forward(self, x):
        x = self.q_net(x)
        return x

In [114]:
dqn_sm_model = DQN_SM_net(rl_model)

In [115]:
# Training
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(dqn_sm_model.parameters(), lr=0.001)

In [116]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [117]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(dqn_sm_model, train_dynamic_dataloader, criterion, optimizer)
    test_loss, test_acc = test(dqn_sm_model, test_dynamic_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.1545, Train Acc: 64.78%, Test Loss: 0.1164, Test Acc: 69.93%
Epoch 2: Train Loss: 0.1101, Train Acc: 71.31%, Test Loss: 0.1038, Test Acc: 71.87%
Epoch 3: Train Loss: 0.0932, Train Acc: 74.02%, Test Loss: 0.0791, Test Acc: 76.44%
Epoch 4: Train Loss: 0.0737, Train Acc: 77.49%, Test Loss: 0.0699, Test Acc: 78.05%
Epoch 5: Train Loss: 0.0659, Train Acc: 78.82%, Test Loss: 0.0622, Test Acc: 79.54%
Epoch 6: Train Loss: 0.0617, Train Acc: 79.77%, Test Loss: 0.0558, Test Acc: 80.99%
Epoch 7: Train Loss: 0.0563, Train Acc: 80.84%, Test Loss: 0.0500, Test Acc: 82.54%
Epoch 8: Train Loss: 0.0530, Train Acc: 81.54%, Test Loss: 0.0462, Test Acc: 83.23%
Epoch 9: Train Loss: 0.0511, Train Acc: 81.86%, Test Loss: 0.0494, Test Acc: 82.63%
Epoch 10: Train Loss: 0.0498, Train Acc: 82.07%, Test Loss: 0.0526, Test Acc: 81.94%
Epoch 11: Train Loss: 0.0486, Train Acc: 82.37%, Test Loss: 0.0501, Test Acc: 82.80%
Epoch 12: Train Loss: 0.0476, Train Acc: 82.47%, Test Loss: 0.0478, Test A

In [118]:
rl_model.policy.q_net.load_state_dict(dqn_sm_model.q_net.state_dict())
rl_model.policy.q_net_target.load_state_dict(dqn_sm_model.q_net.state_dict())

<All keys matched successfully>

In [119]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_dqn_sm_pr_dynamic'))

### DQN Tripple-modal Dynamic (Untrained + Pretrained)

#### Dynamic Reward

In [15]:
class FE_TM_net(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 16):
        super(FE_TM_net, self).__init__(observation_space, features_dim)
        self.fc1_1 = nn.Linear(10, 16) # 5 Nodes status (CPU, Memory)
        self.fc1_2 = nn.Linear(2, 16)   # Pod quota (CPU, Memory)
        self.fc1_3 = nn.Linear(5, 16)  # Node difference (CPU, Memory)
        # self.fc1_4 = nn.Linear(5, 16) # If the node can deploy the pod
        self.fc2_1 = nn.Linear(16, 8)
        self.fc2_2 = nn.Linear(16, 8)
        self.fc2_3 = nn.Linear(16, 8)
        # self.fc2_4 = nn.Linear(16, 8)

        self.fc3 = nn.Linear(24, 16)
        # self.fc3 = nn.Linear(32, 16)    # Concatenated vector
        self.fc4 = nn.Linear(16, 16)    # Last layer of FE_net

    def forward(self, x):
        x1 = x[:, :10]
        x2 = x[:, 10:]
        x3 = x1[:, ::2] - x1[:, 1::2] # Takes the difference of x1's odd and even columns

        # # Duplicte x2 5 times horizontally
        # x2_ = x2.repeat(1, 5).view(-1, 10)
        # x4_ = (1-x1) - x2_ >= 0
        # x4 = x4_[:, ::2] * x4_[:, 1::2]
        # # Convert boolean to float
        # x4 = x4.type(torch.FloatTensor)
        # # print(f"x4 : {x4}")

        x1 = F.relu(self.fc1_1(x1))  
        x2 = F.relu(self.fc1_2(x2))
        x3 = F.relu(self.fc1_3(x3))
        # x4 = F.relu(self.fc1_4(x4))

        x1 = F.relu(self.fc2_1(x1))
        x2 = F.relu(self.fc2_2(x2))
        x3 = F.relu(self.fc2_3(x3))
        # x4 = F.relu(self.fc2_4(x4))

        x = torch.cat((x1, x2, x3), dim=1)
        # x = torch.cat((x1, x2, x3, x4), dim=1)
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [18]:
policy_kwargs = dict(
    features_extractor_class=FE_TM_net,
    features_extractor_kwargs=dict(features_dim=16),
    net_arch=[80, 80]
)

env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic2.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.DQN('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [17]:
rl_model.policy

ActorCriticPolicy(
  (features_extractor): FE_TM_net(
    (fc1_1): Linear(in_features=10, out_features=16, bias=True)
    (fc1_2): Linear(in_features=2, out_features=16, bias=True)
    (fc1_3): Linear(in_features=5, out_features=16, bias=True)
    (fc2_1): Linear(in_features=16, out_features=8, bias=True)
    (fc2_2): Linear(in_features=16, out_features=8, bias=True)
    (fc2_3): Linear(in_features=16, out_features=8, bias=True)
    (fc3): Linear(in_features=24, out_features=16, bias=True)
    (fc4): Linear(in_features=16, out_features=16, bias=True)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential(
      (0): Linear(in_features=16, out_features=80, bias=True)
      (1): Tanh()
      (2): Linear(in_features=80, out_features=80, bias=True)
      (3): Tanh()
    )
    (policy_net): Sequential()
    (value_net): Sequential()
  )
  (action_net): Linear(in_features=80, out_features=6, bias=True)
  (value_net): Linear(in_features=80, out_features=1, bias=True)
)

In [75]:
# model_ppo_mm_ut_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_dqn_tm_ut_dynamic'))

In [76]:
rl_model.policy.q_net_target(sample1)

tensor([[ 0.0567, -0.0286, -0.0869, -0.0466, -0.0994, -0.0755]],
       grad_fn=<AddmmBackward0>)

In [77]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class DQN_TM_net(nn.Module):
    def __init__(self, original_model):
        super(DQN_TM_net, self).__init__()
        self.q_net = original_model.policy.q_net

    def forward(self, x):
        x = self.q_net(x)
        return x

In [78]:
dqn_tm_model = DQN_TM_net(rl_model)

In [79]:
# Training
import torch.optim as optim

# Predict 6 vectors (6 actions' scores)
criterion = nn.MSELoss()
optimizer = optim.Adam(dqn_tm_model.parameters(), lr=0.001)

In [80]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [81]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(dqn_tm_model, train_pr_dataloader, criterion, optimizer)
    test_loss, test_acc = test(dqn_tm_model, test_pr_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.0233, Train Acc: 59.48%, Test Loss: 0.0135, Test Acc: 69.50%
Epoch 2: Train Loss: 0.0136, Train Acc: 69.64%, Test Loss: 0.0126, Test Acc: 71.00%
Epoch 3: Train Loss: 0.0115, Train Acc: 72.31%, Test Loss: 0.0093, Test Acc: 74.22%
Epoch 4: Train Loss: 0.0082, Train Acc: 76.54%, Test Loss: 0.0068, Test Acc: 78.27%
Epoch 5: Train Loss: 0.0061, Train Acc: 78.97%, Test Loss: 0.0052, Test Acc: 79.49%
Epoch 6: Train Loss: 0.0053, Train Acc: 80.24%, Test Loss: 0.0045, Test Acc: 81.49%
Epoch 7: Train Loss: 0.0051, Train Acc: 80.89%, Test Loss: 0.0050, Test Acc: 80.77%
Epoch 8: Train Loss: 0.0049, Train Acc: 81.43%, Test Loss: 0.0051, Test Acc: 81.64%
Epoch 9: Train Loss: 0.0048, Train Acc: 81.90%, Test Loss: 0.0053, Test Acc: 81.67%
Epoch 10: Train Loss: 0.0047, Train Acc: 82.21%, Test Loss: 0.0044, Test Acc: 82.39%
Epoch 11: Train Loss: 0.0046, Train Acc: 82.51%, Test Loss: 0.0044, Test Acc: 82.67%
Epoch 12: Train Loss: 0.0045, Train Acc: 82.83%, Test Loss: 0.0044, Test A

In [82]:
rl_model.policy.q_net.load_state_dict(dqn_tm_model.q_net.state_dict())
rl_model.policy.q_net_target.load_state_dict(dqn_tm_model.q_net.state_dict())

<All keys matched successfully>

In [83]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_dqn_qm_pr_dynamic'))

In [87]:
rl_model.policy.q_net_target(sample1)

tensor([[-0.0045, -0.4972, -0.5014, -0.4907, -0.5101, -0.4970]],
       grad_fn=<AddmmBackward0>)

In [88]:
rl_model.policy.q_net_target(sample2)

tensor([[-0.3976, -0.8768, -0.8936, -0.8247, -0.9237,  0.0061]],
       grad_fn=<AddmmBackward0>)

In [90]:
rl_model.policy.q_net_target(sample3)

tensor([[-0.2225, -0.7509, -0.7294, -0.0045, -0.7570, -0.7192]],
       grad_fn=<AddmmBackward0>)

#### DRS Reward

In [None]:
policy_kwargs = dict(
    features_extractor_class=FE_TM_net,
    features_extractor_kwargs=dict(features_dim=16),
    net_arch=[80, 80]
)

env = gym.make('SimKubeEnv-v0', reward_file='train_drs.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.DQN('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

In [None]:
rl_model.policy

In [None]:
# model_ppo_mm_ut_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_dqn_tm_ut_drs'))

In [None]:
rl_model.policy.q_net_target(sample1)

In [None]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class DQN_TM_net(nn.Module):
    def __init__(self, original_model):
        super(DQN_TM_net, self).__init__()
        self.q_net = original_model.policy.q_net

    def forward(self, x):
        x = self.q_net(x)
        return x

In [None]:
dqn_tm_model = DQN_TM_net(rl_model)

In [None]:
# Training
import torch.optim as optim

# Predict 6 vectors (6 actions' scores)
criterion = nn.MSELoss()
optimizer = optim.Adam(dqn_tm_model.parameters(), lr=0.001)

In [None]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [None]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(dqn_tm_model, train_pr_dataloader, criterion, optimizer)
    test_loss, test_acc = test(dqn_tm_model, test_pr_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

In [None]:
rl_model.policy.q_net.load_state_dict(dqn_tm_model.q_net.state_dict())
rl_model.policy.q_net_target.load_state_dict(dqn_tm_model.q_net.state_dict())

In [None]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_dqn_qm_pr_drs'))

In [None]:
rl_model.policy.q_net_target(sample1)

In [None]:
rl_model.policy.q_net_target(sample2)

In [None]:
rl_model.policy.q_net_target(sample3)

### PPO Tripple-modal Dynamic (Untrained + Pretrained)

In [57]:
class FE_TM_net(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 16):
        super(FE_TM_net, self).__init__(observation_space, features_dim)
        self.fc1_1 = nn.Linear(10, 16) # 5 Nodes status (CPU, Memory)
        self.fc1_2 = nn.Linear(2, 16)   # Pod quota (CPU, Memory)
        self.fc1_3 = nn.Linear(5, 16)  # Node difference (CPU, Memory)
        self.fc2_1 = nn.Linear(16, 8)
        self.fc2_2 = nn.Linear(16, 8)
        self.fc2_3 = nn.Linear(16, 8)

        self.fc3 = nn.Linear(24, 32)
        self.fc4 = nn.Linear(32, 64)    # Last layer of FE_net

    def forward(self, x):
        x1 = x[:, :10]
        x2 = x[:, 10:]
        x3 = x1[:, ::2] - x1[:, 1::2] # Takes the difference of x1's odd and even columns

        # (node.spec["cpu_pool"] - node.status["cpu_util"] - pod.spec["cpu_req"]) / node.spec["cpu_pool"]
        # (node.spec["mem_pool"] - node.status["mem_util"] - pod.spec["mem_req"]) / node.spec["mem_pool"]
        # x1 = 1 - x1 - x2.repeat(1, 5)

        x1 = F.relu(self.fc1_1(x1))  
        x2 = F.relu(self.fc1_2(x2))
        x3 = F.relu(self.fc1_3(x3))

        x1 = F.relu(self.fc2_1(x1))
        x2 = F.relu(self.fc2_2(x2))
        x3 = F.relu(self.fc2_3(x3))

        x = torch.cat((x1, x2, x3), dim=1)
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [58]:
policy_kwargs = dict(
    features_extractor_class=FE_TM_net,
    features_extractor_kwargs=dict(features_dim=64),
    net_arch=[dict(pi=[80, 80], vf=[80, 80])]
)

env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic2.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.PPO('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [59]:
rl_model.policy

ActorCriticPolicy(
  (features_extractor): FE_TM_net(
    (fc1_1): Linear(in_features=10, out_features=16, bias=True)
    (fc1_2): Linear(in_features=2, out_features=16, bias=True)
    (fc1_3): Linear(in_features=5, out_features=16, bias=True)
    (fc2_1): Linear(in_features=16, out_features=8, bias=True)
    (fc2_2): Linear(in_features=16, out_features=8, bias=True)
    (fc2_3): Linear(in_features=16, out_features=8, bias=True)
    (fc3): Linear(in_features=24, out_features=32, bias=True)
    (fc4): Linear(in_features=32, out_features=64, bias=True)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential()
    (policy_net): Sequential(
      (0): Linear(in_features=64, out_features=80, bias=True)
      (1): Tanh()
      (2): Linear(in_features=80, out_features=80, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=64, out_features=80, bias=True)
      (1): Tanh()
      (2): Linear(in_features=80, out_features=80, bias=True)
      (3

In [60]:
# model_ppo_mm_ut_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_tm_ut_dynamic'))

In [61]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class PPO_TM_Action_net(nn.Module):
    def __init__(self, original_model):
        super(PPO_TM_Action_net, self).__init__()
        self.features_extractor = original_model.policy.features_extractor
        self.policy_net = original_model.policy.mlp_extractor.policy_net
        self.action_net = original_model.policy.action_net

    def forward(self, x):
        x = self.features_extractor(x)
        x = self.policy_net(x)
        x = self.action_net(x)
        return x

In [62]:
ppo_tm_action_model = PPO_TM_Action_net(rl_model)

In [63]:
ppo_tm_action_model(sample1)

tensor([[ 0.0037,  0.0031, -0.0085, -0.0003,  0.0036,  0.0068]],
       grad_fn=<AddmmBackward0>)

In [64]:
# Training
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(ppo_tm_action_model.parameters(), lr=0.001)

In [65]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [66]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(ppo_tm_action_model, train_pr_dataloader, criterion, optimizer)
    test_loss, test_acc = test(ppo_tm_action_model, test_pr_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.0154, Train Acc: 52.86%, Test Loss: 0.0077, Test Acc: 64.14%
Epoch 2: Train Loss: 0.0072, Train Acc: 65.15%, Test Loss: 0.0061, Test Acc: 68.13%
Epoch 3: Train Loss: 0.0061, Train Acc: 67.44%, Test Loss: 0.0053, Test Acc: 70.34%
Epoch 4: Train Loss: 0.0057, Train Acc: 68.88%, Test Loss: 0.0045, Test Acc: 71.38%
Epoch 5: Train Loss: 0.0055, Train Acc: 70.05%, Test Loss: 0.0044, Test Acc: 72.52%
Epoch 6: Train Loss: 0.0053, Train Acc: 71.02%, Test Loss: 0.0043, Test Acc: 72.39%
Epoch 7: Train Loss: 0.0052, Train Acc: 71.77%, Test Loss: 0.0044, Test Acc: 74.11%
Epoch 8: Train Loss: 0.0050, Train Acc: 72.15%, Test Loss: 0.0044, Test Acc: 74.13%
Epoch 9: Train Loss: 0.0050, Train Acc: 72.21%, Test Loss: 0.0041, Test Acc: 74.64%
Epoch 10: Train Loss: 0.0048, Train Acc: 72.66%, Test Loss: 0.0055, Test Acc: 72.28%
Epoch 11: Train Loss: 0.0048, Train Acc: 72.77%, Test Loss: 0.0042, Test Acc: 74.38%
Epoch 12: Train Loss: 0.0048, Train Acc: 72.84%, Test Loss: 0.0050, Test A

In [30]:
rl_model.policy.features_extractor.load_state_dict(ppo_tm_action_model.features_extractor.state_dict())
rl_model.policy.mlp_extractor.policy_net.load_state_dict(ppo_tm_action_model.policy_net.state_dict())
rl_model.policy.action_net.load_state_dict(ppo_tm_action_model.action_net.state_dict())

<All keys matched successfully>

In [31]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_ppo_tm_pr_dynamic'))

In [33]:
rl_model.policy

ActorCriticPolicy(
  (features_extractor): FE_TM_net(
    (fc1_1): Linear(in_features=10, out_features=16, bias=True)
    (fc1_2): Linear(in_features=2, out_features=16, bias=True)
    (fc1_3): Linear(in_features=5, out_features=16, bias=True)
    (fc2_1): Linear(in_features=16, out_features=8, bias=True)
    (fc2_2): Linear(in_features=16, out_features=8, bias=True)
    (fc2_3): Linear(in_features=16, out_features=8, bias=True)
    (fc3): Linear(in_features=24, out_features=16, bias=True)
    (fc4): Linear(in_features=16, out_features=16, bias=True)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential()
    (policy_net): Sequential(
      (0): Linear(in_features=16, out_features=80, bias=True)
      (1): Tanh()
      (2): Linear(in_features=80, out_features=80, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=16, out_features=80, bias=True)
      (1): Tanh()
      (2): Linear(in_features=80, out_features=80, bias=True)
      (3

In [53]:
rl_model.predict(sample1)

(array([1]), None)

In [48]:
rl_model.predict(sample2)

(array([5]), None)

In [49]:
rl_model.predict(sample3)

(array([2]), None)

### DQN Single-modal Dynamic2 (Untrained + Pretrained)

In [82]:
class FE_DF_net(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 16):
        super(FE_DF_net, self).__init__(observation_space, features_dim)
        self.fc1 = nn.Linear(5, 16) # 5 Nodes status (CPU, Memory)
        self.fc2 = nn.Linear(16, 16)    # Last layer of FE_net


    def forward(self, x):
        x_nodes = 1 - x[:, :10]
        x_pod = x[:, 10:]

        x = x_nodes - x_pod.repeat(1, 5)
        # Average odd and even
        x = (x[:, ::2] + x[:, 1::2]) / 2 # 5 vectors -> (cpu_remain + mem_remain) / 2

        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [83]:
policy_kwargs = dict(
    features_extractor_class=FE_DF_net,
    features_extractor_kwargs=dict(features_dim=16),
    net_arch=[32, 32]
)

env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic2.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.DQN('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [84]:
rl_model.policy

DQNPolicy(
  (q_net): QNetwork(
    (features_extractor): FE_DF_net(
      (fc1): Linear(in_features=5, out_features=16, bias=True)
      (fc2): Linear(in_features=16, out_features=16, bias=True)
    )
    (q_net): Sequential(
      (0): Linear(in_features=16, out_features=32, bias=True)
      (1): ReLU()
      (2): Linear(in_features=32, out_features=32, bias=True)
      (3): ReLU()
      (4): Linear(in_features=32, out_features=6, bias=True)
    )
  )
  (q_net_target): QNetwork(
    (features_extractor): FE_DF_net(
      (fc1): Linear(in_features=5, out_features=16, bias=True)
      (fc2): Linear(in_features=16, out_features=16, bias=True)
    )
    (q_net): Sequential(
      (0): Linear(in_features=16, out_features=32, bias=True)
      (1): ReLU()
      (2): Linear(in_features=32, out_features=32, bias=True)
      (3): ReLU()
      (4): Linear(in_features=32, out_features=6, bias=True)
    )
  )
)

In [85]:
# model_ppo_mm_ut_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_dqn_df_ut_dynamic'))

In [86]:
rl_model.policy.q_net_target(sample1)

tensor([[ 0.0485,  0.0233,  0.1265, -0.0524, -0.0899, -0.0192]],
       grad_fn=<AddmmBackward0>)

In [87]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class DQN_DF_net(nn.Module):
    def __init__(self, original_model):
        super(DQN_DF_net, self).__init__()
        self.q_net = original_model.policy.q_net

    def forward(self, x):
        x = self.q_net(x)
        return x

In [88]:
dqn_df_model = DQN_DF_net(rl_model)

In [89]:
# Training
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(dqn_df_model.parameters(), lr=0.001)

In [90]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [91]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(dqn_df_model, train_pr_dataloader, criterion, optimizer)
    test_loss, test_acc = test(dqn_df_model, test_pr_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.0298, Train Acc: 47.83%, Test Loss: 0.0272, Test Acc: 51.21%
Epoch 2: Train Loss: 0.0269, Train Acc: 52.18%, Test Loss: 0.0268, Test Acc: 51.44%
Epoch 3: Train Loss: 0.0265, Train Acc: 52.73%, Test Loss: 0.0265, Test Acc: 51.92%
Epoch 4: Train Loss: 0.0262, Train Acc: 53.22%, Test Loss: 0.0264, Test Acc: 52.08%
Epoch 5: Train Loss: 0.0261, Train Acc: 53.31%, Test Loss: 0.0262, Test Acc: 52.23%
Epoch 6: Train Loss: 0.0260, Train Acc: 53.27%, Test Loss: 0.0261, Test Acc: 52.00%
Epoch 7: Train Loss: 0.0259, Train Acc: 53.26%, Test Loss: 0.0260, Test Acc: 51.78%
Epoch 8: Train Loss: 0.0258, Train Acc: 53.23%, Test Loss: 0.0259, Test Acc: 51.51%
Epoch 9: Train Loss: 0.0257, Train Acc: 53.11%, Test Loss: 0.0259, Test Acc: 51.65%
Epoch 10: Train Loss: 0.0257, Train Acc: 53.21%, Test Loss: 0.0258, Test Acc: 51.55%
Epoch 11: Train Loss: 0.0256, Train Acc: 53.21%, Test Loss: 0.0258, Test Acc: 51.92%
Epoch 12: Train Loss: 0.0256, Train Acc: 53.23%, Test Loss: 0.0257, Test A

In [94]:
rl_model.policy.q_net.load_state_dict(dqn_df_model.q_net.state_dict())
rl_model.policy.q_net_target.load_state_dict(dqn_df_model.q_net.state_dict())

<All keys matched successfully>

In [95]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_dqn_df_pr_dynamic'))

### DQN Single-modal Dynamic (Simplified)

In [30]:
class FE_NEW_net(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 16):
        super(FE_NEW_net, self).__init__(observation_space, features_dim)
        self.fc1 = nn.Linear(15, 32) # 5 Nodes status (CPU, Memory)
        self.fc2 = nn.Linear(32, 64)    # Last layer of FE_net


    def forward(self, x):
        x_nodes = 1 - x[:, :10] # Available CPU and Memory
        x_pod = x[:, 10:]  # Pod's CPU and Memory quota

        # Scale x's elements to [0, 10] integers (0.1 belongs to 1, 0.9 belongs to 10)
        x1 = (x_nodes - x_pod.repeat(1,5)) * 10
        x1 = x1.int()

        # Returns if the remaining resources of each node can accommodate the pod
        x2_1 = x_nodes[:, ::2]
        x2_1 = (x2_1 - x_pod[:, 0].unsqueeze(1)) >= 0
        x2_2 = x_nodes[:, 1::2]
        x2_2 = (x2_2 - x_pod[:, 1].unsqueeze(1)) >= 0
        # If x2_1 and x_2_2 are both True, then x2 is True
        x2 = x2_1 & x2_2 # Size : (batch_size, 5)
        x2 = x2.int()

        x = torch.cat((x1, x2), dim=1) # Size : (batch_size, 15)
        x = x.float()

        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [31]:
policy_kwargs = dict(
    features_extractor_class=FE_NEW_net,
    features_extractor_kwargs=dict(features_dim=64),
    net_arch=[64, 32]
)

env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic2.py', scenario_file='scenario-5l-5m-1000p-10m_unbalanced.csv')

rl_model = sb3.DQN('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [32]:
rl_model.policy

DQNPolicy(
  (q_net): QNetwork(
    (features_extractor): FE_NEW_net(
      (fc1): Linear(in_features=15, out_features=32, bias=True)
      (fc2): Linear(in_features=32, out_features=64, bias=True)
    )
    (q_net): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): ReLU()
      (4): Linear(in_features=32, out_features=6, bias=True)
    )
  )
  (q_net_target): QNetwork(
    (features_extractor): FE_NEW_net(
      (fc1): Linear(in_features=15, out_features=32, bias=True)
      (fc2): Linear(in_features=32, out_features=64, bias=True)
    )
    (q_net): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): ReLU()
      (4): Linear(in_features=32, out_features=6, bias=True)
    )
  )
)

In [33]:
# model_ppo_mm_ut_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_dqn_new_ut_dynamic'))

In [34]:
rl_model.policy.q_net_target(sample1)

tensor([[-0.1180, -0.0949, -0.2657,  0.0880, -0.0685,  0.0215]],
       grad_fn=<AddmmBackward0>)

In [35]:
# Model containing features_extractor, mlp_extractor.policy_net, action_net
class DQN_NEW_net(nn.Module):
    def __init__(self, original_model):
        super(DQN_NEW_net, self).__init__()
        self.q_net = original_model.policy.q_net

    def forward(self, x):
        x = self.q_net(x)
        return x

In [36]:
dqn_new_model = DQN_NEW_net(rl_model)

In [37]:
# Training
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(dqn_new_model.parameters(), lr=0.001)

In [38]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    for state, target in train_loader:
        optimizer.zero_grad()
        output = model(state)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * state.size(0)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    train_loss /= len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for state, target in test_loader:
            output = model(state)
            test_loss += criterion(output, target).item() * state.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [39]:
epochs = 50
test_acc = 0
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(dqn_new_model, train_dynamic_dataloader, criterion, optimizer)
    test_loss, test_acc = test(dqn_new_model, test_dynamic_dataloader, criterion)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    if test_acc > 95:
        break

Epoch 1: Train Loss: 0.0701, Train Acc: 75.42%, Test Loss: 0.0573, Test Acc: 78.13%
Epoch 2: Train Loss: 0.0551, Train Acc: 78.39%, Test Loss: 0.0546, Test Acc: 78.29%
Epoch 3: Train Loss: 0.0531, Train Acc: 78.84%, Test Loss: 0.0533, Test Acc: 78.67%
Epoch 4: Train Loss: 0.0524, Train Acc: 79.10%, Test Loss: 0.0524, Test Acc: 79.06%
Epoch 5: Train Loss: 0.0518, Train Acc: 79.27%, Test Loss: 0.0519, Test Acc: 79.46%
Epoch 6: Train Loss: 0.0513, Train Acc: 79.41%, Test Loss: 0.0517, Test Acc: 79.63%
Epoch 7: Train Loss: 0.0509, Train Acc: 79.53%, Test Loss: 0.0516, Test Acc: 79.57%
Epoch 8: Train Loss: 0.0505, Train Acc: 79.63%, Test Loss: 0.0513, Test Acc: 79.68%
Epoch 9: Train Loss: 0.0502, Train Acc: 79.70%, Test Loss: 0.0508, Test Acc: 79.74%
Epoch 10: Train Loss: 0.0499, Train Acc: 79.70%, Test Loss: 0.0505, Test Acc: 79.80%
Epoch 11: Train Loss: 0.0497, Train Acc: 79.76%, Test Loss: 0.0502, Test Acc: 79.92%
Epoch 12: Train Loss: 0.0495, Train Acc: 79.80%, Test Loss: 0.0500, Test A

KeyboardInterrupt: 

In [None]:
rl_model.policy.q_net.load_state_dict(dqn_df_model.q_net.state_dict())
rl_model.policy.q_net_target.load_state_dict(dqn_df_model.q_net.state_dict())

<All keys matched successfully>

In [None]:
# model_ppo_mm_pr_dynamic
rl_model.save(os.path.join(base_path, 'notebook', 'net_arch', 'model_dqn_df_pr_dynamic'))