### Run this first!

In [1]:
import os, sys

base_path = os.path.join(os.getcwd(), "..")
print(f"Base Path: {base_path}")
sys.path.append(base_path)

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..


In [2]:
# Stable baselines3
import stable_baselines3 as sb3

# env
import gym
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import torch as th
import torch.nn as nn
from gym import spaces

from stable_baselines3 import PPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.logger import configure

from kube_mm_scheduler.model.promes import Net5_

device = th.device("cuda" if th.cuda.is_available() else "cpu")

class PromesPPO(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 80):
        super(PromesPPO, self).__init__(observation_space, features_dim)
        self.net = Net5_().to(device)
        self.net.load_state_dict(th.load(os.path.join(base_path,'kube_mm_scheduler/weight/net5.pt')))
        self.net.eval()

    def forward(self, observations: th.Tensor) -> th.Tensor:
        input1 = observations[:, :10].to(device)
        input2 = observations[:, 10:].to(device)

        return self.net(input1, input2)
    
class Naive(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 80):
        super(Naive, self).__init__(observation_space, features_dim)
        self.net = nn.Linear(observation_space.shape[0], features_dim).to(device)
        self.net.eval()

    def forward(self, observations: th.Tensor) -> th.Tensor:
        return self.net(observations)

policy_kwargs_promes = dict(
    features_extractor_class=PromesPPO,
    features_extractor_kwargs=dict(features_dim=80),
)

policy_kwargs_naive = dict(
    features_extractor_class=Naive,
    features_extractor_kwargs=dict(features_dim=80),
)

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook


### 1. sb3 naive dqn with static reward

In [4]:
# Environment
env = gym.make('SimKubeEnv-v0', reward_file='static.py', scenario_file='scenario-5l-5m-1000p-10m.csv')

# Model : Naive dqn
sb3_naive_dqn_static = sb3.DQN('MlpPolicy', env, verbose=1).learn(300000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


KeyboardInterrupt: 

### 2. sb3 naive dqn with dynamic reward

In [4]:
# Environment
env = gym.make('SimKubeEnv-v0', reward_file='dynamic.py', scenario_file='scenario-5l-5m-1000p-10m.csv')

# Model : Naive dqn
sb3_naive_dqn_dynamic = sb3.DQN('MlpPolicy', env, verbose=1).learn(300000)

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | -1.4e+03 |
|    exploration_rate | 0.837    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 63       |
|    time_elapsed     | 80       |
|    total_timesteps  | 5135     |
----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.29e+03  |
|    ep_rew_mean      | -1.43e+03 |
|    exploration_rate | 0.673     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 63        |
|    time_elapsed     | 162       |
|    total_timesteps  | 10330     |
-----------------------------------
-----------------------------------
| rollout/      

### 3. sb3 promes dqn with static reward

In [8]:
import torch as th
import torch.nn as nn
from gym import spaces

from stable_baselines3 import DQN
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

from kube_mm_scheduler.model.promes import Net5_

device = th.device("cuda" if th.cuda.is_available() else "cpu")

class PromesDQN(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 80):
        super(PromesDQN, self).__init__(observation_space, features_dim)
        self.net = Net5_().to(device)
        self.net.load_state_dict(th.load(os.path.join(base_path,'kube_mm_scheduler/weight/net5.pt')))
        self.net.eval()

    def forward(self, observations: th.Tensor) -> th.Tensor:
        input1 = observations[:, :10].to(device)
        input2 = observations[:, 10:].to(device)

        return self.net(input1, input2)

policy_kwargs = dict(
    features_extractor_class=PromesDQN,
    features_extractor_kwargs=dict(features_dim=80),
)

# Environment
env = gym.make('SimKubeEnv-v0', reward_file='static.py', scenario_file='scenario-5l-5m-1000p-10m.csv')

# Model : Promes dqn
sb3_promes_dqn_static = sb3.DQN('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs).learn(300000)

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


KeyboardInterrupt: 

### 4. sb3 promes dqn with dynamic reward

In [14]:
import torch as th
import torch.nn as nn
from gym import spaces

from stable_baselines3 import DQN
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common import logger

from kube_mm_scheduler.model.promes import Net5_

device = th.device("cuda" if th.cuda.is_available() else "cpu")

class PromesDQN(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 80):
        super(PromesDQN, self).__init__(observation_space, features_dim)
        self.net = Net5_().to(device)
        self.net.load_state_dict(th.load(os.path.join(base_path,'kube_mm_scheduler/weight/net5.pt')))
        self.net.eval()

    def forward(self, observations: th.Tensor) -> th.Tensor:
        input1 = observations[:, :10].to(device)
        input2 = observations[:, 10:].to(device)

        return self.net(input1, input2)

policy_kwargs = dict(
    features_extractor_class=PromesDQN,
    features_extractor_kwargs=dict(features_dim=80),
)

# Environment
env = gym.make('SimKubeEnv-v0', reward_file='dynamic.py', scenario_file='scenario-5l-5m-1000p-10m.csv')

# Model : Promes dqn
sb3_promes_dqn_dynamic = sb3.DQN('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs).learn(300000)



Logging to log_dir_dqn_dynamic
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.3e+03   |
|    ep_rew_mean      | -1.44e+03 |
|    exploration_rate | 0.836     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 59        |
|    time_elapsed     | 87        |
|    total_timesteps  | 5185      |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.29e+03  |
|    ep_rew_mean      | -1.42e+03 |
|    exploration_rate | 0.673     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 5         |
|    time_elapsed     | 2046      |
|    total_timesteps  | 10340     |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len

### 5. sb3 promes PPO with static reward

In [15]:
import torch as th
import torch.nn as nn
from gym import spaces

from stable_baselines3 import PPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common import logger

from kube_mm_scheduler.model.promes import Net5_

device = th.device("cuda" if th.cuda.is_available() else "cpu")

class PromesPPO(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 80):
        super(PromesPPO, self).__init__(observation_space, features_dim)
        self.net = Net5_().to(device)
        self.net.load_state_dict(th.load(os.path.join(base_path,'kube_mm_scheduler/weight/net5.pt')))
        self.net.eval()

    def forward(self, observations: th.Tensor) -> th.Tensor:
        input1 = observations[:, :10].to(device)
        input2 = observations[:, 10:].to(device)

        return self.net(input1, input2)

policy_kwargs = dict(
    features_extractor_class=PromesPPO,
    features_extractor_kwargs=dict(features_dim=80),
)

# Environment
env = gym.make('SimKubeEnv-v0', reward_file='static.py', scenario_file='scenario-5l-5m-1000p-10m.csv')

# Logger
logger.configure("log_dir_sac_static")

# Model : Promes dqn
sb3_promes_ppo_static = sb3.PPO('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs).learn(300000)

Logging to log_dir_sac_static
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.26e+03 |
|    ep_rew_mean     | 1.09e+03 |
| time/              |          |
|    fps             | 68       |
|    iterations      | 1        |
|    time_elapsed    | 29       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.27e+03    |
|    ep_rew_mean          | 1.07e+03    |
| time/                   |             |
|    fps                  | 62          |
|    iterations           | 2           |
|    time_elapsed         | 65          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.011766098 |
|    clip_fraction        | 0.191       |
|    clip_range           | 0.2 

In [19]:
# Save the model (sb3_promes_ppo_static)
sb3_promes_ppo_static.save("sb3_promes_ppo_static")

In [None]:
# Load the model (sb3_promes_ppo_static)
sb3_promes_ppo_static = sb3.PPO.load("sb3_promes_ppo_static")

### 6. sb3 promes PPO with dynamic reward

In [16]:
import torch as th
import torch.nn as nn
from gym import spaces

from stable_baselines3 import PPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common import logger

from kube_mm_scheduler.model.promes import Net5_

device = th.device("cuda" if th.cuda.is_available() else "cpu")

class PromesPPO(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 80):
        super(PromesPPO, self).__init__(observation_space, features_dim)
        self.net = Net5_().to(device)
        self.net.load_state_dict(th.load(os.path.join(base_path,'kube_mm_scheduler/weight/net5.pt')))
        self.net.eval()

    def forward(self, observations: th.Tensor) -> th.Tensor:
        input1 = observations[:, :10].to(device)
        input2 = observations[:, 10:].to(device)

        return self.net(input1, input2)

policy_kwargs = dict(
    features_extractor_class=PromesPPO,
    features_extractor_kwargs=dict(features_dim=80),
)

# Environment
env = gym.make('SimKubeEnv-v0', reward_file='dynamic.py', scenario_file='scenario-5l-5m-1000p-10m.csv')

# Model : Promes dqn
sb3_promes_ppo_dynamic = sb3.PPO('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs).learn(300000)

Logging to log_dir_sac_dynamic
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.29e+03 |
|    ep_rew_mean     | -1.4e+03 |
| time/              |          |
|    fps             | 65       |
|    iterations      | 1        |
|    time_elapsed    | 31       |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.28e+03   |
|    ep_rew_mean          | -1.34e+03  |
| time/                   |            |
|    fps                  | 59         |
|    iterations           | 2          |
|    time_elapsed         | 68         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.01033981 |
|    clip_fraction        | 0.156      |
|    clip_range           | 0.2        |
| 

In [17]:
# Save the model (sb3_promes_ppo_dynamic)
sb3_promes_ppo_dynamic.save("sb3_promes_ppo_dynamic")

In [18]:
# Load the model (sb3_promes_ppo_dynamic)
sb3_promes_ppo_dynamic = sb3.PPO.load("sb3_promes_ppo_dynamic")

### 7. Promes PPO with real data

#### 1> Only Dynamic Reward

In [3]:
import torch as th
import torch.nn as nn
from gym import spaces

from stable_baselines3 import PPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.logger import configure

from kube_mm_scheduler.model.promes import Net5_

device = th.device("cuda" if th.cuda.is_available() else "cpu")

class PromesPPO(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 80):
        super(PromesPPO, self).__init__(observation_space, features_dim)
        self.net = Net5_().to(device)
        self.net.load_state_dict(th.load(os.path.join(base_path,'kube_mm_scheduler/weight/net5.pt')))
        self.net.eval()

    def forward(self, observations: th.Tensor) -> th.Tensor:
        input1 = observations[:, :10].to(device)
        input2 = observations[:, 10:].to(device)

        return self.net(input1, input2)

policy_kwargs = dict(
    features_extractor_class=PromesPPO,
    features_extractor_kwargs=dict(features_dim=80),
)

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook


In [5]:
# Environment
envs = []
for i in range(1, 101):
    env = gym.make('SimKubeEnv-v0', reward_file='promes_dynamic.py', scenario_file=f'trace2017_100_{i}.csv')
envs.append(env)

print(len(envs))

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
100


In [12]:
def keep_training(json_tracker_fname):

    log_name = json_tracker_fname.split('.')[0]
    log_path = 'training/log/' + log_name

    new_logger = configure(log_path, ["tensorboard", "stdout"])

    # Load the json tracker
    import json
    with open(f'training/{json_tracker_fname}', 'r') as f:
        json_tracker = json.load(f)

    # Check if the last scenario is None
    if json_tracker['last_scenario'] == 0:
        # If it is None, then start from the first scenario
        scenario_idx = 1
    else:
        # If it is not None, then continue from the last scenario
        scenario_idx = int(json_tracker['last_scenario']) + 1

    # Load the last scenario
    env, = envs[scenario_idx - 1]
    print(f"Loading the scenario: {env.scenario_file}")

    # Load the last model. If there is model_name doesn't exist in training/model folder, then start from the scratch
    model_name = json_tracker['model_name']
    if os.path.exists(f'training/model/{model_name}.zip'):
        print(f"Loading the model: {model_name}")
        model = sb3.PPO.load(f'training/model/{model_name}.zip')
        # Set the environment
        model.set_env(env)
    else:
        print(f"Model {model_name} doesn't exist. Start from the scratch.")
        model = sb3.PPO('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs)



    # ============================== Performance Test ===============================

    # Previous model performance test (vs. defautl scheduler)
    # Test scenario : scenario-5l-5m-1000p-10m.csv
    test_env1 = gym.make('SimKubeEnv-v0', reward_file='promes_static.py', scenario_file=f'scenario-5l-5m-1000p-10m.csv')
    test_env2 = gym.make('SimKubeEnv-v0', reward_file='promes_static.py', scenario_file=f'scenario-5l-5m-1000p-10m.csv')

    # Default Scheduler
    from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
    default_scheduler = SimHrScheduler(test_env2, 'default.py')

    # Test the model
    obs1 = test_env1.reset()
    obs2 = test_env2.reset()
    done1 = False
    done2 = False
    step1 = 0
    step2 = 0
    acc_rew1 = 0
    acc_rew2 = 0

    print("Start testing...")
    while not done1 or not done2:
        if not done1:
            action1, _ = model.predict(obs1)
            obs1, reward1, done1, _ = test_env1.step(action1)
            step1 += 1
            acc_rew1 += reward1
        if not done2:
            action2 = default_scheduler.decision(test_env2)
            obs2, reward2, done2, _ = test_env2.step(action2)
            step2 += 1
            acc_rew2 += reward2

    print(f"Test result(reward): {acc_rew1} vs. {acc_rew2}")
    print(f"Test result(step): {step1} vs. {step2}")

    # Append it to the log file
    with open(f'training/log/{log_name}/test_result.txt', 'a') as f:
        f.write(f"Test result after {scenario_idx - 1}th training\n")
        f.write(f"- reward : {acc_rew1} vs. {acc_rew2}\n")
        f.write(f"- step : {step1} vs. {step2}\n")
        f.write("\n")

    # ============================== ==================== ===============================




    # Set the logger
    model.set_logger(new_logger)

    total_timesteps = json_tracker['total_steps']

    # Start training
    model.learn(total_timesteps)

    # Save the model
    model.save(f'training/model/{model_name}.zip')

    # Update the json tracker
    json_tracker['last_scenario'] = scenario_idx

    # Save the json tracker
    with open(f'training/{json_tracker_fname}', 'w') as f:
        json.dump(json_tracker, f)

    return json_tracker

In [13]:
from IPython.display import clear_output

last_idx = 0

# Get n_scenario by counting the number of trace files in trace folder
n_scenario = len(os.listdir(os.path.join(base_path, 'scenarios', 'trace2017')))

while last_idx < n_scenario:
    json_tracker = keep_training('tracker_ppo_promes_combined.json')
    last_idx = json_tracker['last_scenario']
    clear_output(wait=True)

Logging to training/log/tracker_ppo_promes_combined
Loading the scenario: trace2017/trace2017_100_23.csv
Loading the model: PPO_Promes_Combined
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Start testing...
Test result(reward): -313.8599999999993 vs. 239.8400000000005
Test result(step): 1190 vs. 1105
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.09e+03 |
|    ep_rew_mean     | -437     |
| time/              |          |
|    fps             | 35       |
|    iterations      | 1        |
|    time_elapsed    | 57       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.09e+03    |
|    ep_rew_mean          | -480        |
| time/                   |             |
|    fps                  | 34          |
|    iterations           | 2           |
|    time_elapsed         | 117  

KeyboardInterrupt: 

#### 2> PROMES Static + Dynamic Combination Reward

In [5]:
# Environment
envs = []
for i in range(1, 101):
    env1 = gym.make('SimKubeEnv-v0', reward_file='promes_static.py', scenario_file=f'trace2017_100_{i}.csv')
    env2 = gym.make('SimKubeEnv-v0', reward_file='promes_dynamic.py', scenario_file=f'trace2017_100_{i}.csv')
    envs.append((env1, env2))

print(len(envs))

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
100


In [8]:
def keep_training(json_tracker_fname):

    log_name = json_tracker_fname.split('.')[0]
    log_path = 'training/log/' + log_name

    # Load the json tracker
    import json
    with open(f'training/{json_tracker_fname}', 'r') as f:
        json_tracker = json.load(f)

    # Check if the last scenario is None
    if json_tracker['last_scenario'] == 0:
        # If it is None, then start from the first scenario
        scenario_idx = 1
    else:
        # If it is not None, then continue from the last scenario
        scenario_idx = int(json_tracker['last_scenario']) + 1

    # Load the last scenario
    env1, env2 = envs[scenario_idx - 1]
    print(f"Loading the scenario: {env1.scenario_file}")

    # Load the last model. If there is model_name doesn't exist in training/model folder, then start from the scratch
    model_name = json_tracker['model_name']
    if os.path.exists(f'training/model/{model_name}.zip'):
        print(f"Loading the model: {model_name}")
        model = sb3.PPO.load(f'training/model/{model_name}.zip')
        # Set the environment
        model.set_env(env1)
    else:
        print(f"Model {model_name} doesn't exist. Start from the scratch.")
        model = sb3.PPO('MlpPolicy', env1, verbose=1, policy_kwargs=policy_kwargs_promes)



    # ============================== Performance Test ===============================

    # Previous model performance test (vs. defautl scheduler)
    # Test scenario : scenario-5l-5m-1000p-10m.csv
    test_env1 = gym.make('SimKubeEnv-v0', reward_file='promes_static.py', scenario_file=f'scenario-5l-5m-1000p-10m.csv')
    test_env2 = gym.make('SimKubeEnv-v0', reward_file='promes_static.py', scenario_file=f'scenario-5l-5m-1000p-10m.csv')

    # Default Scheduler
    from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
    default_scheduler = SimHrScheduler(test_env2, 'default.py')

    # Test the model
    obs1 = test_env1.reset()
    obs2 = test_env2.reset()
    done1 = False
    done2 = False
    step1 = 0
    step2 = 0
    acc_rew1 = 0
    acc_rew2 = 0

    print("Start testing...")
    while not done1 or not done2:
        if not done1:
            action1, _ = model.predict(obs1)
            obs1, reward1, done1, _ = test_env1.step(action1)
            step1 += 1
            acc_rew1 += reward1
        if not done2:
            action2 = default_scheduler.decision(test_env2)
            obs2, reward2, done2, _ = test_env2.step(action2)
            step2 += 1
            acc_rew2 += reward2

    print(f"Test result(reward): {acc_rew1} vs. {acc_rew2}")
    print(f"Test result(step): {step1} vs. {step2}")

    # Append it to the log file

    # Make folder if it doesn't exist
    if not os.path.exists(f'training/log/{log_name}'):
        os.makedirs(f'training/log/{log_name}')

    with open(f'training/log/{log_name}/test_result.txt', 'a') as f:
        f.write(f"{scenario_idx - 1}, {acc_rew2}, {step2}, {acc_rew1}, {step1}\n")
        # f.write(f"Test result after {scenario_idx - 1}th training\n")
        # f.write(f"- reward : {acc_rew1} vs. {acc_rew2}\n")
        # f.write(f"- step : {step1} vs. {step2}\n")
        # f.write("\n")

    # ============================== ==================== ===============================

    total_timesteps = json_tracker['total_steps']

    # Start training
    model.learn(total_timesteps)

    # Training for env2
    model.set_env(env2)
    model.learn(total_timesteps)

    # Save the model
    model.save(f'training/model/{model_name}.zip')

    # Update the json tracker
    json_tracker['last_scenario'] = scenario_idx

    # Save the json tracker
    with open(f'training/{json_tracker_fname}', 'w') as f:
        json.dump(json_tracker, f)

    return json_tracker

In [None]:
from IPython.display import clear_output

last_idx = 0

# Get n_scenario by counting the number of trace files in trace folder
n_scenario = len(os.listdir(os.path.join(base_path, 'scenarios', 'trace2017')))

while last_idx < n_scenario:
    json_tracker = keep_training('tracker_ppo_promes_combined.json')
    last_idx = json_tracker['last_scenario']
    clear_output(wait=True)

#### 3> Static w/ Steps

In [3]:
# Environment
envs = []
for i in range(1, 101):
    env1 = gym.make('SimKubeEnv-v0', reward_file='promes_static_step_1.py', scenario_file=f'trace2017_100_{i}.csv')
    envs.append((env1))

print(len(envs))

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..
100


In [4]:
def keep_training(json_tracker_fname):

    log_name = json_tracker_fname.split('.')[0]
    log_path = 'training/log/' + log_name

    # Load the json tracker
    import json
    with open(f'training/{json_tracker_fname}', 'r') as f:
        json_tracker = json.load(f)

    # Check if the last scenario is None
    if json_tracker['last_scenario'] == 0:
        # If it is None, then start from the first scenario
        scenario_idx = 1
    else:
        # If it is not None, then continue from the last scenario
        scenario_idx = int(json_tracker['last_scenario']) + 1

    # Load the last scenario
    env1 = envs[scenario_idx - 1]
    print(f"Loading the scenario: {env1.scenario_file}")

    # Load the last model. If there is model_name doesn't exist in training/model folder, then start from the scratch
    model_name = json_tracker['model_name']
    if os.path.exists(f'training/model/{model_name}.zip'):
        print(f"Loading the model: {model_name}")
        model = sb3.PPO.load(f'training/model/{model_name}.zip')
        # Set the environment
        model.set_env(env1)
    else:
        print(f"Model {model_name} doesn't exist. Start from the scratch.")
        model = sb3.PPO('MlpPolicy', env1, verbose=1)



    # ============================== Performance Test ===============================

    # Previous model performance test (vs. defautl scheduler)
    # Test scenario : scenario-5l-5m-1000p-10m.csv
    test_env1 = gym.make('SimKubeEnv-v0', reward_file='promes_static.py', scenario_file=f'scenario-5l-5m-1000p-10m.csv')
    test_env2 = gym.make('SimKubeEnv-v0', reward_file='promes_static.py', scenario_file=f'scenario-5l-5m-1000p-10m.csv')

    # Default Scheduler
    from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
    default_scheduler = SimHrScheduler(test_env2, 'default.py')

    # Test the model
    obs1 = test_env1.reset()
    obs2 = test_env2.reset()
    done1 = False
    done2 = False
    step1 = 0
    step2 = 0
    acc_rew1 = 0
    acc_rew2 = 0

    print("Start testing...")
    while not done1 or not done2:
        if not done1:
            action1, _ = model.predict(obs1)
            obs1, reward1, done1, _ = test_env1.step(action1)
            step1 += 1
            acc_rew1 += reward1
        if not done2:
            action2 = default_scheduler.decision(test_env2)
            obs2, reward2, done2, _ = test_env2.step(action2)
            step2 += 1
            acc_rew2 += reward2

    print(f"Test result(reward): {acc_rew1} vs. {acc_rew2}")
    print(f"Test result(step): {step1} vs. {step2}")

    # Append it to the log file

    # Make folder if it doesn't exist
    if not os.path.exists(f'training/log/{log_name}'):
        os.makedirs(f'training/log/{log_name}')

    with open(f'training/log/{log_name}/test_result.txt', 'a') as f:
        f.write(f"Test result after {scenario_idx - 1}th training\n")
        f.write(f"- reward : {acc_rew1} vs. {acc_rew2}\n")
        f.write(f"- step : {step1} vs. {step2}\n")
        f.write("\n")

    # ============================== ==================== ===============================


    total_timesteps = json_tracker['total_steps']

    # Start training
    model.learn(total_timesteps)

    # # Training for env2
    # model.set_env(env2)
    # model.learn(total_timesteps)

    # Save the model
    model.save(f'training/model/{model_name}.zip')

    # Update the json tracker
    json_tracker['last_scenario'] = scenario_idx

    # Save the json tracker
    with open(f'training/{json_tracker_fname}', 'w') as f:
        json.dump(json_tracker, f)

    return json_tracker

In [5]:
from IPython.display import clear_output

last_idx = 0

# Get n_scenario by counting the number of trace files in trace folder
n_scenario = len(os.listdir(os.path.join(base_path, 'scenarios', 'trace2017')))

while last_idx < n_scenario:
    json_tracker = keep_training('tracker_ppo_static_step.json')
    last_idx = json_tracker['last_scenario']
    clear_output(wait=True)

Loading the scenario: trace2017/trace2017_100_3.csv
Loading the model: PPO_Static_Step
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Start testing...
Test result(reward): -33723.739999999816 vs. -316.58000000000044
Test result(step): 4741 vs. 1102
-----------------------------
| time/              |      |
|    fps             | 31   |
|    iterations      | 1    |
|    time_elapsed    | 65   |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 32          |
|    iterations           | 2           |
|    time_elapsed         | 127         |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.009506434 |
|    clip_fraction        | 0.0975      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.419      |
|    explained_variance   | 0.311       |
|    le

KeyboardInterrupt: 

#### 4> Naive Static + Dynamic Combination Reward

In [None]:
# Environment
envs = []
for i in range(1, 101):
    env1 = gym.make('SimKubeEnv-v0', reward_file='promes_static.py', scenario_file=f'trace2017_100_{i}.csv')
    env2 = gym.make('SimKubeEnv-v0', reward_file='promes_dynamic.py', scenario_file=f'trace2017_100_{i}.csv')
    envs.append((env1, env2))

print(len(envs))

In [None]:
def keep_training(json_tracker_fname):

    log_name = json_tracker_fname.split('.')[0]
    log_path = 'training/log/' + log_name

    # Load the json tracker
    import json
    with open(f'training/{json_tracker_fname}', 'r') as f:
        json_tracker = json.load(f)

    # Check if the last scenario is None
    if json_tracker['last_scenario'] == 0:
        # If it is None, then start from the first scenario
        scenario_idx = 1
    else:
        # If it is not None, then continue from the last scenario
        scenario_idx = int(json_tracker['last_scenario']) + 1

    # Load the last scenario
    env1, env2 = envs[scenario_idx - 1]
    print(f"Loading the scenario: {env1.scenario_file}")

    # Load the last model. If there is model_name doesn't exist in training/model folder, then start from the scratch
    model_name = json_tracker['model_name']
    if os.path.exists(f'training/model/{model_name}.zip'):
        print(f"Loading the model: {model_name}")
        model = sb3.PPO.load(f'training/model/{model_name}.zip')
        # Set the environment
        model.set_env(env1)
    else:
        print(f"Model {model_name} doesn't exist. Start from the scratch.")
        model = sb3.PPO('MlpPolicy', env1, verbose=1, policy_kwargs=policy_kwargs_naive)



    # ============================== Performance Test ===============================

    # Previous model performance test (vs. defautl scheduler)
    # Test scenario : scenario-5l-5m-1000p-10m.csv
    test_env1 = gym.make('SimKubeEnv-v0', reward_file='promes_static.py', scenario_file=f'scenario-5l-5m-1000p-10m.csv')
    test_env2 = gym.make('SimKubeEnv-v0', reward_file='promes_static.py', scenario_file=f'scenario-5l-5m-1000p-10m.csv')

    # Default Scheduler
    from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
    default_scheduler = SimHrScheduler(test_env2, 'default.py')

    # Test the model
    obs1 = test_env1.reset()
    obs2 = test_env2.reset()
    done1 = False
    done2 = False
    step1 = 0
    step2 = 0
    acc_rew1 = 0
    acc_rew2 = 0

    print("Start testing...")
    while not done1 or not done2:
        if not done1:
            action1, _ = model.predict(obs1)
            obs1, reward1, done1, _ = test_env1.step(action1)
            step1 += 1
            acc_rew1 += reward1
        if not done2:
            action2 = default_scheduler.decision(test_env2)
            obs2, reward2, done2, _ = test_env2.step(action2)
            step2 += 1
            acc_rew2 += reward2

    print(f"Test result(reward): {acc_rew1} vs. {acc_rew2}")
    print(f"Test result(step): {step1} vs. {step2}")

    # Append it to the log file

    # Make folder if it doesn't exist
    if not os.path.exists(f'training/log/{log_name}'):
        os.makedirs(f'training/log/{log_name}')

    with open(f'training/log/{log_name}/test_result.txt', 'a') as f:
        f.write("step, d_reward, d_step, p_reward, p_step\n")
        f.write(f"{scenario_idx - 1}, {acc_rew2}, {step2}, {acc_rew1}, {step1}\n")
        # f.write(f"Test result after {scenario_idx - 1}th training\n")
        # f.write(f"- reward : {acc_rew1} vs. {acc_rew2}\n")
        # f.write(f"- step : {step1} vs. {step2}\n")
        # f.write("\n")

    # ============================== ==================== ===============================

    total_timesteps = json_tracker['total_steps']

    # Start training
    model.learn(total_timesteps)

    # Training for env2
    model.set_env(env2)
    model.learn(total_timesteps)

    # Save the model
    model.save(f'training/model/{model_name}.zip')

    # Update the json tracker
    json_tracker['last_scenario'] = scenario_idx

    # Save the json tracker
    with open(f'training/{json_tracker_fname}', 'w') as f:
        json.dump(json_tracker, f)

    return json_tracker

In [None]:
from IPython.display import clear_output

last_idx = 0

# Get n_scenario by counting the number of trace files in trace folder
n_scenario = len(os.listdir(os.path.join(base_path, 'scenarios', 'trace2017')))

while last_idx < n_scenario:
    json_tracker = keep_training('tracker_ppo_naive_combined.json')
    last_idx = json_tracker['last_scenario']
    clear_output(wait=True)

#### 5> PPO Promes Dynamic New

In [5]:
# Environment
envs = []
for i in range(1, 101):
    env = gym.make('SimKubeEnv-v0', reward_file='promes_dynamic_new.py', scenario_file=f'trace2017_100_{i}.csv')
    envs.append(env)

print(len(envs))

100


In [6]:
def test_model(scenario_file, model_name, log_name, scenario_idx):
    # ============================== Performance Test ===============================

    # Previous model performance test (vs. defautl scheduler)
    # Test scenario : scenario-5l-5m-1000p-10m.csv
    test_env1 = gym.make('SimKubeEnv-v0', reward_file='promes_dynamic_new.py', scenario_file=scenario_file)
    test_env2 = gym.make('SimKubeEnv-v0', reward_file='promes_dynamic_new.py', scenario_file=scenario_file)

    # Default Scheduler
    from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
    default_scheduler = SimHrScheduler(test_env2, 'default.py')

    # RL Scheduler
    from kube_rl_scheduler.scheduler.sim_rl_scheduler import SimRlScheduler
    rl_scheduler = SimRlScheduler(test_env1, f'_{model_name}.zip')


    # Test the model
    obs1 = test_env1.reset()
    obs2 = test_env2.reset()
    done1 = False
    done2 = False
    step1 = 0
    step2 = 0
    acc_rew1 = 0
    acc_rew2 = 0

    print("Start testing...")
    while not done1 or not done2:
        if not done1:
            # action1, _ = model.predict(obs1)
            action1 = rl_scheduler.decision(test_env1)
            obs1, reward1, done1, _ = test_env1.step(action1)
            step1 += 1
            acc_rew1 += reward1
        if not done2:
            action2 = default_scheduler.decision(test_env2)
            obs2, reward2, done2, _ = test_env2.step(action2)
            step2 += 1
            acc_rew2 += reward2

    acc_rew1 = round(acc_rew1, 2)
    acc_rew2 = round(acc_rew2, 2)

    print(f"Test result(reward): {acc_rew1} vs. {acc_rew2}")
    print(f"Test result(step): {step1} vs. {step2}")

    return acc_rew1, acc_rew2, step1, step2

    # ============================== ==================== ===============================

In [9]:
def keep_training(json_tracker_fname):

    log_name = json_tracker_fname.split('.')[0]
    log_path = 'training/log/' + log_name

    # Load the json tracker
    import json
    with open(f'training/{json_tracker_fname}', 'r') as f:
        json_tracker = json.load(f)

    # Check if the last scenario is None
    if json_tracker['last_scenario'] == 0:
        # If it is None, then start from the first scenario
        scenario_idx = 1
    else:
        # If it is not None, then continue from the last scenario
        scenario_idx = int(json_tracker['last_scenario']) + 1

    # Load the last scenario
    env = envs[scenario_idx - 1]
    print(f"Loading the scenario: {env.scenario_file}")

    # Load the last model. If there is model_name doesn't exist in training/model folder, then start from the scratch
    model_name = json_tracker['model_name']
    if os.path.exists(f'training/model/{model_name}.zip'):
        print(f"Loading the model: {model_name}")
        model = sb3.PPO.load(f'training/model/{model_name}.zip')
        # Set the environment
        model.set_env(env)
    else:
        print(f"Model {model_name} doesn't exist. Start from the scratch.")
        model = sb3.PPO('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs_promes)
        model.save(f'training/model/{model_name}.zip')

    a1, a2, a3, a4 = test_model('scenario-5l-5m-1000p-10m.csv', model_name, log_name, scenario_idx)
    b1, b2, b3, b4 = test_model('scenario-3l-10m-1000p-10m.csv', model_name, log_name, scenario_idx)
    c1, c2, c3, c4 = test_model('scenario-10l-3m-1000p-10m.csv', model_name, log_name, scenario_idx)

    # Append it to the log file

    # Make folder if it doesn't exist
    if not os.path.exists(f'training/log/{log_name}'):
        os.makedirs(f'training/log/{log_name}')

    with open(f'training/log/{log_name}/test_result.txt', 'a') as f:
        # f.write("step, d_reward, d_step, p_reward, p_step\n")
        f.write(f"{scenario_idx - 1}, {a1}, {a2}, {a3}, {a4}, {b1}, {b2}, {b3}, {b4}, {c1}, {c2}, {c3}, {c4}\n")

    total_timesteps = json_tracker['total_steps']

    # Start training
    model.learn(total_timesteps)

    # Save the model
    model.save(f'training/model/{model_name}.zip')

    # Update the json tracker
    json_tracker['last_scenario'] = scenario_idx

    # Save the json tracker
    with open(f'training/{json_tracker_fname}', 'w') as f:
        json.dump(json_tracker, f)

    return json_tracker

In [10]:
from IPython.display import clear_output

last_idx = 0

# Get n_scenario by counting the number of trace files in trace folder
n_scenario = len(os.listdir(os.path.join(base_path, 'scenarios', 'trace2017')))

while last_idx < n_scenario:
    json_tracker = keep_training('tracker_ppo_promes_dynamic_new.json')
    last_idx = json_tracker['last_scenario']
    clear_output(wait=True)

Loading the scenario: trace2017/trace2017_100_11.csv
Loading the model: PPO_Promes_Dynamic_New
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Start testing...
Test result(reward): 208.75 vs. 192.27
Test result(step): 1120 vs. 1102
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Start testing...
Test result(reward): 266.74 vs. 235.95
Test result(step): 1274 vs. 1261


FileNotFoundError: [Errno 2] No such file or directory: '/Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/../kube_sim_gym/utils/../../scenarios/scenario_10l-3m-1000p-10m.csv'

#### 6> PPO Naive Dynamic New

In [7]:
# Environment
envs = []
for i in range(1, 101):
    env = gym.make('SimKubeEnv-v0', reward_file='promes_dynamic_new.py', scenario_file=f'trace2017_100_{i}.csv')
    envs.append(env)

print(len(envs))

100


In [8]:
def keep_training(json_tracker_fname):

    log_name = json_tracker_fname.split('.')[0]
    log_path = 'training/log/' + log_name

    # Load the json tracker
    import json
    with open(f'training/{json_tracker_fname}', 'r') as f:
        json_tracker = json.load(f)

    # Check if the last scenario is None
    if json_tracker['last_scenario'] == 0:
        # If it is None, then start from the first scenario
        scenario_idx = 1
    else:
        # If it is not None, then continue from the last scenario
        scenario_idx = int(json_tracker['last_scenario']) + 1

    # Load the last scenario
    env = envs[scenario_idx - 1]
    print(f"Loading the scenario: {env.scenario_file}")

    # Load the last model. If there is model_name doesn't exist in training/model folder, then start from the scratch
    model_name = json_tracker['model_name']
    if os.path.exists(f'training/model/{model_name}.zip'):
        print(f"Loading the model: {model_name}")
        model = sb3.PPO.load(f'training/model/{model_name}.zip')
        # Set the environment
        model.set_env(env)
    else:
        print(f"Model {model_name} doesn't exist. Start from the scratch.")
        model = sb3.PPO('MlpPolicy', env, verbose=1, policy_kwargs=policy_kwargs_naive)
        model.save(f'training/model/{model_name}.zip')



    # ============================== Performance Test ===============================

    # Previous model performance test (vs. defautl scheduler)
    # Test scenario : scenario-5l-5m-1000p-10m.csv
    test_env1 = gym.make('SimKubeEnv-v0', reward_file='promes_dynamic_new.py', scenario_file=f'scenario-5l-5m-1000p-10m.csv')
    test_env2 = gym.make('SimKubeEnv-v0', reward_file='promes_dynamic_new.py', scenario_file=f'scenario-5l-5m-1000p-10m.csv')

    # Default Scheduler
    from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
    default_scheduler = SimHrScheduler(test_env2, 'default.py')

    # RL Scheduler
    from kube_rl_scheduler.scheduler.sim_rl_scheduler import SimRlScheduler
    rl_scheduler = SimRlScheduler(test_env1, f'_{model_name}.zip')


    # Test the model
    obs1 = test_env1.reset()
    obs2 = test_env2.reset()
    done1 = False
    done2 = False
    step1 = 0
    step2 = 0
    acc_rew1 = 0
    acc_rew2 = 0

    print("Start testing...")
    while not done1 or not done2:
        if not done1:
            # action1, _ = model.predict(obs1)
            action1 = rl_scheduler.decision(test_env1)
            obs1, reward1, done1, _ = test_env1.step(action1)
            step1 += 1
            acc_rew1 += reward1
        if not done2:
            action2 = default_scheduler.decision(test_env2)
            obs2, reward2, done2, _ = test_env2.step(action2)
            step2 += 1
            acc_rew2 += reward2

    acc_rew1 = round(acc_rew1, 2)
    acc_rew2 = round(acc_rew2, 2)

    print(f"Test result(reward): {acc_rew1} vs. {acc_rew2}")
    print(f"Test result(step): {step1} vs. {step2}")

    # Append it to the log file

    # Make folder if it doesn't exist
    if not os.path.exists(f'training/log/{log_name}'):
        os.makedirs(f'training/log/{log_name}')

    with open(f'training/log/{log_name}/test_result.txt', 'a') as f:
        f.write("step, d_reward, d_step, p_reward, p_step\n")
        f.write(f"{scenario_idx - 1}, {acc_rew2}, {step2}, {acc_rew1}, {step1}\n")

    # ============================== ==================== ===============================

    total_timesteps = json_tracker['total_steps']

    # Start training
    model.learn(total_timesteps)

    # Save the model
    model.save(f'training/model/{model_name}.zip')

    # Update the json tracker
    json_tracker['last_scenario'] = scenario_idx

    # Save the json tracker
    with open(f'training/{json_tracker_fname}', 'w') as f:
        json.dump(json_tracker, f)

    return json_tracker

In [9]:
from IPython.display import clear_output

last_idx = 0

# Get n_scenario by counting the number of trace files in trace folder
n_scenario = len(os.listdir(os.path.join(base_path, 'scenarios', 'trace2017')))

while last_idx < n_scenario:
    json_tracker = keep_training('tracker_ppo_naive_dynamic_new.json')
    last_idx = json_tracker['last_scenario']
    clear_output(wait=True)

Loading the scenario: trace2017/trace2017_100_9.csv
Loading the model: PPO_Naive_Dynamic_New
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


RuntimeError: Error(s) in loading state_dict for ActorCriticPolicy:
	Missing key(s) in state_dict: "features_extactor.net.net3_.fc1_1.weight", "features_extactor.net.net3_.fc1_1.bias", "features_extactor.net.net3_.fc1_2.weight", "features_extactor.net.net3_.fc1_2.bias", "features_extactor.net.net3_.fc2.weight", "features_extactor.net.net3_.fc2.bias", "features_extactor.net.net3_.fc3.weight", "features_extactor.net.net3_.fc3.bias", "features_extactor.net.fc1_3_1.weight", "features_extactor.net.fc1_3_1.bias", "features_extactor.net.fc1_3_2.weight", "features_extactor.net.fc1_3_2.bias", "features_extactor.net.fc1_3_3.weight", "features_extactor.net.fc1_3_3.bias", "features_extactor.net.fc1_3_4.weight", "features_extactor.net.fc1_3_4.bias", "features_extactor.net.fc1_3_5.weight", "features_extactor.net.fc1_3_5.bias", "features_extactor.net.fc2_1.weight", "features_extactor.net.fc2_1.bias", "features_extactor.net.fc2_2.weight", "features_extactor.net.fc2_2.bias", "features_extactor.net.fc2_3.weight", "features_extactor.net.fc2_3.bias", "features_extactor.net.fc2_4.weight", "features_extactor.net.fc2_4.bias", "features_extactor.net.fc2_5.weight", "features_extactor.net.fc2_5.bias". 
	Unexpected key(s) in state_dict: "features_extractor.net.weight", "features_extractor.net.bias". 

In [9]:
# Environment
test_envs = []
for i in range(1, 11):
    test_env1 = gym.make('SimKubeEnv-v0', reward_file='promes_dynamic_new.py', scenario_file=f'trace2017_100_{i}.csv')
    test_env2 = gym.make('SimKubeEnv-v0', reward_file='promes_dynamic_new.py', scenario_file=f'trace2017_100_{i}.csv')
    test_envs.append((test_env1, test_env2))

print(len(test_envs))

10


In [14]:
import gym
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

model_name = 'PPO_Naive_Dynamic_New' # Change model name
reward_file = 'promes_dynamic_new.py' # Change reward file name

# Previous model performance test (vs. defautl scheduler)
# Test scenario : scenario-5l-5m-1000p-10m.csv
test_env1 = test_envs[0][0]
test_env2 = test_envs[0][1]

# RL Scheduler
from kube_rl_scheduler.scheduler.sim_rl_scheduler import SimRlScheduler
rl_scheduler = SimRlScheduler(test_env2, f'_{model_name}.zip')
# Default Scheduler
from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
default_scheduler = SimHrScheduler(test_env2, 'default.py')

# Test the model
obs1 = test_env1.reset()
obs2 = test_env2.reset()
done1 = False
done2 = False
scenario_cnt1 = 0
scenario_cnt2 = 0
step1 = 0
step2 = 0
acc_rew1 = 0
acc_rew2 = 0

print("Start testing...")

while (not done1 and scenario_cnt1 != 9) or (not done2 and scenario_cnt2 != 9):
    if not done1:
        # action1, _ = model.predict(obs1)
        action1 = rl_scheduler.decision(test_env1)
        obs1, reward1, done1, _ = test_env1.step(action1)
        step1 += 1
        acc_rew1 += reward1
    if not done2:
        action2 = default_scheduler.decision(test_env2)
        obs2, reward2, done2, _ = test_env2.step(action2)
        step2 += 1
        acc_rew2 += reward2

    # if done1:
    #     scenario_cnt1 += 1
    #     test_env1 = test_envs[scenario_cnt1][0]
    #     rl_scheduler = SimRlScheduler(test_env1, f'_{model_name}.zip')
    #     obs1 = test_env1.reset()
    #     # done1 = False
    # if done2:
    #     scenario_cnt2 += 1
    #     test_env2 = test_envs[scenario_cnt2][1]
    #     default_scheduler = SimHrScheduler(test_env2, 'default.py')
    #     obs2 = test_env2.reset()
    #     # done2 = False

    if done1 and done2:

        print(f"Intermediary result at step {step1 if step1 > step2 else step2}")
        print(f"reward : {acc_rew1}(my) vs. {acc_rew2}(default)")
        print(f"step : {step1}(my) vs. {step2}(default)")

        scenario_cnt1 += 1
        test_env1 = test_envs[scenario_cnt1][0]
        rl_scheduler = SimRlScheduler(test_env1, f'_{model_name}.zip')
        obs1 = test_env1.reset()
        done1 = False

        scenario_cnt2 += 1
        test_env2 = test_envs[scenario_cnt2][1]
        default_scheduler = SimHrScheduler(test_env2, 'default.py')
        obs2 = test_env2.reset()
        done2 = False


acc_rew1 = round(acc_rew1, 2)
acc_rew2 = round(acc_rew2, 2)

print("Fianl result")
print(f"reward : {acc_rew1}(my) vs. {acc_rew2}(default)")
print(f"step : {step1}(my) vs. {step2}(default)")


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Start testing...
Intermediary result at step 1026
reward : -17.205099999999987(my) vs. 59.01179999999993(default)
step : 1026(my) vs. 1025(default)
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Intermediary result at step 2084
reward : 38.86790000000002(my) vs. 207.11390000000003(default)
step : 2084(my) vs. 2082(default)
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Intermediary result at step 3085
reward : 84.77629999999992(my) vs. 331.67520000000025(default)
step : 3085(my) vs. 3083(default)
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Intermediary result at step 4196
reward : 277.7597999999999(my) vs. 594.0169999999999(default)
step : 4196(my) vs. 4195(default)
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Intermediary result at step 5197
reward : 270.4608999999997(my) vs. 690.4779(d

IndexError: list index out of range