In [1]:
from time import sleep
from IPython.display import clear_output, display
import matplotlib.pyplot as plt
import importlib

import optical_network_game.game_gym
importlib.reload(optical_network_game.game_gym)
from optical_network_game.game_gym import *

from optical_network_game.requests import *
from optical_network_game.topology_generation import *

from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import DQN
from stable_baselines3.common import results_plotter
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy, plot_results
from stable_baselines3.common.callbacks import BaseCallback

pygame 2.0.3 (SDL 2.0.16, Python 3.9.10)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
class SaveOnBestTrainingRewardCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq:
    :param log_dir: Path to the folder where the model will be saved.
      It must contains the file created by the ``Monitor`` wrapper.
    :param verbose: Verbosity level.
    """
    def __init__(self, check_freq: int, log_dir: str, verbose: int = 1):
        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.save_path = None
        self.best_mean_reward = -np.inf

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

          # Retrieve training reward
          x, y = ts2xy(load_results(self.log_dir), 'timesteps')
          if len(x) > 0:
              # Mean training reward over the last 100 episodes
              mean_reward = np.mean(y[-100:])
              if self.verbose > 0:
                print(f"Num timesteps: {self.num_timesteps}")
                print(f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}")

              # # New best model, you could save the agent here
              # if mean_reward > self.best_mean_reward:
              #     self.best_mean_reward = mean_reward
              #     # Example for saving best model
              #     if self.verbose > 0:
              #       print(f"Saving new best model to {self.save_path}")
              #     self.model.save(self.save_path)

        return True

In [3]:
class TensorboardCallback(BaseCallback):
    """
    Custom callback for plotting additional values in tensorboard.
    """

    def __init__(self, verbose=0):
        super(TensorboardCallback, self).__init__(verbose)
    
    def _on_step(self) -> bool:
        block_ep = self.locals["infos"][0].get('bp')
        avg_path_len = self.locals["infos"][0].get('avg_length')
        blocked_continuous = self.locals["infos"][0].get('blocked_continuous')
        blocked_contiguous = self.locals["infos"][0].get('blocked_contiguous')
        self.logger.record('blocking_ratio', block_ep)
        self.logger.record('average_route_length', avg_path_len)
        self.logger.record('blocked_continuous', blocked_continuous)
        self.logger.record('blocked_contiguous', blocked_contiguous)
        return True 

In [4]:
import os
# Create log dir
log_dir = os.path.join(os.getcwd(), "tmp/")
os.makedirs(log_dir, exist_ok=True)

In [5]:
# Create the callback: check every 1000 steps
callback = SaveOnBestTrainingRewardCallback(check_freq=200000, log_dir=log_dir)
tensor_callback = TensorboardCallback()

# create model

nodeList, linkList = createPresetTopology("VSNL", 5)
requestList = generateRequests(nodeList, 20, 2, 30)

user = User()
env = game_gym(nodeList, linkList, requestList, user, dynamic=True)
eveon = Monitor(env, log_dir)

# check_env(eveon, warn=True)
model = DQN('MlpPolicy', eveon, verbose=1, buffer_size=100000, device='cuda', 
learning_starts=50000, exploration_fraction=0.5, learning_rate=0.0001,
gamma=0.8, tensorboard_log='./dqn_tensorboard/')

# train
model_name = "Dict_15"
model.learn(total_timesteps=30000000, callback=[callback, tensor_callback])
model.save(model_name)

# tensorboard --logdir ./dqn_tensorboard/


VSNL Topology Selected
Traffic load is: 15.0
Using cpu device
Wrapping the env in a DummyVecEnv.
Logging to ./dqn_tensorboard/DQN_15
Too many invalid actions.
Total reward for this episode is -28179.999999999898
Too many invalid actions.
Total reward for this episode is -27419.99999999987
Too many invalid actions.
Total reward for this episode is -40599.99999999988
Too many invalid actions.
Total reward for this episode is -17699.999999999993
------------------------------------
| average_route_length | 2.75      |
| blocked_contiguous   | 0         |
| blocked_continuous   | 0         |
| blocking_ratio       | 0.6       |
| rollout/             |           |
|    ep_len_mean       | 4.6e+04   |
|    ep_rew_mean       | -2.84e+04 |
|    exploration_rate  | 0.988     |
| time/                |           |
|    episodes          | 4         |
|    fps               | 2681      |
|    time_elapsed      | 68        |
|    total_timesteps   | 184106    |
| train/               |           

In [6]:
# create env

nodeList, linkList = createPresetTopology("VSNL", 5)
requestList = generateRequests(nodeList, 20, 2, 30)

user = User()
env = game_gym(nodeList, linkList, requestList, user, dynamic=True)

# test
model = DQN.load('Dict_15', env=env, device='cpu')

obs = env.reset()
while True :
    action, states_ = model.predict(obs, deterministic=True )
    # action = 6
    obs, rewards, dones, info = env.step(action)
    # plt.imshow(obs)
    # plt.show()
    print(action)
    if dones == True:
        # print(env.reward)

        # with open('info.json', 'w') as outfile:
        #     json.dump(info, outfile)

        env.reset()

    env.render()

VSNL Topology Selected
Traffic load is: 15.0
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
No more requests.
Total reward for this episode is -30000.0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
0
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
0
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
0
0
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
0
0
0
2
1
1
1
1
1
1
1
1
1
1
1
1
