In [None]:
from time import sleep
from IPython.display import clear_output, display
import matplotlib.pyplot as plt
import importlib

import optical_network_game.game_gym
importlib.reload(optical_network_game.game_gym)
from optical_network_game.game_gym import *

from optical_network_game.requests import *
from optical_network_game.topology_generation import *

from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import DQN
from stable_baselines3.common import results_plotter
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy, plot_results
from stable_baselines3.common.callbacks import BaseCallback

## Callback Functions

In [None]:
class SaveOnBestTrainingRewardCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq:
    :param log_dir: Path to the folder where the model will be saved.
      It must contains the file created by the ``Monitor`` wrapper.
    :param verbose: Verbosity level.
    """
    def __init__(self, check_freq: int, log_dir: str, verbose: int = 1):
        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.save_path = None
        self.best_mean_reward = -np.inf

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

          # Retrieve training reward
          x, y = ts2xy(load_results(self.log_dir), 'timesteps')
          if len(x) > 0:
              # Mean training reward over the last 100 episodes
              mean_reward = np.mean(y[-100:])
              if self.verbose > 0:
                print(f"Num timesteps: {self.num_timesteps}")
                print(f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}")

              # # New best model, you could save the agent here
              # if mean_reward > self.best_mean_reward:
              #     self.best_mean_reward = mean_reward
              #     # Example for saving best model
              #     if self.verbose > 0:
              #       print(f"Saving new best model to {self.save_path}")
              #     self.model.save(self.save_path)

        return True

In [None]:
class TensorboardCallback(BaseCallback):
    """
    Custom callback for plotting additional values in tensorboard.
    """

    def __init__(self, verbose=0):
        super(TensorboardCallback, self).__init__(verbose)
    
    def _on_step(self) -> bool:
        block_ep = self.locals["infos"][0].get('bp')
        avg_path_len = self.locals["infos"][0].get('avg_length')
        blocked_continuous = self.locals["infos"][0].get('blocked_continuous')
        blocked_contiguous = self.locals["infos"][0].get('blocked_contiguous')
        self.logger.record('blocking_ratio', block_ep)
        self.logger.record('average_route_length', avg_path_len)
        self.logger.record('blocked_continuous', blocked_continuous)
        self.logger.record('blocked_contiguous', blocked_contiguous)
        return True

In [None]:
import os
# Create log dir
log_dir = os.path.join(os.getcwd(), "10_Traffic_Load_Model_Log/")
os.makedirs(log_dir, exist_ok=True)

## Model Training

In [None]:
#Parameters for model training environment
Holdtime = 20 #(For a traffic load of 10)
#Number of connection requests = 20
num_req = 20
#request interval = 2 seconds
req_int = 2
#time limit for each connection request = 10 seconds
time_limit = 10
#bandwidth per link = 5
link_BW = 5


In [None]:
# Create the callback: check every 1000 steps
callback = SaveOnBestTrainingRewardCallback(check_freq=200000, log_dir=log_dir)
tensor_callback = TensorboardCallback()

# create model

nodeList, linkList = createPresetTopology("VSNL", num_slots=link_BW)
requestList = generateRequests(nodeList, numberOfRequests=num_req, req_interval=req_int, hold_time=Holdtime, time_limit=time_limit)

user = User()
env = game_gym(nodeList, linkList, requestList, user, dynamic=False)
eveon = Monitor(env, log_dir)

# check_env(eveon, warn=True)
model = DQN('MlpPolicy', eveon, verbose=1, buffer_size=100000, device='cuda', 
learning_starts=50000, exploration_fraction=0.5, learning_rate=0.0001,
gamma=0.8, tensorboard_log='./10_traffic_load_model_log/')


In [None]:
# training agent on 30 000 000 timesteps
model_name = "DQN_VSNL_TL_10"
model.learn(total_timesteps=30000000, callback=[callback, tensor_callback])
model.save(model_name)

# tensorboard --logdir ./10_traffic_load_model_log/

In [None]:
# Plot rewards
# plot_results([log_dir], 500000, results_plotter.X_TIMESTEPS, "DQN EON")
# plt.show()