In [2]:
from optical_network_game.node import *
from optical_network_game.link import *
from optical_network_game.requests import *
from optical_network_game.user import *
import gym
import pygame, sys
from pygame.locals import *
from gym import spaces
from stable_baselines3.common.env_checker import check_env


from stable_baselines3.common import results_plotter
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy, plot_results
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.evaluation import evaluate_policy
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import numpy as np
import tensorflow as tf

#from stable_baselines.common.vec_env import DummyVecEnv
#from stable_baselines.deepq.policies import MlpPolicy
from stable_baselines3 import DQN
from stable_baselines3 import A2C
import json
import cv2

#additional code added by me just for testing
import matplotlib
import matplotlib.pyplot as plt
import torch
#importing IPython's display module to plot images
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython: from IPython import display
from itertools import count
import time
from IPython.display import clear_output

#importing wandb (weights and biases) for logging
import wandb
wandb.login()

import tensorboard
# Load the TensorBoard notebook extension
%load_ext tensorboard

#Importing game_gym class for use
import importlib
import optical_network_game.game_gym
importlib.reload(optical_network_game.game_gym)
from optical_network_game.game_gym import *


pygame 2.0.3 (SDL 2.0.16, Python 3.7.11)
Hello from the pygame community. https://www.pygame.org/contribute.html


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtrinity-actual[0m (use `wandb login --relogin` to force relogin)


In [4]:
class SaveOnBestTrainingRewardCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq:
    :param log_dir: Path to the folder where the model will be saved.
      It must contains the file created by the ``Monitor`` wrapper.
    :param verbose: Verbosity level.
    """
    def __init__(self, check_freq: int, log_dir: str, verbose: int = 1):
        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.save_path = os.path.join(log_dir, 'best_model')
        self.best_mean_reward = -np.inf

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

          # Retrieve training reward
          x, y = ts2xy(load_results(self.log_dir), 'timesteps')
          if len(x) > 0:
              # Mean training reward over the last 100 episodes
              mean_reward = np.mean(y[-100:])
              if self.verbose > 0:
                print(f"Num timesteps: {self.num_timesteps}")
                print(f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}")

              # New best model, you could save the agent here
              if mean_reward > self.best_mean_reward:
                  self.best_mean_reward = mean_reward
                  # Example for saving best model
                  if self.verbose > 0:
                    print(f"Saving new best model to {self.save_path}")
                  self.model.save(self.save_path)

        return True

In [5]:
# creating fixed test topology
def createTestTopology():
    # testNodes
    nodeA = Node(0, 'A', 300, 200)
    nodeB = Node(1, 'B', 300, 400)
    nodeC = Node(2, 'C', 650, 200)
    nodeD = Node(3, 'D', 650, 400)
    # testLinks
    link1 = Link(0, nodeA, nodeB)
    link2 = Link(1, nodeB, nodeC)
    link3 = Link(2, nodeB, nodeD)
    link4 = Link(3, nodeA, nodeC)
    link5 = Link(4, nodeC, nodeD)

    nodeList = [nodeA, nodeB, nodeC, nodeD]
    linkList = [link1, link2, link3, link4, link5]

    # save the links associated to each node in a list
    for node in nodeList:
        node.setLinks(linkList)
    return nodeList, linkList



In [6]:
# Create log dir for saving model
log_dir = os.path.join(os.getcwd(), "tmp/")
#os.makedirs(log_dir, exist_ok=True)

# Create log dir for tensorboard log
log_tensorboard = os.path.join(os.getcwd(), "tensorboard_logs/")
#os.makedirs(log_tensorboard, exist_ok=True)

## Weights & Biases Hyperparameter Sweep

In [5]:
sweep_config = {
    'method': 'random'
}

hyper_params = {
    'learning_rate': {
        'values': [0.001, 0.0025, 0.005, 0.0075, 0.01]
    },
    'Gamma': {
        'values': [0.9, 0.92, 0.94, 0.96, 0.98, 0.99]
    },
    'Tau': {
        'values': [0.4, 0.6, 0.8, 1]
    }
}

sweep_config['parameters'] = hyper_params

import pprint
pprint.pprint(sweep_config)

{'method': 'random',
 'parameters': {'Gamma': {'values': [0.9, 0.92, 0.94, 0.96, 0.98, 0.99]},
                'Tau': {'values': [0.4, 0.6, 0.8, 1]},
                'learning_rate': {'values': [0.001,
                                             0.0025,
                                             0.005,
                                             0.0075,
                                             0.01]}}}


In [6]:
sweep_id = wandb.sweep(sweep_config, project="DQN_EON_Hyperparameter_Tuning")

Create sweep with ID: faxtqfe9
Sweep URL: https://wandb.ai/trinity-actual/DQN_EON_Hyperparameter_Tuning/sweeps/faxtqfe9


## Training Function

In [7]:
def DQN_HP_Tune(config=None):
    from wandb.integration.sb3 import WandbCallback
    
    run = wandb.init(config=config, settings=wandb.Settings(start_method="fork"), sync_tensorboard=True)

    with run:
        

        config = wandb.config


        Model_Name = "DQN_Tune_070322"
        model_dir = "./Models"
        log_tensorboard = "./tensorboard_logs/"
        
        # Create and wrap the environment
        nodeList, linkList = createTestTopology()
        #changed to only have 1 request per episode
        #from 6 originally
        requestList = generateRequests(nodeList, 6)
        user = User()


        env = game_gym(nodeList, linkList, requestList, user)
        check_env(env)

        eveon = Monitor(env, log_tensorboard)

        #hyperparameters
        timesteps = 500000
        learning_starts = 50000
        eps_fraction = 0.8
        eps_start = 1
        eps_end = 0.05
        train_freq = (1000, "step")
        target_update_interval = 50000



        model = DQN('MlpPolicy', eveon, tau=config.Tau, learning_starts=learning_starts, buffer_size=10000, verbose=1, device="auto", learning_rate=config.learning_rate, gamma=config.Gamma, exploration_fraction=eps_fraction, exploration_initial_eps=eps_start, exploration_final_eps=eps_end, target_update_interval=target_update_interval, train_freq=train_freq, tensorboard_log=f"runs/{run.id}")
    
        #training with hyperparameter tuning on lr, gamma and tau.
        model.learn(total_timesteps=int(timesteps), callback=WandbCallback(verbose=2, model_save_path=f"models/{run.id}", model_save_freq=50000), reset_num_timesteps=False)


        #Save_Model_Details(model, Model_Name, timesteps)
        # # Close the environment
        # eveon.close()

        #Finishing wandb run
        run.finish()
    

In [None]:
#testing launching agent which runs train 20 times
#uses random hp values
wandb.agent(sweep_id, DQN_HP_Tune, count=5)

## Hyperparameter Tuning

### Normal Training

In [9]:
def DQN_Train(lr, gamma, eps_start, eps_end, eps_fraction, train_freq, target_update_interval, tau, learning_starts, timesteps, Model_Name):
    '''Function which conducts model training for DQN on the EON Game Environment
    Returns Model object and Model Name

    Input Arguements:
    1) lr = learning rate
    2) gamma = discount factor
    3) eps_start = Starting Exploration Rate Value 
    4) eps_end = Ending Exploration Rate Value
    5) exploration_fraction = Fraction of timesteps you want the exploration value to decay (Between eps_start and eps_end)
    6) train_freq = frequency of updating the model (either per number of episodes or timesteps)
    7) target_update_interval = frequency of updating the target network
    8) Tau = The soft update coefficient ("Polyak update", between 0 and 1) default 1 for hard update
    9) learning_starts = Number of timesteps before model starting doing training updates
    10) timesteps = Number of timesteps to train the model through
    11) Model_Name = String name of model to be used to save as.

    The tensorboard logging is saved in /tensorboard_logs with the respective name for the run given by Model_Name

    Model is trained on the TestTopology with 6 request links generated, uses the base callback which saves the best model based on mean_ep_rewards
    '''
    #from wandb.integration.sb3 import WandbCallback

    #main function with callback
    
    #Start a wandb instance
    #wandb.init(
    #    project="DQN_EON",
    #    sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
    #)

    # Create and wrap the environment
    nodeList, linkList = createTestTopology()
    #changed to only have 1 request per episode
    #from 6 originally
    requestList = generateRequests(nodeList, 6)
    user = User()


    env = game_gym(nodeList, linkList, requestList, user)
    check_env(env)

    eveon = Monitor(env, log_tensorboard)

    # Create the callback: check every 10000 steps
    callback = SaveOnBestTrainingRewardCallback(check_freq=10000, log_dir=log_tensorboard)
    
    model_dir = os.path.join("Models/", str(Model_Name))

    #hyperparameters testing
    #previously used
    #lr = 0.01
    #gamma = 0.7
    #eps_start = 1
    #eps_end = 0.05
    #train_freq = (1000, "step")
    #target_update_interval = 50000
    #soft update tau value
    #tau = 0.4
    policy_kwargs = {
        'net_arch':[64,64] #MLP hidden layer size
    }


    # Train the agent
    #model = DQN('MlpPolicy', eveon, verbose=2, buffer_size=100)
    model = DQN('MlpPolicy', eveon, gradient_steps=-1, tau=tau, learning_starts=learning_starts, buffer_size=10000, verbose=1, device="auto", learning_rate=lr, gamma=gamma, exploration_fraction=eps_fraction, exploration_initial_eps=eps_start, exploration_final_eps=eps_end, target_update_interval=target_update_interval, train_freq=train_freq, tensorboard_log="./tensorboard_logs/")
        

    #Debug Print
    print("Carry out training on: " + str(Model_Name))
    model.learn(total_timesteps=int(timesteps), tb_log_name=str(Model_Name), callback=callback, reset_num_timesteps=False)
    #training with wandb callback instead
    #model.learn(total_timesteps=int(timesteps), tb_log_name=str(Model_Name), callback=WandbCallback(model_save_path=model_dir, model_save_freq=50000), reset_num_timesteps=False)
    

    Save_Model_Details(model, Model_Name, timesteps)
    # # Close the environment
    # eveon.close()

    #Finishing wandb run
    #wandb.finish()
    
    return model


In [10]:
#training and saving model
model = DQN_Train(lr=0.002, gamma=0.99, eps_start=1, eps_end=0.05, eps_fraction=0.3, train_freq=(1000, "step"), target_update_interval=50000, tau=0.5, learning_starts=3000, timesteps=1000000, Model_Name="DQN_100322")




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

6
(256, 256, 3)
Request Timed Out, cumulative Reward:
0
(256, 256, 3)
Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


  "This system does not have apparently enough memory to store the complete "


Carry out training on: DQN_100322
(256, 256, 3)
Logging to ./tensorboard_logs/DQN_100322_0
Too many 3 link connections made.
-427.09999999999934
(256, 256, 3)
Too many 3 link connections made.
-394.0999999999995
(256, 256, 3)
Too many 3 link connections made.
-436.9999999999993
(256, 256, 3)
Too many 3 link connections made.
-405.99999999999926
(256, 256, 3)
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 538      |
|    ep_rew_mean      | -767     |
|    exploration_rate | 0.993    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 164      |
|    time_elapsed     | 13       |
|    total_timesteps  | 2154     |
----------------------------------
Too many 3 link connections made.
-224.70000000000007
(256, 256, 3)
Too many 3 link connections made.
-225.4000000000001
(256, 256, 3)
Too many 3 link connections made.
-413.99999999999926
(256, 256, 3)
Too many 3 link connections made.
-391.999999999999

KeyboardInterrupt: 

## Saving Model

In [14]:
def Save_Model_Details(Model, Model_Name, timesteps):
    '''Creating Folder in "Models" Folder and Saving Trained Model
    
    Input Arguements:
    1) Model = Trained Model object to be saved.
    2) Model_Name = String name of the model to be saved
    3) timesteps = Number of timesteps used in training.


    '''
    #Creating folder to store the created model in "Models"
    model_dir = os.path.join("Models/", str(Model_Name) + "_Full_Train")
    # Create log dir for tensorboard log
    os.makedirs(model_dir, exist_ok=False)
    
    #Saving Trained Model
    Model.save(model_dir)
    print(str(Model_Name) + " Successfully Saved.")
    print("Remember to shift the Monitor.csv and best_model.zip files into the created model folder manually!!")

    #Plotting Rewards for show
    plot_results([log_tensorboard], timesteps, results_plotter.X_TIMESTEPS, "DQN EON")
    plt.show()
    plt.savefig(str(model_dir) + " Episode rewards over timestamps.png")


In [None]:
#Debug
#torch.cuda.memory_summary(device=None, abbreviated=False)

## Accessing Tensorboard Logs

In [6]:
#To access tensorboard logging, select the --logdir where the logs are at (in this case tensorboard_logs)
%tensorboard --logdir tensorboard_logs

#after this you can go to the following link http://localhost:6006/
#Alternatively you can run the same command in a command prompt while in the source directory (without the %)

## Loading Model

In [10]:
def Load_Model(Model_Dir, Model_Name, device):
    '''Loading Saved model Zip file into Object
    
    Input Arguements:
    1) Model_Dir = Path to saved model file
    2) Model_Name = Str of Model file name
    3) Device = Device to run on, either "cpu" or "cuda"

    Returns loaded model
    '''

    model_dir = os.path.join(str(Model_Dir), str(Model_Name))

    load_model = DQN.load(str(model_dir),  device=device)
    print("Successfully loaded " + str(Model_Name) + "!")

    return load_model
    

In [12]:
loaded_model = Load_Model("Models/030322_Model_1mil_sofar", "DQNEveon_1mil_030322.zip", device="cuda")

  "This system does not have apparently enough memory to store the complete "


Successfully loaded DQNEveon_1mil_030322.zip!


In [15]:
#test2 is the successful one (but still just spams enter key)
#LOADING MODEL FROM ZIP
#loaded_model = DQN.load("DQNEveon_test2")
#loaded_model = DQN.load("DQNEveon_testing_surface_250222")

#2mil timestep model load
#NOTE that old model on more complex game not compatible with new observation window
#loaded_model = DQN.load("DQNEveon_2mil_270222", device="cpu")
#best model load during 2mil training
#loaded_model = DQN.load("./Models/270222_Model_2mil/best_model.zip", device="cpu")

#loading 1mil semi simplified model with new rewards
#loaded_model = DQN.load("DQNEveon_1mil_030322.zip")
#loaded_model = DQN.load("DQNEveon_2mil_030322.zip")
#loaded_model = DQN.load("DQNEveon_2+3mil_030322.zip")
#loaded_model = DQN.load("./Models/030322_Model_1mil_sofar/best_model.zip")


#loaded_model = DQN.load("DQNEveon_040322.zip")
#loaded_model = DQN.load("./tensorboard_logs/best_model.zip")

  "This system does not have apparently enough memory to store the complete "


# Testing Agent Performance

In [None]:
nodeList, linkList = createTestTopology()

#changed to only have 1 request per episode
#from 6 originally
requestList = generateRequests(nodeList, 6)

user = User()
eveon = game_gym(nodeList, linkList, requestList, user)

check_env(eveon, warn=True)

# THIS IS THE TESTING LOOP OF THE AGENT PLAYING THE GAME|
obs = eveon.reset()
while True :

    #enable this if using older models with the wider observation space
    #obs = cv2.resize(obs, dsize=(600, 1000))
    
    
    #trying to test if deterministic true or false changes model actions
    action, states_ = loaded_model.predict(obs, deterministic=True)
    # action = 6
    obs, rewards, dones, info = eveon.step(action)
    
    print("Action:")
    print(action)
    #time.sleep(1)
    #clear_output(wait=True)


    if dones == True:
        #debug print
        print("########################Reward Obtained:")
        print(eveon.reward)
        # with open('info.json', 'w') as outfile:
        #     json.dump(info, outfile)

        eveon.reset()

    eveon.render()

## Random Action Performance

In [None]:
#agent taking random actions
nodeList, linkList = createTestTopology()

#changed to only have 1 request per episode
#from 6 originally
requestList = generateRequests(nodeList, 6)

user = User()
eveon = game_gym(nodeList, linkList, requestList, user)

check_env(eveon, warn=True)
#resets the environment
obs = eveon.reset()

for step in range(2000):
	eveon.render()

	rand_action = eveon.action_space.sample()
	eveon.step(rand_action)
	print(rand_action)

env.close()

In [None]:
#test main function
nodeList, linkList = createTestTopology()
requestList = generateRequests(nodeList, 6)

user = User()
eveon = game_gym(nodeList, linkList, requestList, user)

check_env(eveon, warn=True)

#defining the agent
model = DQN('MlpPolicy', eveon, verbose=1, buffer_size=100, device='cuda')

screen = eveon.render()
#storing episode_durations during training to plot them
#creating empty list to store
#adding plot to check the training process?
episode_durations = []
timestep = 0


#trying the double nested for loop

for episode in range(100):

#resets the environment
obs = eveon.reset()

#nested for loop iterate over time step
for timesetp in count():

    action, states_ = model.predict(obs, deterministic=True)
    # action = 6
    
    obs, rewards, dones, info = eveon.step(action)
    

    print(action)
    
    #this only runs when the agent actually competed the level
    if dones == True:
        #debug print
        print("########################Reward Obtained:")
        print(eveon.reward)

        episode_durations.append(timestep)
        plot(episode_durations, 100)
        
        # with open('info.json', 'w') as outfile:
        #     json.dump(info, outfile)
        timestep += 1
        eveon.reset()

    eveon.render()

    screen = eveon.render('rgb_array')
    plt.figure()
    plt.imshow(screen)
    plt.title('test screen')
    plt.show()