In [1]:
import os

# Check if the notebook is running on Colab
if 'COLAB_GPU' in os.environ:
    # This block will run only in Google Colab
    IN_COLAB = True
    print("Running on Google Colab. Cloning the repository.")
    !git clone https://github.com/pedro15sousa/R244-wm-load-balancing.git
    %cd R244-wm-load-balancing/notebooks
else: 
    # This block will run if not in Google Colab
    IN_COLAB = False
    print("Not running on Google Colab. Assuming local environment.")

Not running on Google Colab. Assuming local environment.


In [2]:
import sys
sys.path.append('..')  # This adds the parent directory (main_folder) to the Python path

In [12]:
""" Test controller """
import argparse
from os.path import join, exists
from utils.misc import RolloutGenerator
from models.controller import Controller
from utils.misc import LSIZE, RSIZE, ASIZE
from utils.misc import flatten_parameters
import torch

In [5]:
args = {
    'batch_size': 32,   # input batch size for training (default: 32)
    'epochs': 4000,     # number of epochs to train (default: 1000)
    'logdir': '../exp_dir',  # Directory where results are logged
    'include_reward': False,  # Set True if best model is not to be reloaded
    'noreload': False,  # Set True if samples are not to be saved during training
}

device = torch.device('cpu')

In [8]:
ctrl_file = join(args['logdir'], 'ctrl', 'best.tar')
assert exists(ctrl_file),\
    "Controller was not trained..."

# Load controller
controller = Controller(LSIZE, RSIZE, ASIZE)
state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'})
controller.load_state_dict(state['state_dict'])

<All keys matched successfully>

In [17]:
generator = RolloutGenerator(args['logdir'], device, 1000)
with torch.no_grad():
    total_rewards = []
    total_reward = 0
    for _ in range(100):
        parameters = controller.parameters()
        cumulative_reward = generator.rollout(flatten_parameters(parameters))
        total_reward += cumulative_reward
        total_rewards.append(cumulative_reward)
        print('Cumulative reward: {}'.format(-cumulative_reward))

    print('Average reward: {}'.format(-total_reward / 100))
    print('Standard deviation: {}'.format(torch.std(torch.tensor(total_rewards))))

INFO:root:Making new env load_balance


Loading MDRNN at epoch 934 with test loss -2.15408667696391
Loading Controller with reward -4023682.69
Reward:  -9.0
Reward:  -220.0
Reward:  -140.0
Reward:  -336.0
Reward:  -26.0
Reward:  -33.0
Reward:  -160.0
Reward:  -90.0
Reward:  -48.0
Reward:  -175.0
Reward:  -538.0
Reward:  -273.0
Reward:  -14.0
Reward:  -80.0
Reward:  -1199.0
Reward:  -572.0
Reward:  -35.0
Reward:  -248.0
Reward:  -308.0
Reward:  -1022.0
Reward:  -405.0
Reward:  -280.0
Reward:  -88.0
Reward:  -707.0
Reward:  -975.0
Reward:  -1055.0
Reward:  -77.0
Reward:  -168.0
Reward:  -153.0
Reward:  -240.0
Reward:  -440.0
Reward:  -964.0
Reward:  -2035.0
Reward:  -96.0
Reward:  -828.0
Reward:  -600.0
Reward:  -36.0
Reward:  -60.0
Reward:  -583.0
Reward:  -36.0
Reward:  -273.0
Reward:  -770.0
Reward:  -525.0
Reward:  -936.0
Reward:  -585.0
Reward:  -509.0
Reward:  -391.0
Reward:  -84.0
Reward:  -823.0
Reward:  -581.0
Reward:  -212.0
Reward:  -1155.0
Reward:  -64.0
Reward:  -153.0
Reward:  -1527.0
Reward:  -1723.0
Reward:  -1