In [1]:
import os
import json
import math

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.distributions import Categorical
from copy import deepcopy
from tqdm import tqdm
from env.mec_offloaing_envs.offloading_env import Resources
from env.mec_offloaing_envs.offloading_env import OffloadingEnvironment
from models import GraphSeq2Seq, BaselineSeq2Seq
from buffer import RolloutBuffer
from train import inner_loop

%load_ext autoreload
%autoreload 2

In [2]:
with open('my_config.json') as f:
    args = json.load(f)

class Config:
        def __init__(self, dictionary):
            for key, value in dictionary.items():
                setattr(self, key, value)

c = Config(args)

np.random.seed(c.seed)
torch.manual_seed(c.seed)

<torch._C.Generator at 0x214b507ca90>

In [8]:
resources = Resources(mec_process_capable=c.mec_process_capable*10e9,
                      mobile_process_capable=c.mobile_process_capable*10e9,
                      bandwidth_up=c.bandwidth_up,
                      bandwidth_dl=c.bandwidth_down)

env = OffloadingEnvironment(resource_cluster=resources,
                            batch_size=c.graph_number,
                            graph_number=c.graph_number,
                            graph_file_paths=["./env/mec_offloaing_envs/data/meta_offloading_20/offload_random20_12/random.20."],
                            time_major=False,
                            encoding="rank_cost")

Loading graph offload_random20_12: 100%|██████████| 100/100 [00:19<00:00,  5.20it/s]


In [9]:
print(f'Average greedy latency: {np.mean(env.greedy_solution()[1]):.4f}')

Average greedy latency: 0.1550


In [4]:
print(f'Average greedy latency: {np.mean(env.greedy_solution()[1]):.4f}')

Average greedy latency: 808.9166


In [10]:
env.encoder_batchs

[array([[[ 0.        ,  0.7481408 ,  0.8561118 , ...,  0.8       ,
           0.9       , -1.        ],
         [ 0.05      ,  0.65810084,  0.75320905, ...,  0.8       ,
           0.9       ,  0.95      ],
         [ 0.2       ,  0.38986847,  0.44665775, ...,  0.95      ,
          -1.        , -1.        ],
         ...,
         [ 0.8       ,  0.36082217,  0.413462  , ..., -1.        ,
          -1.        , -1.        ],
         [ 0.55      ,  0.21686661,  0.24894135, ..., -1.        ,
          -1.        , -1.        ],
         [ 0.75      ,  0.19556251,  0.2245938 , ..., -1.        ,
          -1.        , -1.        ]],
 
        [[ 0.        ,  0.8741706 ,  1.        , ...,  0.9       ,
          -1.        , -1.        ],
         [ 0.35      ,  0.78471947,  0.89777017, ..., -1.        ,
          -1.        , -1.        ],
         [ 0.05      ,  0.6425662 ,  0.73530924, ..., -1.        ,
          -1.        , -1.        ],
         ...,
         [ 0.95      ,  0.3610190

In [4]:
print(f'Average greedy latency: {np.mean(env.greedy_solution()[1]):.4f}')
print(f'Average all local latency: {np.mean(env.get_all_locally_execute_time()):.4f}')
print(f'Average all mec latency: {np.mean(env.get_all_mec_execute_time()):.4f}')

device = c.device
c.meta_batch_size = 1
latencies = []

if c.is_graph:
    policy = GraphSeq2Seq(input_dim=c.obs_dim,
                          hidden_dim=c.encoder_units,
                          output_dim=c.action_dim,
                          num_layers=c.num_layers,
                          device=device).to(device)
else:
    policy = BaselineSeq2Seq(input_dim=c.obs_dim,
                             hidden_dim=c.encoder_units,
                             output_dim=c.action_dim,
                             num_layers=c.num_layers,
                             device=device).to(device)
if args["load"]:
    policy.load_state_dict(torch.load(args["load_path"]))

buffer = RolloutBuffer(meta_batch_size=c.meta_batch_size, 
                       buffer_size=c.graph_number*c.num_task_episodes, 
                       discount=c.gamma, 
                       gae_lambda=c.tau, 
                       device=device)
optimizer = torch.optim.Adam(policy.parameters(), lr=c.inner_lr)

for iteration in tqdm(range(0, 1), leave=False, disable=True):
    task_policies = []
    fts_before, fts_after = [], []
    vf_losses, pg_losses, ent_losses = [], [], []
    all_rewards, all_returns = [], []
    
    batch_of_tasks = env.sample_tasks(c.meta_batch_size)


    ### Sample trajectories ###
    buffer.reset()
    for i, task_id in tqdm(enumerate(batch_of_tasks), leave=False, total=c.meta_batch_size, desc=f'Sampling trajectories'):
        buffer.collect_episodes(env=env, 
                                policy=policy, 
                                device=device, 
                                meta_batch=i, 
                                task_id=task_id, 
                                is_graph=c.is_graph)
    buffer.process()
    
    for i, task_id in enumerate(batch_of_tasks):
        vf_loss, pg_loss, ent_loss, fts, policy = \
            inner_loop(policy=policy, 
                       optimizer=optimizer, 
                       buffer=buffer, 
                       meta_batch=i, 
                       task_id=task_id, 
                       hparams=c)
        vf_losses.append(vf_loss)
        pg_losses.append(pg_loss)
        ent_losses.append(ent_loss)
        fts_before.append(fts)
        task_policies.append(policy)

    print('*'*50)
    latencies.append(np.mean(np.concatenate(fts_before)))
    print("Iteration", iteration,
        "| vf_loss: {:.4f}".format(np.mean(vf_losses)),
        "| pg_loss: {:.4f}".format(np.mean(pg_losses)),
        "| ent_loss: {:.4f}".format(np.mean(ent_losses)),
        "| average_reward: {:.4f}".format(np.mean([reward.sum(-1) for reward in buffer.rewards])),
        "| average_return: {:.4f}".format(np.mean([returns[:, 0].mean().item() for returns in buffer.returns])),
        "| latency before adaptation: {:.4f}".format(np.mean(np.concatenate(fts_before))))


Average greedy latency: 808.9166
Average all local latency: 1478.0573
Average all mec latency: 1052.0136


                                                                    

**************************************************
Iteration 0 | vf_loss: 4.5034 | pg_loss: -0.0050 | ent_loss: 0.6881 | average_reward: -5.9766 | average_return: -5.5198 | latency before adaptation: 896.4844




In [5]:
buffer.observations[0]

array([[[ 0.        ,  0.84362584,  0.3742647 , ...,  0.8       ,
          0.9       , -1.        ],
        [ 0.05      ,  0.73179364,  0.31834856, ...,  0.8       ,
          0.9       ,  0.95      ],
        [ 0.2       ,  0.39864117,  0.15177234, ...,  0.95      ,
         -1.        , -1.        ],
        ...,
        [ 0.8       ,  0.36256486,  0.13373418, ..., -1.        ,
         -1.        , -1.        ],
        [ 0.55      ,  0.18376783,  0.04433567, ..., -1.        ,
         -1.        , -1.        ],
        [ 0.75      ,  0.15730752,  0.03110552, ..., -1.        ,
         -1.        , -1.        ]],

       [[ 0.        ,  1.        ,  0.24297327, ...,  0.9       ,
         -1.        , -1.        ],
        [ 0.35      ,  0.8901705 ,  0.21002443, ..., -1.        ,
         -1.        , -1.        ],
        [ 0.05      ,  0.71563256,  0.15766305, ..., -1.        ,
         -1.        , -1.        ],
        ...,
        [ 0.95      ,  0.36994463,  0.05395667, ..., -

In [16]:
import torch
import torch.nn as nn


model = nn.Linear(10, 2)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
x = torch.randn(100, 10)
y = torch.randint(0, 2, (100,))
opt_states = []
for i in range(100):
    optimizer.zero_grad()
    opt_states.append(optimizer.state_dict())
    y_pred = model(x)
    loss = criterion(y_pred, y)
    loss.backward()
    optimizer.step()

In [17]:
opt_states[0]

{'state': {},
 'param_groups': [{'lr': 0.001,
   'betas': (0.9, 0.999),
   'eps': 1e-08,
   'weight_decay': 0,
   'amsgrad': False,
   'maximize': False,
   'foreach': None,
   'capturable': False,
   'differentiable': False,
   'fused': None,
   'params': [0, 1]}]}

In [25]:
import torch
def linear_init(module):
    if isinstance(module, nn.Linear):
        nn.init.xavier_uniform_(module.weight)
    if module.bias is not None:
        nn.init.constant_(module.bias, 0)
    return module
x = nn.Linear(10, 2)
y = linear_init(nn.Linear(10, 2))
y.weight

Parameter containing:
tensor([[-0.6051, -0.5528,  0.2216, -0.4529,  0.3433, -0.0203,  0.4645, -0.0500,
         -0.4121, -0.2808],
        [-0.1735, -0.5906,  0.3890, -0.4964,  0.1963,  0.3686,  0.3746, -0.3546,
         -0.3248, -0.4352]], requires_grad=True)

In [26]:
x.weight

Parameter containing:
tensor([[ 0.2456,  0.2074, -0.1387, -0.1093,  0.2080, -0.0609,  0.2578,  0.2012,
          0.1854,  0.0672],
        [-0.3058,  0.2043, -0.0009,  0.0798, -0.0257, -0.1717,  0.2533,  0.0942,
         -0.2720,  0.2637]], requires_grad=True)