In [1]:
import numpy as np


In [2]:
np.random.uniform(-1,1,18)

array([-0.25697209, -0.56506957,  0.84390634,  0.21724558,  0.54328956,
       -0.49683564,  0.77299428, -0.22588412, -0.40406352, -0.89976666,
        0.02562606,  0.75575362, -0.36609868,  0.12501063,  0.93832107,
        0.24625981,  0.57724916,  0.24532134])

In [5]:
def rescale(action, min_action, max_action):
    action_new = np.zeros_like(action)
    for i in range(action.shape[0]):
        a = action[i]+1
        ratio = a/2
        new_a = ratio*(max_action[i]-min_action[i])
        action_new[i] = min_action[i] + new_a
    return action_new

In [6]:
new_action = rescale(np.array([-1,0,1]), np.array([-2,-2,-2]), np.array([2,2,2]))

In [7]:
new_action

array([-2,  0,  2])

In [11]:
np.array([[-1,-1,0]]).repeat(6, 0)

array([[-1, -1,  0],
       [-1, -1,  0],
       [-1, -1,  0],
       [-1, -1,  0],
       [-1, -1,  0],
       [-1, -1,  0]])

In [1]:
import os
import math
from warnings import formatwarning
import numpy as np
import torch
from torch._C import device
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.functional import softplus
from torch.distributions import constraints
import torch.optim as optim
from torch.distributions import Normal, TransformedDistribution, Categorical
from torch.distributions.transforms import Transform
from Graph_Capsule_Convolution.Models.ONR.encoder import Task_Graph_Encoder, Context_Encoder
from Graph_Capsule_Convolution.Models.ONR.decoder import MHA_Decoder
from Graph_Capsule_Convolution.Networks.utils import normalize_edges

In [13]:
class ActorNetwork(nn.Module):
    def __init__(self, lr=0.9, 
                 node_feats=7,
                 encoder_out_feats=64,
                 edge_feats=2,
                 encoder_hidden_dims=[32,32,32],
                 k=2, p=3,
                 gcn_model='Edge_Laplacian',
                 context_in_feats=8,
                 context_out_feats=64,
                 context_hidden_dims=[32,32],
                 decoder_heads=8,
                 decoder_hidden_dims=[64,64],
                 decoder_out_feats=50,
                 coa_num=3,
                 latent_dim=128,
                 activation=nn.ReLU(),
                 device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):
        super(ActorNetwork, self).__init__()
        self.lr = lr
        self.task_encoder = Task_Graph_Encoder(in_feats=node_feats,
                                               out_feats=encoder_out_feats,
                                               edge_feat=edge_feats,
                                               hidden_dims=encoder_hidden_dims,
                                               k=k, p=p,
                                               gcn_model=gcn_model,
                                               activation=activation,
                                               device=device)
        
        self.coa_keys = nn.ModuleList(nn.Linear(in_features=encoder_out_feats,
                                                out_features=latent_dim) for i in range(coa_num))
        self.coa_vals = nn.ModuleList(nn.Linear(in_features=encoder_out_feats,
                                                out_features=latent_dim) for i in range(coa_num))
        
        self.context_encoder = Context_Encoder(in_feats=context_in_feats,
                                               out_feats=context_out_feats,
                                               hidden_dims=context_hidden_dims,
                                               device=device)
        self.decoder = MHA_Decoder(context_dim=context_out_feats,
                                   key_dim=latent_dim,
                                   value_dim=latent_dim,
                                   num_heads=decoder_heads,
                                   hidden_dim=decoder_hidden_dims,
                                   out_feats=decoder_out_feats,
                                   device=device)

        self.optimizer = optim.Adam(self.parameters(), lr=lr)
        self.device = device

        self.to(self.device)

    def forward(self, state):
        tasks = torch.tensor(state['nodes'], dtype=torch.float32, device=self.device)
        cost_adj = state['degree_mat']-state['cost_adjacency']
        time_adj = state['degree_mat']-state['time_adjacency']
        adj = normalize_edges(np.stack((cost_adj, time_adj), axis=2))
        adj = torch.tensor(adj, dtype=torch.float32, device=self.device)
        agents = torch.tensor(state['agent_feats'], dtype=torch.float32, device=self.device)
        mask = torch.unsqueeze(torch.tensor(state['mask'], dtype=torch.float32, device=self.device), 0)
        ag_idx = int(state['agent_index'])
        coa_num = state['coa_num']
        time = torch.unsqueeze(torch.tensor([state['time_elapsed']], dtype=torch.float32, device=self.device), 0)
        agent = torch.unsqueeze(agents[ag_idx,:], 0)
        peers = torch.cat((agents[:ag_idx,:],agents[ag_idx+1:,:]), dim=0)

        encoder_x = self.task_encoder(X=tasks, L=adj)
        context_x = self.context_encoder(agent=agent, peers=peers, time=time)
        key = self.coa_keys[coa_num](encoder_x)
        value = self.coa_vals[coa_num](encoder_x)
        probs = self.decoder(context=context_x, key=key, value=value)
        probs = probs*mask
        # Softmax ignoring 0 values
        x_max = torch.max(probs, dim=1, keepdim=True)[0][0]
        x_exp = torch.exp(probs-x_max)
        x_exp = x_exp * (probs!=0)
        probs_softmax = x_exp / torch.sum(x_exp, dim=1, keepdim=True)
        return probs_softmax
        
        # x = self.fc1(state)
        # x = F.relu(x)
        # x = self.fc2(x)
        # x = F.relu(x)
        # mu = self.mu(x)
        # logsigma = self.logsigma(x)
        # return mu, logsigma

    def sample_normal(self, state, reparameterize=True):
        probs = self.forward(state)
        probabilities = Categorical(probs)
        action = probabilities.sample()
        # mu, logsigma = self.forward(state)
        # logsigma = T.clamp(logsigma, -20, 2)
        # sigma = logsigma.exp()
        # probabilities = Normal(mu, sigma)
        # transforms = [TanhTransform(cache_size=1)]
        # probabilities = TransformedDistribution(probabilities, transforms)
        # if reparameterize:
        #     action = probabilities.rsample()
        # else:
        #     action = probabilities.sample()

        log_probs = probabilities.log_prob(action).sum(axis=-1, keepdim=True)
        log_probs.to(self.device)

        return action, log_probs

In [14]:
n = 50
task_dim = 7
ag = 5
ag_dim = 8
def gen_state():
    state = {'nodes': np.random.rand(n, task_dim),
             'cost_adjacency': np.random.rand(n, n),
             'time_adjacency': np.random.rand(n, n),
             'degree_mat': np.random.rand(n, n),
             'agent_feats': np.random.rand(ag, ag_dim),
             'mask': np.random.choice([0, 1], size=n, p=[.5, .5]),
             'agent_index': 2,
             'coa_num': 2,
             'time_elapsed': np.random.rand()}
    return state

In [15]:
state = gen_state()
actor = ActorNetwork()

In [16]:
a, log_prob = actor.sample_normal(state)

In [17]:
a

tensor([20], device='cuda:0')

In [18]:
log_prob

tensor([-3.3344], device='cuda:0', grad_fn=<SumBackward1>)

In [7]:
a = probabilities.rsample()

In [8]:
a

tensor([-0.0470])