In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ActorNetwork(nn.Module):
    def __init__(self, input_shape, output_shape, n_features, **kwargs):
        super(ActorNetwork, self).__init__()

        n_input = input_shape
        n_output = output_shape
        
        #self.dummy_param = nn.Parameter(torch.empty(0))

        self._h1 = nn.Linear(n_input, n_features)
        self._h2 = nn.Linear(n_features, n_features)
        self._h3 = nn.Linear(n_features, n_output)

        nn.init.xavier_uniform_(self._h1.weight,
                                gain=nn.init.calculate_gain('relu'))
        nn.init.xavier_uniform_(self._h2.weight,
                                gain=nn.init.calculate_gain('relu'))
        nn.init.xavier_uniform_(self._h3.weight,
                                gain=nn.init.calculate_gain('linear'))
    
    @property
    def n_params(self):
        
        n = 0
        for p in self.parameters():
            p_shape = torch.tensor(p.shape)
            n += torch.prod(p_shape,0)
        
        return n
    
    @property
    def device(self):
        device = next(self.parameters()).device
        return device
            

    def add_noise(self,noise):
        
        if not torch.is_tensor(noise):        
            noise = torch.tensor(noise,device=self.device)
        
        idx = 0
        for param in self.parameters():
            weights = param.data
            weights_shape = torch.tensor(weights.shape)
            n_steps = torch.prod(weights_shape,0)
            noise_param = noise[idx:idx+n_steps].reshape(*weights_shape)
            param.data += noise_param
            idx += n_steps

    def forward(self, state):
        features1 = F.relu(self._h1(torch.squeeze(state, 1).float()))
        features2 = F.relu(self._h2(features1))
        a = self._h3(features2)

        return a

In [19]:
actr = ActorNetwork(input_shape=3,output_shape=1,n_features=10)
states = torch.rand(10,3)

In [23]:

noise_vector = torch.rand(actr.n_params)
actr.add_noise(noise_vector)
output = torch.sum(actr(states))

grad   = torch.autograd.grad(output,actr.parameters())
grad

# _update_target(self, current,noise_vector)

(tensor([[213.8113, 172.2993, 212.5418],
         [287.9179, 232.0180, 286.2085],
         [235.5001, 189.7772, 234.1018],
         [162.9399, 131.3047, 161.9725],
         [189.1584, 152.4329, 188.0353],
         [172.8526, 139.2929, 171.8263],
         [258.6119, 208.4018, 257.0764],
         [188.5842, 151.9702, 187.4646],
         [210.1334, 169.3355, 208.8858],
         [231.6133, 186.6450, 230.2381]]),
 tensor([353.3896, 475.8739, 389.2371, 269.3088, 312.6431, 285.6927, 427.4365,
         311.6941, 347.3108, 382.8130]),
 tensor([[ 43.1548,  34.2945,  74.7545,  70.5093,  53.4055,  53.6183,  59.5728,
           79.3692,  80.2186,  55.4671],
         [ 14.0676,  11.1793,  24.3685,  22.9846,  17.4091,  17.4785,  19.4195,
           25.8728,  26.1497,  18.0812],
         [ 48.9951,  38.9358,  84.8714,  80.0517,  60.6332,  60.8748,  67.6351,
           90.1107,  91.0751,  62.9738],
         [ 98.7421,  78.4690, 171.0452, 161.3318, 122.1967, 122.6837, 136.3081,
          181.6042, 183.5