In [2]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn

In [3]:
def hidden_init(layer):
    fan_in = layer.weight.data.size()[0]
    lim = 1. / np.sqrt(fan_in)
    return (-lim, lim)

In [4]:
class Actor(nn.Module):
    def __init__(self,state_size,action_size,seed,hidden_unit_1=500,hidden_unit_2=300) -> None:
        super(Actor,self).__init__()
        # Code generated from https://github.com/Remtasya/DDPG-Actor-Critic-Reinforcement-Learning-Reacher-Environment/blob/master/model.py
        self.seed = torch.manual_seed(seed)
        self.hidden_unit_1 = nn.Linear(state_size,hidden_unit_1)
        self.hidden_unit_2 = nn.Linear(hidden_unit_1,hidden_unit_2)
        self.final = nn.Linear(hidden_unit_2,action_size)
        self.reset_parameters()

    def reset_parameters(self):
        self.hidden_unit_1.weight.data.uniform_(*hidden_init(self.hidden_unit_1))
        self.hidden_unit_2.weight.data.uniform_(*hidden_init(self.hidden_unit_2))
        self.final.weight.data.uniform_(-3e-3,3e-3)

    def forward(self, state):
        """Build an actor (policy) network that maps states -> actions."""
        x = F.relu(self.hidden_unit_1(state))
        x = F.relu(self.hidden_unit_2(x))
        return torch.tanh(self.final(x))
        

In [8]:
class Critic(torch.nn.Module):
    def __init__(self,state_size,action_size,seed,hidden_unit_1=500,hidden_unit_2=300):
        super(Critic, self).__init__()
        # Code generated from https://github.com/Remtasya/DDPG-Actor-Critic-Reinforcement-Learning-Reacher-Environment/blob/master/model.py
        self.seed = torch.manual_seed(seed)
        self.hidden_unit_1 = nn.Linear(state_size,hidden_unit_1)
        self.hidden_unit_2 = nn.Linear(hidden_unit_1+action_size,hidden_unit_2)
        self.final = nn.Linear(hidden_unit_2,1)
        self.reset_parameters()

    def reset_parameters(self):
        self.hidden_unit_1.weight.data.uniform_(*hidden_init(self.hidden_unit_1))
        self.hidden_unit_2.weight.data.uniform_(*hidden_init(self.hidden_unit_2))
        self.final.weight.data.uniform_(-3e-3,3e-3)

    def forward(self, state, action):
        """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
        xs = F.relu(self.hidden_unit_1(state))
        x = torch.cat((xs, action), dim=1)
        x = F.relu(self.hidden_unit_2(x))
        return self.final(x)

        


