In [112]:
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7fa4680bdf10>

In [5]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)

# alternatively, we can do the entire sequence all at once.
# the first value returned by LSTM is all of the hidden states throughout
# the sequence. the second is just the most recent hidden state
# (compare the last slice of "out" with "hidden" below, they are the same)
# The reason for this is that:
# "out" will give you access to all hidden states in the sequence
# "hidden" will allow you to continue the sequence and backpropagate,
# by passing it as an argument  to the lstm at a later time
# Add the extra 2nd dimension
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)

tensor([[[-0.0187,  0.1713, -0.2944]],

        [[-0.3521,  0.1026, -0.2971]],

        [[-0.3191,  0.0781, -0.1957]],

        [[-0.1634,  0.0941, -0.1637]],

        [[-0.3368,  0.0959, -0.0538]]], grad_fn=<StackBackward>)
(tensor([[[-0.3368,  0.0959, -0.0538]]], grad_fn=<StackBackward>), tensor([[[-0.9825,  0.4715, -0.0633]]], grad_fn=<StackBackward>))


In [6]:
class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [59]:
def mlp(layer_sizes, hidden_activation, final_activation, batchnorm=True):
    layers = []
    for i in range(len(layer_sizes) - 1):
        layers.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
        if i < len(layer_sizes) - 2:
            # if batchnorm:
            #     layers.append(nn.BatchNorm1d(layer_sizes[i + 1]))
            layers.append(hidden_activation())
        else:
            layers.append(final_activation())
    return nn.Sequential(*layers)

In [None]:

class BoundedDeterministicActor(nn.Module):
    """
    MLP net for actor in bounded continuous action space.
    Returns deterministic action.
    Layer sizes passed as argument.
    Input dimension: layer_sizes[0]
    Output dimension: layer_sizes[-1] (should be 1 for V,Q)
    """

    def __init__(self, layer_sizes, activation, low, high, **kwargs):
        super().__init__()
        self.low = torch.as_tensor(low)
        self.width = torch.as_tensor(high - low)
        self.net = mlp(layer_sizes, activation, nn.Tanh)

    def forward(self, x):
        output = (self.net(x) + 1) * self.width / 2 + self.low
        return output

In [113]:
?torch.zeros

[0;31mDocstring:[0m
zeros(*size, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor

Returns a tensor filled with the scalar value `0`, with the shape defined
by the variable argument :attr:`size`.

Args:
    size (int...): a sequence of integers defining the shape of the output tensor.
        Can be a variable number of arguments or a collection like a list or tuple.
    out (Tensor, optional): the output tensor.
    dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
        Default: if ``None``, uses a global default (see :func:`torch.set_default_tensor_type`).
    layout (:class:`torch.layout`, optional): the desired layout of returned Tensor.
        Default: ``torch.strided``.
    device (:class:`torch.device`, optional): the desired device of returned tensor.
        Default: if ``None``, uses the current device for the default tensor type
        (see :func:`torch.set_default_tensor_type`). :attr:`device` wil

In [116]:
class LSTMDualUltimatum(nn.Module):

    def __init__(self, input_size, hidden_size, action_size, low, high):
        super(LSTMDualUltimatum, self).__init__()
        self.low = torch.as_tensor(low)
        self.width = torch.as_tensor(high - low)
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.action_size = action_size
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dimhidden_size
        self.lstm = nn.LSTM(input_size, hidden_size)        
        self.action_mlp = mlp([hidden_size, action_size], nn.Identity, nn.Tanh)

    def forward(self, input, h=None, c=None):
        if h is None: h=torch.zeros(self.hidden_size)
        if c is None: c=torch.zeros(self.hidden_size)
        lstm_out, (h, c) = self.lstm(input.view(len(input), 1, -1), (h,c))
        x = self.action_mlp(lstm_out)
        action_out = (x + 1) * self.width / 2 + self.low
        return action_out, (h, c)

In [106]:
action_size = 2
input_size = 2
hidden_size = 10
num_layers = 1
rnn_rl = nn.LSTM(input_size, hidden_size, num_layers)

In [117]:
rnn_rl = LSTMDualUltimatum(input_size, hidden_size, action_size, 0, 1)

In [109]:
action = torch.tensor([0.5, 0.5])
action_input = action.view(1,1,-1)

In [118]:
action, (h,c) = rnn_rl(action_input)

RuntimeError: Expected hidden[0] size (1, 1, 10), got (10,)

In [104]:
for i in range(10):
    action, (h,c) = rnn_rl(action_input, (h,c))
    

TypeError: forward() takes 2 positional arguments but 3 were given

In [99]:
action

tensor([[[0.4323, 0.4087]]], grad_fn=<AddBackward0>)

In [100]:
h

tensor([[[ 0.0264,  0.1273,  0.0466, -0.0064,  0.0409,  0.0365,  0.0644,
           0.0118,  0.0319, -0.0389]]], grad_fn=<StackBackward>)

In [101]:
c

tensor([[[ 0.0510,  0.2659,  0.0775, -0.0208,  0.0863,  0.0915,  0.1421,
           0.0213,  0.0802, -0.0825]]], grad_fn=<StackBackward>)

In [15]:
rnn = nn.LSTM(10, 20, 2)
input = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(input, (h0, c0))

In [17]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)

In [28]:
lstm(i.view(1, 1, -1))

(tensor([[[0.0552, 0.0080, 0.1339]]], grad_fn=<StackBackward>),
 (tensor([[[0.0552, 0.0080, 0.1339]]], grad_fn=<StackBackward>),
  tensor([[[0.0960, 0.0237, 0.2261]]], grad_fn=<StackBackward>)))

In [20]:
inputs[0]

tensor([[ 0.5119, -0.0704,  1.4390]])

input of shape (seq_len, batch, input_size): tensor containing the features of the input sequence. The input can also be a packed variable length sequence. See torch.nn.utils.rnn.pack_padded_sequence() or torch.nn.utils.rnn.pack_sequence() for details.

h_0 of shape (num_layers * num_directions, batch, hidden_size): tensor containing the initial hidden state for each element in the batch. If the LSTM is bidirectional, num_directions should be 2, else it should be 1.

c_0 of shape (num_layers * num_directions, batch, hidden_size): tensor containing the initial cell state for each element in the batch.

If (h_0, c_0) is not provided, both h_0 and c_0 default to zero.

In [22]:
inputs[0].view(1, 1, -1).shape

torch.Size([1, 1, 3])

In [25]:
print(action.shape)
print(action_input.shape)

torch.Size([2])
torch.Size([1, 1, 2])


In [33]:
out, hidden = rnn_rl(action_input)

In [36]:
out.shape

torch.Size([1, 1, 10])

In [35]:
hidden

(tensor([[[-0.0830,  0.0167,  0.1060,  0.0499,  0.0262,  0.0764, -0.0923,
           -0.0840,  0.0275,  0.0843]]], grad_fn=<StackBackward>),
 tensor([[[-0.2060,  0.0322,  0.2238,  0.1060,  0.0463,  0.1982, -0.1764,
           -0.2045,  0.0604,  0.1599]]], grad_fn=<StackBackward>))

In [56]:
action_tanh = nn.Tanh()
action_head = nn.Linear(hidden_size, action_dim)

In [66]:
action_mlp = mlp([hidden_size, 1], nn.Identity, nn.Tanh)

In [67]:
action_mlp(out)

tensor([[[-0.2442]]], grad_fn=<TanhBackward>)

In [52]:
action_head(out).squeeze()

tensor([-0.0586,  0.1381], grad_fn=<SqueezeBackward0>)

In [58]:
action_tanh(action_head(out))

tensor([[[ 0.2027, -0.0512]]], grad_fn=<TanhBackward>)

In [54]:
 m = nn.Tanh()
input = torch.randn(2)
output = m(input)

In [55]:
nn.Tanh(input)

TypeError: __init__() takes 1 positional argument but 2 were given