In [5]:
import sys

import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from multiagent_rl.algos.agents import *

torch.manual_seed(1)

## Basic operation of LSTM

In [6]:
input_dim = 3
hidden_size = 2
seq_len = 1
lstm = nn.LSTM(input_dim, hidden_size)
inputs = [torch.randn(1, input_dim) for _ in range(seq_len)]  # make a sequence of length seq_len

# initialize the hidden state.
hidden = (torch.randn(1, 1, hidden_size),
          torch.randn(1, 1, hidden_size))
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)

# alternatively, we can do the entire sequence all at once.
# the first value returned by LSTM is all of the hidden states throughout
# the sequence. the second is just the most recent hidden state
# (compare the last slice of "out" with "hidden" below, they are the same)
# The reason for this is that:
# "out" will give you access to all hidden states in the sequence
# "hidden" will allow you to continue the sequence and backpropagate,
# by passing it as an argument  to the lstm at a later time
# Add the extra 2nd dimension
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, hidden_size), torch.randn(1, 1, hidden_size))  # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)

tensor([[[-0.1390, -0.2853]]], grad_fn=<StackBackward>)
(tensor([[[-0.1390, -0.2853]]], grad_fn=<StackBackward>), tensor([[[-0.2345, -0.4600]]], grad_fn=<StackBackward>))


In [7]:
print(f'lstm object: {lstm}')
print(f'input shape: {inputs.shape}')
print(f'out shape: {out.shape}')
print(f'hidden h shape: {hidden[0].shape}')
print(f'hidden c shape: {hidden[1].shape}')

lstm object: LSTM(3, 2)
input shape: torch.Size([1, 1, 3])
out shape: torch.Size([1, 1, 2])
hidden h shape: torch.Size([1, 1, 2])
hidden c shape: torch.Size([1, 1, 2])


In [8]:
print(f'out {out}')
print(f'h   {hidden[0]}')
print(f'c   {hidden[1]}')

out tensor([[[-0.1390, -0.2853]]], grad_fn=<StackBackward>)
h   tensor([[[-0.1390, -0.2853]]], grad_fn=<StackBackward>)
c   tensor([[[-0.2345, -0.4600]]], grad_fn=<StackBackward>)


## Run simple LSTM over multiple step input sequence

In [27]:
input_dim = 3
hidden_size = 2
seq_len = 4
num_layers = 1
batch_size = 1
lstm = nn.LSTM(input_dim, hidden_size, num_layers)

# initialize the hidden state.
hidden = (torch.randn(1, 1, hidden_size),
          torch.randn(1, 1, hidden_size))
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)

    

To run an LSTM on a sequence of inputs:
- generate input sequence of shape **(seq_len, batch, input_size)**
- pass initial hidden state *(h)* of shape **(num_layers * num_directions, batch, hidden_size)**
- pass initial cell state *(c)* of shape **(num_layers * num_directions, batch, hidden_size)**

In [28]:
inputs = [torch.randn(1, input_dim) for _ in range(seq_len)]  # make a sequence of length seq_len
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
print(f'Input shape:\n {inputs.shape}')
print(f'Input:\n {inputs}')

Input shape:
 torch.Size([4, 1, 3])
Input:
 tensor([[[ 1.0874, -0.5586,  0.3157]],

        [[ 1.5174,  0.1300, -0.9286]],

        [[-0.9993, -1.4282, -0.7373]],

        [[-0.0306,  0.1277, -0.0047]]])


In [40]:
h_0 = torch.zeros((num_layers, batch_size, hidden_size))
c_0 = torch.zeros_like(h_0)
print(f'Initial hidden state shape:\n {h_0.shape}')
print(f'Initial hidden state:\n {h_0}')


Initial hidden state shape:
 torch.Size([1, 1, 2])
Initial hidden state:
 tensor([[[0., 0.]]])


Run input sequence(s) through LSTM, with initial hidden/cell states

In [48]:
out, (h_final, c_final) = lstm(inputs, (h_0, c_0))

In [49]:
print(f'Final hidden state shape:\n {h_final.shape}')
print(f'Final hidden state:\n {h_final}')
print(f'Final cell state shape:\n {c_final.shape}')
print(f'Final cell state:\n {c_final}')

Final hidden state shape:
 torch.Size([1, 1, 2])
Final hidden state:
 tensor([[[ 0.0889, -0.1019]]], grad_fn=<StackBackward>)
Final cell state shape:
 torch.Size([1, 1, 2])
Final cell state:
 tensor([[[ 0.1639, -0.1388]]], grad_fn=<StackBackward>)


In [50]:
print(f'Output shape:\n {out.shape}')
print(f'Output:\n {out}')

Output shape:
 torch.Size([4, 1, 2])
Output:
 tensor([[[-0.0211, -0.2858]],

        [[-0.0450, -0.2318]],

        [[ 0.1083, -0.1997]],

        [[ 0.0889, -0.1019]]], grad_fn=<StackBackward>)


Confirm that final output is equal to final hidden state

In [62]:
print(f'Final hidden state:\n {h_final}')
print(f'Last output:\n {out[-1,:,:]}')
assert(torch.all(h_final.eq(out[-1,:,:])))

Final hidden state:
 tensor([[[ 0.0889, -0.1019]]], grad_fn=<StackBackward>)
Last output:
 tensor([[ 0.0889, -0.1019]], grad_fn=<SliceBackward>)


## Run episode history through LSTM ActorCritic

In [97]:
input_dim = 3
hidden_size = 5
action_size = 1
low = 0
high = 1
actor = LSTMDeterministicActor(input_dim, hidden_size, action_size, low, high)
critic = LSTMVEstimator(input_dim, hidden_size)
actor_critic = LSTMJoinedActorCritic(input_dim, hidden_size, action_size, low, high)
env_input = torch.randn(1, input_dim)
# agent = RDPGAgent(env.observation_space, env.action_space, **agent_kwargs)

In [102]:
episode_length = 10
for i in range(episode_length):
    obs = [[np.random.rand() for j in range(input_dim)]]
    print(f'obs: {obs} ')
    act, _ = actor(torch.as_tensor(obs, dtype=torch.float32))
#     act = agent.act(torch.as_tensor(obs, dtype=torch.float32), noise=True)
    print(f'act {act}')
    print('-----')


obs: [[0.981526796499701, 0.06426801686601302, 0.081231609841804]] 
act tensor([[[0.6747]]], grad_fn=<AddBackward0>)
-----
obs: [[0.7026335258517828, 0.7316251786079838, 0.432987423020599]] 
act tensor([[[0.6720]]], grad_fn=<AddBackward0>)
-----
obs: [[0.27976755885431903, 0.5940676026565817, 0.6518292062113481]] 
act tensor([[[0.6611]]], grad_fn=<AddBackward0>)
-----
obs: [[0.9444828121876456, 0.25927251914765825, 0.3586612718481491]] 
act tensor([[[0.6692]]], grad_fn=<AddBackward0>)
-----
obs: [[0.5172339057534872, 0.9661745794135282, 0.9250493546548532]] 
act tensor([[[0.6590]]], grad_fn=<AddBackward0>)
-----
obs: [[0.8801894499513875, 0.9220330768524984, 0.3481792042224864]] 
act tensor([[[0.6682]]], grad_fn=<AddBackward0>)
-----
obs: [[0.08087023056326093, 0.04518220031705933, 0.37877875291441365]] 
act tensor([[[0.6591]]], grad_fn=<AddBackward0>)
-----
obs: [[0.8739606977901481, 0.5333578595490565, 0.8602360281654687]] 
act tensor([[[0.6591]]], grad_fn=<AddBackward0>)
-----
obs: 

## Misc

In [9]:
input_dim = 3
hidden_size = 5
action_size = 1
low = -10
high = 20
actor = LSTMDeterministicActor(input_dim, hidden_size, action_size, low, high)
critic = LSTMVEstimator(input_dim, hidden_size)
actor_critic = LSTMJoinedActorCritic(input_dim, hidden_size, action_size, low, high)
env_input = torch.randn(1, input_dim)

In [10]:
action_out, (h, c) = actor.forward(env_input)
print(action_out)
print(actor.h)
print(actor.c)

tensor([[[2.7934]]], grad_fn=<AddBackward0>)
tensor([[[ 0.0568, -0.1332,  0.1781,  0.0766, -0.1196]]],
       grad_fn=<StackBackward>)
tensor([[[ 0.1730, -0.2191,  0.3194,  0.1702, -0.2331]]],
       grad_fn=<StackBackward>)


In [5]:
value, (h, c) = critic.forward(env_input)
print(value)
print(critic.h)
print(critic.c)

tensor([[[0.1712]]], grad_fn=<AddBackward0>)
tensor([[[ 0.0683,  0.1675,  0.0892,  0.0398, -0.0290]]],
       grad_fn=<StackBackward>)
tensor([[[ 0.1350,  0.2836,  0.2563,  0.1200, -0.0720]]],
       grad_fn=<StackBackward>)


In [9]:
action_out, value, (h, c) = actor_critic.forward(env_input)
print(action_out)
print(value)
print(critic.h)
print(critic.c)

tensor([[[5.7177]]], grad_fn=<AddBackward0>)
tensor([[[0.2299]]], grad_fn=<AddBackward0>)
tensor([[[ 0.0683,  0.1675,  0.0892,  0.0398, -0.0290]]],
       grad_fn=<StackBackward>)
tensor([[[ 0.1350,  0.2836,  0.2563,  0.1200, -0.0720]]],
       grad_fn=<StackBackward>)


In [10]:
agent = DDPGLSTMAgent(obs_dim=1)

AttributeError: 'NoneType' object has no attribute 'shape'

In [None]:
class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [None]:
def mlp(layer_sizes, hidden_activation, final_activation, batchnorm=True):
    layers = []
    for i in range(len(layer_sizes) - 1):
        layers.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
        if i < len(layer_sizes) - 2:
            # if batchnorm:
            #     layers.append(nn.BatchNorm1d(layer_sizes[i + 1]))
            layers.append(hidden_activation())
        else:
            layers.append(final_activation())
    return nn.Sequential(*layers)

In [None]:

class BoundedDeterministicActor(nn.Module):
    """
    MLP net for actor in bounded continuous action space.
    Returns deterministic action.
    Layer sizes passed as argument.
    Input dimension: layer_sizes[0]
    Output dimension: layer_sizes[-1] (should be 1 for V,Q)
    """

    def __init__(self, layer_sizes, activation, low, high, **kwargs):
        super().__init__()
        self.low = torch.as_tensor(low)
        self.width = torch.as_tensor(high - low)
        self.net = mlp(layer_sizes, activation, nn.Tanh)

    def forward(self, x):
        output = (self.net(x) + 1) * self.width / 2 + self.low
        return output

In [None]:
?torch.zeros

In [None]:
class LSTMDualUltimatum(nn.Module):

    def __init__(self, input_size, hidden_size, action_size, low, high):
        super(LSTMDualUltimatum, self).__init__()
        self.low = torch.as_tensor(low)
        self.width = torch.as_tensor(high - low)
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.action_size = action_size
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dimhidden_size
        self.lstm = nn.LSTM(input_size, hidden_size)        
        self.action_mlp = mlp([hidden_size, action_size], nn.Identity, nn.Tanh)

    def forward(self, input, h=None, c=None):
        if h is None: h=torch.zeros(self.hidden_size)
        if c is None: c=torch.zeros(self.hidden_size)
        lstm_out, (h, c) = self.lstm(input.view(len(input), 1, -1), (h,c))
        x = self.action_mlp(lstm_out)
        action_out = (x + 1) * self.width / 2 + self.low
        return action_out, (h, c)

In [None]:
action_size = 2
input_size = 2
hidden_size = 10
num_layers = 1
rnn_rl = nn.LSTM(input_size, hidden_size, num_layers)

In [None]:
rnn_rl = LSTMDualUltimatum(input_size, hidden_size, action_size, 0, 1)

In [None]:
action = torch.tensor([0.5, 0.5])
action_input = action.view(1,1,-1)

In [None]:
action, (h,c) = rnn_rl(action_input)

In [None]:
for i in range(10):
    action, (h,c) = rnn_rl(action_input, (h,c))
    

In [None]:
action

In [None]:
h

In [None]:
c

In [None]:
rnn = nn.LSTM(10, 20, 2)
input = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(input, (h0, c0))

In [None]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)

In [None]:
lstm(i.view(1, 1, -1))

In [None]:
inputs[0]

input of shape (seq_len, batch, input_size): tensor containing the features of the input sequence. The input can also be a packed variable length sequence. See torch.nn.utils.rnn.pack_padded_sequence() or torch.nn.utils.rnn.pack_sequence() for details.

h_0 of shape (num_layers * num_directions, batch, hidden_size): tensor containing the initial hidden state for each element in the batch. If the LSTM is bidirectional, num_directions should be 2, else it should be 1.

c_0 of shape (num_layers * num_directions, batch, hidden_size): tensor containing the initial cell state for each element in the batch.

If (h_0, c_0) is not provided, both h_0 and c_0 default to zero.

In [None]:
inputs[0].view(1, 1, -1).shape

In [None]:
print(action.shape)
print(action_input.shape)

In [None]:
out, hidden = rnn_rl(action_input)

In [None]:
out.shape

In [None]:
hidden

In [None]:
action_tanh = nn.Tanh()
action_head = nn.Linear(hidden_size, action_dim)

In [None]:
action_mlp = mlp([hidden_size, 1], nn.Identity, nn.Tanh)

In [None]:
action_mlp(out)

In [None]:
action_head(out).squeeze()

In [None]:
action_tanh(action_head(out))

In [None]:
 m = nn.Tanh()
input = torch.randn(2)
output = m(input)

In [None]:
nn.Tanh(input)