In [1]:
import torch
from torch import nn

In [2]:
class AuctionAdvantageRNN(nn.Module):
    def __init__(self, num_players, num_products, num_actions, hidden_size=128, rnn_model='lstm'):
        super(AuctionAdvantageRNN, self).__init__()
        input_size = num_players + 1 + 5 * num_products # (prices, demands, processed_demands) for each product
        if rnn_model == 'lstm':
            self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        elif rnn_model == 'rnn':
            self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        else:
            raise ValueError('unrecognized RNN model %s' % rnn_model) 
        self.output_layer = nn.Linear(hidden_size, num_actions)
        
    def forward(self, x):
        # assumes x is a packed padded sequence
        # TODO: do packing+padding in module?
        rnn_outputs, _ = self.rnn(x)
        padded_output, output_lens = nn.utils.rnn.pad_packed_sequence(rnn_outputs, batch_first=True)
        last_outputs = torch.cat([padded_output[e, i-1, :].unsqueeze(0) for e, i in enumerate(output_lens)])
        action_logits = self.output_layer(last_outputs)
        return action_logits

In [4]:
model = AuctionAdvantageRNN(
    num_players=3,
    num_products=4,
    num_actions=30,
)

In [5]:
def pack_sequences(sequences):
    padded_seq_batch = torch.nn.utils.rnn.pad_sequence(sequences, batch_first=True)
    sequence_lengths = [len(seq) for seq in sequences]
    packed_seq_batch = torch.nn.utils.rnn.pack_padded_sequence(padded_seq_batch, lengths=sequence_lengths, batch_first=True, enforce_sorted=False)
    return packed_seq_batch

In [6]:
sequences = [
    # first infoset: 1 previous state 
    torch.tensor([
        [100, 100, 50, 70, 100, 110, 50, 2, 1, 2, 1, 1, 2]
    ]),
    # second infoset: 3 previous states
    torch.tensor([
        [300, 150, 110, 70, 100, 100, 50, 0, 0, 0, 0, 0, 0,],
        [300, 150, 110, 70, 110, 110, 55, 2, 1, 2, 1, 1, 2],
        [300, 150, 110, 70, 121, 121, 61, 2, 1, 1, 1, 1, 1],
    ]),
]

packed_sequences = pack_sequences(sequences).float()

In [7]:
model(packed_sequences)

RuntimeError: input.size(-1) must be equal to input_size. Expected 24, got 13

infostate tensor has:
- player number (1-hot encoded)
- budget
- value for each product
- submitted demand for each product x round
- processed demand for each product x round
- observations for each product x round
- prices for each product x round

expand into tensor of size (rounds x features), where features for each round are:
- player number (1-hot encoded)
- budget
- value for each product
- submitted demand for each product
- processed demand for each product
- observations for each product
- prices for each product

so # features = players + 1 + 5*products

(don't include any round with all-0 posted prices - these rounds haven't happened yet)

(final round will have 0 submitted/processed/observed demands)


In [28]:
def expand_infostate_tensor(infostate_tensor, num_players, num_products):
    offset = 0
    
    players = infostate_tensor[offset:offset+num_players]
    offset += num_players
    
    budget = infostate_tensor[offset:offset+1]
    offset += 1
    
    values = infostate_tensor[offset:offset+num_products]
    offset += num_products
    
    rounds = len(infostate_tensor[offset:]) // (4 * num_products)
    
    submitted_demands = infostate_tensor[offset:offset+rounds*num_products].reshape((rounds, num_products))
    offset += rounds*num_products
    
    processed_demands = infostate_tensor[offset:offset+rounds*num_products].reshape((rounds, num_products))
    offset += rounds*num_products
    
    observed_demands = infostate_tensor[offset:offset+rounds*num_products].reshape((rounds, num_products))
    offset += rounds*num_products
    
    prices = infostate_tensor[offset:offset+rounds*num_products].reshape((rounds, num_products))
    
    expanded_infostate = torch.hstack([
        torch.tile(players, (rounds, 1)),
        torch.tile(budget, (rounds, 1)),
        torch.tile(values, (rounds, 1)),
        submitted_demands,
        processed_demands,
        observed_demands, 
        prices
    ])
    return expanded_infostate

In [35]:
infostate = torch.tensor([
    # player (one-hot encoded)
    1, 0, 0,
    # budget
    100,
    # values
    100, 50,
    # submitted demands each round
    2, 2, 
    2, 1,
    0, 0,
    # processed demands each round
    2, 2,
    2, 2,
    0, 0,
    # observed demands each round
    6, 6,
    6, 3,
    0, 0,
    # prices
    50, 50,
    60, 60,
    72, 72,
])

In [38]:
infostate

tensor([  1,   0,   0, 100, 100,  50,   2,   2,   2,   1,   0,   0,   2,   2,
          2,   2,   0,   0,   6,   6,   6,   3,   0,   0,  50,  50,  60,  60,
         72,  72])

In [39]:
# expand into (players, budget, values, submitted demands, processed demands, observed demands, prices)
expand_infostate_tensor(infostate, num_players=3, num_products=2)

tensor([[  1,   0,   0, 100, 100,  50,   2,   2,   2,   2,   6,   6,  50,  50],
        [  1,   0,   0, 100, 100,  50,   2,   1,   2,   2,   6,   3,  60,  60],
        [  1,   0,   0, 100, 100,  50,   0,   0,   0,   0,   0,   0,  72,  72]])