In [1]:
from torch import nn
from torch.autograd import Variable
import torch

In [2]:
class LinearMulti(nn.Module):
    """
    Fetch the weight and bias from a lookup table based on agent/model id
    Params:
        sz_in: input layer
        sz_out: output layer
        model_ids: agent/model id
    Returns:
        Tensor [len(model_ids), sz_out]
    """
    def __init__(self, nmodels, sz_in, sz_out):
        super(LinearMulti, self).__init__()
        self.nmodels = nmodels
        self.sz_in = sz_in
        self.sz_out = sz_out

        if nmodels == 1:
            self.linear = nn.Linear(sz_in, sz_out)
        else:
            # XXX: potential bug - updateGradInput is overidden,
            # possible use of `register_backward_hook`
            self.weight_lut = nn.Embedding(nmodels, sz_in * sz_out) # 1x3x200
            self.bias_lut = nn.Embedding(nmodels, sz_out) # 1x3x20

    def forward(self, input, model_ids):
        """
        Params:
            input: shape [len(model_ids), sz_in]
        """
        if self.nmodels == 1:
            return self.linear(input)
        else:
            weight = self.weight_lut(model_ids) # 1 x 3 x 200
            weight_view = weight.view(-1, self.sz_in, self.sz_out) # 3 x 10 x 20
            bias = self.bias_lut(model_ids) # 1 x 3 x 20
            bias_view = bias.view(-1, self.sz_out) # 3x20

            a, b = input.size()
            input = input.view(a, 1, b) # 3x1x10

            out = torch.matmul(input, weight_view) # 3x1x20

            a, b, c = out.size()
            out = out.view(a, c) #3x20
            out = out.add(bias_view) # 3x20
            return out

In [8]:
x = Variable(torch.ones(3, 4))
model = LinearMulti(3, 4, 1)
y = model.forward(x, Variable(torch.LongTensor([[1,2,1]])))
target = Variable(torch.FloatTensor([[3],[10],[3], ]))
print(target)
#print(y)

learning_rate = 1e-1
optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate)
loss_fn = torch.nn.MSELoss(size_average=False)

for i in range(10):
    print(model.weigh_lut.)
    optimizer.zero_grad()
    y = model.forward(x, Variable(torch.LongTensor([[1,2,1]])))
    loss = loss_fn(y, target)
    loss.backward(retain_graph=True)
    optimizer.step()
    print (loss)

tensor([[ 3.],
        [10.],
        [ 3.]])
tensor(130.8749, grad_fn=<MseLossBackward>)
tensor(113.3854, grad_fn=<MseLossBackward>)
tensor(102.4016, grad_fn=<MseLossBackward>)
tensor(94.1657, grad_fn=<MseLossBackward>)
tensor(87.5230, grad_fn=<MseLossBackward>)
tensor(81.9443, grad_fn=<MseLossBackward>)
tensor(77.1357, grad_fn=<MseLossBackward>)
tensor(72.9152, grad_fn=<MseLossBackward>)
tensor(69.1603, grad_fn=<MseLossBackward>)
tensor(65.7846, grad_fn=<MseLossBackward>)




In [9]:
class Modeler(nn.Module):
    
    def __init__(self, embed, vocab_size, embed_dim, keyword):
        super(Modeler, self).__init__()
        
        self.embeddings = nn.Embedding(vocab_size, embed_dim)
        self.embeddings.weight.data.copy_(torch.from_numpy(embed))
        self.embeddings.weight.requires_grad = True
        self.keyword = keyword
        self.linear1 = nn.Linear(embed_dim, 128)
        self.linear2 = nn.Linear(128, 1)
        
    def forward(self, input):
        embed = self.embeddings(input)
        k = embed.dot(self.keyword)
        out = F.relu(self.linear1(embed))
        out = self.linear2(out)
        return out

In [10]:
losses = []
loss_function = nn.MSELoss()
lookup_tensor = torch.LongTensor([word_to_ix['domain']])
embed = model.embeddings(Variable(lookup_tensor))
n_model = Modeler(model.embeddings.weight.data.numpy(), len(vocab),embedding_size, embed)
optimizer = optim.SGD(n_model.parameters(), lr=0.001)
batch_size = 1000
start = 0
end = batch_size 
for e in range(700):
     for epoch in range(int(len(data)/batch_size)):
        total_loss = torch.Tensor([0])
        for word, t in data[start:end]:
            word_id = word_to_ix[word]
            word_var = Variable(torch.LongTensor([word_id]))
            # Step 2. Recall that torch *accumulates* gradients. Before passing in a
            # new instance, you need to zero out the gradients from the old
            # instance
            n_model.zero_grad()            # Step 3. Run the forward pass, getting log probabilities over next
            # words
            res = n_model(word_var)
            # Step 4. Compute your loss function. (Again, Torch wants the target
            # word wrapped in a variable)
            t = Variable(torch.FloatTensor([t]))
            loss = loss_function(res, t)
            print(loss, t)
            # Step 5. Do the backward pass and update the gradient
            loss.backward()
            optimizer.step()
            total_loss += loss.data
            start += batch_size
            end += batch_size
        losses.append(total_loss)
print(np.mean(losses))

NameError: name 'word_to_ix' is not defined