Source:
http://seba1511.net/tutorials/beginner/nlp/advanced_tutorial.html

## Advanced: Making Dynamic Decisions and the Bi-LSTM CRF

Supporting paper on CRFs:
http://www.cs.columbia.edu/~mcollins/crf.pdf

In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7f6dfcdddf90>

In [2]:
# Helper functions to make code more readable

def toScalar(var):
    # given: tensor variable
    # returns a python float
    return var.view(-1).data.tolist()[0]

def argmax(vec):
    # given: tensor (variable?)
    # returns: argmax, or index of maximum value in tensor
    _, index = torch.max(vec, 1) # along dim=1
    return toScalar(index)

def prepareSequence(seq, toIndex):
    # given: dict toIndex, seq (tensor?)
    # return variable of indices
    indices = [toIndex[w] for w in seq]
    tensor = torch.LongTensor(indices)
    return autograd.Variable(tensor)

# Compute log sum exp in stable way for the forward algo
def logSumExp(vec):
    maxScore = vec[0, argmax(vec)]
    maxScoreBroadcast = maxScore.view(1,-1).expand(1, vec.size()[1])
    return maxScore + \
        torch.log(torch.sum(torch.exp(vec - maxScoreBroadcast)))

In [4]:
# Create the model

class BiLSTM_CRF(nn.Module):
    
    def __init__(self, vocabSize, tagToIndex, embeddingDim, hiddenDim):
        super(BiLSTM_CRF, self).__init__()
        self.embeddingDim = embeddingDim
        self.hiddenDim = hiddenDim
        self.vocabSize = vocabSize
        self.tagToIndex = tagToIndex
        self.tagsetSize = len(tagToIndex)
        
        self.wordEmbed = nn.Embedding(vocabSize, embeddingDim)
        self.lstm = nn.LSTM(embeddingDim, hiddenDim // 2, 
                            num_layers=1, bidirectional=True)
        
        # Maps the output of LSTM into tag space
        self.hiddenToTagLayer = nn.Linear(hiddenDim, self.tagsetSize)
        
        # Matrix of transition parameters. 
        # Entry i, j is the score of transition *to* i *from* j
        self.transitions = nn.Parameter(
            torch.randn(self.tagsetSize, self.tagsetSize)
        )
        
        # These two statements enforce the constraint that we never
        # transfer to the start tag and never transfer from
        # the stop tag
        self.transitions.data[tagToIndex[START_TAG], :] = -10000
        self.transitions.data[:, tagToIndex[STOP_TAG]] = -10000
        
        self.hidden = self.initHiddenLayer()
        
        
    def initHiddenLayer(self):
        return (autograd.Variable(torch.randn(2, 1, self.hiddenDim // 2)), 
                autograd.Variable(torch.randn(2, 1, self.hiddenDim // 2)))
    
    
    def forwardAlgo(self, features):
        # Do the forward algorithm to compute the partition funcion
        initAlphas = torch.Tensor(1, self.tagsetSize).fill_(-10000.)
        # START_TAG has all of the score.
        initAlphas[0][self.tagToIndex[START_TAG]] = 0.0
        
        # Wrap in a variable to get automatic backprop later on
        forwardVar = autograd.Variable(initAlphas)
        
        # Iterate through the sentence
        for currFeature in features:
            alphas_t = [] # the forward variables at this timestep
            
            for nextTag in range(self.tagsetSize):
                # broadcast the emission score: it is the same
                # regardless of the previous tag

                emissionScore = currFeature[nextTag].view(1,-1) \
                    .expand(1, self.tagsetSize)
                
                # the ith entry of transScore is the score of transitioning
                # the nextTag from i
                transScore = self.transitions[nextTag].view(1, -1)
                
                # The ith entry of nextTagVar is the value for the
                # edge (i -> nextTag) before we do log-sum-exp
                nextTagVar = forwardVar + transScore + emissionScore
                
                # The forward variable for this tag is the log-sum-exp 
                # for all the scores
                