In [46]:
import numpy as np
import torchtext
from torchtext import vocab, data
import torch
import torch.nn as nn
from torchtext import datasets
import spacy

### Use pytorch torchtext to load the training data and transform the text into numerical tensors.

In [2]:
# set up fields
TEXT = data.Field(lower=True, include_lengths=False, batch_first=True,tokenize="spacy", pad_first=True,truncate_first=True)
LABEL = data.Field(sequential=False)

# make splits for data
train, test = datasets.IMDB.splits(TEXT, LABEL)

#generate vocabulary on the train set
TEXT.build_vocab(train, min_freq=10, vectors=vocab.GloVe(name='6B', dim=300))
LABEL.build_vocab(train)

# make iterator for splits
train_iter, test_iter = data.BucketIterator.splits((train, test), batch_size=100, device=torch.device('cuda'))

### Load a batch and decode it to see if everythings works as it shoud

In [4]:
def tensor_to_text(t, itos):
    return " ".join([itos[x] for x in t])

batch = next(train_iter.__iter__())
tensor_to_text(batch.text[0], TEXT.vocab.itos)[-100:]

" saints - especially the unbelievable character , al . i wonder if he 's got a job for me in <unk> ?"

### Define the model

In [18]:
import torch.nn as nn
import torch.nn.functional as F

class SentimentRNN(nn.Module):
    """
    The RNN model that will be used to perform Sentiment analysis.
    """

    def __init__(self, embedding_vect, hidden_dim, linear_dim = 128, n_layers=2, drop_prob=0.3):
        """
        Initialize the model by setting up the layers.
        """
        super(SentimentRNN, self).__init__()

        # initialize the embedding with pretrained word vectors
        self.embedding = nn.Embedding.from_pretrained(embedding_vect,freeze=True)
        self.lstm = nn.LSTM(self.embedding.embedding_dim, hidden_dim, n_layers, 
                            dropout=drop_prob, batch_first=True)
        
        # dropout layer
        self.dropout = nn.Dropout(drop_prob)
        
        # linear and sigmoid layers
        self.fc1 = nn.Linear(hidden_dim * 3, linear_dim)
        self.fc2 = nn.Linear(linear_dim, 1)

        self.batchnorm1 = nn.BatchNorm1d(3 * hidden_dim)
        self.batchnorm2 = nn.BatchNorm1d(linear_dim)
        

    def forward(self, x):

        embeds = self.embedding(x)
        lstm_out, _ = self.lstm(embeds, None)
    
        # concatenate last element, mean and max of the lstm output sequence
        lstm_last = lstm_out[:,-1,:] 
        lstm_mean = torch.mean(lstm_out, dim=1)
        lstm_max,_ = torch.max(lstm_out, dim=1)
        lstm_out = torch.cat([lstm_last, lstm_mean, lstm_max], dim=1)
        
        # dropout and fully-connected layers
        out = self.dropout(F.relu(self.batchnorm1(lstm_out)))
        out = self.dropout(F.relu(self.batchnorm2(self.fc1(out))))
        out = self.fc2(out)
        return torch.sigmoid(out)
   

In [74]:
# Instantiate the model w/ hyperparams
hidden_dim = 200
n_layers = 2
linear_dim = 512
net = SentimentRNN(TEXT.vocab.vectors, hidden_dim, linear_dim, n_layers, drop_prob=.4).cuda()

In [11]:
#helper function for calculating model loss and accuracy on a given dataset
def evaluate(net, data_iter, criterion):
    net.eval()
    with torch.no_grad():
        losses=[]
        num_correct = 0
        for batch in data_iter.__iter__():
            inputs = batch.text
            labels = batch.label - 1
            output = net(inputs)
            loss = criterion(output.squeeze(), labels.float())
            #print(loss)
            losses.append(loss.item())

            # convert output probabilities to predicted class (0 or 1)
            pred = torch.round(output.squeeze())  # rounds to the nearest integer

            # compare predictions to true label
            correct_tensor = pred.eq(labels.float().view_as(pred))
            correct =  np.squeeze(correct_tensor.cpu().numpy())
            num_correct += np.sum(correct)
    
    # accuracy over all test data
    acc = num_correct/len(data_iter.dataset)
    return acc, np.mean(losses)

In [27]:
# training loop function
def train(epochs, lr):
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr,betas=(0.7, 0.99))

    clip=1 # gradient clipping

    net.train()
    # train for some number of epochs
    for e in range(epochs):
        total_loss = 0
        net.train()
        # batch loop
        for batch in train_iter.__iter__():

            inputs = batch.text
            labels = batch.label - 1
            # zero accumulated gradients
            net.zero_grad()

            # get the output from the model
            output= net(inputs)

            # calculate the loss and perform backprop
            loss = criterion(output.squeeze(), labels.float())
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            optimizer.step()
            total_loss += loss
        total_loss /= len(train_iter)
        # Get validation loss and accuracy
        val_acc, val_loss = evaluate(net, test_iter, criterion)
        print("Epoch: {}/{}...".format(e+1, epochs),
              "Loss: {:.6f}...".format(total_loss.item()),
              "Val Loss: {:.6f}".format(val_loss),
              "Val acc: {:.2f}".format(val_acc))



In [75]:
train(2, 0.001)

Epoch: 1/2... Loss: 0.379463... Val Loss: 0.310542 Val acc: 0.87
Epoch: 2/2... Loss: 0.286147... Val Loss: 0.282349 Val acc: 0.89


In [None]:
net.embedding.weight.requires_grad = True
train(2,0.00001)

Epoch: 1/2... Loss: 0.248861... Val Loss: 0.281046 Val acc: 0.89
Epoch: 2/2... Loss: 0.243893... Val Loss: 0.277018 Val acc: 0.89


In [None]:
train(2,0.00001)

Epoch: 1/2... Loss: 0.240829... Val Loss: 0.279200 Val acc: 0.90


### Inference on a test review

You can change this test_review to any text that you want. Read it and think: is it pos or neg? Then see if your model predicts correctly!
    
> **Exercise:** Write a `predict` function that takes in a trained net, a plain text_review, and a sequence length, and prints out a custom statement for a positive or negative review!
* You can use any functions that you've already defined or define any helper functions you want to complete `predict`, but it should just take in a trained net, a text review, and a sequence length.


In [67]:
# positive test review
test_review_pos = 'This movie had the best acting and the dialogue was so good. I loved it.'
# negative test review
test_review_neg = 'The worst movie I have seen; acting was terrible and I want my money back. This movie had bad acting and the dialogue was slow.'
# hard to interpret review
test_review_hard = "I have mixed feelings about this movie, there were parts I enjoyed and parts that I found too boring"

In [60]:
def predict(net, review):
    net.eval()
    tokenizer = spacy.blank('en').tokenizer
    tokenized = [tok.text for tok in tokenizer(review)]
    input_tensor = TEXT.numericalize([tokenized])
    output = net(input_tensor.cuda())
    # convert output probabilities to predicted class (0 or 1)
    pred = torch.round(output.squeeze()) 
    # printing output value, before rounding
    print('Prediction value, pre-rounding: {:.6f}'.format(output.item()))
    
    # print custom response
    if(pred.item()==1):
        print("Positive review detected!")
    else:
        print("Negative review detected.")


In [61]:
# call function
predict(net,test_review_neg)

Prediction value, pre-rounding: 0.000000
Negative review detected.


In [62]:
predict(net,test_review_pos)

Prediction value, pre-rounding: 0.837725
Positive review detected!


In [68]:
predict(net,test_review_hard)

Prediction value, pre-rounding: 0.958804
Positive review detected!
