In [498]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [499]:
%cd /content/drive/MyDrive/END/Week4

/content/drive/MyDrive/END/Week4


In [500]:
import torch
from torchtext import data
from torchtext import datasets

SEED = 1234

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

TEXT = data.Field(tokenize = 'spacy', include_lengths = True)
LABEL = data.LabelField(dtype = torch.float)

In [501]:
from torchtext import datasets

train_data, test_data = datasets.IMDB.splits(TEXT, LABEL, root = 'data')

In [502]:
import random

train_data, valid_data = train_data.split(random_state = random.seed(SEED))

In [503]:
len(train_data), len(valid_data), len(test_data)

(17500, 7500, 25000)

In [504]:
MAX_VOCAB_SIZE = 25_000

TEXT.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE, 
                 vectors = "glove.6B.200d", 
                 unk_init = torch.Tensor.normal_)

LABEL.build_vocab(train_data)



  0%|          | 0/400000 [00:00<?, ?it/s][A[A

  0%|          | 1663/400000 [00:00<00:23, 16624.55it/s][A[A

  1%|          | 3162/400000 [00:00<00:24, 16097.37it/s][A[A

  1%|          | 4719/400000 [00:00<00:24, 15935.29it/s][A[A

  2%|▏         | 6157/400000 [00:00<00:25, 15431.86it/s][A[A

  2%|▏         | 7666/400000 [00:00<00:25, 15327.05it/s][A[A

  2%|▏         | 9252/400000 [00:00<00:25, 15480.92it/s][A[A

  3%|▎         | 10831/400000 [00:00<00:24, 15571.15it/s][A[A

  3%|▎         | 12405/400000 [00:00<00:24, 15620.13it/s][A[A

  3%|▎         | 13873/400000 [00:00<00:25, 15218.01it/s][A[A

  4%|▍         | 15332/400000 [00:01<00:25, 14987.16it/s][A[A

  4%|▍         | 16809/400000 [00:01<00:25, 14915.15it/s][A[A

  5%|▍         | 18415/400000 [00:01<00:25, 15239.46it/s][A[A

  5%|▍         | 19975/400000 [00:01<00:24, 15343.69it/s][A[A

  5%|▌         | 21499/400000 [00:01<00:24, 15309.96it/s][A[A

  6%|▌         | 23021/400000 [00:01<00:25, 

In [526]:
BATCH_SIZE = 128

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE,
    sort_within_batch = True,
    device = device)

## Reversing the text
We can use torch.flip or list reversal to reverse the text. Lets try it out on one batch of the text. torch.flip is more efficient than the second method shown later


In [527]:
one_batch = list(train_iterator)[0].text[0]
one_batch

tensor([[  11, 8503,   11,  ...,   66,   66,   66],
        [  19, 1180,  216,  ...,   24,   23,   19],
        [ 818,  256,   12,  ...,    9,    9,    5],
        ...,
        [ 220,   64,   22,  ...,    4,    0, 5140],
        [  53,   21,  308,  ...,    1,    1,    1],
        [ 163,    4,    4,  ...,    1,    1,    1]], device='cuda:0')

### Method 1: Using torch.flip

In [528]:
torch.flip(one_batch, (0, ))

tensor([[ 163,    4,    4,  ...,    1,    1,    1],
        [  53,   21,  308,  ...,    1,    1,    1],
        [ 220,   64,   22,  ...,    4,    0, 5140],
        ...,
        [ 818,  256,   12,  ...,    9,    9,    5],
        [  19, 1180,  216,  ...,   24,   23,   19],
        [  11, 8503,   11,  ...,   66,   66,   66]], device='cuda:0')

### Method 2: Using list reversal using reversed function

**Original text**

In [529]:
" ".join([TEXT.vocab.itos[x] for x in one_batch[:, 0].detach().cpu()])

"I was surprised at how a movie could be both cheesy and excellent at the same time . The <unk> flying saucer was naff beyond comprehension , especially when landing , yet the specially effects when the Krell attacked were awesome for a film that was made over half a century ago ! Living in the middle east I saw shades of Islam creep in when JJ Adams suggested <unk> should dress more modestly , and as an engineer , was amazed by the imagination used for the ' futuristic ' gadgets , and <unk> dreamed up by the props department . All in all , an entertaining hour and a half , my first time seeing Walter Pidgeon and a chance to see Leslie <unk> as a ' young ' man"

**Reversed text**

In [530]:
" ".join(list(reversed([TEXT.vocab.itos[x] for x in one_batch[:, 0].detach().cpu()])))

"man ' young ' a as <unk> Leslie see to chance a and Pidgeon Walter seeing time first my , half a and hour entertaining an , all in All . department props the by up dreamed <unk> and , gadgets ' futuristic ' the for used imagination the by amazed was , engineer an as and , modestly more dress should <unk> suggested Adams JJ when in creep Islam of shades saw I east middle the in Living ! ago century a half over made was that film a for awesome were attacked Krell the when effects specially the yet , landing when especially , comprehension beyond naff was saucer flying <unk> The . time same the at excellent and cheesy both be could movie a how at surprised was I"

**This we have to do individually for each text using a for loop if we are using Method 2. Hence Method 1 is more efficient**

## Model Definition

Here we have to use 3 LSTM layers in a loop instead of using the layer parameter = 3 in the nn.LSTM layer.
This we can do using torch.Modulelist and passing the cell states and hidden states of one LSTM layer to other using a for loop 

In [531]:
import torch.nn as nn

In [566]:
class RNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, dropout, pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        self.dropout = nn.Dropout(dropout)
        self.multi_layered_rnn = nn.ModuleList([nn.LSTM(embedding_dim, hidden_dim)])
        self.multi_layered_rnn.extend([nn.Dropout(dropout), nn.LSTM(hidden_dim, hidden_dim)]*(n_layers-1))
        
        
        self.fc = nn.Linear(hidden_dim * 1, output_dim)
        
    def forward(self, text, text_lengths):
        
        #text = [sent len, batch size]
        
        embedded = self.dropout(self.embedding(text))
        
        #embedded = [sent len, batch size, emb dim]
        
        #pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, enforce_sorted=False)
        for rnn in self.multi_layered_rnn:
          # print(packed_embedded)
          if not isinstance(rnn, torch.nn.modules.dropout.Dropout): 
            packed_embedded, (hidden, cell) = rnn(packed_embedded)
          else:
            packed_embedded, packed_lengths = nn.utils.rnn.pad_packed_sequence(packed_embedded)
            packed_embedded = nn.utils.rnn.pack_padded_sequence(rnn(packed_embedded), packed_lengths, enforce_sorted=False)
        
        #unpack sequence
        # output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)

        #output = [sent len, batch size, hid dim * num directions]
        #output over padding tokens are zero tensors
        
        #hidden = [num layers * num directions, batch size, hid dim]
        #cell = [num layers * num directions, batch size, hid dim]
        
        #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
        #and apply dropout
        
        hidden = self.dropout(hidden[-1,:,:])
                
        #hidden = [batch size, hid dim * num directions]
            
        return self.fc(hidden)

In [567]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 200
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 3
# BIDIRECTIONAL = True
DROPOUT = 0.2
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = RNN(INPUT_DIM, 
            EMBEDDING_DIM, 
            HIDDEN_DIM, 
            OUTPUT_DIM, 
            N_LAYERS, 
            DROPOUT, 
            PAD_IDX)

In [568]:
print(model)

RNN(
  (embedding): Embedding(25002, 200, padding_idx=1)
  (dropout): Dropout(p=0.2, inplace=False)
  (multi_layered_rnn): ModuleList(
    (0): LSTM(200, 256)
    (1): Dropout(p=0.2, inplace=False)
    (2): LSTM(256, 256)
    (3): Dropout(p=0.2, inplace=False)
    (4): LSTM(256, 256)
  )
  (fc): Linear(in_features=256, out_features=1, bias=True)
)


In [569]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 5,995,985 trainable parameters


In [570]:
pretrained_embeddings = TEXT.vocab.vectors

print(pretrained_embeddings.shape)

torch.Size([25002, 200])


In [571]:
model.embedding.weight.data.copy_(pretrained_embeddings)

tensor([[-0.1117, -0.4966,  0.1631,  ..., -1.8542,  0.4022,  0.4238],
        [ 0.2078,  1.1879, -0.7320,  ...,  1.3663, -0.4598,  0.6668],
        [-0.0715,  0.0935,  0.0237,  ...,  0.3362,  0.0306,  0.2558],
        ...,
        [ 0.4815, -0.3176,  0.5229,  ...,  0.4595, -0.7940,  0.1645],
        [-0.1764,  0.6005, -0.2219,  ...,  0.3903,  0.3519,  0.6316],
        [ 0.2811, -0.2467,  0.0691,  ..., -0.2630,  0.0290,  0.5149]])

In [572]:
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

print(model.embedding.weight.data)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0715,  0.0935,  0.0237,  ...,  0.3362,  0.0306,  0.2558],
        ...,
        [ 0.4815, -0.3176,  0.5229,  ...,  0.4595, -0.7940,  0.1645],
        [-0.1764,  0.6005, -0.2219,  ...,  0.3903,  0.3519,  0.6316],
        [ 0.2811, -0.2467,  0.0691,  ..., -0.2630,  0.0290,  0.5149]])


In [573]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [574]:
criterion = nn.BCEWithLogitsLoss()

model = model.to(device)
criterion = criterion.to(device)

In [575]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

**In the train function we are reversing the text online as we get the data from the batch. This was it will be less memory intensive**

In [580]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        text, text_lengths = batch.text
        text_lengths = text_lengths.cpu()
        #Reversing the text
        rev_text = torch.flip(text, (0, ))
        predictions = model(rev_text, text_lengths).squeeze(1)
        
        loss = criterion(predictions, batch.label)
        
        acc = binary_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [581]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            text, text_lengths = batch.text
            text_lengths = text_lengths.cpu()
            predictions = model(text, text_lengths).squeeze(1)
            
            loss = criterion(predictions, batch.label)
            
            acc = binary_accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [582]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [583]:
N_EPOCHS = 20

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut2-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 30s
	Train Loss: 0.681 | Train Acc: 54.69%
	 Val. Loss: 0.684 |  Val. Acc: 53.19%
Epoch: 02 | Epoch Time: 0m 30s
	Train Loss: 0.656 | Train Acc: 59.36%
	 Val. Loss: 0.689 |  Val. Acc: 55.20%
Epoch: 03 | Epoch Time: 0m 30s
	Train Loss: 0.691 | Train Acc: 53.31%
	 Val. Loss: 0.695 |  Val. Acc: 49.22%
Epoch: 04 | Epoch Time: 0m 30s
	Train Loss: 0.661 | Train Acc: 61.32%
	 Val. Loss: 0.546 |  Val. Acc: 78.62%
Epoch: 05 | Epoch Time: 0m 30s
	Train Loss: 0.551 | Train Acc: 73.38%
	 Val. Loss: 0.375 |  Val. Acc: 85.48%
Epoch: 06 | Epoch Time: 0m 30s
	Train Loss: 0.315 | Train Acc: 87.99%
	 Val. Loss: 0.361 |  Val. Acc: 86.26%
Epoch: 07 | Epoch Time: 0m 30s
	Train Loss: 0.226 | Train Acc: 91.91%
	 Val. Loss: 0.331 |  Val. Acc: 87.30%
Epoch: 08 | Epoch Time: 0m 31s
	Train Loss: 0.171 | Train Acc: 93.81%
	 Val. Loss: 0.370 |  Val. Acc: 87.37%
Epoch: 09 | Epoch Time: 0m 30s
	Train Loss: 0.129 | Train Acc: 95.81%
	 Val. Loss: 0.355 |  Val. Acc: 87.49%
Epoch: 10 | Epoch T

In [588]:
model.load_state_dict(torch.load('tut2-model.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Test Loss: 0.339 | Test Acc: 86.57%


In [589]:
import spacy
nlp = spacy.load('en')

def predict_sentiment(model, sentence):
    model.eval()
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    rev_tokenized = list(reversed(tokenized))
    indexed = [TEXT.vocab.stoi[t] for t in rev_tokenized]
    length = [len(indexed)]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1)
    length_tensor = torch.LongTensor(length)
    prediction = torch.sigmoid(model(tensor, length_tensor))
    return prediction.item()

In [590]:
sentence = "This film is terrible"
predict_sentiment(model, sentence)

0.027619820088148117

In [591]:
sentence = "This film is awesome"
predict_sentiment(model, sentence)

0.9871930480003357