In [1]:
import random
import torch
from torchtext.legacy import data, datasets
import torch.optim as optim
import torch.nn as nn

seed = 42
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

TEXT = data.Field(tokenize='spacy', tokenizer_language='en_core_web_sm', include_lengths=True)
LABEL = data.LabelField(dtype=torch.float)

train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
train_data, valid_data = train_data.split(random_state=random.seed(seed))

MAX_VOCAB_SIZE = 25000
TEXT.build_vocab(train_data, max_size=MAX_VOCAB_SIZE, vectors="glove.6B.100d", unk_init=torch.Tensor.normal_)
LABEL.build_vocab(train_data)

BATCH_SIZE = 64

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size=BATCH_SIZE,
    sort_within_batch=True,
    device=device,
    repeat=False  # Set repeat to False to iterate over the data only once
)


In [2]:
print(len(TEXT.vocab))
print(TEXT.vocab.stoi[TEXT.pad_token])

25002
1


In [2]:
class RNN(nn.Module):
    def __init__(self, word_limit, dimension_embedding, dimension_hidden, dimension_output, num_layers, 
                 bidirectional, dropout, pad_idx):
        
        super().__init__()
        self.embedding = nn.Embedding(word_limit, dimension_embedding, padding_idx=pad_idx)
        self.rnn = nn.GRU(dimension_embedding, dimension_hidden, num_layers=num_layers, bidirectional=bidirectional, dropout=dropout)
        self.fc = nn.Linear(dimension_hidden * 2, dimension_output)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text, len_txt):
        embedded = self.dropout(self.embedding(text))
        output, hidden = self.rnn(embedded)
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
        return self.fc(hidden)

dimension_input = len(TEXT.vocab)
dimension_embedding = 100
dimension_hddn = 128
dimension_out = 1
layers = 2
bidirectional = True
dropout = 0.5
idx_pad = TEXT.vocab.stoi[TEXT.pad_token]

model = RNN(dimension_input, dimension_embedding, dimension_hddn, dimension_out, layers, bidirectional, dropout, idx_pad)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

optimizer = optim.AdamW(model.parameters())  # Use AdamW optimizer for better convergence
criterion = nn.BCEWithLogitsLoss()
model = model.to(device)
criterion = criterion.to(device)

In [3]:
def bin_acc(preds, y):
    predictions = torch.round(torch.sigmoid(preds))
    correct = (predictions == y).float() 
    acc = correct.sum() / len(correct)
    return acc

def train(model, itr, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0
    model.train()
    
    for i in itr:
        optimizer.zero_grad()
        text, len_txt = i.text
        predictions = model(text, len_txt).squeeze(1)
        loss = criterion(predictions, i.label)
        acc = bin_acc(predictions, i.label)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(itr), epoch_acc / len(itr)
 
def evaluate(model, itr, criterion):
    epoch_loss = 0
    epoch_acc = 0
    model.eval()
    
    with torch.no_grad():
        for i in itr:
            text, len_txt = i.text
            predictions = model(text, len_txt).squeeze(1)
            loss = criterion(predictions, i.label)
            acc = bin_acc(predictions, i.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(itr), epoch_acc / len(itr)

In [5]:
import time
 
def epoch_time(start_time, end_time):
    used_time = end_time - start_time
    used_mins = int(used_time / 60)
    used_secs = int(used_time - (used_mins * 60))
    return used_mins, used_secs

num_epochs = 5
best_valid_loss = float('inf')

for epoch in range(num_epochs):
    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()
 
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut2-model.pt')
    
    print(f'Epoch: {epoch+1:02}') #Epoch Time: {epoch_mins}m {epoch_secs}s
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tVal. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')

model.load_state_dict(torch.load('tut2-model.pt'))
test_loss, test_acc = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Epoch: 01
	Train Loss: 0.659 | Train Acc: 60.43%
	Val. Loss: 0.584 | Val. Acc: 69.47%
Epoch: 02
	Train Loss: 0.590 | Train Acc: 68.41%
	Val. Loss: 0.532 | Val. Acc: 73.03%
Epoch: 03
	Train Loss: 0.455 | Train Acc: 79.02%
	Val. Loss: 0.607 | Val. Acc: 75.79%
Epoch: 04
	Train Loss: 0.376 | Train Acc: 83.57%
	Val. Loss: 0.369 | Val. Acc: 85.20%
Epoch: 05
	Train Loss: 0.318 | Train Acc: 86.49%
	Val. Loss: 0.341 | Val. Acc: 86.84%
Test Loss: 0.340 | Test Acc: 86.66%
