## References

1. https://github.com/bentrevett/pytorch-sentiment-analysis
2. https://github.com/lukysummer/Movie-Review-Sentiment-Analysis-LSTM-Pytorch

In [1]:
import numpy as np
import torch

## Load data

In [2]:
N_VOCAB = 5000

x_train_all = np.load('x_train.npy')
x_test = np.load('x_test.npy')
y_train_all = np.load('y_train.npy')
y_test = np.load('y_test.npy')

In [3]:
from sklearn.model_selection import train_test_split

x_train, x_vali, y_train, y_vali = train_test_split(x_train_all, y_train_all, test_size=0.2)

In [4]:
print(f'Number of training examples: {len(x_train)}')
print(f'Number of validation examples: {len(x_vali)}')
print(f'Number of testing examples: {len(x_test)}')

Number of training examples: 20000
Number of validation examples: 5000
Number of testing examples: 25000


## Training

In [5]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = 'cuda'

In [6]:
from torch.utils.data import TensorDataset, DataLoader

BATCH_SIZE = 64

x_train = torch.as_tensor(x_train, dtype=torch.int64)
x_vali = torch.as_tensor(x_vali, dtype=torch.int64)
x_test = torch.as_tensor(x_test, dtype=torch.int64)
y_train = torch.as_tensor(y_train, dtype=torch.float)
y_vali = torch.as_tensor(y_vali, dtype=torch.float)
y_test = torch.as_tensor(y_test, dtype=torch.float)

d_train = TensorDataset(x_train, y_train)
d_vali = TensorDataset(x_vali, y_vali)
d_test = TensorDataset(x_test, y_test)

dl_train = DataLoader(d_train, batch_size=BATCH_SIZE)
dl_vali = DataLoader(d_vali, batch_size=BATCH_SIZE)
dl_test = DataLoader(d_test, batch_size=BATCH_SIZE)

### Model

In [7]:
import torch.nn as nn

class SentimentLSTM(nn.Module):
    def __init__(self, n_vocab, embedding_dim, hidden_dim, output_dim, n_layers=2, drop_p=0.5):
        super().__init__()
        self.embedding = nn.Embedding(n_vocab, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_p)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(drop_p)

    def forward(self, text):
        embedded = self.dropout(self.embedding(text))   # (batch_size, seq_length, n_embed)
        output, (hidden, cell) = self.lstm(embedded)    # hidden=(num_layers * num_directions, batch, hidden_size)
        hidden = self.dropout(hidden[-1,:,:])           # (batch, hidden_size)
        return self.fc(hidden)

EMBEDDING_DIM = 32
HIDDEN_DIM = 100
OUTPUT_DIM = 1

model = SentimentLSTM(N_VOCAB, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

In [8]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 294,501 trainable parameters


### Optimizer

In [9]:
import torch.optim as optim

# optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

### Criterion

In [10]:
criterion = nn.BCEWithLogitsLoss()

Using `.to` to place the model and the criterion on the GPU (if we have one).

In [11]:
model = model.to(device)
criterion = criterion.to(device)

In [12]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

In [13]:
def train(model, dl_train, optimizer, criterion):
    epoch_loss = []
    epoch_acc = []
    
    model.train()
    
    for x, y in dl_train:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()

        predictions = model(x).squeeze(1)
        loss = criterion(predictions, y)
        
        loss.backward()
        optimizer.step()
        
        acc = binary_accuracy(predictions, y)
        
        epoch_loss.append(loss.item())
        epoch_acc.append(acc.item())
        
    return sum(epoch_loss)/len(epoch_loss), sum(epoch_acc)/len(epoch_acc)

In [14]:
def evaluate(model, dl_vali, criterion):
    
    epoch_loss = []
    epoch_acc = []
    
    model.eval()
    
    with torch.no_grad():
        for x, y in dl_vali:
            x, y = x.to(device), y.to(device)

            predictions = model(x).squeeze(1)
            loss = criterion(predictions, y)
            
            acc = binary_accuracy(predictions, y)

            epoch_loss.append(loss.item())
            epoch_acc.append(acc.item())
        
    return sum(epoch_loss)/len(epoch_loss), sum(epoch_acc)/len(epoch_acc)

In [15]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [16]:
N_EPOCHS = 5

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, dl_train, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, dl_vali, criterion)
    test_loss, test_acc = evaluate(model, dl_test, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut1-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
    print(f'\t Test. Loss: {test_loss:.3f} |  Test. Acc: {test_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 30s
	Train Loss: 0.682 | Train Acc: 55.43%
	 Val. Loss: 0.664 |  Val. Acc: 58.88%
	 Test. Loss: 0.666 |  Test. Acc: 58.74%
Epoch: 02 | Epoch Time: 0m 30s
	Train Loss: 0.635 | Train Acc: 63.86%
	 Val. Loss: 0.579 |  Val. Acc: 71.12%
	 Test. Loss: 0.583 |  Test. Acc: 70.90%
Epoch: 03 | Epoch Time: 0m 30s
	Train Loss: 0.595 | Train Acc: 68.36%
	 Val. Loss: 0.531 |  Val. Acc: 75.00%
	 Test. Loss: 0.541 |  Test. Acc: 74.42%
Epoch: 04 | Epoch Time: 0m 31s
	Train Loss: 0.552 | Train Acc: 72.49%
	 Val. Loss: 0.497 |  Val. Acc: 77.39%
	 Test. Loss: 0.511 |  Test. Acc: 76.84%
Epoch: 05 | Epoch Time: 0m 30s
	Train Loss: 0.524 | Train Acc: 74.81%
	 Val. Loss: 0.436 |  Val. Acc: 80.97%
	 Test. Loss: 0.448 |  Test. Acc: 80.05%


In [17]:
for batch in dl_train:
    # print(batch[0][0])
    # print(batch[1][0])
    break