Chapter 15

In [1]:
import torch
import torch.nn as nn

torch.manual_seed(1)
rnn_layer = nn.RNN(input_size=5, hidden_size=2,
                   num_layers=1, batch_first=True)
w_xh = rnn_layer.weight_ih_l0
w_hh = rnn_layer.weight_hh_l0
b_xh = rnn_layer.bias_ih_l0
b_hh = rnn_layer.bias_hh_l0

print('W_xh shape:', w_xh)
print()
print('W_xh shape:', w_xh.shape)
print('W_hh shape:', w_hh.shape)
print('b_xh shape:', b_xh.shape)
print('b_hh shape:', b_hh.shape)

W_xh shape: Parameter containing:
tensor([[ 0.3643, -0.3121, -0.1371,  0.3319, -0.6657],
        [ 0.4241, -0.1455,  0.3597,  0.0983, -0.0866]], requires_grad=True)

W_xh shape: torch.Size([2, 5])
W_hh shape: torch.Size([2, 2])
b_xh shape: torch.Size([2])
b_hh shape: torch.Size([2])


In [2]:
x_seq = torch.tensor([[1.0]*5, [2.0]*5, [3.0]*5]).float()

## output of the simple RNN:
output, hn = rnn_layer(torch.reshape(x_seq, (1, 3, 5)))

## manually computing the output:
out_man = []
for t in range(3):
    xt = torch.reshape(x_seq[t], (1, 5))
    print(f'Time step {t} =>')
    print('    Input           :', xt.numpy())
    
    ht = torch.matmul(xt, torch.transpose(w_xh, 0, 1)) + b_hh
    print('   Hidden           :', ht.detach().numpy())
    if t > 0:
         prev_h = out_man[t-1]
    else:
         prev_h = torch.zeros((ht.shape))
    ot = ht + torch.matmul(prev_h, torch.transpose(w_hh, 0, 1)) \
            + b_hh
    ot = torch.tanh(ot)
    out_man.append(ot)
    print('    Output (manual) :', ot.detach().numpy())
    print('    RNN output      :', output[:, t].detach().numpy())
    print()

Time step 0 =>
    Input           : [[1. 1. 1. 1. 1.]]
   Hidden           : [[-0.3161478   0.64722455]]
    Output (manual) : [[-0.21046415  0.56788784]]
    RNN output      : [[-0.3519801   0.52525216]]

Time step 1 =>
    Input           : [[2. 2. 2. 2. 2.]]
   Hidden           : [[-0.73478645  1.2972739 ]]
    Output (manual) : [[-0.5741978  0.7945334]]
    RNN output      : [[-0.68424344  0.76074266]]

Time step 2 =>
    Input           : [[3. 3. 3. 3. 3.]]
   Hidden           : [[-1.153425   1.9473232]]
    Output (manual) : [[-0.8130059   0.91817397]]
    RNN output      : [[-0.8649416   0.90466356]]



In [42]:
torch.reshape(x_seq[t], (1, 5))

RuntimeError: shape '[1, 4]' is invalid for input of size 5

#Project one – predicting the sentiment of IMDb movie reviews

In [3]:
from torchtext.datasets import IMDB
from collections.abc import Iterable

train_dataset = IMDB(split='train')
test_dataset = IMDB(split='test')

In [4]:
## Step 1: create the datasets

In [5]:
from torch.utils.data.dataset import random_split

torch.manual_seed(1)
train_dataset, valid_dataset = random_split(
    list(train_dataset), [20000, 5000])

In [6]:
## Step 2: find unique tokens (words)
import re 
from collections import Counter, OrderedDict

def tokenizer(text):
    text = re.sub('<[^>]*>', '', text)
    emoticons = re.findall(
        '(?::|;|=)(?:-)?(?:\)|\(|D|P)', text.lower()
    )
    text = re.sub('[\W]+', ' ', text.lower()) + \
        ' '.join(emoticons).replace('-', '')
    tokenized = text.split()
    
    return tokenized


token_counts = Counter()
for label, line in train_dataset:
    tokens = tokenizer(line)
    token_counts.update(tokens)
print('Vocab-size:', len(token_counts))

Vocab-size: 69344


In [7]:
# token_counts

In [8]:
## step 3: encoding each uniqe token into integers

In [9]:
from torchtext.vocab import vocab

sorted_by_freq_tuples = sorted(
    token_counts.items(), key=lambda x: x[1], reverse=True)

ordered_dict = OrderedDict(sorted_by_freq_tuples)
vocab = vocab(ordered_dict)
vocab.insert_token('<pad>', 0)
vocab.insert_token('<unk>', 1)
vocab.set_default_index(1)

In [10]:
# ordered_dict

In [11]:
print([vocab[token] for token in ['this', 'is', 'an', 'example']])

[11, 7, 35, 458]


In [12]:
## Step 3-A: define the functions for transfoemation

In [13]:
text_pipeline = lambda x: [vocab[token] for token in tokenizer(x)]
label_pipeline = lambda x: 1. if x == 'pos' else 0

In [14]:
## Step 3-B: wrap the encode and transformation function

In [15]:
def collate_batch(batch):
    label_list, text_list, lengths = [], [], []
    for _label, _text in batch:
        label_list.append(label_pipeline(_label))
        processed_text = torch.tensor(text_pipeline(_text),
                                      dtype=torch.int64)
        text_list.append(processed_text)
        lengths.append(processed_text.size(0))
    label_list = torch.tensor(label_list)
    lengths = torch.tensor(lengths)
    padded_text_list = nn.utils.rnn.pad_sequence(
        text_list, batch_first=True)
    return padded_text_list, label_list, lengths

## Take a small batch
from torch.utils.data import DataLoader

dataloader = DataLoader(train_dataset, batch_size=4,
                        shuffle=False, collate_fn=collate_batch)


In [16]:
text_batch, label_batch, length_batch = next(iter(dataloader))
print(text_batch)

tensor([[ 163,   10,  880,  ...,    6,    2,  872],
        [  40,   45,   77,  ...,    0,    0,    0],
        [4894,   10,  243,  ...,    0,    0,    0],
        [ 322,   85,  511,  ...,    0,    0,    0]])


In [17]:
print(label_batch)

tensor([0., 0., 0., 1.])


In [18]:
print(length_batch)

tensor([925, 249,  85, 125])


In [19]:
print(text_batch.shape)

torch.Size([4, 925])


In [20]:
batch_size = 32
train_dl = DataLoader(train_dataset, batch_size=batch_size,
                      shuffle=True, collate_fn=collate_batch)
valid_dl = DataLoader(valid_dataset, batch_size=batch_size,
                      shuffle=False, collate_fn=collate_batch)
test_dl = DataLoader(test_dataset, batch_size=batch_size, 
                     shuffle=False, collate_fn=collate_batch)

In [22]:
embedding = nn.Embedding(
    num_embeddings=10,
    embedding_dim=3, 
    padding_idx=0)

# a batch of 2 samples of 4 indices each
text_encoded_input = torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 0]])
print(embedding(text_encoded_input))


tensor([[[ 0.7039, -0.8321, -0.4651],
         [-0.3203,  2.2408,  0.5566],
         [-0.4643,  0.3046,  0.7046],
         [-0.7106, -0.2959,  0.8356]],

        [[-0.4643,  0.3046,  0.7046],
         [ 0.0946, -0.3531,  0.9124],
         [-0.3203,  2.2408,  0.5566],
         [ 0.0000,  0.0000,  0.0000]]], grad_fn=<EmbeddingBackward0>)


In [23]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=2,
                          batch_first=True)
        # self.rnn = nn.GRU(input_size, hidden_size, num_layers,
        #                   batch_first=True)
        # self.rnn = nn.LSTM(input_size, hidden_size, num_layers,
        #                    batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        _, hidden = self.rnn(x)
        out = hidden[-1, :, :] # we use the final hidden state
                               # from the last hidden layer as
                               # the input to the fully connected
                               # layer
        out = self.fc(out)
        return out

    
model = RNN(64, 32)
print(model)
model(torch.randn(5, 3, 64))

RNN(
  (rnn): RNN(64, 32, num_layers=2, batch_first=True)
  (fc): Linear(in_features=32, out_features=1, bias=True)
)


tensor([[ 0.3183],
        [ 0.1230],
        [ 0.1772],
        [-0.1052],
        [-0.1259]], grad_fn=<AddmmBackward0>)

# Building an RNN model for the sentiment analysis task

In [26]:
class RNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, rnn_hidden_size,
                 fc_hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size,
                                      embed_dim,
                                      padding_idx=0)
        self.rnn = nn.LSTM(embed_dim, rnn_hidden_size,
                           batch_first=True)
        self.fc1 = nn.Linear(rnn_hidden_size, fc_hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(fc_hidden_size, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, text, lenghts):
        out = self.embedding(text)
        out = nn.utils.rnn.pack_padded_sequence(
            out, lenghts.cpu().numpy(), enforce_sorted=False, batch_first=True)
        out, (hidden, cell) = self.rnn(out)
        out = hidden[-1, :, :]
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out
    

vocab_size =len(vocab)
embed_dim = 20
rnn_hidden_size = 64
fc_hidden_size = 64
torch.manual_seed(1)
model = RNN(vocab_size, embed_dim,
            rnn_hidden_size, fc_hidden_size)
model

RNN(
  (embedding): Embedding(69346, 20, padding_idx=0)
  (rnn): LSTM(20, 64, batch_first=True)
  (fc1): Linear(in_features=64, out_features=64, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=64, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [37]:
mod = model.parameters()

for p in mod:
    print(p)

Parameter containing:
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.8657,  0.2444, -0.6629,  ...,  0.0457,  0.1530, -0.4757],
        [-0.1110,  0.2927, -0.1578,  ...,  0.9386, -0.1860, -0.6446],
        ...,
        [-0.1404, -0.1202,  0.8836,  ..., -0.0382, -0.0254,  0.5872],
        [ 1.4415, -1.3369, -0.1999,  ...,  1.2954, -1.2241, -0.9001],
        [ 0.3040, -1.1861,  0.5460,  ...,  1.1956,  0.2319,  0.6994]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0742,  0.0847,  0.0768,  ..., -0.0319, -0.0131, -0.0751],
        [-0.0221,  0.0828, -0.1203,  ...,  0.0752,  0.0840, -0.0027],
        [ 0.1068, -0.0608, -0.0829,  ..., -0.0267,  0.0347, -0.0030],
        ...,
        [ 0.1153, -0.1099,  0.0404,  ...,  0.0444, -0.0708,  0.0810],
        [ 0.0772, -0.0308,  0.0414,  ..., -0.1104,  0.0609,  0.0099],
        [-0.0186, -0.0078, -0.1045,  ..., -0.1064,  0.0693, -0.0206]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.

In [29]:
def train(dataloader):
    model.train()
    total_acc, total_loss = 0, 0
    for text_batch, label_batch, lenghts in dataloader:
        optimizer.zero_grad()
        pred = model(text_batch, lenghts)[:, 0]
        loss = loss_fn(pred, label_batch)
        loss.backward()
        optimizer.step()
        total_acc += (
            (pred >= 0.5).float() == label_batch).float().sum().item()
        total_loss += loss.item() * label_batch.size(0)
        
    return total_acc/len(dataloader.dataset), \
           total_loss/len(dataloader.dataset)
            
            

In [30]:
def evaluate(dataloader):
    model.eval()
    total_acc, total_loss = 0, 0
    with torch.no_grad():
        for text_batch, label_batch, lengths in dataloader:
            pred = model(text_batch, lengths)[:, 0]
            loss = loss_fn(pred, label_batch)
            total_acc += (
                (pred >= 0.5).float() == label_batch
            ).float().sum().item()
            total_loss += loss.item() * label_batch.size(0)
    return total_acc / len(dataloader.dataset), \
           total_loss / len(dataloader.dataset)

In [27]:
# The next step is to create a loss function and optimizer (Adam optimizer). 

In [31]:
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# train

num_epochs = 10
torch.manual_seed(1)
for epoch in range(num_epochs):
    acc_train, loss_train = train(train_dl)
    acc_valid, loss_valid = evaluate(valid_dl)
    print(f'Epoch {epoch} accuracy: {acc_train:.4f}'
          f'val_accuracy: {acc_valid:.4f}')
    

Epoch 0 accuracy: 0.6010val_accuracy: 0.6608
Epoch 1 accuracy: 0.7359val_accuracy: 0.7332
Epoch 2 accuracy: 0.7707val_accuracy: 0.7946
Epoch 3 accuracy: 0.8387val_accuracy: 0.8146
Epoch 4 accuracy: 0.8925val_accuracy: 0.8524
Epoch 5 accuracy: 0.9183val_accuracy: 0.8636
Epoch 6 accuracy: 0.9375val_accuracy: 0.8602
Epoch 7 accuracy: 0.9536val_accuracy: 0.8626
Epoch 8 accuracy: 0.9666val_accuracy: 0.8638
Epoch 9 accuracy: 0.9762val_accuracy: 0.8678


In [35]:
acc_test, _ = evaluate(test_dl)
print(f'test_accuracy: {acc_test:.4f}')

Exception: OnDiskCache Exception: C:\Users\PipBoy3000/.cache\torch\text\datasets\IMDB\aclImdb_v1\test\pos expected to be written by different process, but file is not ready in 300 seconds.
This exception is thrown by __iter__ of MapperIterDataPipe(datapipe=UnBatcherIterDataPipe, fn=functools.partial(<function _wait_promise_fn at 0x000001CEE37153A0>, 300), input_col=None, output_col=None)

In [33]:
class RNN(nn.Module):
    def __init__(self, vocab_size, embed_dim,
                 rnn_hidden_size, fc_hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(
            vocab_size, embed_dim, padding_idx=0)
        self.rnn = nn.LSTM(embed_dim, rnn_hidden_size,
                           batch_first=True, bidirectional=True)
        self.fc1 = nn.Linear(rnn_hidden_size*2, fc_hidden_size)
        self.relu= nn.ReLU()
        self.fc2 = nn.Linear(fc_hidden_size, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, text, lenghts):
        out = self.embedding(text)
        out = nn.utils.rnn.pack_padded_sequence(
            out, lengths.cpu().numpy(), enforce_sorted=False, batch_first=True)
        _, (hidden, cell) = self.rnn(out)
        out = torch.cat((hidden[-2, :, :],
                         hidden[-1, :, :]), dim=1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out
    
torch.manual_seed(1)
model = RNN(vocab_size, embed_dim, 
            rnn_hidden_size, fc_hidden_size)
model

RNN(
  (embedding): Embedding(69346, 20, padding_idx=0)
  (rnn): LSTM(20, 64, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=64, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

# Project two – character-level language modeling in PyTorch