In [1]:
# !python -m spacy download en_core_web_sm
# import spacy
# spacy.load('en_core_web_sm')

In [2]:
import spacy
import torch
import torchtext
from torchtext.legacy import datasets, data

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Containers for tokenisation
# using tokenize="spacy" because it's the best.
text_field = data.Field(tokenize="spacy", tokenizer_language="en_core_web_sm")
label_field = data.LabelField(dtype=torch.float) # torch.float because GPUs use floats

# Load dataset and split to train and test data
# IMDB dataset (about movies)
train, test = datasets.IMDB.splits(text_field=text_field, label_field=label_field)

In [4]:
# Split to train and validation set - 80% to train_set, 20% to validation_set
# The original set is 25k descriptions(?) so train_set after the split is 20k and valid_set is 5k.
train_set, valid_set = train.split(0.8)
len(train_set), len(valid_set)  # 20_000, 5_000
text_field.build_vocab(train_set, max_size=25_000)
label_field.build_vocab(train_set)

assert len(text_field.vocab) == 25_002

In [5]:
# Map int to string and string to int
# text_field.vocab.itos[186] -> 'though'
# text_field.vocab.stoi['though'] -> 186

In [6]:
text_field.vocab.itos[:10]

['<unk>', '<pad>', 'the', ',', '.', 'a', 'and', 'of', 'to', 'is']

In [8]:
len(max(train_set, key=lambda x: len(x.text)).text)

# but we can do better!
train_buckets, valid_buckets, test_buckets = data.BucketIterator.splits(
    (train_set, valid_set, test), batch_size=64, device=device
)

In [9]:
from torch import nn


class NLPModule(nn.Module):
    def __init__(self, num_embedding, embedding_dim, hidden_size, out_features):
        # before parent
        super().__init__()
        # after parent
        # warstwa osadzeń/osadzanie(?) embedding
        # wektory w przestrzeni znaczeniowej słów
        self.embedding = nn.Embedding(num_embedding, embedding_dim)

        self.rnn = nn.RNN(embedding_dim, hidden_size, 1)
        self.linear = nn.Linear(hidden_size, out_features)

    def forward(self, input):
        embed_output = self.embedding(input)
        rnn_output, hidden_output = self.rnn(embed_output)
        # hidden_output is the same as rnn_output[-1]
        lin_output = self.linear(hidden_output)

        return lin_output

In [53]:
num_embedding = len(text_field.vocab)
embedding_dim = 100
hidden_size = 256
out_features = 1

# num_embedding, embedding_dim, hidden_size, out_features

model = NLPModule(num_embedding, embedding_dim, hidden_size, out_features)

In [54]:
def policz(mod):
    return sum(p.numel() for p in mod.parameters())


policz(model)

2592105

In [55]:
# Stochastic gradient descent SGD
# minimalizować funkcję kosztu (szukanie minimum)

import torch.optim as optim

optimiser = optim.SGD(module.parameters(), lr=1e-3)

criterion = nn.BCEWithLogitsLoss()

In [74]:
ciretrion = criterion.to(device)
model = model.to(device)

def binary_accuracy(prediction, target):
    prediction = F.sigmoid(prediction)
    prediction = torch.round(prediction)
    
    compared = (prediction == target).float()
    return torch.mean(compared)


T = torch.tensor
binary_accuracy(T([0, 0.5, .2, 0.001, 0.8]), T([0, 1, 1, 1, 1]))

tensor(1.)

In [89]:
import numpy as np

def train(mod, data, optimiser, criterion):
    losses = []
    metrics = []
    for bucket in data:
        optimiser.zero_grad()
        output = mod(bucket.text).squeeze(0).squeeze(1)
        loss = criterion(output, bucket.label)
        metric = binary_accuracy(output, bucket.label)
        losses.append(loss.item())
        metrics.append(metric.item())
        loss.backward()
        optimiser.step()
        
        print(np.mean(losses), losses[-1], np.mean(metrics), metrics[-1])
        
    return ...
    
train(model, train_buckets, optimiser, criterion)

0.6925106644630432 0.6925106644630432 0.515625 0.515625
0.6913136541843414 0.6901166439056396 0.5546875 0.59375
0.692987302939097 0.6963346004486084 0.5208333333333334 0.453125
0.6937338262796402 0.6959733963012695 0.52734375 0.546875
0.6938868165016174 0.6944987773895264 0.521875 0.5
0.6937883794307709 0.6932961940765381 0.5182291666666666 0.5
0.6934229561260769 0.6912304162979126 0.5245535714285714 0.5625
0.6934435442090034 0.6935876607894897 0.525390625 0.53125
0.6932776901457045 0.6919508576393127 0.5260416666666666 0.53125
0.6932907223701477 0.6934080123901367 0.5203125 0.46875
0.6932447986169294 0.6927855610847473 0.5198863636363636 0.515625
0.6928315808375677 0.6882861852645874 0.5260416666666666 0.59375
0.6927297252875108 0.6915074586868286 0.5300480769230769 0.578125
0.6927759732518878 0.6933771967887878 0.53125 0.546875
0.6926431099573771 0.6907830238342285 0.53125 0.53125
0.6927378997206688 0.694159746170044 0.52734375 0.46875
0.6928126671734978 0.6940089464187622 0.52665441

KeyboardInterrupt: 

In [51]:
import torch.nn.functional as F

# Funkcja kosztu, im bliżej 1 (target) tym funkcja kosztu maleje.

target = torch.ones([1, 1], dtype=torch.float32)  # 64 classes, batch size = 10
input_ = torch.full([1, 1], 0.1)  # A prediction (logit)

print(F.binary_cross_entropy_with_logits(input_, target))

target = torch.ones([1, 1], dtype=torch.float32)  # 64 classes, batch size = 10
input_ = torch.full([1, 1], 0.4)  # A prediction (logit)

print(F.binary_cross_entropy_with_logits(input_, target))

target = torch.ones([1, 1], dtype=torch.float32)  # 64 classes, batch size = 10
input_ = torch.full([1, 1], 0.7)  # A prediction (logit)

print(F.binary_cross_entropy_with_logits(input_, target))

target = torch.ones([1, 1], dtype=torch.float32)  # 64 classes, batch size = 10
input_ = torch.full([1, 1], 0.9)  # A prediction (logit)

print(F.binary_cross_entropy_with_logits(input_, target))

target, input_

tensor(0.6444)
tensor(0.5130)
tensor(0.4032)
tensor(0.3412)


(tensor([[1.]]), tensor([[0.9000]]))

In [10]:
>>> rnn = nn.RNN(3, 2, 1)
>>> input = torch.randn(5, 3, 3)
>>> h0 = torch.randn(1, 3, 2)
>>> output, hn = rnn(input, h0)
output, hn

(tensor([[[-0.8500,  0.2006],
          [-0.4605, -0.6672],
          [-0.4719, -0.0198]],
 
         [[ 0.6130, -0.8700],
          [-0.6430,  0.6836],
          [-0.8753, -0.3436]],
 
         [[-0.9134,  0.4095],
          [-0.8148, -0.9893],
          [ 0.3423,  0.2943]],
 
         [[-0.8185, -0.2287],
          [-0.7922, -0.1060],
          [-0.7463,  0.4932]],
 
         [[ 0.0135,  0.7411],
          [-0.3716, -0.1680],
          [-0.7156, -0.5003]]], grad_fn=<StackBackward>),
 tensor([[[ 0.0135,  0.7411],
          [-0.3716, -0.1680],
          [-0.7156, -0.5003]]], grad_fn=<StackBackward>))

In [13]:
>>> # an Embedding module containing 10 tensors of size 3
>>> embedding = nn.Embedding(100, 19)
>>> # a batch of 2 samples of 4 indices each
>>> input = torch.LongTensor([[1,98,1,0, 4,3,2,9],[4,3,2,9, 4,3,2,9]])
>>> embedding(input)

tensor([[[ 9.4736e-01,  7.0916e-01,  4.4942e-01,  3.3067e-01,  2.6436e+00,
           4.6544e-01,  2.0053e+00,  8.8530e-01, -1.4538e+00, -1.3253e-01,
           3.2943e-01,  3.3413e-01, -2.3301e-01, -1.5218e+00,  3.8434e-01,
           9.0579e-01, -2.8931e-01, -8.0191e-02, -1.7367e+00],
         [-1.0324e+00, -1.0683e+00,  1.1531e-01, -1.7621e+00, -5.2225e-01,
           1.5228e-01, -1.3620e-01,  9.9777e-01, -1.6727e+00, -4.4506e-01,
           1.4775e+00, -1.1431e+00, -3.9191e-01, -8.5234e-01,  9.5088e-01,
           3.7345e-01,  6.9964e-01,  8.2460e-02, -4.1564e-01],
         [ 9.4736e-01,  7.0916e-01,  4.4942e-01,  3.3067e-01,  2.6436e+00,
           4.6544e-01,  2.0053e+00,  8.8530e-01, -1.4538e+00, -1.3253e-01,
           3.2943e-01,  3.3413e-01, -2.3301e-01, -1.5218e+00,  3.8434e-01,
           9.0579e-01, -2.8931e-01, -8.0191e-02, -1.7367e+00],
         [ 5.7972e-01, -1.4694e+00,  3.6318e-01,  1.4122e+00, -1.2245e+00,
          -4.7360e-01,  3.6868e-01,  1.2903e+00, -1.0298e+00,