# Technical cells

In [16]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [17]:
!pip install --upgrade torch
!pip install --upgrade wandb
!pip install --upgrade catalyst
!pip install --upgrade torchtext
!wandb login c54b2fcb6b8ca2808f5be303a8a3b6e464f52cca

Requirement already up-to-date: torch in /usr/local/lib/python3.6/dist-packages (1.4.0)
Requirement already up-to-date: wandb in /usr/local/lib/python3.6/dist-packages (0.8.32)
Requirement already up-to-date: catalyst in /usr/local/lib/python3.6/dist-packages (20.4.1)
Requirement already up-to-date: torchtext in /usr/local/lib/python3.6/dist-packages (0.5.0)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[32mSuccessfully logged in to Weights & Biases![0m


In [18]:
import torch
import torchtext
import wandb
import catalyst
print("Torchtext Version:", torchtext.__version__)
print("PyTorch Version:", torch.__version__)
print("Catalyst Version:", catalyst.__version__)
print("Wandb Version:", wandb.__version__)

Torchtext Version: 0.5.0
PyTorch Version: 1.4.0
Catalyst Version: 20.04.1
Wandb Version: 0.8.32


In [0]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [0]:
def get_device():
	if torch.cuda.is_available():
		return torch.device('cuda:0')
	else:
		return torch.device('cpu')

device = get_device()


# Pipeline Example for SNLI

In [0]:
import pdb
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchtext import data
from torchtext import datasets
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm
from catalyst import dl
import wandb

DEVICE = 'cuda'

In [0]:
# create directory for checkpoints
logdir = 'wandb-log-directory'

In [23]:
wandb.init(project="text-augmentation", dir=logdir)

W&B Run: https://app.wandb.ai/msaidov/text-augmentation/runs/2ljefscp


unclosed <ssl.SSLSocket fd=92, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6, laddr=('172.28.0.2', 57600), raddr=('35.186.228.49', 443)>



In [0]:
class BiLstm(nn.Module):
    def __init__(self, vocab_size, h_size, n_layers, dropout, padding_idx):
        super(BiLstm, self).__init__()
        self.embedding = nn.Embedding(vocab_size, h_size, padding_idx=padding_idx, scale_grad_by_freq=True)
        self.lstm = nn.LSTM(h_size, h_size, n_layers, bidirectional=True, dropout=dropout)
        self.out = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(2 * 4 * n_layers * h_size, h_size * 3),
            nn.BatchNorm1d(h_size * 3),
            nn.PReLU(),
            nn.Dropout(dropout),
            nn.Linear(h_size * 3, h_size * 3),
            nn.BatchNorm1d(h_size * 3),
            nn.PReLU(),
            nn.Dropout(dropout),
            nn.Linear(h_size * 3, 3)
        )

        self.init_weights()

    def forward(self, batch):
        prem, hyp = batch
        prem, prem_len = prem
        hyp, hyp_len = hyp

        prem, hyp = self.embedding(prem), self.embedding(hyp)

        # optimize perfomance of RNN by omitting padding token
        prem = nn.utils.rnn.pack_padded_sequence(input=prem, lengths=prem_len, enforce_sorted=False)
        hyp = nn.utils.rnn.pack_padded_sequence(input=hyp, lengths=hyp_len, enforce_sorted=False)
        _, h_prem = self.lstm(prem)
        _, h_hyp = self.lstm(hyp)

        h_prem = torch.cat(h_prem, dim=-1)
        h_prem = h_prem.permute(1,0,2)
        h_prem = h_prem.contiguous().view(h_prem.size(0),-1)

        h_hyp = torch.cat(h_hyp, dim=-1)
        h_hyp = h_hyp.permute(1,0,2)
        h_hyp = h_hyp.contiguous().view(h_hyp.size(0),-1)

        h = torch.cat([h_prem, h_hyp],dim=-1)
        h = h.contiguous().view(h.size(0), -1)
        
        return self.out(h)
 
    def init_weights(self):
        d = self.embedding.weight.size(1)
        nn.init.uniform_(self.embedding.weight, -1.0 / np.sqrt(d), 1.0 / np.sqrt(d))

In [25]:
%%time

TEXT = data.Field(lower=True, tokenize='spacy', include_lengths=True)
LABEL = data.LabelField(sequential=False, is_target=True)

train, valid, test = datasets.SNLI.splits(TEXT, LABEL)

CPU times: user 1min 25s, sys: 1.9 s, total: 1min 27s
Wall time: 1min 27s


In [0]:
TEXT.build_vocab(train, valid, min_freq=5)
LABEL.build_vocab(train)

In [0]:
# Simple wrapper to join torchtext and catalyst API

class IteratorWrapper(DataLoader):
    __initialized__ = False

    def __init__(self, iter: iter):
        self.batch_size = iter.batch_size
        self.num_workers = 1
        self.collate_fn = None
        self.pin_memory = False
        self.drop_last = False
        self.timeout = 0
        self.worker_init_fn = None
        self.sampler = iter
        self.batch_sampler = iter
        self.__initialized__ = True

    def __iter__(self):
        return map(lambda batch: {
                    'features': (batch.premise, batch.hypothesis),
                    'targets': batch.label,
                }, self.batch_sampler.__iter__())

    def __len__(self):
        return len(self.batch_sampler)

In [0]:
# example of your hyperparameters 
# keep them as global vars or in the 'config' dict
n_layers = 4
h_size = 128
embed_dim = 512
num_epochs = 10
dropout = 0.1
batch_size = 512 # BatchNorm do not work properly with small batch sizes
vocab_size = len(TEXT.vocab)

train_iter, valid_iter, test_iter = data.BucketIterator.splits((train, valid, test), batch_size=batch_size, device=DEVICE)
train_iter = IteratorWrapper(train_iter)
valid_iter = IteratorWrapper(valid_iter)
test_iter = IteratorWrapper(test_iter)
 
loaders = {'train': train_iter, 'valid': valid_iter}

model = BiLstm(
    vocab_size=vocab_size,
    h_size=h_size,
    n_layers=n_layers, 
    dropout=dropout,
    padding_idx=TEXT.vocab.stoi[TEXT.pad_token]
)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=2, factor=0.5)

In [14]:
runner = dl.SupervisedRunner(DEVICE)
runner.train(model=model, 
             loaders=loaders,
             num_epochs=num_epochs,
             logdir=logdir,
             criterion=nn.CrossEntropyLoss(),
             optimizer=optimizer, 
             scheduler=scheduler,  
             callbacks=[
                dl.callbacks.CheckpointCallback(2),
                dl.callbacks.AccuracyCallback(),
                dl.callbacks.EarlyStoppingCallback(3), # stop training, if valid loss does not improve last 3 epochs
                dl.callbacks.WandbLogger(
                    project="text-augmentation"
                )
             ],
             monitoring_params={
                 'entity': 'msaidov',
                 'project': 'text-augmentation',
                 'name': 'snli-dimension-fix',
                 'group': 'examples',
                 'config': {
                     'model': 'bilstm',
                     'optimizer': str(optimizer),
                     'scheduler': 'plateau',
                     'early_stop': 3,
                     'vocab_size': vocab_size,
                     'h_size': h_size,
                     'n_layers': n_layers,
                     'dropout': dropout,
                     'batch_size': batch_size,
                     'embed_dim': embed_dim,
                 },
             },
#              check=True, # set if you want to check pipeline for correctness, without actual training
             verbose=True)

3/10 * Epoch (train): 100% 1073/1073 [07:11<00:00,  2.49it/s, accuracy01=0.807, loss=0.515]
3/10 * Epoch (valid): 100% 20/20 [00:02<00:00,  7.90it/s, accuracy01=0.614, loss=0.870]
[2020-04-18 14:30:50,985] 
3/10 * Epoch 3 (_base): lr=0.0010 | momentum=0.9000
3/10 * Epoch 3 (train): accuracy01=0.7668 | loss=0.5722
3/10 * Epoch 3 (valid): accuracy01=0.7570 | loss=0.5984


INFO:metrics_logger:
3/10 * Epoch 3 (_base): lr=0.0010 | momentum=0.9000
3/10 * Epoch 3 (train): accuracy01=0.7668 | loss=0.5722
3/10 * Epoch 3 (valid): accuracy01=0.7570 | loss=0.5984


4/10 * Epoch (train): 100% 1073/1073 [07:10<00:00,  2.49it/s, accuracy01=0.816, loss=0.499]
4/10 * Epoch (valid): 100% 20/20 [00:02<00:00,  7.87it/s, accuracy01=0.632, loss=0.853]
[2020-04-18 14:38:04,541] 
4/10 * Epoch 4 (_base): lr=0.0010 | momentum=0.9000
4/10 * Epoch 4 (train): accuracy01=0.7891 | loss=0.5253
4/10 * Epoch 4 (valid): accuracy01=0.7659 | loss=0.5858


INFO:metrics_logger:
4/10 * Epoch 4 (_base): lr=0.0010 | momentum=0.9000
4/10 * Epoch 4 (train): accuracy01=0.7891 | loss=0.5253
4/10 * Epoch 4 (valid): accuracy01=0.7659 | loss=0.5858


5/10 * Epoch (train): 100% 1073/1073 [07:15<00:00,  2.46it/s, accuracy01=0.824, loss=0.473]
5/10 * Epoch (valid): 100% 20/20 [00:02<00:00,  7.92it/s, accuracy01=0.693, loss=0.745]
[2020-04-18 14:45:23,416] 
5/10 * Epoch 5 (_base): lr=0.0010 | momentum=0.9000
5/10 * Epoch 5 (train): accuracy01=0.8060 | loss=0.4887
5/10 * Epoch 5 (valid): accuracy01=0.7707 | loss=0.5823


INFO:metrics_logger:
5/10 * Epoch 5 (_base): lr=0.0010 | momentum=0.9000
5/10 * Epoch 5 (train): accuracy01=0.8060 | loss=0.4887
5/10 * Epoch 5 (valid): accuracy01=0.7707 | loss=0.5823


6/10 * Epoch (train): 100% 1073/1073 [07:15<00:00,  2.46it/s, accuracy01=0.826, loss=0.465]
6/10 * Epoch (valid): 100% 20/20 [00:02<00:00,  7.73it/s, accuracy01=0.649, loss=0.770]
[2020-04-18 14:52:41,877] 
6/10 * Epoch 6 (_base): lr=0.0010 | momentum=0.9000
6/10 * Epoch 6 (train): accuracy01=0.8210 | loss=0.4551
6/10 * Epoch 6 (valid): accuracy01=0.7704 | loss=0.5833


INFO:metrics_logger:
6/10 * Epoch 6 (_base): lr=0.0010 | momentum=0.9000
6/10 * Epoch 6 (train): accuracy01=0.8210 | loss=0.4551
6/10 * Epoch 6 (valid): accuracy01=0.7704 | loss=0.5833


7/10 * Epoch (train): 100% 1073/1073 [07:16<00:00,  2.46it/s, accuracy01=0.848, loss=0.397]
7/10 * Epoch (valid): 100% 20/20 [00:02<00:00,  7.87it/s, accuracy01=0.675, loss=0.771]
[2020-04-18 15:00:00,862] 
7/10 * Epoch 7 (_base): lr=0.0010 | momentum=0.9000
7/10 * Epoch 7 (train): accuracy01=0.8341 | loss=0.4247
7/10 * Epoch 7 (valid): accuracy01=0.7772 | loss=0.5795


INFO:metrics_logger:
7/10 * Epoch 7 (_base): lr=0.0010 | momentum=0.9000
7/10 * Epoch 7 (train): accuracy01=0.8341 | loss=0.4247
7/10 * Epoch 7 (valid): accuracy01=0.7772 | loss=0.5795


8/10 * Epoch (train): 100% 1073/1073 [07:17<00:00,  2.45it/s, accuracy01=0.836, loss=0.433]
8/10 * Epoch (valid): 100% 20/20 [00:02<00:00,  7.57it/s, accuracy01=0.693, loss=0.838]
[2020-04-18 15:07:21,423] 
8/10 * Epoch 8 (_base): lr=0.0010 | momentum=0.9000
8/10 * Epoch 8 (train): accuracy01=0.8470 | loss=0.3957
8/10 * Epoch 8 (valid): accuracy01=0.7833 | loss=0.5913


INFO:metrics_logger:
8/10 * Epoch 8 (_base): lr=0.0010 | momentum=0.9000
8/10 * Epoch 8 (train): accuracy01=0.8470 | loss=0.3957
8/10 * Epoch 8 (valid): accuracy01=0.7833 | loss=0.5913


9/10 * Epoch (train): 100% 1073/1073 [07:21<00:00,  2.43it/s, accuracy01=0.846, loss=0.371]
9/10 * Epoch (valid): 100% 20/20 [00:03<00:00,  5.14it/s, accuracy01=0.684, loss=0.742]
[2020-04-18 15:14:46,899] 
9/10 * Epoch 9 (_base): lr=0.0010 | momentum=0.9000
9/10 * Epoch 9 (train): accuracy01=0.8584 | loss=0.3685
9/10 * Epoch 9 (valid): accuracy01=0.7788 | loss=0.6068


INFO:metrics_logger:
9/10 * Epoch 9 (_base): lr=0.0010 | momentum=0.9000
9/10 * Epoch 9 (train): accuracy01=0.8584 | loss=0.3685
9/10 * Epoch 9 (valid): accuracy01=0.7788 | loss=0.6068


10/10 * Epoch (train): 100% 1073/1073 [07:19<00:00,  2.44it/s, accuracy01=0.857, loss=0.343]
10/10 * Epoch (valid): 100% 20/20 [00:02<00:00,  7.62it/s, accuracy01=0.675, loss=0.793]
Epoch    10: reducing learning rate of group 0 to 5.0000e-04.
[2020-04-18 15:22:09,430] 
10/10 * Epoch 10 (_base): lr=0.0005 | momentum=0.9000
10/10 * Epoch 10 (train): accuracy01=0.8689 | loss=0.3431
10/10 * Epoch 10 (valid): accuracy01=0.7806 | loss=0.6288


INFO:metrics_logger:
10/10 * Epoch 10 (_base): lr=0.0005 | momentum=0.9000
10/10 * Epoch 10 (train): accuracy01=0.8689 | loss=0.3431
10/10 * Epoch 10 (valid): accuracy01=0.7806 | loss=0.6288


Early stop at 10 epoch
Top best models:
wandb-log-directory/checkpoints/train.7.pth	0.5795
wandb-log-directory/checkpoints/train.5.pth	0.5823



unclosed <ssl.SSLSocket fd=94, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6, laddr=('172.28.0.2', 54248), raddr=('35.186.228.49', 443)>



1/10 * Epoch (train):  34% 368/1073 [02:30<04:47,  2.45it/s, accuracy01=0.645, loss=0.791]

In [15]:
# Evaluate on the test setssssss

runner = dl.SupervisedRunner()
y_pred = runner.predict_loader(model, test_iter, verbose=True)
# Do not forget, that the NN compute the class logits, from which you actually have to make prediction
y_pred = np.argmax(y_pred, axis=1)
y_true = np.concatenate([x['targets'].cpu().numpy() for x in test_iter])

test_score = accuracy_score(y_true, y_pred)
print(test_score)

# send test score to wandb
wandb.log({'acc/test': test_score})

1/1 * Epoch (infer): 100% 20/20 [00:03<00:00,  6.57it/s]
0.782064332247557



unclosed file <_io.TextIOWrapper name='/tmp/wandb/run-20200418_140859-1677cths/wandb-metadata.json' mode='r' encoding='UTF-8'>


unclosed <socket.socket fd=86, family=AddressFamily.AF_INET, type=2049, proto=0, laddr=('0.0.0.0', 0)>


unclosed <ssl.SSLSocket fd=86, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6, laddr=('172.28.0.2', 57464), raddr=('35.186.228.49', 443)>

