# Prepare the test bench

In [1]:
%env CUDA_VISIBLE_DEVICES=1

env: CUDA_VISIBLE_DEVICES=1


In [2]:
# basic imports
import re
import gc
import ast
import copy
import time
import math
import glob
import os,sys
import shutil
import pickle
import argparse
import warnings
import numpy as np
import pandas as pd
from tqdm import tqdm

# torch imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.nn.functional as F
from torchtext import data, datasets
from torch.optim.lr_scheduler import ReduceLROnPlateau

# task specific custom modules
from pytorch.encoder_decoder import make_model
from pytorch.transformer_train_utils import str2bool
from pytorch.encoder_decoder_utils import (Batch,rebatch,
                                           SimpleLossCompute,
                                           cleaner,
                                           tokenize,
                                           greedy_decode_batch,
                                           beam_decode_batch,
                                           lookup_words,
                                           run_epoch
                                           )

from pytorch.metrics import (score_task1,
                             score_task2)

# utils
from tensorboardX import SummaryWriter

In [3]:
args = type('test', (object,), {})()

args.resume = 'weights/encdec_savva_l4_heavy_h512e512_1e4_resume1e5_best.pth.tar'
args.batch_size = 512
args.hidden_size = 512
args.num_layers = 4
args.num_classes = 3
args.emb_size = 512
args.cn_emb_size = 0
args.num_cn = 0
args.tb_name = 'beam_test'

args.train_df_path='../data/proc_train.csv'
args.trn_df_path='../data/proc_trn.csv'
args.val_df_path='../data/proc_val.csv'
args.test_df_path='../data/proc_test.csv'

args.min_freq = 0
args.dropout = 0.2
args.heavy_decoder = True
args.add_input_skip = True

In [4]:
# global vars
best_met = 0
valid_minib_counter = 0

# args = parser.parse_args()

# we will use CUDA if it is available
USE_CUDA = torch.cuda.is_available()
DEVICE=torch.device('cuda:0') # or set to 'cpu'

# USE_CUDA = True
# DEVICE = torch.device('cpu')
print("CUDA:", USE_CUDA)
print(DEVICE)

seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
os.environ["USE_CUDA"] = str(USE_CUDA)

tb_name = args.tb_name

CUDA: True
cuda:0


In [5]:
global USE_CUDA,DEVICE
global UNK_TOKEN,PAD_TOKEN,SOS_TOKEN,EOS_TOKEN,TRG_NAMES,LOWER,PAD_INDEX,NAMES,MIN_FREQ
global args,best_met,valid_minib_counter

label_writers = []

UNK_TOKEN = "!"
PAD_TOKEN = "_"    
SOS_TOKEN = "["
EOS_TOKEN = "]"
LOWER = False

ID = data.Field(sequential=False,
                use_vocab=False)

NAMES = data.Field(tokenize=tokenize,
                   batch_first=True,
                   lower=LOWER,
                   include_lengths=True,
                   unk_token=UNK_TOKEN,
                   pad_token=PAD_TOKEN,
                   init_token=None,
                   eos_token=EOS_TOKEN)

TRG_NAMES = data.Field(tokenize=tokenize, 
                       batch_first=True,
                       lower=LOWER,
                       include_lengths=True,
                       unk_token=UNK_TOKEN,
                       pad_token=PAD_TOKEN,
                       init_token=SOS_TOKEN,
                       eos_token=EOS_TOKEN)

LBL = data.Field(sequential=False,
                 use_vocab=False)

CNT = data.Field(sequential=False,
                 use_vocab=False)

datafields = [("id", ID),
              ("src", NAMES),
              ("trg", TRG_NAMES),
              ("clf", LBL),
              ("cn", CNT)
             ]

trainval_data = data.TabularDataset(path=args.train_df_path,
                                 format='csv',
                                 skip_header=True,
                                 fields=datafields)    
"""
train_data = data.TabularDataset(path=args.trn_df_path,
                               format='csv',
                               skip_header=True,
                               fields=datafields)
"""
val_data = data.TabularDataset(path=args.val_df_path,
                               format='csv',
                               skip_header=True,
                               fields=datafields)    
"""
test_data = data.TabularDataset(path=args.test_df_path,
                                format='csv',
                                skip_header=True,
                                fields=datafields)    
"""
# print('Train length {}, val length {}'.format(len(train_data),len(val_data)))
print('Val length {}'.format(len(val_data)))

MIN_FREQ = args.min_freq  # NOTE: we limit the vocabulary to frequent words for speed
NAMES.build_vocab(trainval_data.src, min_freq=MIN_FREQ)
TRG_NAMES.build_vocab(trainval_data.trg, min_freq=MIN_FREQ)
PAD_INDEX = TRG_NAMES.vocab.stoi[PAD_TOKEN]

del trainval_data
gc.collect()

Val length 199111


0

In [6]:
"""
train_iter = data.BucketIterator(train_data,
                                 batch_size=args.batch_size,
                                 train=True, 
                                 sort_within_batch=True, 
                                 sort_key=lambda x: (len(x.src), len(x.trg)),
                                 repeat=False,
                                 device=DEVICE,
                                 shuffle=True)
"""
valid_iter_batch = data.Iterator(val_data,
                           batch_size=args.batch_size,
                           train=False,
                           sort_within_batch=True,
                           sort_key=lambda x: (len(x.src), len(x.trg)),
                           repeat=False, 
                           device=DEVICE,
                           shuffle=False)

"""
test_iter_batch = data.Iterator(test_data,
                                batch_size=args.batch_size,
                                train=False,
                                sort_within_batch=True,
                                sort_key=lambda x: (len(x.src), len(x.trg)),
                                repeat=False, 
                                device=DEVICE,
                                shuffle=False)    
"""
val_ids = []
for b in valid_iter_batch:
    val_ids.extend(list(b.id.cpu().numpy()))

"""
test_ids = []
for b in test_iter_batch:
    test_ids.extend(list(b.id.cpu().numpy()))         
"""

print('Preparing data for validation')

train_df = pd.read_csv('../data/proc_train.csv')
train_df = train_df.set_index('id')

# val_gts = train_df.loc[val_ids,'fullname_true'].values
# val_ors = train_df.loc[val_ids,'fullname'].values
# incorrect_idx = list(train_df[train_df.target==1].index.values)
# incorrect_val_ids = list(set(val_ids).intersection(set(incorrect_idx)))
# correct_val_ids = list(set(val_ids)-set(incorrect_val_ids))

print('Making dictionaries')

id2gt = dict(train_df['fullname_true'])
id2clf_gt = dict(train_df['target'])
val_gts = [id2gt[_] for _ in val_ids]
val_clf_gts = [id2clf_gt[_] for _ in val_ids]    
del train_df
gc.collect()

Preparing data for validation
Making dictionaries


21

In [7]:
model = make_model(len(NAMES.vocab),
                   len(TRG_NAMES.vocab),
                   device=DEVICE,
                   emb_size=args.emb_size,
                   hidden_size=args.hidden_size,
                   num_layers=args.num_layers,
                   dropout=args.dropout,
                   num_classes=args.num_classes,
                   num_cn=args.num_cn,
                   cn_emb_size=args.cn_emb_size,
                   heavy_decoder=args.heavy_decoder)

loaded_from_checkpoint = False

if args.resume:
    if os.path.isfile(args.resume):
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume,
                                map_location='cpu')
        args.start_epoch = checkpoint['epoch']
        best_met = checkpoint['best_met']
        model.load_state_dict(checkpoint['state_dict'])           
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        loaded_from_checkpoint = True
        del checkpoint
    else:
        print("=> no checkpoint found at '{}'".format(args.resume))
else:
    args.start_epoch = 0

# criterion = nn.CrossEntropyLoss(reduce=False).to(DEVICE)

"""
if args.tensorboard:
    writer = SummaryWriter('runs_encdec/{}'.format(tb_name))
"""

=> loading checkpoint 'weights/encdec_savva_l4_heavy_h512e512_1e4_resume1e5_best.pth.tar'
=> loaded checkpoint (epoch 37)


"\nif args.tensorboard:\n    writer = SummaryWriter('runs_encdec/{}'.format(tb_name))\n"

In [8]:
model

EncoderDecoder(
  (encoder): Encoder(
    (rnn): GRU(512, 512, num_layers=4, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (decoder): Decoder(
    (attention): BahdanauAttention(
      (key_layer): Linear(in_features=1024, out_features=512, bias=False)
      (query_layer): Linear(in_features=1024, out_features=512, bias=False)
      (energy_layer): Linear(in_features=512, out_features=1, bias=False)
    )
    (rnn): GRU(1536, 1024, num_layers=4, batch_first=True, dropout=0.2)
    (bridge): Linear(in_features=1024, out_features=1024, bias=True)
    (dropout_layer): Dropout(p=0.2)
    (pre_output_layer): Linear(in_features=2560, out_features=512, bias=False)
  )
  (src_embed): Embedding(69, 512)
  (trg_embed): Embedding(70, 512)
  (generator): Generator(
    (proj): Linear(in_features=512, out_features=70, bias=False)
  )
  (classifier): Classifier(
    (classifier): Sequential(
      (0): Linear(in_features=1024, out_features=300, bias=True)
      (1): Dropout(p=0.2)
      (2

In [14]:
valid_iter_batch = data.Iterator(val_data,
                           batch_size=512,
                           train=False,
                           sort_within_batch=True,
                           sort_key=lambda x: (len(x.src), len(x.trg)),
                           repeat=False, 
                           device=DEVICE,
                           shuffle=False)

In [15]:
val_ids = []
for b in valid_iter_batch:
    val_ids.extend(list(b.id.cpu().numpy()))

In [16]:
example_iter = (rebatch(PAD_INDEX, x) for x in valid_iter_batch)
max_len=70
src_vocab=NAMES.vocab
trg_vocab=TRG_NAMES.vocab
num_batches=len(valid_iter_batch)
return_logits=True

global UNK_TOKEN,PAD_TOKEN,SOS_TOKEN,EOS_TOKEN,TRG_NAMES,LOWER
model.eval()
count = 0
print()

if src_vocab is not None and trg_vocab is not None:
    src_eos_index = src_vocab.stoi[EOS_TOKEN]
    trg_sos_index = trg_vocab.stoi[SOS_TOKEN]
    trg_eos_index = trg_vocab.stoi[EOS_TOKEN]
else:
    src_eos_index = None
    trg_sos_index = 1
    trg_eos_index = None

preds = []
clf_preds = []

with tqdm(total=num_batches) as pbar:
    for i, batch in enumerate(example_iter):
        if i == 200:
            output, pred_classes = greedy_decode_batch(
                model, batch.src, batch.src_mask, batch.src_lengths,
                max_len=max_len, sos_index=trg_sos_index, eos_index=trg_eos_index,
                return_logits=return_logits,cn=batch.cn
            )

            clf_preds.extend(list(pred_classes))

            # cut off everything starting from </s> 
            # (only when eos_index provided)
            if trg_eos_index is not None:
                # iterate over sentence predictions and cut off from eos
                for pred in output:
                    first_eos = np.where(pred==trg_eos_index)[0]
                    if len(first_eos) > 0:
                        # produce sentences
                        preds.append("".join(lookup_words(pred[:first_eos[0]],
                                             vocab=TRG_NAMES.vocab)))
                    else:
                        preds.append("".join(lookup_words(pred[:],
                                             vocab=TRG_NAMES.vocab)))                        
            pbar.update(1)

            break

  0%|          | 0/389 [00:00<?, ?it/s]




  0%|          | 1/389 [00:02<15:57,  2.47s/it]


In [12]:
from pytorch.encoder_decoder_utils import beam_decode_batch

In [282]:
seq_tensor[0,0,:]

tensor([ 56,  44,  39,  43,  43,  43,  43,  44,  44,  44,  43,  43,
         43,  43,  43,  43,  43,  43,  43,  43,  43,  43,  43,  43,
         43,  43,  43,  43,  43,  43,  43,  43,  43,  43,  43,  43,
         43,  43,  43,  43,  43,  43,  43,  43,  43,  43,  43,  43,
         43,  43,  43,  43,  43,  43,  43,  43,  43,  43,  43,  43,
         43,  43,  43,  43,  43,  43,  43,  43,  43,  43])

In [28]:
seq_tensor.shape

(10, 15)

In [20]:
seq_tensor.shape

(512, 70)

In [97]:
idx = 4

pred = seq_tensor[idx,:]
print("".join(lookup_words(pred,
                     vocab=TRG_NAMES.vocab)))

"""
pred = seq_tensor[idx,1,:]
print("".join(lookup_words(pred,
                     vocab=TRG_NAMES.vocab)))

pred = seq_tensor[idx,2,:]
print("".join(lookup_words(pred,
                     vocab=TRG_NAMES.vocab)))

pred = seq_tensor[idx,3,:]
print("".join(lookup_words(pred,
                     vocab=TRG_NAMES.vocab)))

pred = seq_tensor[idx,4,:]
print("".join(lookup_words(pred,
                     vocab=TRG_NAMES.vocab)))
                     
"""                  

МАМАДИЕВА САБОХОН ОДИЛОВНА]]А]НА]


'\npred = seq_tensor[idx,1,:]\nprint("".join(lookup_words(pred,\n                     vocab=TRG_NAMES.vocab)))\n\npred = seq_tensor[idx,2,:]\nprint("".join(lookup_words(pred,\n                     vocab=TRG_NAMES.vocab)))\n\npred = seq_tensor[idx,3,:]\nprint("".join(lookup_words(pred,\n                     vocab=TRG_NAMES.vocab)))\n\npred = seq_tensor[idx,4,:]\nprint("".join(lookup_words(pred,\n                     vocab=TRG_NAMES.vocab)))\n                     \n'

In [13]:
import time

In [52]:
torch.zeros(10).byte()

tensor([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0], dtype=torch.uint8)

In [89]:
def get_beam_probs(encoder_hidden, encoder_final, src_mask,
                   prev_y, trg_mask, hidden,
                   cn=None, model=None):
    
    with torch.no_grad():
        out, hidden, pre_output = model.decode(
            encoder_hidden, encoder_final, src_mask,
            prev_y, trg_mask, hidden,
            cn=cn)
        
        # we predict from the pre-output layer, which is
        # a combination of Decoder state, prev emb, and context
        prob = model.generator(pre_output[:, -1])
        prob = F.softmax(prob, dim=1)

        """
        _, next_word = torch.max(prob, dim=1)
        next_word = next_word.data
        prev_y = next_word.unsqueeze(dim=1)        
        
        print(prev_y.shape)
        """
    return prob,hidden
        
def beam_decode_batch(model,
                      src, src_mask, src_lengths,
                      max_len=100,
                      sos_index=1, eos_index=None,
                      return_logits=False,
                      cn=None,
                      beam_width=3, device=None, values_to_return=1,
                      debug=False):
    """Use beam search to decode a sentence."""
    batch_size = src.size(0)
    end = time.time()
    
    with torch.no_grad():
        encoder_hidden, encoder_final = model.encode(src, src_mask, src_lengths, cn)
        clf_logits = model.classifier(encoder_hidden)
        if return_logits:
            pred_classes = clf_logits
        else:
            _, pred_classes = torch.max(clf_logits, dim=1)
        pred_classes = pred_classes.data.cpu().numpy()
        prev_y = torch.ones(batch_size, 1).fill_(sos_index).type_as(src)
        trg_mask = torch.ones_like(prev_y)

    print('Encoder - {}'.format(time.time() - end))
    end = time.time()

    # output = []
    # attention_scores = []
    # hidden = None

    # init tensors used for batch-based beam search
    hidden_tensor = None # (num_layers * num_directions, batch, hidden_size, sequences)
    score_tensor  = None # (batch,num_sequences)
    seq_tensor    = None # (batch,num_sequences,max_len)
    
    prev_y_eos = torch.ones(batch_size).fill_(eos_index).long().to(device)
    break_mask = torch.zeros(batch_size).byte().to(device)
    
    for i in range(max_len):
        if debug:
            print('i = {}'.format(i))
        # for first iteration asume there is already one sequence
        if seq_tensor is None:
            seq_len = 1
        else:
            seq_len = seq_tensor.size(1)
        
        # sequences is dynamically updated
        # iterate over sequences
        # for each sequence do the magic
        
        for j in range(seq_len):
            if debug:
                print('j = {}'.format(j))
            if hidden_tensor is None:
                # first hidden is initialized as None
                hidden = None
            else:
                # we store only the current hidden state
                hidden = hidden_tensor[:,:,:,j].contiguous() #.to(device)
                if debug:
                    print(hidden_tensor.shape)
                # do not forget to pass the last symbol
                prev_y = seq_tensor[:,j,i-1:i].contiguous() #.to(device)
                
                if i==0:
                    prev_y = prev_y.unsqueeze(dim=1)
                

            
            # use wrapper for readability
            prob, hidden = get_beam_probs(encoder_hidden, encoder_final, src_mask,
                                          prev_y, trg_mask, hidden,
                                          cn=cn, model=model)
           
            # these tensors are (batch,beam_width)
            scores, indices = torch.topk(input=prob,
                                         k=beam_width,
                                         dim=1)
            if debug:
                print(scores[0])
                print("".join(lookup_words(list(indices[0]),
                                           vocab=TRG_NAMES.vocab)))

            
            # scores = -torch.log(scores.detach().cpu())
            scores = -torch.log(scores)
            
            assert scores.size(0) == batch_size
            assert indices.size(0) == batch_size
            assert scores.size(1) == beam_width
            assert indices.size(1) == beam_width            
            
            # collect hidden states
            # (batch,num_sequences,hidden_size)
            if hidden_tensor is not None:
                # hidden sizes are shared across sequences
                # we store only the last ones on each step
                """
                hidden_tensor = torch.cat([hidden_tensor]
                                          +beam_width*[hidden.detach().cpu().unsqueeze(dim=3)],
                                          dim=3)
                """
                hidden_tensor = torch.cat([hidden_tensor]
                                          +beam_width*[hidden.unsqueeze(dim=3)],
                                          dim=3).contiguous()                
                
            else:
                """
                hidden_tensor = torch.cat(beam_width*[hidden.detach().cpu().unsqueeze(dim=3)],
                                          dim=3)                
                """
                hidden_tensor = torch.cat(beam_width*[hidden.unsqueeze(dim=3)],
                                          dim=3).contiguous() 
            
            # collect indexes
            # (batch,beam_width) => (batch,num_sequences,max_len)
            if seq_tensor is not None:
                # pad sequence tensor with -1
                if seq_tensor.size(2) == i:
                    # we assume that at the beginning of each operation
                    # sequences tensor has only beam_width sequences
                    seq_tensor = torch.cat([seq_tensor,
                                            torch.ones(batch_size,
                                                       beam_width,
                                                       1,dtype=torch.long).to(device)*(-1)],
                                           dim=2)
                
                # take old indices
                old_seq_indices = seq_tensor[:,j:j+1,:].clone()
                # repeat beam_width times to create several new sequences
                new_indices = torch.cat(beam_width*[old_seq_indices],
                                        dim=1)
                # add new indices  
                new_indices[:,:,i] = indices # .detach().cpu()
                # merge with seq tensor to create new sequences
                seq_tensor = torch.cat([seq_tensor,new_indices],
                                       dim=1)
            else:
                # convert first beam into sequences
                seq_tensor = indices.unsqueeze(dim=2) # .detach().cpu().unsqueeze(dim=2)
                assert seq_tensor.size() == (batch_size,beam_width,1)
            
            # update scores
            # (batch,beam_width) => (batch,num_sequences)
            if score_tensor is not None:
                # multiply parent sequence's score by its children
                parent_score = score_tensor[:,j:j+1].clone()
                parent_score = torch.cat(beam_width*[parent_score],
                                         dim=1)
                children_scores = parent_score + scores
                score_tensor = torch.cat([score_tensor,
                                          children_scores],
                                         dim=1)
            else:
                score_tensor = scores
                
        # print(seq_tensor.size())

        # remove the first beam_width sequences for simplicity
        # but only after the first pass
        if i>0:
            hidden_tensor = hidden_tensor[:,:,:,beam_width:]
            score_tensor = score_tensor[:,beam_width:] 
            seq_tensor = seq_tensor[:,beam_width:,:] 

        # select the best sequences to survive!
        _, seq_indices = torch.topk(input=score_tensor,
                                    k=beam_width,
                                    dim=1,
                                    largest=False)

        # print(score_tensor[0])
        
        # batch and sequence dimension are equal
        assert hidden_tensor.size(1) == score_tensor.size(0)
        assert hidden_tensor.size(3) == score_tensor.size(1)
        assert score_tensor.size()[0:2] == seq_tensor.size()[0:2]

        # print(hidden_tensor.shape)
        # print(seq_tensor.shape)
        # print(score_tensor.shape)
        # print(seq_indices.shape)           
        
        # torch gather is not applicable due to different shape
        # for simplicity re-index manually
        hidden_tensor = torch.cat([hidden_tensor[:,
                                                 _:_+1,
                                                 :,
                                                 seq_indices[_]] 
                                   for _ in range(batch_size)],
                                  dim=1)
        
        score_tensor = torch.gather(input=score_tensor,
                                    dim=1,
                                    index=seq_indices)
        
        seq_tensor = torch.cat([seq_tensor[_:_+1,
                                           seq_indices[_],
                                           :]
                                for _ in range(batch_size)],
                               dim=0).long()
        
        # break out of the cycle if all the last predictions were eos_index
        prev_y = seq_tensor[:,j,i-1:i].long()
        if i==0:
            # to pass to the model on the next step
            prev_y = prev_y.unsqueeze(dim=1)
            
        mask = (prev_y.squeeze(dim=1) == prev_y_eos)
        break_mask += mask
        
        if (break_mask>=1).sum()==512:
            if debug:
                print('Breaking out of cycle early')
            break
            
        print('Iteration {} - {}'.format(i,time.time() - end))
        end = time.time()
      
    # output = np.array(output)
    # output = np.stack(output).T
    
    # select only the best of the best
    _, seq_indices = torch.topk(input=score_tensor,
                                k=values_to_return,
                                dim=1,
                                largest=False)
    
    seq_tensor = torch.cat([seq_tensor[_:_+1,
                                       seq_indices[_],
                                       :]
                            for _ in range(batch_size)],
                           dim=0).long()
    
    # shed extra dimension for compatibility
    if values_to_return==1:
        assert seq_tensor.size(1)==1
        seq_tensor = seq_tensor.squeeze(dim=1)
    
    seq_tensor = seq_tensor.cpu().detach().numpy()
    
    print('Postprocessing - {}'.format(time.time() - end))
    end = time.time()
    
    return seq_tensor,pred_classes 

In [34]:
True+False

1

In [34]:
model = model.to(DEVICE)

In [98]:
seq_tensor,pred_classes  = output, pred_classes = beam_decode_batch(
    model, batch.src, batch.src_mask, batch.src_lengths,
    max_len=max_len, sos_index=trg_sos_index, eos_index=trg_eos_index,
    return_logits=return_logits,cn=batch.cn,
    device=DEVICE,beam_width=5,values_to_return=1
)

Encoder - 0.0729529857635498
Iteration 0 - 0.06865739822387695
Iteration 1 - 0.12813973426818848
Iteration 2 - 0.10826849937438965
Iteration 3 - 0.10655760765075684
Iteration 4 - 0.10595202445983887
Iteration 5 - 0.10528230667114258
Iteration 6 - 0.12033820152282715
Iteration 7 - 0.1322803497314453
Iteration 8 - 0.1336677074432373
Iteration 9 - 0.1289348602294922
Iteration 10 - 0.10628771781921387
Iteration 11 - 0.1063697338104248
Iteration 12 - 0.10542798042297363
Iteration 13 - 0.10401535034179688
Iteration 14 - 0.10492110252380371
Iteration 15 - 0.10354018211364746
Iteration 16 - 0.10497212409973145
Iteration 17 - 0.1037449836730957
Iteration 18 - 0.1054391860961914
Iteration 19 - 0.10451650619506836
Iteration 20 - 0.10475993156433105
Iteration 21 - 0.10585904121398926
Iteration 22 - 0.10683202743530273
Iteration 23 - 0.10654282569885254
Iteration 24 - 0.11208748817443848
Iteration 25 - 0.10623836517333984
Iteration 26 - 0.10736441612243652
Iteration 27 - 0.10605406761169434
Iterati

In [20]:
a = torch.zeros(5,5)
b = torch.zeros(5,5)

In [21]:
a.equal(b)

True

In [20]:
val_df = pd.read_csv(args.val_df_path)

In [25]:
val_df = val_df.set_index('id')

In [258]:
val_df.fullname[list(batch.id.numpy())[:10]]

id
324699    MADALIEV BABURJON
324673    ФАЙЗУЛЛОЕВ РУСТАМ
323187    CIUMACENCO ALIONA
321651    ОРОЗБЕКУУЛУ БАКЫТ
321329    ASKAROV BAKHTIYOR
320997    ЗАПОРОЖАН АЛЕКСЕЙ
320825    ДЖШРНГО ДЖЛО ЕКНЕ
320764    БЗИКАДЗЕ СВЕТЬАНА
320743    BARATOV FAHRIDDIN
320274    БЕРДИБЕКОВ ЭЛАМАН
Name: fullname, dtype: object

In [259]:
preds[0:10]

['MADALIEV BABURJON',
 'ФАЙЗУЛЛОЕВ РУСТАМ',
 'CIUMACENCO ALIONA',
 'ОРОЗБЕКУУЛУ БАКЫТ',
 'ASKAROV BAKHTIYOR',
 'ЗАПОРОЖАН АЛЕКСЕЙ',
 'ДЖШРНГО ДЖЛО ЕКНЕ',
 'БЗИКАДЗЕ СВЕТЛАНА',
 'BARATOV FAHRIDDIN',
 'БЕРДИБЕКОВ ЭЛАМАН']

In [99]:
torch.ones(2,2)*(-1)

tensor([[-1., -1.],
        [-1., -1.]])

In [101]:
a = torch.randn(512,9)

In [110]:
scores, indices = torch.topk(input=a,
                 k=3,
                 dim=1)

In [133]:
a.size()

torch.Size([512, 9])

In [120]:
a[indices].shape

torch.Size([512, 3, 9])

In [124]:
indices.shape

torch.Size([512, 3])

In [117]:
a.shape

torch.Size([512, 9])

In [125]:
torch.gather(input=a,
             dim=1,
             index=indices).shape

torch.Size([512, 3])

In [123]:
torch.index_select(input=a,
                   dim=1,
                   index=indices).shape

RuntimeError: invalid argument 3: Index is supposed to be an empty tensor or a vector at /opt/conda/conda-bld/pytorch_1524586445097/work/aten/src/TH/generic/THTensorMath.c:314

In [113]:
a[indices].shape

torch.Size([512, 3, 9])

In [116]:
indices.shape

torch.Size([512, 3])

In [109]:
indices[0]

tensor([  16,  455,  431,   88,  271,  207,  492,  405,  427])

In [97]:
torch.cat([torch.randn(2,2),torch.randn(2,1)],dim=1).shape

torch.Size([2, 3])

In [None]:
preds,clf_preds = predict((rebatch(PAD_INDEX, x) for x in valid_iter_batch),
                          model, max_len=70, src_vocab=NAMES.vocab, trg_vocab=TRG_NAMES.vocab,
                          num_batches=len(valid_iter_batch),return_logits=True)

In [None]:
def predict(example_iter, model, max_len=100, 
            sos_index=1, 
            src_eos_index=None, 
            trg_eos_index=None, 
            src_vocab=None, trg_vocab=None,
            num_batches=100,
            return_logits=False):

    global UNK_TOKEN,PAD_TOKEN,SOS_TOKEN,EOS_TOKEN,TRG_NAMES,LOWER
    model.eval()
    count = 0
    print()
    
    if src_vocab is not None and trg_vocab is not None:
        src_eos_index = src_vocab.stoi[EOS_TOKEN]
        trg_sos_index = trg_vocab.stoi[SOS_TOKEN]
        trg_eos_index = trg_vocab.stoi[EOS_TOKEN]
    else:
        src_eos_index = None
        trg_sos_index = 1
        trg_eos_index = None

    preds = []
    clf_preds = []

    with tqdm(total=num_batches) as pbar:
        for i, batch in enumerate(example_iter):

            output, pred_classes = greedy_decode_batch(
                model, batch.src, batch.src_mask, batch.src_lengths,
                max_len=max_len, sos_index=trg_sos_index, eos_index=trg_eos_index,
                return_logits=return_logits,cn=batch.cn
            )

            clf_preds.extend(list(pred_classes))
            
            # cut off everything starting from </s> 
            # (only when eos_index provided)
            if trg_eos_index is not None:
                # iterate over sentence predictions and cut off from eos
                for pred in output:
                    first_eos = np.where(pred==trg_eos_index)[0]
                    if len(first_eos) > 0:
                        # produce sentences
                        preds.append("".join(lookup_words(pred[:first_eos[0]],
                                             vocab=TRG_NAMES.vocab)))
                    else:
                        preds.append("".join(lookup_words(pred[:],
                                             vocab=TRG_NAMES.vocab)))                        
            pbar.update(1)
    return preds,clf_preds    

In [11]:
a = torch.randint(0,100,(512,100))

In [13]:
scores, indices = torch.topk(a,k=3,dim=1)

In [16]:
scores.shape, indices.shape

(torch.Size([512, 3]), torch.Size([512, 3]))

In [20]:
scores.shape

torch.Size([512, 3])

In [25]:
scores[:,0].unsqueeze(dim=1).expand_as(scores)

torch.Size([512, 3])

In [18]:
(scores * scores).shape

torch.Size([512, 3])

In [None]:
predict_df = pd.DataFrame(
    {'id': val_ids,
     'target': clf_preds,
     'fullname_true':preds
    })

predict_df.set_index('id').to_csv('eval/{}.csv'.format(args.tb_name)) 

# Add skip connection from the input

In [7]:
%load_ext autoreload

In [8]:
val_iter = (rebatch(PAD_INDEX, b) for b in valid_iter_batch)

In [9]:
for batch in val_iter:
    break

In [20]:
from pytorch.encoder_decoder import make_model
# Use these commands in the same cell.
%autoreload 2

torch.cuda.empty_cache()

model = make_model(len(NAMES.vocab),
                   len(TRG_NAMES.vocab),
                   device=DEVICE,
                   emb_size=args.emb_size,
                   hidden_size=args.hidden_size,
                   num_layers=args.num_layers,
                   dropout=args.dropout,
                   num_classes=args.num_classes,
                   num_cn=args.num_cn,
                   cn_emb_size=args.cn_emb_size,
                   heavy_decoder=args.heavy_decoder,
                   add_input_skip=args.add_input_skip)

In [22]:
(out, _, pre_output),clf_logits = model.forward(batch.src, batch.trg,
                                                batch.src_mask, batch.trg_mask,
                                                batch.src_lengths, batch.trg_lengths,
                                                batch.cn)