In [None]:
import torch
print(torch.__version__)
#!pip install torchtext==0.5
#!pip install -U torch==1.5
#!pip install tokenizers
!pip install nbresuse

In [None]:
#!ls ../data/ncs_preprocessed_data/train-ncs/
#!pip install tokenizers
#tokenizer.decode(tokenizer.encode("hello there").ids)

!git clone https://github.com/NVIDIA/apex
!cd apex

In [None]:
#ls ../data/ncs_preprocessed_data/train-ncs/
!cd apex;pip install -v --no-cache-dir ./
#!print(src_tokenizer)

In [None]:
print(src_tokenizer.encode('<s> </s> <pad>').ids)
print(src_tokenizer.encode(' hi there ').ids)
print(tgt_tokenizer.decode(src_tokenizer.encode(' hi there ').ids))


In [None]:
from apex import amp
from tokenizers import ByteLevelBPETokenizer
src_tokenizer = ByteLevelBPETokenizer()


src_tokenizer.train(["../data/ncs_preprocessed_data/train-ncs/code.original_subtoken"], vocab_size=30000, special_tokens=[
    "<s>",
    "<pad>",
    "</s>",
    "<unk>",
    "<mask>"])

tgt_tokenizer = ByteLevelBPETokenizer()
tgt_tokenizer.train(["../data/ncs_preprocessed_data/train-ncs/javadoc.original"], vocab_size=20000, special_tokens=[
    "<s>",
    "<pad>",
    "</s>",
    "<unk>",
    "<mask>"])

In [None]:
vars(src_tokenizer)

In [None]:
print(src_tokenizer.encode('<s> </s> <pad> <unk>').ids) #[0, 225, 2, 225, 1]
print(src_tokenizer.encode(' hi there ').ids)

print(tgt_tokenizer.encode('<s> </s> <pad> <unk>').ids) #[0, 225, 2, 225, 1]
print(tgt_tokenizer.encode(' hi there ').ids)

print(src_tokenizer.decode([13000]))
print(src_tokenizer)
print(tgt_tokenizer)


In [None]:
from torch.utils.data import Dataset, DataLoader
from joblib import Parallel, delayed
import threading
import linecache
linecache.clearcache()
import subprocess
import os
from tqdm import tqdm
import torch

MAX_SRC_LEN = 200
MAX_TGT_LEN = 50


def pad_sequences(x, max_len):
    padded = torch.ones((max_len), dtype=torch.long)
    if len(x) > max_len: padded[:] = torch.tensor(x[:max_len] , dtype=torch.long)
    else: padded[:len(x)] = torch.tensor(x, dtype=torch.long)
    return padded

class LazyDataset(Dataset):
    def __init__(self, src_tokenizer,tgt_tokenizer, src_path, tgt_path, max_len_src = MAX_SRC_LEN,max_len_tgt=MAX_TGT_LEN):
        self.src_path = src_path
        self.tgt_path = tgt_path
        self.max_len_src = max_len_src
        self.max_len_tgt = max_len_tgt
        self.src_tokenizer = src_tokenizer
        self.tgt_tokenizer = tgt_tokenizer
        self.num_entries = sum(1 for line in open(src_path))
        self.target_entries = sum(1 for line in open(tgt_path))
        print(self.num_entries, self.target_entries)
        assert self.num_entries == self.target_entries
        
            
    def __getitem__(self, idx):
        x = self.src_tokenizer.encode("<s>"+linecache.getline(self.src_path, idx + 1).strip()+"</s>").ids
        y = self.tgt_tokenizer.encode("<s>"+linecache.getline(self.tgt_path, idx + 1).strip()+"</s>").ids
        
        return torch.tensor(pad_sequences(x,self.max_len_src), dtype=torch.long),torch.tensor(pad_sequences(y,self.max_len_tgt), dtype=torch.long) 
    
    def __len__(self):
        return self.num_entries

train_dataset = LazyDataset(src_tokenizer, tgt_tokenizer, '../data/ncs_preprocessed_data/train-ncs/code.original_subtoken',
                            '../data/ncs_preprocessed_data/train-ncs/javadoc.original')



In [None]:
test_dataset = LazyDataset(src_tokenizer, tgt_tokenizer, '../data/ncs_preprocessed_data/test/code.original_subtoken',
                            '../data/ncs_preprocessed_data/test/javadoc.original')


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import spacy
import numpy as np

import random
import math
import time

SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [None]:
#!conda install spacy
#!python -m spacy.en.download
#!spacy download en
#spacy_en = spacy.load('en')

#!pip install spacy

In [None]:
'''
def tokenize_en(text):
    """
    Tokenizes English text from a string into a list of strings
    """
    return [tok.text for tok in spacy_en.tokenizer(text)]
'''

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class Encoder(nn.Module):
    def __init__(self, 
                 input_dim, 
                 hid_dim, 
                 n_layers, 
                 n_heads, 
                 pf_dim,
                 dropout, 
                 device,
                 max_length = MAX_SRC_LEN):
        super().__init__()

        self.device = device
        
        self.tok_embedding = nn.Embedding(input_dim, hid_dim)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        
        self.layers = nn.ModuleList([EncoderLayer(hid_dim, 
                                                  n_heads, 
                                                  pf_dim,
                                                  dropout, 
                                                  device) 
                                     for _ in range(n_layers)])
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
        
    def forward(self, src, src_mask):
        
        #src = [batch size, src len]
        #src_mask = [batch size, src len]
        
        batch_size = src.shape[0]
        src_len = src.shape[1]
        
        pos = torch.arange(0, src_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
        
        #pos = [batch size, src len]
        
        src = self.dropout((self.tok_embedding(src) * self.scale) + self.pos_embedding(pos))
        
        #src = [batch size, src len, hid dim]
        
        for layer in self.layers:
            src = layer(src, src_mask)
            
        #src = [batch size, src len, hid dim]
            
        return src

class EncoderLayer(nn.Module):
    def __init__(self, 
                 hid_dim, 
                 n_heads, 
                 pf_dim,  
                 dropout, 
                 device):
        super().__init__()
        
        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, 
                                                                     pf_dim, 
                                                                     dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src, src_mask):
        
        #src = [batch size, src len, hid dim]
        #src_mask = [batch size, src len]
                
        #self attention
        _src, _ = self.self_attention(src, src, src, src_mask)
        
        #dropout, residual connection and layer norm
        src = self.self_attn_layer_norm(src + self.dropout(_src))
        
        #src = [batch size, src len, hid dim]
        
        #positionwise feedforward
        _src = self.positionwise_feedforward(src)
        
        #dropout, residual and layer norm
        src = self.ff_layer_norm(src + self.dropout(_src))
        
        #src = [batch size, src len, hid dim]
        
        return src
class MultiHeadAttentionLayer(nn.Module):
    def __init__(self, hid_dim, n_heads, dropout, device):
        super().__init__()
        
        assert hid_dim % n_heads == 0
        
        self.hid_dim = hid_dim
        self.n_heads = n_heads
        self.head_dim = hid_dim // n_heads
        
        self.fc_q = nn.Linear(hid_dim, hid_dim)
        self.fc_k = nn.Linear(hid_dim, hid_dim)
        self.fc_v = nn.Linear(hid_dim, hid_dim)
        
        self.fc_o = nn.Linear(hid_dim, hid_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([self.head_dim])).to(device)
        
    def forward(self, query, key, value, mask = None):
        
        batch_size = query.shape[0]
        
        #query = [batch size, query len, hid dim]
        #key = [batch size, key len, hid dim]
        #value = [batch size, value len, hid dim]
                
        Q = self.fc_q(query)
        K = self.fc_k(key)
        V = self.fc_v(value)
        
        #Q = [batch size, query len, hid dim]
        #K = [batch size, key len, hid dim]
        #V = [batch size, value len, hid dim]
                
        Q = Q.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        K = K.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        V = V.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        
        #Q = [batch size, n heads, query len, head dim]
        #K = [batch size, n heads, key len, head dim]
        #V = [batch size, n heads, value len, head dim]
                
        energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale
        
        #energy = [batch size, n heads, query len, key len]
        
        if mask is not None:
            energy = energy.masked_fill(mask == 0, -1e10)
        
        attention = torch.softmax(energy, dim = -1)
                
        #attention = [batch size, n heads, query len, key len]
                
        x = torch.matmul(self.dropout(attention), V)
        
        #x = [batch size, n heads, query len, head dim]
        
        x = x.permute(0, 2, 1, 3).contiguous()
        
        #x = [batch size, query len, n heads, head dim]
        
        x = x.view(batch_size, -1, self.hid_dim)
        
        #x = [batch size, query len, hid dim]
        
        x = self.fc_o(x)
        
        #x = [batch size, query len, hid dim]
        
        return x, attention
    
class PositionwiseFeedforwardLayer(nn.Module):
    def __init__(self, hid_dim, pf_dim, dropout):
        super().__init__()
        
        self.fc_1 = nn.Linear(hid_dim, pf_dim)
        self.fc_2 = nn.Linear(pf_dim, hid_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        
        #x = [batch size, seq len, hid dim]
        
        x = self.dropout(torch.relu(self.fc_1(x)))
        
        #x = [batch size, seq len, pf dim]
        
        x = self.fc_2(x)
        
        #x = [batch size, seq len, hid dim]
        
        return x
class Decoder(nn.Module):
    def __init__(self, 
                 output_dim, 
                 hid_dim, 
                 n_layers, 
                 n_heads, 
                 pf_dim, 
                 dropout, 
                 device,
                 max_length = MAX_TGT_LEN):
        super().__init__()
        
        self.device = device
        
        self.tok_embedding = nn.Embedding(output_dim, hid_dim)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        
        self.layers = nn.ModuleList([DecoderLayer(hid_dim, 
                                                  n_heads, 
                                                  pf_dim, 
                                                  dropout, 
                                                  device)
                                     for _ in range(n_layers)])
        
        self.fc_out = nn.Linear(hid_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
        
    def forward(self, trg, enc_src, trg_mask, src_mask):
        
        #trg = [batch size, trg len]
        #enc_src = [batch size, src len, hid dim]
        #trg_mask = [batch size, trg len]
        #src_mask = [batch size, src len]
                
        batch_size = trg.shape[0]
        trg_len = trg.shape[1]
        
        pos = torch.arange(0, trg_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
                            
        #pos = [batch size, trg len]
            
        trg = self.dropout((self.tok_embedding(trg) * self.scale) + self.pos_embedding(pos))
                
        #trg = [batch size, trg len, hid dim]
        
        for layer in self.layers:
            trg, attention = layer(trg, enc_src, trg_mask, src_mask)
        
        #trg = [batch size, trg len, hid dim]
        #attention = [batch size, n heads, trg len, src len]
        
        output = self.fc_out(trg)
        
        #output = [batch size, trg len, output dim]
            
        return output, attention
class DecoderLayer(nn.Module):
    def __init__(self, 
                 hid_dim, 
                 n_heads, 
                 pf_dim, 
                 dropout, 
                 device):
        super().__init__()
        
        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.enc_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.encoder_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, 
                                                                     pf_dim, 
                                                                     dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, trg, enc_src, trg_mask, src_mask):
        
        #trg = [batch size, trg len, hid dim]
        #enc_src = [batch size, src len, hid dim]
        #trg_mask = [batch size, trg len]
        #src_mask = [batch size, src len]
        
        #self attention
        _trg, _ = self.self_attention(trg, trg, trg, trg_mask)
        
        #dropout, residual connection and layer norm
        trg = self.self_attn_layer_norm(trg + self.dropout(_trg))
            
        #trg = [batch size, trg len, hid dim]
            
        #encoder attention
        _trg, attention = self.encoder_attention(trg, enc_src, enc_src, src_mask)
        
        #dropout, residual connection and layer norm
        trg = self.enc_attn_layer_norm(trg + self.dropout(_trg))
                    
        #trg = [batch size, trg len, hid dim]
        
        #positionwise feedforward
        _trg = self.positionwise_feedforward(trg)
        
        #dropout, residual and layer norm
        trg = self.ff_layer_norm(trg + self.dropout(_trg))
        
        #trg = [batch size, trg len, hid dim]
        #attention = [batch size, n heads, trg len, src len]
        
        return trg, attention
    
class Seq2Seq(nn.Module):
    def __init__(self, 
                 encoder, 
                 decoder, 
                 src_pad_idx, 
                 trg_pad_idx, 
                 device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device
        
    def make_src_mask(self, src):
        
        #src = [batch size, src len]
        
        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)

        #src_mask = [batch size, 1, 1, src len]

        return src_mask
    
    def make_trg_mask(self, trg):
        
        #trg = [batch size, trg len]
        
        trg_pad_mask = (trg != self.trg_pad_idx).unsqueeze(1).unsqueeze(2)
        
        #trg_pad_mask = [batch size, 1, 1, trg len]
        
        trg_len = trg.shape[1]
        
        trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len), device = self.device)).bool()
        
        #trg_sub_mask = [trg len, trg len]
            
        trg_mask = trg_pad_mask & trg_sub_mask
        
        #trg_mask = [batch size, 1, trg len, trg len]
        
        return trg_mask

    def forward(self, src, trg):
        
        #src = [batch size, src len]
        #trg = [batch size, trg len]
                
        src_mask = self.make_src_mask(src)
        trg_mask = self.make_trg_mask(trg)
        
        #src_mask = [batch size, 1, 1, src len]
        #trg_mask = [batch size, 1, trg len, trg len]
        
        enc_src = self.encoder(src, src_mask)
        
        #enc_src = [batch size, src len, hid dim]
                
        output, attention = self.decoder(trg, enc_src, trg_mask, src_mask)
        
        #output = [batch size, trg len, output dim]
        #attention = [batch size, n heads, trg len, src len]
        
        return output, attention

In [None]:

INPUT_DIM = 30000
OUTPUT_DIM = 20000



HID_DIM = 256
ENC_LAYERS = 6
DEC_LAYERS = 6
ENC_HEADS = 8
DEC_HEADS = 8
ENC_PF_DIM = 512
DEC_PF_DIM = 512
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1

enc = Encoder(INPUT_DIM, 
              HID_DIM, 
              ENC_LAYERS, 
              ENC_HEADS, 
              ENC_PF_DIM, 
              ENC_DROPOUT, 
              device)

dec = Decoder(OUTPUT_DIM, 
              HID_DIM, 
              DEC_LAYERS, 
              DEC_HEADS, 
              DEC_PF_DIM, 
              DEC_DROPOUT, 
              device)

In [None]:
model = Seq2Seq(enc, dec, 1, 1, device).to(device)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

In [None]:
def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)

In [None]:
model.apply(initialize_weights)
#LEARNING_RATE = 0.0005
LEARNING_RATE = 0.00005

optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)
criterion = nn.CrossEntropyLoss(ignore_index = 1)


In [None]:
from tqdm import tqdm
def train(model, dataset, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    for (src_, trg_) in tqdm(dataset):
        
        optimizer.zero_grad()

        src, trg = src_.to(device), trg_.to(device)
        output, _ = model(src, trg[:,:-1])

        #output = [batch size, trg len - 1, output dim]
        #trg = [batch size, trg len]
        #print(trg.shape)
        #print(output.shape)
        output_dim = output.shape[-1]

        output = output.contiguous().view(-1, output_dim) #[:, :-1, :]
        trg = trg[:,1:].contiguous().view(-1)

        #output = [batch size * trg len - 1, output dim]
        #trg = [batch size * trg len - 1]
        #print(trg.shape)
        #print(output.shape)
        loss = criterion(output, trg)
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        #loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        epoch_loss += loss.item()
        #print(f'\tTrain Loss: {loss.item():.3f} | Train PPL: {math.exp(loss.item()):7.3f}')

        
    return epoch_loss / len(dataset)

In [None]:
def evaluate(model,dataset,criterion):
    
    model.eval()
    epoch_loss = 0
    with torch.no_grad():
        for (src_, trg_) in tqdm(dataset):
            src, trg = src_.to(device), trg_.to(device)
            output, _ = model(src, trg[:,:-1])

            output_dim = output.shape[-1]

            output = output.contiguous().view(-1, output_dim) #[:, :-1, :]
            trg = trg[:,1:].contiguous().view(-1)
            loss = criterion(output, trg)
            epoch_loss += loss.item()
        
    return epoch_loss / len(dataset)

def give_pred(model, tokens, device, max_len=50):
    model.eval()
    
    src_indexes = tokens
    #print(tokens.size())
    src_tensor = torch.LongTensor(src_indexes).to(device)
    
    src_mask = model.make_src_mask(src_tensor)
    with torch.no_grad():
        enc_src = model.encoder(src_tensor, src_mask)

    trg_indexes = torch.zeros((tokens.size()[0], 1), dtype = torch.long)
    trg_indexes = trg_indexes.tolist()
    #print(trg_indexes)
    #print(trg_indexes.size())
    
    for i in range(max_len):
        #print(len(trg_indexes),len(trg_indexes[0]))
        #trg_tensor = torch.LongTensor(pad_sequences(trg_indexes, 50)).unsqueeze(0).to(device)
        trg_tensor = torch.LongTensor(trg_indexes).to(device)
        #print("in loop", trg_tensor.size())
        trg_mask = model.make_trg_mask(trg_tensor)
        #print(trg_mask)
        with torch.no_grad():
            output, attention = model.decoder(trg_tensor, enc_src, trg_mask, src_mask)
        
        #print("out shape", output.size())
        
        
        
        for idx in range(len(trg_indexes)):
            #print("shape here = ", output[i][-1].size())
            #print(output[idx][-1].argmax().item())
            trg_indexes[idx].append(output[idx][-1].argmax().item())
            
        
        #pred_token = output.argmax(2)[:,-1].item()
        #print(output[0][-1].argmax())
        #print(pred_token)
        #trg_indexes.append(pred_token)

        #if pred_token == 2:
        #    break
    #print(len(trg_indexes),len(trg_indexes[0]))
    return trg_indexes

def calculate_blue(model,test_ds, device, max_len = 50):
    model.eval()
    sum_blue = 0
    data_count = 0
    for (src_, trg_) in tqdm(test_ds):
        hypothesis = give_pred(model,src_, device, max_len = 50)
        reference = trg_.tolist()
        #there may be several references
        for i in range(src_.size()[0]):
            sum_blue += nltk.translate.bleu_score.sentence_bleu([reference[i]], hypothesis[i])
            data_count+=1
        #print(sum_blue/data_count)
        #print(data_count)
    return sum_blue/data_count

'''
import warnings
import nltk
warnings.filterwarnings('ignore')

N_EPOCHS = 20
CLIP = 1

BATCH_SIZE = 280
EVAL_BATCH_SIZE = 500

train_dataset = DataLoader(train_dataset, batch_size = BATCH_SIZE, num_workers=5,
                     drop_last=True,
                     shuffle=True)

test_dataset = DataLoader(test_dataset, batch_size = EVAL_BATCH_SIZE, num_workers=5,
                     drop_last=True,
                     shuffle=False)


calculate_blue(model,test_dataset, device, max_len = 50)
'''

In [None]:
'''
for (src_, trg_) in test_dataset:
    for i in range(src_.size()[0]):
        print(src_.size())
        hypothesis = give_pred(model,src_[i], device, max_len = 50)
        reference = trg_[i].tolist()
        #there may be several references
        sum_blue = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis)
        #data_count+=1
'''

In [None]:
import warnings
import nltk
warnings.filterwarnings('ignore')

N_EPOCHS = 300
CLIP = 1

BATCH_SIZE = 215
EVAL_BATCH_SIZE = 850

train_dataset = DataLoader(train_dataset, batch_size = BATCH_SIZE, num_workers=5,
                     drop_last=True,
                     shuffle=True)

test_dataset = DataLoader(test_dataset, batch_size = EVAL_BATCH_SIZE, num_workers=5,
                     drop_last=True,
                     shuffle=False)


#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_dataset))
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)



def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

best_valid_loss = float('inf')

opt_level = 'O1'
model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level)
    

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    train_loss = train(model, train_dataset, optimizer, criterion, CLIP)
    #scheduler.step()
    scheduler.step(train_loss)
    valid_loss = evaluate(model, test_dataset, criterion)
    
    blue_score = calculate_blue(model,test_dataset, device, max_len = 50)
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut6-model.pt')
    
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')
    print("validation blue score = ",blue_score*100)
    '''
    annealing
    Epoch: 14 | Time: 1m 43s
    Train Loss: 4.060 | Train PPL:  57.988
    
    Epoch: 07 | Time: 1m 58s
	Train Loss: 4.715 | Train PPL: 111.645
	 Val. Loss: 4.773 |  Val. PPL: 118.230
    '''

In [None]:
train_dataset = LazyDataset(src_tokenizer, tgt_tokenizer, '../data/ncs_preprocessed_data/train-ncs/code.original_subtoken',
                            '../data/ncs_preprocessed_data/train-ncs/javadoc.original')


train_dataset = DataLoader(train_dataset, batch_size = 101, num_workers=10,
                     drop_last=True,
                     shuffle=True)
for (src_, trg_) in tqdm(train_dataset):
    src, trg = src_.to(device), trg_.to(device)
    print(src[0].size())

In [None]:
#load and save here

# Save checkpoint
checkpoint = {
    'model': model.state_dict(),
    'optimizer': optimizer.state_dict(),
    'amp': amp.state_dict()
}
torch.save(checkpoint, '../models/big_checkpoint_corrected_600_loss2.pt')



In [None]:

# Restore
opt_level = 'O1'
model = Seq2Seq(enc, dec, 1, 1, device).to(device)
LEARNING_RATE = 0.0005

optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

checkpoint = torch.load('../models/big_checkpoint_corrected_600_loss1.pt')

model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level)
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
amp.load_state_dict(checkpoint['amp'])


import warnings
import nltk
warnings.filterwarnings('ignore')

N_EPOCHS = 20
CLIP = 1

BATCH_SIZE = 280
EVAL_BATCH_SIZE = 500

train_dataset = DataLoader(train_dataset, batch_size = BATCH_SIZE, num_workers=5,
                     drop_last=True,
                     shuffle=True)

test_dataset = DataLoader(test_dataset, batch_size = EVAL_BATCH_SIZE, num_workers=5,
                     drop_last=True,
                     shuffle=False)


In [None]:

def give_pred(model, tokens, device, max_len=50):
    model.eval()
    
    src_indexes = tokens
    #print(tokens.size())
    src_tensor = torch.LongTensor(src_indexes).to(device)
    
    src_mask = model.make_src_mask(src_tensor)
    with torch.no_grad():
        enc_src = model.encoder(src_tensor, src_mask)

    trg_indexes = torch.zeros((tokens.size()[0], 1), dtype = torch.long)
    trg_indexes = trg_indexes.tolist()
    #print(trg_indexes)
    #print(trg_indexes.size())
    
    for i in range(max_len):
        #print(len(trg_indexes),len(trg_indexes[0]))
        #trg_tensor = torch.LongTensor(pad_sequences(trg_indexes, 50)).unsqueeze(0).to(device)
        trg_tensor = torch.LongTensor(trg_indexes).to(device)
        #print("in loop", trg_tensor.size())
        trg_mask = model.make_trg_mask(trg_tensor)
        #print(trg_mask)
        with torch.no_grad():
            output, attention = model.decoder(trg_tensor, enc_src, trg_mask, src_mask)
        
        #print("out shape", output.size())
        
        
        
        for idx in range(len(trg_indexes)):
            #print("shape here = ", output[i][-1].size())
            #print(output[idx][-1].argmax().item())
            trg_indexes[idx].append(output[idx][-1].argmax().item())
            
        
        #pred_token = output.argmax(2)[:,-1].item()
        #print(output[0][-1].argmax())
        #print(pred_token)
        #trg_indexes.append(pred_token)

        #if pred_token == 2:
        #    break
    #print(len(trg_indexes),len(trg_indexes[0]))
    return trg_indexes

def calculate_blue(model,test_ds, device, max_len = 50):
    model.eval()
    sum_blue = 0
    data_count = 0
    for (src_, trg_) in tqdm(test_ds):
        hypothesis = give_pred(model,src_, device, max_len = 50)
        reference = trg_.tolist()
        #there may be several references
        for i in range(src_.size()[0]):
            sum_blue += nltk.translate.bleu_score.sentence_bleu([reference[i]], hypothesis[i])
            data_count+=1
        print(sum_blue/data_count)
        print(data_count)
    return sum_blue/data_count

calculate_blue(model,test_dataset, device, max_len = 50)



In [None]:

def translate_sentence(sentence, model, device, max_len = 50):    
    model.eval()
        # add <s> </s>
    #    "<s> "+linecache.getline(self.src_path, idx + 1).strip()+" </s>"
    src_indexes = pad_sequences(src_tokenizer.encode("<s>"+sentence.strip()+ "</s>").ids, 150)
    #print(src_indexes)
    #torch.tensor(pad_sequences(x,self.max_len_src), dtype=torch.long)
    src_tensor = torch.LongTensor(src_indexes).unsqueeze(0).to(device)
    #src_tensor = torch.LongTensor(src_indexes).unsqueeze(0).to(device)
    
    src_mask = model.make_src_mask(src_tensor)
    #print(src_mask)
    with torch.no_grad():
        enc_src = model.encoder(src_tensor, src_mask)

    trg_indexes = [0]
    
    for i in range(max_len):

        #trg_tensor = torch.LongTensor(pad_sequences(trg_indexes, 50)).unsqueeze(0).to(device)
        trg_tensor = torch.LongTensor(trg_indexes).unsqueeze(0).to(device)
        #print(trg_tensor)
        trg_mask = model.make_trg_mask(trg_tensor)
        #print(trg_mask)
        with torch.no_grad():
            output, attention = model.decoder(trg_tensor, enc_src, trg_mask, src_mask)
        
        pred_token = output.argmax(2)[:,-1].item()
        #print(output[0][-1].argmax())
        #print(pred_token)
        trg_indexes.append(pred_token)

        if pred_token == 2:
            break
    print(trg_indexes)
    trg_tokens = [tgt_tokenizer.decode([i]) for i in trg_indexes]
    
    return trg_tokens, attention

In [None]:
s  = "public static List < String > read Lines ( File file ) throws IO Exception { return read Lines ( file , Charset . default Charset ( ) ) ; }"
#s = "private void update Rating Choice ( ) { int current = m ch Rating . get Selected Index ( ) ; m ch Rating . remove All Items ( ) ; Faction Record f Rec = ( Faction Record ) m ch Subfaction . get Selected Item ( ) ; if ( f Rec == null ) { f Rec = ( Faction Record ) m ch Faction . get Selected Item ( ) ; } Array List < String > rating Levels = f Rec . get Rating Levels ( ) ; if ( rating Levels . is Empty ( ) ) { rating Levels = f Rec . get Rating Level System ( ) ; } if ( rating Levels . size ( ) > NUM ) { for ( int i = rating Levels . size ( ) - NUM ; i >= NUM ; i -- ) { m ch Rating . add Item ( rating Levels . get ( i ) ) ; } } if ( current < NUM && m ch Rating . get Item Count ( ) > NUM ) { m ch Rating . set Selected Index ( NUM ) ; } else { m ch Rating . set Selected Index ( Math . min ( current , m ch Rating . get Item Count ( ) - NUM ) ) ; } }"
#s = "public static boolean memory Is Low ( ) { return available Memory ( ) * NUM < RUNTIME . total Memory ( ) * NUM ; }"
out, _ = translate_sentence(s, model, 'cuda')
print(out)

src_tokenizer.decode([352])

In [None]:
%matplotlib inline
def display_attention(sentence, translation, attention, n_heads = 8, n_rows = 4, n_cols = 2):
    
    assert n_rows * n_cols == n_heads
    
    fig = plt.figure(figsize=(15,25))
    
    for i in range(n_heads):
        
        ax = fig.add_subplot(n_rows, n_cols, i+1)
        
        _attention = attention.squeeze(0)[i].cpu().detach().numpy()

        cax = ax.matshow(_attention, cmap='bone')

        ax.tick_params(labelsize=12)
        ax.set_xticklabels(['']+['<sos>']+[t.lower() for t in sentence]+['<eos>'], 
                           rotation=45)
        ax.set_yticklabels(['']+translation)

        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()
    plt.close()
    
translation, attention = translate_sentence(s, model, 'cuda')
display_attention(s, translation, attention)

In [None]:
import nltk

hypothesis = ['It', 'is', 'a', 'cat', 'at', 'room']
reference = ['It', 'is', 'a', 'cat', 'inside', 'the', 'room']
#there may be several references
BLEUscore = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis)
print(BLEUscore)

In [None]:
print(tgt_tokenizer.decode([225, 2, 225, 1]))
print(tgt_tokenizer.decode(tgt_tokenizer.encode("hello there").ids))
print(tgt_tokenizer.encode("<s> hello there <pad> <pad>").ids)
print(tgt_tokenizer.encode('<pad>').ids)
tgt_tokenizer.decode([10304])

In [None]:
!pip install tqdm

In [None]:
!head -3 ../data/ncs_preprocessed_data/train-ncs/code.original_subtoken

In [None]:
!head -3 ../data/ncs_preprocessed_data/train-ncs/javadoc.original

In [None]:
??tgt_tokenizer

In [None]:
x = torch.Tensor.new_full(50,20000, dtype=torch.long)

In [None]:
src_tokenizer

In [None]:
torch.tensor.new_full((3, 4), 3.141592)

In [None]:
import gc
gc.collect()

In [None]:
!nvidia-smi

In [None]:
len(tgt_tokenizer.encode("reads the contents of a file line by line to a list of strings using the default encoding for the vm . the file is always closed .").ids)

In [None]:
dataset = DataLoader(train_dataset, batch_size = 10, num_workers=10,
                     drop_last=True,
                     shuffle=True)

for (src_, trg_) in dataset:
    print(src_[0])
    break

In [None]:
wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1B8EI5pQUswtUuiYwiInxCb02d5t1NHKH' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1B8EI5pQUswtUuiYwiInxCb02d5t1NHKH" -O ncs_big.7z && rm -rf /tmp/cookies.txt




In [None]:
sum_blue +=compute_bleu([reference], hypothesis, smooth=True)[0]

In [1]:
from google_bleu import compute_bleu

In [4]:
import pickle
ref = pickle.load(open('ref.pkl', 'rb'))

In [6]:
from pprint import pprint
pprint(ref)

{0: ['returns a 0 - based depth within the object graph of the current object '
     'being serialized .'],
 1: ['checks whether the scheme alters the training dataset during building . '
     'if the scheme needs to modify the data it should take a copy of the '
     'training data . currently checks for changes to header structure , '
     'number of instances , order of instances , instance weights .'],
 2: ['compute the union size of two bit sets .'],
 3: ['tests local candidates with remote version in the middle on key 2 .'],
 4: ['actions to perform when the user has closed the dialog with the ok '
     'button .'],
 5: ['prints out the classifier .'],
 6: ['returns a new sector whose angles are the extre mes of the this sector '
     "and another . the new sector ' s minimum latitude and longitude will be "
     "the minimum of the two sectors . the new sector ' s maximum latitude and "
     'longitude will be the maximum of the two sectors . the sectors are '
     'assumed to b

 553: ['deselects the item at the given zero - relative index in the receiver '
       '. if the item at the index was already deselected , it remains '
       'deselected . indices that are out of range are ignored .'],
 554: ['gets the leaf attribute of the dom treemodel object'],
 555: ['add a listener all callbacks of the interface listener will be called '
       'within this function should be called in the main thread'],
 556: ['checks if this date - time is equal to another date - time , including '
       'the chronology . compares this date - time with another ensuring that '
       'the date - time and chronology are the same .'],
 557: ['parse a string into a vector . todo : move this into utility package '
       '?'],
 558: ['create an empty file and write some data on it . then update its '
       'metadata , verify the new metadata and the updated version , and then '
       'write some data on the new version . verify the both file versions can '
       'be read .'],
 

 1258: ['indicates if a group is a child group , a non - top - level data '
        'group in a set of nested data groups ( e . g . , the node or edge '
        'table of a graph or tree ) .'],
 1259: ['throw array index out of bounds exception'],
 1260: ['create the next relevant zero - coverage loc us info'],
 1261: ["this method will be invoked when a service ' s global config uation "
        'data has been changed . the parameter groupname denote the name of '
        'the configuration grouping ( e . g . default ) and service component '
        "denotes the service ' s sub - component that changed"],
 1262: ['launch an email intent if the device is capable .'],
 1263: ['returns the number of data values .'],
 1264: ['insert a item associated with the specified position of adapter'],
 1265: ['adds a translator , which is called whenever a class is loaded .'],
 1266: ['indicates that the main content has scrolled ( for the purposes of '
        'showing / hiding the action bar for

        'found .'],
 1908: ['creates a repository location for a given repository and a set of '
        'path components which will be concatenated by a / .'],
 1909: ['this method will be used to add a new property'],
 1910: ['creates the nested delta deltas based on the affected element its '
        'delta , and the root of this delta tree . returns the root of the '
        'created delta tree .'],
 1911: ['tostring . used for jcombobox in movie editor'],
 1912: ['return the size of the embedded map'],
 1913: ['replace all occurrences of a substring within a string with another '
        'string .'],
 1914: ['determines if a given coordinate lies within a selected zone . the '
        'shape of the zone is defined by the respective zone geometry in the '
        'shape file'],
 1915: ['returns the tip text for this property'],
 1916: ['read data from a label file .'],
 1917: ['parses a double safely , returning a default if nothing can be san '
        'ely parsed from it .'],
 19

 2567: ['the other party has bre ached the contract and submitted an old '
        'channel transaction .'],
 2568: ['construct fan cy loader , grabbing a reference to the dex file class '
        "if we ' re running under dal vi k ."],
 2569: ['how to keep track of all the modified times across the paths . note '
        'that a file might have appeared in a directory which is earlier in '
        'the path ; so we should search the path and see if the file we find '
        'that way is the same as the one that we have cached .'],
 2570: ['flipping moves the contents of the back buffer to the front buffer , '
        'either by copying or by moving the video pointer .'],
 2571: ['load value from property string value'],
 2572: ['returns a list of all namespaces below an element ( recursive )'],
 2573: ["there is some bug and it ' s not perfect . so we should improve this "
        'path for our projection'],
 2574: ['compress the bitmap to the byte array as the specified format and '

        'characters to log and everything else is interpreted as elasticsearch '
        'interprets booleans which is then converted to 0 for false and '
        'integer . max value for true .'],
 3211: ['deletes current version of license file in / tmp / . license'],
 3212: ['alias for test net 3 params . get ( ) , use that instead .'],
 3213: ['encoding loop on private thread'],
 3214: ['de - serialize from input stream .'],
 3215: ['updates the bounds for the fast scroller .'],
 3216: ['creates a new d net scap ec apo licy url dialog .'],
 3217: ['call this method from your gu is cre en to process the keys into the '
        'textbox'],
 3218: ["locate the best instance of ' pattern ' in ' text ' near ' loc ' "
        'using the bit ap algorithm . returns - 1 if no match found .'],
 3219: ['creates a new directory with the given parent folder and folder name '
        '. the newly created folder will be deleted on graceful vm shutdown .'],
 3220: ['is operating system windows 8 o

 3829: ['add a new server to the load snapshot .'],
 3830: ['append leaf to the end of the leaves list'],
 3831: ['convenience method for converting component time longs to strings'],
 3832: ["set the ' content - length ' request header to the given value"],
 3833: ['draw into the provided canvas . assumes that the canvas has been '
        'rotated accordingly and the size has been set . the effect will be '
        'drawn the full width of x = 0 to x = width , beginning from y = 0 and '
        'extending to some factor < 1 . f of height .'],
 3834: ['modify lun ( export / unexport / expand etc ) in async mode'],
 3835: ['copies this string replacing occurrences of the specified target '
        'sequence with another sequence . the string is processed from the '
        'beginning to the end .'],
 3836: ['closes the cache and deletes all of its stored values . this will '
        "delete all files in the cache directory including files that weren ' "
        't created by the cache 

        'will become the intersecting area of the two regions .'],
 4455: ['indicates if convenience get method can be called without an '
        'exception being thrown for the int type .'],
 4456: ['adds a new waiter to wait queue .'],
 4457: ['clears the ring buffer but moving the read position to the write '
        'position'],
 4458: ['returns the portion of its input that consists of xml safe chars .'],
 4459: ['compares a string line wise , i . e . ignores different line break '
        'characters . does this by incrementally reading all expected an '
        'actual lines and comparing them line wise .'],
        'in case an exception was thrown'],
 4461: ['adds fill components to empty cells in the first row and first column '
        'of the grid . this ensures that the grid spacing will be the same as '
        'shown in the designer .'],
 4462: ['prefix one of the registered no uns with an expression like " piece '
        'of " .'],
 4463: ['clears the password expirati

        'then use this bounds object to snap the full selection on screen .'],
 4852: ['records the size of successive runs of white and black pixels in a '
        'row , starting at a given point . the values are recorded in the '
        'given array , and the number of runs recorded is equal to the size of '
        'the array . if the row starts on a white pixel at the given start '
        'point , then the first count recorded is the run of white pixels '
        'starting from that point ; likewise it is the count of a run of black '
        'pixels if the row begin on a black pixels at that point .'],
 4853: ['returns ( method , params ) for the given service or ( null , null ) '
        'if no method was found .'],
 4854: ['utility method to find the first invalid character as per rfc 2396 '
        'section 2 . this helps us prevent creating excessive objects in the '
        'encode ( ) methods since most strings will not be encoded .'],
 4855: ['decodes contents of the byt

        "topology changes . the method returns true if there ' s any change to "
        'the list of attachment points - - which indicates a possible device '
        'move .'],
 5352: ['removes soft wraps with offsets equal or larger than a given offset '
        'from storage .'],
 5353: ['formats given d ps ( = damage per second ) to a readable string . '
        'with braces .'],
 5354: ['sets the maximum length of the string . longer strings will be simply '
        'truncated .'],
 5355: ['escape a string to create a valid json string'],
 5356: ['answers a sortedset of the specified portion of this treeset which '
        'contains elements greater or equal to the start element but less than '
        'the end element . the returned sortedset is backed by this treeset so '
        'changes to one are reflected by the other .'],
 5357: ['method that checks if a legion is dis ban ding'],
 5358: ['indexes the given field of the provided tuple instance .'],
 5359: ['iterates from th

 5974: ['adds a mapping from the specified key to the specified value , '
        'replacing the previous mapping from the specified key if there was '
        'one .'],
 5975: ['readdata - - read the reference word .'],
 5976: ['return the dot product to a dense feature vector'],
 5977: ['creates the new instructions , inlining each instantiation of each '
        'subroutine until the code is fully elabor ated .'],
 5978: ['gets the standard deviation from a list of numbers .'],
 5979: ["trims the capacity of this instance to be the list ' s current size . "
        'an application can use this operation to minimize the storage of some '
        'instance .'],
 5980: ['setbit ( int n ) outside zero'],
 5981: ['compares a date with a boolean'],
 5982: ['read ahead and decode the next chunk of solutions .'],
 5983: ['create a new udp client socket with the specified logger .'],
 5984: ['will multiply all values by a given double . can be used to divide '
        'all numbers , if given

 6549: ['throws an exception if the format is not syntactically valid .'],
 6550: ['parse the uppercase mapping attribute for a unicode character . if '
        'there is a uppercase mapping attribute and the parse succeeds , then '
        'the has upper map field is set to true , the upper map field of this '
        'unicode spec object is updated , and false is returned . if the '
        'uppercase mapping attribute is an empty string , the parse succeeds '
        'but the has upper map field is set to false . ( and false is returned '
        ') . the uppercase mapping attribute should be a four to six digit '
        'hexadecimal integer .'],
 6551: ['block on the queue until an entry is been cleaned by the gc'],
 6552: ['compresses the given number of files , each of the given size , into '
        'a . zip archive .'],
 6553: ['ensures that a string for a playlist is valid , checking if it is '
        'empty , or another playlist has the same name .'],
 6554: ['uses pack in 

 7218: ["determines if the prompt contains ' select account ' ."],
 7219: ['sorts the instances according to the given attribute / dimension . '
        'the sorting is done on the master index array and not on the actual '
        'instances object .'],
 7220: ['flips the given image on the horizontal axis'],
 7221: ['whether audit is enable ? defaults to true .'],
 7222: ['is a terms & conditions request'],
 7223: ['reads the segment metadata , the sequence and table key .'],
 7224: ['if time is > 60 60 24 [ seconds ] , it will be projected into next '
        'day , e . g . time = 60 60 24 + 1 = 1 even if time is negative , it '
        'is turned into a positive time by adding number of seconds of day '
        'into it consecutively'],
 7225: ['below method will be used to update the min or max value by removing '
        'the length from it'],
 7226: ['construct a node in the result tree . this method is overloaded by '
        'xsl : attribute . at this class level , this method

        'note that the user setting may be empty , defaulting to the running '
        'system locale which may be other than english . here we check the '
        'effective locale seen in the messages bundle .'],
 7645: ['call this method from your gu is cre en to process the keys into the '
        'textbox'],
 7646: ['parse a full image name ( my host : 300 / namespace / repo : tag ) '
        'into its components'],
 7647: ['adds the set of channel names to the set of listened channels .'],
 7648: ['deletes a directory recursively .'],
 7649: ['output property summary table .'],
 7650: ['deletes any empty folder from the db .'],
 7651: ['perform an asynchronous sum operation'],
 7652: ['write ( byte [ ] b , int off , int len ) method testing .'],
 7653: ['converts the loggingevent data in xml string format into an actual '
        'xml document class instance .'],
 7654: ['checks to see if the volume is a production journal . we check to see '
        "if the volume ' s rp copy na

 8401: ['generates activity scope graph using activity module plus additional '
        'modules provided by inheritance .'],
 8402: ['request the write lock . block until a write operation can be '
        'performed safely . write requests are guaranteed to be executed in '
        'the order received . pending read requests take precedence over all '
        'write requests . this call must be followed by a call to writ ed one '
        '( ) when the write operation completes .'],
 8403: ['reads a date value from the input stream .'],
 8404: ['removes the listener from the collection of listeners who will be '
        "notified when the user changes the receiver ' s value ."],
 8405: ['equivalent to , but che aper than writing integer . tohexstring ( ) . '
        'getbytes ( ) followed by crlf .'],
 8406: ['read an imp ut stream reader'],
 8407: ['create a list of maps from the list of type parameters .'],
 8408: ["find free space on the nix platform using the ' df ' command ."],
 

In [7]:
import pickle
hyp = pickle.load(open('hyp.pkl', 'rb'))
pprint(hyp)

{0: ['returns a 0 - based depth within the object graph of the current object '
     'being serialized .'],
 1: ['checks whether the scheme alters the training dataset during training . '
     'if the scheme needs to modify the training data it should take a copy of '
     'the training data . currently checks for changes to header structure , '
     'number of instances , order of instances , instance weights .'],
 2: ['compute the union size of two bit sets .'],
 3: ['test 2 keys with candidates in reverse order .'],
 4: ['responds to a closing event under an ok condition .'],
 5: ['outputs a description of the rule .'],
 6: ['returns a new sector whose angles are the extre mes of the this sector '
     "and another . the new sector ' s minimum latitude and longitude will be "
     "the minimum of the two sectors . the new sector ' s maximum latitude and "
     'longitude will be the maximum of the'],
 7: ['check if the value is equal to the given double value , e . g . if the '
    

       'folds .'],
 757: ['adds the specified segment to the list .'],
 758: ['delete functionality test test delete 6 ( ) . todo foreign key '
       'functionality is not supported'],
 759: ['called by a websocket thread ; please use this method only in tests , '
       'since it is an asynchronous manner .'],
 760: ['changes the size of this vector so that the size is the same as the '
       'size .'],
 761: ['inserts the row at the specified row and column index .'],
 762: ['adds the default uri .'],
 763: ['method called to transfer sequences from source . note that string '
       'parameter can not be parsed as a valid hex number . not expected , but '
       'the " percent " must be passed as an argument .'],
 764: ['computes the distance from a point p to a line segment ab note : non - '
       'robust !'],
 765: ['returns a copy of the current instance to achieve immutability'],
 766: ['runs the test case .'],
 767: ['returns the fix ids for the directory server .'],
 768: [

 1452: ['run the thread size loop in a separate thread .'],
 1453: ['reads the occurrence of the variable - length tuple'],
 1454: ['lock api for locking of the file channel of the lock file .'],
 1455: ['open the datagram connection'],
 1456: ['returns a string describing this result listener'],
 1457: ['conversion of the output script using a explain script executor , '
        'specifically for manual queries and queries . must be called before '
        'any other calls to this method or the super class that executed the '
        'output script .'],
 1458: ['disable a specific feature on this rich formatter .'],
 1459: ['remove a listener for z wave events'],
 1460: ['returns a value in python time'],
 1461: ['adds a new items at the end of the existing list .'],
 1462: ['quote a java keyword ( will throw an exception ) if the given name '
        'contains a java keyword .'],
 1463: ['static helper method for populating attributes from a database cursor '
        '.'],
 1464: ['e

 2238: ['gets the primary keys of the current row from the tre st and returns '
        'them as a set .'],
 2239: ['reads long as little endian .'],
 2240: ['returns a prefix token for the given leaf node from it .'],
 2241: ['adds a new csv record to the set of output records .'],
 2242: ['called by the native code internally to decrement the send queue and '
        'notify the listener .'],
 2243: ['spins / yields / blocks until node s is matched or caller gives up .'],
 2244: ['concatenates the string representation of each items in this array , '
        'with the given string as a separator between each item .'],
 2245: ['find a producer for the given target type .'],
 2246: ['initialize the compute shader .'],
 2247: ['the method to compare the response body against the failure or a '
        "successful response body if it ' s the case . the method assumes my "
        'response body is complete and returns true if the response body is '
        "completed or if it ' s the cas

        'main whitebox user interface .'],
 3043: ['accepts a sql select , if any .'],
 3044: ['executes the script at the specified location and returns the result '
        '.'],
 3045: ['creates a implementation of dom test documentbuilderfactory using org '
        '. apache . html . dom . html builder'],
 3046: ['format the sample rate to a string'],
 3047: ['checks whether this iterator has been closed .'],
 3048: ['start our processor thread .'],
 3049: ['makes a new simpli fier for the given package and set of types .'],
 3050: ['append an array of bytes back into a byte array , using the '
        'hexadecimal encoding .'],
 3051: ['returns the native name of this element type .'],
 3052: ['the graphical representation of the series values as text .'],
 3053: ['returns the xml string for the given host document .'],
 3054: ['apply any client addresses to seeds in the cluster .'],
 3055: ['cast a value to a boolean value ( primitive value type )'],
 3056: ['are there more work 

 3746: ['re - measure the loading views height , and adjust internal padding '
        'as necessary'],
 3747: ['return flags as a string , separated by " " .'],
 3748: ['format the sql string .'],
 3749: ['indent text by two spaces . after calling indent ( ) , two spaces '
        'will be inserted at the beginning of each line of text . indent ( ) '
        'may be called multiple times to produce deeper indents .'],
 3750: ['add a fake view to the end of the list .'],
 3751: ['compile a list of projects .'],
 3752: ['internal function used for the contact info'],
 3753: ['used to communicate a progress update between a plugin tool and the '
        'main whitebox user interface .'],
 3754: ['dispatches the result of this promise'],
 3755: ['adjusts the name to avoid name conflicts in the new session and , if '
        'the name is adjusted , adjusts the position so the user can see the '
        'two nodes .'],
 3756: ['create the snapshot .'],
 3757: ['save the current properties t

        'command - line testing .'],
 4606: ['setbit ( int n ) outside zero'],
 4607: ['this will convert an array of boolean datatypes to string . this will '
        'be in fact that it will only be used for get boolean converter .'],
 4608: ['draws an axis line at the current cursor position and edge .'],
 4609: ['save a model in x - stream .'],
 4610: ['sorts the span based on their accessions .'],
 4611: ['split the data set in the data set .'],
 4612: ['drops a prefix from a path if it exists or returns original path if '
        'prefix does not match .'],
 4613: ['publish a message to a specific url'],
 4614: ['removes the given adaptable object from this list .'],
 4615: ['creates a lazily concatenated stream whose elements are all the '
        'elements of the first stream followed by all the elements of the '
        'second stream . the resulting stream is ordered if both of the input '
        'streams are ordered , and parallel if either of the input streams is '
       

 5396: ['the graphical representation of the labels on the x axis .'],
 5397: ['encode the byte array as a base 64 string .'],
 5398: ['decode the base 64 - encoded data in input and return the data in a '
        "new byte array . the padding ' = ' characters at the end are "
        'considered optional , but if any are present , there must be the '
        'correct number of them .'],
 5399: ['returns whether the specified object equals to this finite field .'],
 5400: ['0 . 6 . 5 . 5 defines chunk distances of a chunk of error state .'],
 5401: ['this method creates an el sa serializer pojo object .'],
 5402: ['loads the binary from the given fileinputstream .'],
 5403: ['return if the given class node is an [ to , k ] , or void . signature '
        '( ) .'],
 5404: ['adds the provided parameter sections .'],
 5405: ['consume the declaration of a fake declaration .'],
 5406: ['processes the logout request and returns the response .'],
 5407: ['override the equals method . two cove

 6234: ['inserts the string representation of the string argument into this '
        'string buffer . the second argument is converted to a string as if by '
        'the method string . valueof , and the characters of that string are '
        'then inserted into this string buffer at the position indicated by '
        'index .'],
 6235: ['mkdir the request was signed with the key .'],
 6236: ['calculates the log value using the natural log of the log method .'],
 6237: ['rounds the x , y , and z values of the given vector 3 to the nearest '
        'integer value .'],
 6238: ['used to receive ss dp datagram packet'],
 6239: ['tear down after testing .'],
 6240: ['flushes all pending data .'],
 6241: ['convert a number of degrees to rgb color space'],
 6242: ['sets the advanced state map .'],
 6243: ['called by the network listener interface . receives the message '
        'string as a decimal integer and reflects it as per the " address " '
        'attribute .'],
 6244: ['returns

        "'"],
 7015: ['removes cluster node map that have the given name or method from '
        'cache .'],
 7016: ['write the given device to storage if we are master . use this method '
        'if the device has significantly changed ( e . g . , new ap , new ip , '
        'entities removed ) .'],
 7017: ['runs the test case .'],
 7018: ['internal helper method to adjust a absolute value of magnitude '
        'magnitude to relative to the classic vector vector .'],
 7019: ['removes the audio processor at the provided index .'],
 7020: ['this method builds the suffix chain .'],
 7021: ['replaces the by - line links in the supplied locator , using the '
        'partial protocol meta - inf / utility method . for example , if the '
        "xpath expression contains ' foo / bar ' , ' foo ' , ' bar ' and ' d ' "
        '.'],
 7022: ['notify listeners that a task has started .'],
 7023: ['this method reports that a comment is preceding to the xml element . '
        'currently we do 

        'encoded public key .'],
 7689: ['convert a user preference genericvalue to a user pref map .'],
 7690: ['clamp the magnitude of value for absmin and absmax . if the value is '
        'below the minimum , it will be clamped to zero . if the value is '
        'above the maximum , it will be clamped to the maximum .'],
 7691: ['append an object as a string .'],
 7692: ['delete a user specified authentication information from a properties '
        'file on the same password as the authentication server .'],
 7693: ['writes error occurred in the component into a log .'],
 7694: ['removes an audio processor from the list of listeners .'],
 7695: ['clamp the magnitude of value for absmin and absmax . if the value is '
        'below the minimum , it will be clamped to zero . if the value is '
        'above the maximum , it will be clamped to the maximum .'],
 7696: ['returns the animation type or 0 if cannot be found .'],
 7697: ['append a string onto the vector .'],
 7698: ['che

 8626: ['this is the default implementation of writeobject . customize if '
        'necessary .'],
 8627: ['compares two money objects for equality with the specified object'],
 8628: ['this method was generated by mybatis generator . this method '
        'corresponds to the database table address'],
 8629: ['obtains an instance due to a given field and field name .'],
 8630: ['processes an event to the event domain .'],
 8631: ['unregisters a membership listener'],
 8632: ['add a message to the given resolver .'],
 8633: ['present the final image on the screen / viewport .'],
 8634: ['decrement a calendar by one day .'],
 8635: ['angular object client bind method .'],
 8636: ['removes all objects in the given list from the given list . '
        'afterwards , if the list is modified modified to the remove ( ) '
        'method .'],
 8637: ['creates a new project tree object .'],
 8638: ['determine if the expression is an object .'],
 8639: ['use interpolator to get t'],
 8640: ['aut