# Experiment Motivation

AST only embeds node type ID which doesn't carry too much semantic information. Here we want to use BERTCode encoder to add node type embedding to the nodes and see if such augmentation can improve performance. ASTNodeEncoder is extensively modified.


In [None]:
args = {
    'num_layers': 3,
    'hidden_dim': 768,
    'output_dim': 768,
    'dropout': 0.05,
    'lr': 0.001,
    'epochs': 25,
    'num_vocab': 5000,
    'warmup': 2,
    'weight_decay': 0.000001,
    'words_per_method_name': 5,
    'max_steps': 99999,
    'batch_size': 32,
}

# Setup

In [None]:
!pip install ogb
!pip install torch_geometric
!python -c "import ogb; print(ogb.__version__)"

import os
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from torch_geometric.loader import DataLoader
import torch
import pandas as pd
import torch.nn.functional as F
from tqdm.notebook import tqdm
print(torch.__version__)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device: {}'.format(device))

# The PyG built-in GCNConv
from torch_geometric.nn import GCNConv

import torch_geometric.transforms as T
from torch_geometric.nn import global_add_pool, global_mean_pool
from numpy import float32
from transformers import AutoTokenizer, AutoModel


import copy
import numpy as np



Collecting ogb
  Downloading ogb-1.3.6-py3-none-any.whl (78 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/78.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━[0m [32m71.7/78.8 kB[0m [31m2.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting outdated>=0.2.0 (from ogb)
  Downloading outdated-0.2.2-py2.py3-none-any.whl (7.5 kB)
Collecting littleutils (from outdated>=0.2.0->ogb)
  Downloading littleutils-0.2.2.tar.gz (6.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: littleutils
  Building wheel for littleutils (setup.py) ... [?25l[?25hdone
  Created wheel for littleutils: filename=littleutils-0.2.2-py3-none-any.whl size=7026 sha256=558387fa455ca1632e0604b85d250fbc74e29ccd308737c73a4bc9d259dc9b42
  Stored in directory: /root/.cache/pip

# Dataset

In [None]:
dataset = PygGraphPropPredDataset(name = "ogbg-code2")
print(dataset)
evaluator = Evaluator(name='ogbg-code2')
'''
==== Expected input format of Evaluator for ogbg-code2
{'seq_ref': seq_ref, 'seq_pred': seq_pred}
- seq_ref: a list of lists of strings
- seq_pred: a list of lists of strings
where seq_ref stores the reference sequences of sub-tokens, and
seq_pred stores the predicted sequences of sub-tokens.

==== Expected output format of Evaluator for ogbg-code2
{'F1': F1}
- F1 (float): F1 score averaged over samples.
'''
# print(dataset.num_classes)
# print(dataset.num_node_features)
# print(dataset[0])
# print(len(dataset.y))

Downloading http://snap.stanford.edu/ogb/data/graphproppred/code2.zip


Downloaded 0.91 GB: 100%|██████████| 934/934 [03:22<00:00,  4.61it/s]


Extracting dataset/code2.zip


Processing...


Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 452741/452741 [00:01<00:00, 377499.93it/s]


Converting graphs into PyG objects...


100%|██████████| 452741/452741 [00:18<00:00, 24633.39it/s]


Saving...


Done!


PygGraphPropPredDataset(452741)


"\n==== Expected input format of Evaluator for ogbg-code2\n{'seq_ref': seq_ref, 'seq_pred': seq_pred}\n- seq_ref: a list of lists of strings\n- seq_pred: a list of lists of strings\nwhere seq_ref stores the reference sequences of sub-tokens, and\nseq_pred stores the predicted sequences of sub-tokens.\n\n==== Expected output format of Evaluator for ogbg-code2\n{'F1': F1}\n- F1 (float): F1 score averaged over samples.\n"

# GCN Model
This GCN model uses multiple GCN layers and a global_add_pool pooling method for graph level aggregation.

In [None]:
from torch_geometric.nn.models.tgn import Linear
class GCN(torch.nn.Module):
    """
    AST is a directed graph with 100-150 nodes. This GCN takes in nodes of 2 dimension
    features and convert them to 1x256 embeddings using a basic encoder ASTNodeEncoder
    before passing the embedding through GCN layers. No self-loops are added to establish
    a baseline. Additional augmentation and words are added by using codeBERT to transform
    them into dense embeddings

    Input
        Batch of node_size x256 dimensions
    Output
        Embeddings of batch_size x pred_list_len X num_classes, i.e. 64 x 5 x len(vocab_list)
    """
    def __init__(self, node_embedder, hidden_dim, output_dim, num_layers,
                 dropout, pred_list_len, num_classes):
        super(GCN, self).__init__()

        self.node_embedder = node_embedder
        self.hidden_dim = hidden_dim
        self.pred_list_len = pred_list_len
        self.num_classes = num_classes

        # GCN layers
        self.convs = torch.nn.ModuleList()
        self.convs.extend([GCNConv(hidden_dim, hidden_dim) for i in range(num_layers-1)])
        self.convs.append(GCNConv(hidden_dim, output_dim))

        # Batch norm
        self.bns = torch.nn.ModuleList()
        self.bns.extend([torch.nn.BatchNorm1d(hidden_dim) for i in range(num_layers-1)])

        # Global pool
        self.pool_method = global_add_pool

        # Probability of an element getting zeroed
        self.dropout = dropout

        # Post-processing layer that transform graph embedding into 5 token
        # predictions for each graph
        self.post_mps = Linear(output_dim, num_classes*pred_list_len, bias=True)

        # self.post_mps = torch.nn.ModuleList()
        # for i in range(pred_list_len):
        #     self.post_mps.append(torch.nn.Linear(output_dim, self.num_classes))

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, batched_data):
        x, edge_index, edge_attr, node_depth, batch = batched_data.x, batched_data.edge_index, batched_data.edge_attr, batched_data.node_depth, batched_data.batch

        # Pre-processing. Each node x has two features initially, the node type (e.g for , while)
        # and the node text (e.g. delete). ASTNodeEncoder is used to turn
        # 2 dimension into dim_hidden dimensions through a linear transformation

        #print("Raw x {} shape {}".format(x, x.shape)) #
        # print("Raw x shape {}".format(x.shape)) # batch size * N x D

        x = self.node_embedder(x, node_depth.view(-1,))

        out = None
        for i in range(len(self.convs)-1):
            x = self.convs[i](x, edge_index)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        out = self.convs[-1](x, edge_index) # batch * N x outdim

        out = self.pool_method(out, batch) # batch x outdim
        #print("Output before pooling {}".format(out.shape))

        # We want to turn one single graph embedding per graph to pred 5
        # words. So the desired output is batch_size x word len x num_classes
        out = self.post_mps(out).view(out.shape[0], self.pred_list_len, self.num_classes) # batch_size x word_len x num_classes

        # pred_list = []
        # for i in range(self.pred_list_len):
        #     pred_list.append(self.post_mps[i](out))
        # out = torch.stack(pred_list, dim=1)

        #out = self.softmax(out)
        #print(out)

        return out

# Train

In [None]:
def train(model, device, data_loader, optimizer, epoch, args):
    model.train()

    loss_accum = 0

    # Used to shorten training for debugging
    max_steps = args['max_steps']

    loss_fn = torch.nn.CrossEntropyLoss()

    for step, batch in enumerate(tqdm(data_loader, desc="Iteration")):
        batch = batch.to(device)

        if batch.x.shape[0] == 1 or batch.batch[-1] == 0:
            pass
        else:
            # if epoch < args['warmup']:
            #     iteration = epoch * len(data_loader) + step
            #     for param_group in optimizer.param_groups:
            #         param_group["lr"] = lr_modifier(iteration)

            # Each prediction is the one-hot probability of the classification
            pred_list = model(batch) # batch_size x words per graph x num_classes
            optimizer.zero_grad()

            # y comes in as a list of words per graph. Need to one-hot encode it
            y = one_hot_y(batch.y, args['num_classes'], args['words_per_method_name']) #batch_size x words_per_method_name x num_classes i.e. 32x5x15002

            # print("Pred_list {}".format(pred_list.shape))
            # print("One-hot encoded y shape {}".format(y.shape))

            #print("Batch y {}".format(batch.y[0]))

            # print("Pred_list {}".format(pred_list[0]))
            # print("One-hot encoded y {}".format(y[0]))

            loss = 0
            for i in range(pred_list.shape[0]):
                loss += loss_fn(pred_list[i].to(torch.float32), y[i].to(torch.float32))


            # pred_flat = pred_list.view(pred_list.shape[0]*args['words_per_method_name'],
            #                            args['num_classes']) # batch*word len x num_class
            # y_flat = y.view(y.shape[0]*args['words_per_method_name'],
            #                            args['num_classes']) # batch*word len x num_class

            #loss = loss_fn(pred_flat, y_flat.to(torch.float32))

            # print("Pred flat {}".format(pred_flat))
            # print("Y_flat {}".format(y_flat))

            loss_accum += loss/pred_list.shape[0]

            #print("Loss {}".format(loss.item()))

            loss.backward()
            optimizer.step()

            # Used to shorten training for debugging
            if step > max_steps:
                break

    print("Epoch loss {}".format(loss_accum / (step+1)))


# Eval

In [None]:
def eval(model, device, loader, evaluator, args):
    model.eval()
    seq_ref_list = []
    seq_pred_list = []

    # Used to shorten training for debugging
    max_steps = args['max_steps']

    for step, batch in enumerate(tqdm(loader, desc="Iteration")):
        batch = batch.to(device)

        if batch.x.shape[0] == 1:
            pass
        else:
            with torch.no_grad():
                pred_list = model(batch) # batch_size x pred_list_len X num_classes

            #print("Preds are {}".format(pred_list.shape))
            seq_ref_list.extend(batch.y)
            seq_pred_list.extend(decode_to_word(pred_list))

            #print("Seq_ref_list len {} pred list len {}".format(len(seq_ref_list), len(seq_pred_list)))

        # Used to shorten for debugging
        if step > max_steps:
            break

    print("seq_ref_list {}".format(seq_ref_list[:5]))
    print("seq_pred_list {}".format(seq_pred_list[:5]))

    input_dict = {"seq_ref": seq_ref_list, "seq_pred": seq_pred_list}

    return evaluator.eval(input_dict)

# Helper methods
Might modularize away in a different file

In [None]:
class SemanticNodeEncoder(torch.nn.Module):
    '''
    Use CodeBERT to convert node type strings and node attributes strings into embeddings.
    Depth is not used in this embedding.
        Input:
            emb_dim: default node feature of N X D
            node_types: list of node type strings [98 type_string]
            node_attributes: list of node attribute strings [10029 attribute_string]
            depth: The depth of the node in the AST.
        Output:
            BERTCode-based embedding, which sums the embeddings of the node type
            and node attribute strings. Dim: N x 768
    '''
    def __init__(self, codeBert, tokenizer, emb_dim, node_type_mapping, node_attributes_mapping):
        super(SemanticNodeEncoder, self).__init__()

        self.max_depth = 20
        self.codeBert = codeBert
        self.tokenizer = tokenizer
        self.emb_dim = emb_dim
        self.node_type_mapping = node_type_mapping
        self.node_attributes_mapping = node_attributes_mapping

        self.type_encoder = torch.nn.Embedding(len(node_type_mapping), emb_dim)
        self.attribute_encoder = torch.nn.Embedding(len(node_attributes_mapping), emb_dim)
        self.depth_encoder = torch.nn.Embedding(self.max_depth+1, emb_dim)

        self.type_encoder.weight = torch.nn.Parameter(self.get_embedding(node_type_mapping))
        #self.attribute_encoder.weight = torch.nn.Parameter(self.get_embedding(node_attributes_mapping))

    def get_embedding(self, mapping):
        '''
        Input:
            mapping: Either list of node type strings [98 type_string] or list of node attribute strings [10029 attribute_string]
        Output:
            BERTCode-based embedding of node attribute strings. Dim: N x 768
        '''
        node_embeddings = []
        feature_embedded = {}
        for feature_string in mapping:
            if feature_string in feature_embedded:
                node_embedding = feature_embedded[feature_string]
            else:
                # standard codeBert steps taken to transform strings into embeddings
                tokens = self.tokenizer.tokenize(feature_string)
                tokens_ids = self.tokenizer.convert_tokens_to_ids(tokens)
                tokens_tensor = torch.tensor(tokens_ids).to(device)[None,:]
                node_embedding = self.codeBert(tokens_tensor)[0].view(tokens_tensor.shape[1], -1)
                node_embedding = node_embedding.sum(dim=0, keepdim=True) # sum embeddings if >1 word in the feature string
                feature_embedded[feature_string] = node_embedding
            node_embeddings.append(node_embedding)

        node_embeddings = torch.cat(node_embeddings, dim=0) # Nx768
        #print(node_embeddings.shape)
        return node_embeddings

    def forward(self, x, depth):
        '''
            Input:
                'x': default node feature. the first and second column represents node type and node attributes.
                'depth': The depth of the node in the AST.
            Output:
        '''
        depth[depth > self.max_depth] = self.max_depth # 1xN of depth 1-20
        # print(x[:, 0]) # Nx1, holding the id of the node type

        return self.type_encoder(x[:,0]) + self.attribute_encoder(x[:,1]) + self.depth_encoder(depth)

class ASTNodeEncoder(torch.nn.Module):
    '''
        Input:
            x: default node feature. the first and second column represents node type and node attributes.
            depth: The depth of the node in the AST.

        Output:
            emb_dim-dimensional vector

    '''
    def __init__(self, emb_dim, num_nodetypes, num_nodeattributes, max_depth):
        super(ASTNodeEncoder, self).__init__()

        self.max_depth = max_depth

        self.type_encoder = torch.nn.Embedding(num_nodetypes, emb_dim)
        self.attribute_encoder = torch.nn.Embedding(num_nodeattributes, emb_dim)
        self.depth_encoder = torch.nn.Embedding(self.max_depth + 1, emb_dim)


    def forward(self, x, depth):
        depth[depth > self.max_depth] = self.max_depth # 1xN of depth 1-20
        # print(x[:, 0]) # Nx1, holding the id of the node type

        return self.type_encoder(x[:,0]) + self.attribute_encoder(x[:,1]) + self.depth_encoder(depth)


def get_vocab_mapping(seq_list, num_vocab):
    '''
        Input:
            seq_list: a list of sequences
            num_vocab: vocabulary size
        Output:
            vocab2idx:
                A dictionary that maps vocabulary into integer index.
                Additioanlly, we also index '__UNK__' and '__EOS__'
                '__UNK__' : out-of-vocabulary term
                '__EOS__' : end-of-sentence
            idx2vocab:
                A list that maps idx to actual vocabulary.
    '''

    vocab_cnt = {}
    vocab_list = []
    for seq in seq_list:
        for w in seq:
            if w in vocab_cnt:
                vocab_cnt[w] += 1
            else:
                vocab_cnt[w] = 1
                vocab_list.append(w)

    cnt_list = np.array([vocab_cnt[w] for w in vocab_list])
    topvocab = np.argsort(-cnt_list, kind = 'stable')[:num_vocab]

    print('Coverage of top {} vocabulary:'.format(num_vocab))
    print(float(np.sum(cnt_list[topvocab]))/np.sum(cnt_list))

    vocab2idx = {vocab_list[vocab_idx]: idx for idx, vocab_idx in enumerate(topvocab)}
    idx2vocab = [vocab_list[vocab_idx] for vocab_idx in topvocab]

    # print(topvocab)
    # print([vocab_list[v] for v in topvocab[:10]])
    # print([vocab_list[v] for v in topvocab[-10:]])

    vocab2idx['__UNK__'] = num_vocab
    idx2vocab.append('__UNK__')

    vocab2idx['__EOS__'] = num_vocab + 1
    idx2vocab.append('__EOS__')

    # test the correspondence between vocab2idx and idx2vocab
    for idx, vocab in enumerate(idx2vocab):
        assert(idx == vocab2idx[vocab])

    # test that the idx of '__EOS__' is len(idx2vocab) - 1.
    # This fact will be used in decode_arr_to_seq, when finding __EOS__
    assert(vocab2idx['__EOS__'] == len(idx2vocab) - 1)

    return vocab2idx, idx2vocab

def one_hot_y(batch_y, num_classes, max_seq_len):
    '''
    Given data.y, produce a tensor batch_size x number of words per method name.

    Input:
        batch_data.y: a list of lists, each of word len. batch_size x word len
        output: one hot encoding of dimensions batch_size x word len x num_classes.
    '''
    #print(batch_y)
    labels = torch.zeros(len(batch_y), max_seq_len, dtype=torch.long).to(device)

    for i, y in enumerate(batch_y):
        # if the number of words in y is less than the max length, fill blanks with end of word token
        y = y[:max_seq_len] + ['__EOS__'] * max(0, max_seq_len - len(y))
        for j, w in enumerate(y):
            index = vocab2idx['__UNK__']
            if w in vocab2idx:
                index = vocab2idx[w]
            labels[i][j] = index
        # print(labels[i])
    labels_one_hot = F.one_hot(labels, num_classes).to(device) # batch x word len x num_class
    #print('labels {}'.format(labels_one_hot))

    return labels_one_hot

def decode_to_word(pred_list):
    '''
        The goal is to turn an one-hot-encoder back into word. We need to take the
        argmax of the prediction

        Input:  embeddings, batch x word len x num class
        Output: list of lists, each a sequence of words. batch x word len
    '''
    #print("Prediction {}".format(pred_list))
    argmax = torch.argmax(pred_list, dim=-1).tolist()
    #print("Argmax {}".format(argmax))

    for i, graph in enumerate(argmax):
        word_list = []
        for word_index in graph:
            if word_index < len(vocab2idx) - 2:
                w = idx2vocab[word_index]
                word_list.append(w)
        argmax[i] = word_list
    #print("decoded pred words {}".format(argmax))
    return argmax

#Run Epochs
Train and then evaluate the accuracy of the GCN model

In [None]:
def run_epochs(model, evaluator, args, dataset):
    split_idx = dataset.get_idx_split()
    train_loader = DataLoader(dataset[split_idx["train"]], batch_size=args['batch_size'], shuffle=True)
    valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=args['batch_size'], shuffle=False)
    test_loader = DataLoader(dataset[split_idx["test"]], batch_size=args['batch_size'], shuffle=False)

    valid_curve = []
    test_curve = []
    train_curve = []

    optimizer = torch.optim.AdamW(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

    # # We want to decrease lr as training goes progresses.
    # # lr_scheduler method decays lr with a cosine annealing according to https://arxiv.org/pdf/1608.03983.pdf
    # lr_modifier = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args['epochs'])
    # lr_steps = args['lr'] / (args['warmup'] * len(train_loader))
    # def lr_scheduler(s):
    #     lr = s * lr_steps
    #     return lr

    for epoch in range(1, args['epochs'] + 1):
        print("=====Epoch {}".format(epoch))
        print('Training...')
        train(model, device, train_loader, optimizer, epoch, args)

        print('Evaluating...')
        train_perf = eval(model, device, train_loader, evaluator, args)
        valid_perf = eval(model, device, valid_loader, evaluator, args)
        test_perf = eval(model, device, test_loader, evaluator, args)

        # if epoch >= args['warmup'] and lr_modifier is not None:
        #     lr_modifier.step()

        print({'Train': train_perf, 'Validation': valid_perf, 'Test': test_perf})

        train_curve.append(train_perf[dataset.eval_metric])
        valid_curve.append(valid_perf[dataset.eval_metric])
        test_curve.append(test_perf[dataset.eval_metric])

    print('F1')
    best_val_epoch = np.argmax(np.array(valid_curve))
    best_train = max(train_curve)
    print('Finished training!')
    print('Best validation score: {}'.format(valid_curve[best_val_epoch]))
    print('Test score: {}'.format(test_curve[best_val_epoch]))

In [None]:
# There are 450k words (duplicates included) in the true labels.
# The top 15k is extracted and stored in a dict vocab2idx.
# This top 15k covers 96% of the all unique words.
vocab2idx, idx2vocab = get_vocab_mapping(dataset.y, args['num_vocab'])
print("Vocab list {}".format(vocab2idx))
args['num_classes'] = len(vocab2idx)

Coverage of top 5000 vocabulary:
0.901200162173439


# Main
Grab ogbg-code2 and create a dictionary of all of the y labels

In [None]:
if __name__ == "__main__":


    nodetypes_mapping = pd.read_csv(os.path.join(dataset.root, 'mapping', 'typeidx2type.csv.gz'))['type'].tolist()
    nodeattributes_mapping = pd.read_csv(os.path.join(dataset.root, 'mapping', 'attridx2attr.csv.gz'))['attr'].tolist()

    tokenizer = AutoTokenizer.from_pretrained("microsoft/graphcodebert-base")
    graphBert = AutoModel.from_pretrained("microsoft/graphcodebert-base")
    graphBert.to(device)
    node_encoder = SemanticNodeEncoder(graphBert, tokenizer, args['hidden_dim'], nodetypes_mapping, nodeattributes_mapping)

    gcn = GCN(
        node_encoder,
        args['hidden_dim'],
        args['output_dim'],
        args['num_layers'],
        args['dropout'],
        args['words_per_method_name'],
        args['num_classes'],
    ).to(device)

    # param_size = 0
    # for param in gcn.parameters():
    #     param_size += param.nelement() * param.element_size()
    # print(param_size)

    run_epochs(gcn, evaluator, args, dataset)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/539 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at microsoft/graphcodebert-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


=====Epoch 1
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 3.9824025630950928
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['normalized'], ['create', 'one'], ['clear'], ['set', 'tcp'], ['field', 'singleton', 'sub', 'fields', 'schema']]
seq_pred_list [[], ['get'], ['add'], ['set'], ['create']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['grid', 'deferred', 'container'], ['load', 'file'], [], [], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['get'], ['get'], ['get'], ['get'], ['create']]
{'Train': {'precision': 0.12180230373028476, 'recall': 0.054500304832125826, 'F1': 0.07189709443905626}, 'Validation': {'precision': 0.10241997925523366, 'recall': 0.04401645320149067, 'F1': 0.058992250974106604}, 'Test': {'precision': 0.10359865743271976, 'recall': 0.04594150748811765, 'F1': 0.06104208399178327}}
=====Epoch 2
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 3.1019575595855713
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['get', 'joining', 'group', 'property'], ['shellsort'], ['filter'], ['adjust', 'rowcol'], ['get', 'frontend', 'data', 'dict', 'for', 'placeholders']]
seq_pred_list [['get'], [], ['set'], ['add'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['download', 'chain'], ['download'], [], [], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], ['create'], ['create']]
{'Train': {'precision': 0.16212996189318327, 'recall': 0.07771583090278307, 'F1': 0.0997982035085383}, 'Validation': {'precision': 0.13533330411535258, 'recall': 0.060500820538292595, 'F1': 0.07983103659134688}, 'Test': {'precision': 0.13459844480894234, 'recall': 0.0626042145793376, 'F1': 0.08153876301087892}}
=====Epoch 3
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.999541997909546
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['init', 'config'], ['notify', 'attached'], ['delete', 'pull', 'request', 'labels'], ['calc', 'finished'], ['boolean']]
seq_pred_list [['set'], ['on'], ['delete', 'pull'], ['is'], ['from']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['thin', 'emitter', 'core'], ['load'], [], [], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], ['create'], ['create']]
{'Train': {'precision': 0.18592936022380396, 'recall': 0.09679084993313035, 'F1': 0.12031729212550223}, 'Validation': {'precision': 0.1554082482359644, 'recall': 0.07501860907106996, 'F1': 0.09564095553927696}, 'Test': {'precision': 0.156845726262074, 'recall': 0.07919093036643611, 'F1': 0.09985283002642213}}
=====Epoch 4
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.9055325984954834
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['instant'], ['flags'], ['setup', 'redis'], ['request', 'permission'], ['predict']]
seq_pred_list [['get'], ['get'], ['init'], ['get'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['static', 'symbol'], ['write'], [], ['get'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], ['get'], ['generate']]
{'Train': {'precision': 0.20387073422619625, 'recall': 0.11071555179535797, 'F1': 0.13519331348555036}, 'Validation': {'precision': 0.16465720004090517, 'recall': 0.08219200596046808, 'F1': 0.10315077976909066}, 'Test': {'precision': 0.16467255938278352, 'recall': 0.08746503279034716, 'F1': 0.10744183549678354}}
=====Epoch 5
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.8319084644317627
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['get', 'duration', 'measures'], ['list', 'key', 'policies'], ['integer'], ['run', 'strelka', 'with', 'merge'], ['zinnia', 'statistics']]
seq_pred_list [[], ['delete', 'client'], [], ['run'], ['keywords']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['residuals', 'psf'], ['write'], [], [], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], [], [], ['generate']]
{'Train': {'precision': 0.19011436947271407, 'recall': 0.11155971674586743, 'F1': 0.1323071155659235}, 'Validation': {'precision': 0.13910023228294693, 'recall': 0.07368286038466167, 'F1': 0.09041552838283345}, 'Test': {'precision': 0.14603988214567767, 'recall': 0.08125625576760075, 'F1': 0.09837346755061352}}
=====Epoch 6
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.775906562805176
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['validate', 'timeout', 'or', 'zero'], ['parse', 'function', 'signature'], ['zinb', 'll'], ['parse'], ['issuperset']]
seq_pred_list [[], ['get'], [], ['find', 'rev', 'location'], ['approx', 'equal']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['eeg', 'interpreter', 'fn', 'pars'], ['save', 'file'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], [], ['get']]
{'Train': {'precision': 0.2181758649201587, 'recall': 0.12983135737948523, 'F1': 0.1530148871712609}, 'Validation': {'precision': 0.16359220464273713, 'recall': 0.08683916058132651, 'F1': 0.10609818468432658}, 'Test': {'precision': 0.17101937913856993, 'recall': 0.09694230374246776, 'F1': 0.11608014325346198}}
=====Epoch 7
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.725048542022705
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['obfuscatable', 'variable'], ['delete'], ['depaginate', 'all'], ['process', 'message'], ['add', 'work', 'item']]
seq_pred_list [['guess', 'variable'], ['delete'], ['get'], ['authenticate'], ['set']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['telnet', 'r', 'criteria'], ['save'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], [], []]
{'Train': {'precision': 0.22385499800641867, 'recall': 0.13067686502061318, 'F1': 0.15552269457159223}, 'Validation': {'precision': 0.16655344699127844, 'recall': 0.08753475722292832, 'F1': 0.10751783071544653}, 'Test': {'precision': 0.16744426219549238, 'recall': 0.09303349764667423, 'F1': 0.11275544110426558}}
=====Epoch 8
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.680633544921875
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['send', 'location'], ['edit', 'distance'], ['transfer', 'learning', 'tuner'], ['bulk', 'add', 'dimensions'], ['load3', 'ds']]
seq_pred_list [['send', 'location'], [], ['get'], [], ['load']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['pg', 'noise'], ['save'], [], [], ['to']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], [], [], ['download', 'article']]
{'Train': {'precision': 0.2237218202377918, 'recall': 0.13906105349572614, 'F1': 0.16142008940671282}, 'Validation': {'precision': 0.163736103197908, 'recall': 0.09029528247874218, 'F1': 0.10876487247394905}, 'Test': {'precision': 0.168032774436547, 'recall': 0.09983105811427297, 'F1': 0.11747572512334678}}
=====Epoch 9
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.650353193283081
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['group'], ['get', 'importable', 'files'], ['contributor', 'director'], ['attachments'], ['user', 'input']]
seq_pred_list [['save', 'retinotopy', 'tsv'], ['get'], [], ['get'], ['confirm', 'choice', 'choices']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['train', 'retinotopy', 'descent'], ['save'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], [], ['download']]
{'Train': {'precision': 0.2511889588276434, 'recall': 0.1634174868977284, 'F1': 0.18576210755253975}, 'Validation': {'precision': 0.1765664489927101, 'recall': 0.10078222447618075, 'F1': 0.11918230951188832}, 'Test': {'precision': 0.18155488730939795, 'recall': 0.11051692167055753, 'F1': 0.12815959985178135}}
=====Epoch 10
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.6060867309570312
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['svg', 'shapes', 'to', 'df'], ['add', 'element'], ['source', 'start'], ['jms', 'to', 'form', 'flavor', 'chrom'], ['decode', 'response']]
seq_pred_list [['xpath', 'pdb'], ['add'], ['get'], ['jms', 'i', 'decimal', 'chrom'], ['encode', 'request']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['mcmc', 'descent'], ['write'], [], [], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], [], [], ['from']]
{'Train': {'precision': 0.23442808727310754, 'recall': 0.14008915138264655, 'F1': 0.1657197324586297}, 'Validation': {'precision': 0.15633957137222246, 'recall': 0.08478357128595986, 'F1': 0.1030147833496214}, 'Test': {'precision': 0.16144751230180426, 'recall': 0.09286421223613679, 'F1': 0.1107992300310507}}
=====Epoch 11
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.572418212890625
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['validate', 'classpath', 'tuples'], ['copy', 'action', 'callback'], ['annotate'], ['get', 'all', 'files'], ['add', 'sni', 'cert']]
seq_pred_list [['set'], ['on'], ['prepare', 'features'], ['files'], ['set']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['fit'], ['save'], [], ['get', 'canvas'], ['make']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], [], ['create']]
{'Train': {'precision': 0.27106120621474467, 'recall': 0.17288392377515266, 'F1': 0.19928284816505565}, 'Validation': {'precision': 0.18616674701611372, 'recall': 0.10441794999690428, 'F1': 0.12493372356413071}, 'Test': {'precision': 0.18777109531620192, 'recall': 0.11286945189350875, 'F1': 0.13215063165172514}}
=====Epoch 12
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.5876176357269287
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['output'], ['is', 'running'], ['render', 'import', 'image'], ['probe', 'services', 'finished'], ['read', 'cumulative', 'iss', 'index']]
seq_pred_list [['post'], ['is'], ['set'], ['on', 'tracing'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['instantiate', 'cc', 'function', 'training'], ['export'], [], ['get'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'valid'], ['get'], ['get'], [], ['get']]
{'Train': {'precision': 0.2698419759985881, 'recall': 0.18136009017623372, 'F1': 0.2044484948005185}, 'Validation': {'precision': 0.17835751121240012, 'recall': 0.10518791335616509, 'F1': 0.1228247450494021}, 'Test': {'precision': 0.18268559018285643, 'recall': 0.11455601407077622, 'F1': 0.1314763861374031}}
=====Epoch 13
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.5293240547180176
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['sanitize', 'next', 'page', 'link'], ['verify', 'claims'], ['archives'], ['append', 'slash', 'if', 'dir', 'path'], ['check', 'in', 'out', 'dates']]
seq_pred_list [['get'], ['validate'], ['get'], ['get'], ['validate']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['mcmc', 'polling', 'solver', 'obs'], ['save'], [], ['get', 'rnn'], ['generate']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['download']]
{'Train': {'precision': 0.2657262110189488, 'recall': 0.17138703522626844, 'F1': 0.1966054166378011}, 'Validation': {'precision': 0.1727330499189201, 'recall': 0.09764292636164486, 'F1': 0.11615560181103418}, 'Test': {'precision': 0.1770662474940769, 'recall': 0.10529180082214527, 'F1': 0.12357520701561162}}
=====Epoch 14
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.483442544937134
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['plot', 'histogram'], ['put'], ['default', 'value', 'cell', 'data', 'func'], ['get', 'remote', 'console', 'url'], ['json']]
seq_pred_list [['plot', 'plot'], ['add'], ['set', 'cell', 'changed'], ['get'], ['to']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['run', 'client', 'with'], ['save'], [], ['rnn'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['from', 'image']]
{'Train': {'precision': 0.2812111088233949, 'recall': 0.18551151571274074, 'F1': 0.21092663300754563}, 'Validation': {'precision': 0.18621422623482492, 'recall': 0.10668599575226213, 'F1': 0.12621489723650395}, 'Test': {'precision': 0.18852894720855354, 'recall': 0.11449233541907133, 'F1': 0.13344897792000032}}
=====Epoch 15
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.46309494972229
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['get', 'setup', 'version'], ['validate'], ['check', 'module', 'usage'], ['remove', 'annotation', 'value'], ['get', 'current', 'activities']]
seq_pred_list [['get'], ['validate'], ['check', 'stable', 'submission', 'step'], ['add'], ['login']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['train', 'submission'], ['save'], [], ['profile', 'observation'], ['generate']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['make']]
{'Train': {'precision': 0.2916776558751822, 'recall': 0.20422840917244992, 'F1': 0.22637622705921148}, 'Validation': {'precision': 0.18133263210179545, 'recall': 0.11060767486787561, 'F1': 0.12698138226472364}, 'Test': {'precision': 0.18734964461454345, 'recall': 0.12092420468582304, 'F1': 0.13687132001101385}}
=====Epoch 16
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.4466664791107178
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['to', 'sections'], ['start', 'reporter'], ['validate', 'language', 'code'], ['decorate', 'routes'], ['consecutive']]
seq_pred_list [['get'], ['start'], ['validate', 'error'], ['register', 'blueprint'], []]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['show', 'spectrum'], ['write'], [], ['rnn', 'layer', 'observation'], ['generate']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['show']]
{'Train': {'precision': 0.28470849592459685, 'recall': 0.19341607051692938, 'F1': 0.21779867373462716}, 'Validation': {'precision': 0.17522534367649853, 'recall': 0.10231858590538521, 'F1': 0.12006736135894219}, 'Test': {'precision': 0.18124582346151508, 'recall': 0.11382940485592206, 'F1': 0.13101352836864047}}
=====Epoch 17
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.424572229385376
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['memorized', 'timedelta'], ['add', 'metadata', 'props'], ['get', 'auth'], ['do', 'execute'], ['lilypond']]
seq_pred_list [['get', 'seconds'], ['set'], ['get'], ['do', 'execute'], ['print']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['cube', 'track'], ['save'], [], ['rnn', 'rnn', 'observation'], ['generate']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['hash'], [], ['generate']]
{'Train': {'precision': 0.29508643482296343, 'recall': 0.20910752750603626, 'F1': 0.23179684278117357}, 'Validation': {'precision': 0.17502958320550469, 'recall': 0.10677048441325078, 'F1': 0.12319164573185698}, 'Test': {'precision': 0.18020092946965555, 'recall': 0.11640052533983643, 'F1': 0.13241532984561963}}
=====Epoch 18
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.396989107131958
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['matches', 'pattern'], ['tempfile'], ['get', 'cmdclass'], ['stream', 'media'], ['set', 'context']]
seq_pred_list [['matches'], [], ['report'], ['consume', 'file'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['simulate', 'fit', 'cross'], ['write'], [], ['get'], ['generate']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['generate']]
{'Train': {'precision': 0.3003003853167834, 'recall': 0.20457413652564838, 'F1': 0.2301042313531386}, 'Validation': {'precision': 0.1808045170998232, 'recall': 0.10674517955492667, 'F1': 0.12479372390184502}, 'Test': {'precision': 0.1858893748860944, 'recall': 0.11534266780644582, 'F1': 0.13333163707685303}}
=====Epoch 19
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.37216854095459
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['make', 'mysql', 'url'], ['recomb', 'probability'], ['save', 'xml'], ['get', 'relationship', 'mdata'], ['common', 'start']]
seq_pred_list [['get', 'mysql'], [], ['write', 'xml'], ['get', 'assessment', 'mdata', 'mdata'], []]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['dynamic', 'mcmc'], ['embed'], [], ['rnn', 'transformer'], ['generate']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['generate']]
{'Train': {'precision': 0.3008603038740841, 'recall': 0.21156930660412487, 'F1': 0.23550915966754607}, 'Validation': {'precision': 0.16734379337044014, 'recall': 0.10206301553213942, 'F1': 0.11773792938494697}, 'Test': {'precision': 0.17495671587388373, 'recall': 0.11369185028971798, 'F1': 0.12867470626491948}}
=====Epoch 20
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.3527345657348633
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['cmd', 'genobstacles'], ['ref', 'frequency'], ['get', 'band', 'qpoints'], ['default', 'capacity'], ['parse']]
seq_pred_list [['cmd', 'full'], [], ['peak', 'magnitude', 'full'], ['get'], ['parse', 'kv']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['train', 'gradients', 'full', 'bottleneck'], ['save', 'epub', 'file'], [], ['rnn', 'relu', 'load'], ['generate', 'gradients', 'load']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['generate']]
{'Train': {'precision': 0.3111597659339438, 'recall': 0.23884771487976078, 'F1': 0.2552217777374854}, 'Validation': {'precision': 0.17651750887496165, 'recall': 0.11494762329118291, 'F1': 0.12849408629966985}, 'Test': {'precision': 0.18304553186319178, 'recall': 0.1254588406652376, 'F1': 0.13843529807171143}}
=====Epoch 21
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.3455874919891357
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['selectnone'], ['get', 'authentication', 'info'], ['background'], ['as', 'pull', 'request'], ['unpack']]
seq_pred_list [[], ['get', 'password'], ['from'], ['get'], ['unpack']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['dynamic', 'avg'], ['save'], [], ['get'], ['generate', 'serving']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get', 'id'], [], ['build']]
{'Train': {'precision': 0.31511898575733216, 'recall': 0.22973266203492984, 'F1': 0.25199930140981125}, 'Validation': {'precision': 0.17551606258491476, 'recall': 0.10898433777098807, 'F1': 0.12489619553431623}, 'Test': {'precision': 0.18648699957475243, 'recall': 0.12373295393730058, 'F1': 0.138801817676886}}
=====Epoch 22
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.3088176250457764
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['enzyme'], ['find', 'snapshot'], ['is', 'package', 'installed'], ['coherence'], ['actions', 'freqs', 'angles']]
seq_pred_list [[], ['get'], ['is', 'all', 'installed'], [], ['ecc', 'freqs']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['train', 'noise'], ['write'], [], ['get', 'estimate'], ['generate']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], [], []]
{'Train': {'precision': 0.32322955271878734, 'recall': 0.21626752937289587, 'F1': 0.2460431119532761}, 'Validation': {'precision': 0.17868913529385982, 'recall': 0.10289424969268944, 'F1': 0.12184601542317279}, 'Test': {'precision': 0.18297263228236438, 'recall': 0.11137240693008872, 'F1': 0.1301786940455612}}
=====Epoch 23
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.2904770374298096
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['read'], ['assert', 'file', 'size', 'not', 'equal'], ['get'], ['get', 'diff'], ['api', 'value']]
seq_pred_list [['read'], ['assert', 'file', 'size', 'less', 'equal'], ['get'], ['get', 'file', 'spec'], ['put', 'item']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['init', 'samples'], ['save'], [], ['get', 'rnn'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['get', 'url'], ['get'], ['get'], [], ['report']]
{'Train': {'precision': 0.318758292971482, 'recall': 0.21827825904407616, 'F1': 0.24572586826262524}, 'Validation': {'precision': 0.1759448364523528, 'recall': 0.10462551940178272, 'F1': 0.12213923556781951}, 'Test': {'precision': 0.1807947573051455, 'recall': 0.11348345729154913, 'F1': 0.1307093823496229}}
=====Epoch 24
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.277272939682007
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['model'], ['divide', 'heigths'], ['setup', 'pod'], ['get', 'parser', 'compound'], ['handle']]
seq_pred_list [['get'], ['find', 'controller'], [], ['parse'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['estimate', 'predictions'], ['write', 'predictions'], [], ['get'], ['generate']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['get']]
{'Train': {'precision': 0.32691306187945685, 'recall': 0.22912718896416465, 'F1': 0.2557024977114584}, 'Validation': {'precision': 0.17544009583497683, 'recall': 0.10482724521051205, 'F1': 0.12190486878438844}, 'Test': {'precision': 0.181516918777717, 'recall': 0.11457087603889134, 'F1': 0.1315651996283489}}
=====Epoch 25
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.2555692195892334
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['splitport'], ['request', 'url'], ['ret8'], ['parse', 'place'], ['node2geoff']]
seq_pred_list [['get'], ['get', 'proxy'], [], ['loc', 'place', 'possible'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['crawl', 'network'], ['load'], [], ['observation'], ['minimal']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['load']]
{'Train': {'precision': 0.3271388104529025, 'recall': 0.241718454209814, 'F1': 0.26401959777867084}, 'Validation': {'precision': 0.1722487618880659, 'recall': 0.10710381287692064, 'F1': 0.12260898733704098}, 'Test': {'precision': 0.17993363100662169, 'recall': 0.1188554621168071, 'F1': 0.1337115031961943}}
F1
Finished training!
Best validation score: 0.12849408629966985
Test score: 0.13843529807171143
