# Experiment Motivation

GraphSAGE with biased pooling showed high testing score but also high overfitting. Try using less layers to reduce overfitting.

In [2]:
args = {
    'num_layers': 3,
    'hidden_dim': 768,
    'output_dim': 768,
    'dropout': 0,
    'lr': 0.001,
    'epochs': 25,
    'num_vocab': 10000,
    'warmup': 2,
    'weight_decay': 0.000001,
    'words_per_method_name': 5,
    'max_steps': 99999,
    'batch_size': 32,
    'heads': 3,
}

# Setup

In [3]:
!pip install ogb
!pip install torch_geometric
!python -c "import ogb; print(ogb.__version__)"

import os
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from torch_geometric.loader import DataLoader
import torch
import pandas as pd
import torch.nn.functional as F
from tqdm.notebook import tqdm
print(torch.__version__)

# The PyG built-in GCNConv
from torch_geometric.nn import SAGEConv

import torch_geometric.transforms as T
from torch_geometric.nn import global_add_pool, global_mean_pool
from numpy import float32
from transformers import AutoTokenizer, AutoModel

import copy
import numpy as np
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device: {}'.format(device))

Collecting ogb
  Downloading ogb-1.3.6-py3-none-any.whl (78 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/78.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting outdated>=0.2.0 (from ogb)
  Downloading outdated-0.2.2-py2.py3-none-any.whl (7.5 kB)
Collecting littleutils (from outdated>=0.2.0->ogb)
  Downloading littleutils-0.2.2.tar.gz (6.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: littleutils
  Building wheel for littleutils (setup.py) ... [?25l[?25hdone
  Created wheel for littleutils: filename=littleutils-0.2.2-py3-none-any.whl size=7026 sha256=7915f749056724e03470590284b79d8b8ee7037a663017ea487e1d0063e8ee84
  Stored in directory: /root/.cache/pip/wheels/3d/fe/b0/27a9892da57472e538c7452a721a9cf463cc03cf7379889266
Successfully built littleutils
Installing collected packages: littleut

# Dataset

In [4]:
dataset = PygGraphPropPredDataset(name = "ogbg-code2")
print(dataset)
evaluator = Evaluator(name='ogbg-code2')
'''
==== Expected input format of Evaluator for ogbg-code2
{'seq_ref': seq_ref, 'seq_pred': seq_pred}
- seq_ref: a list of lists of strings
- seq_pred: a list of lists of strings
where seq_ref stores the reference sequences of sub-tokens, and
seq_pred stores the predicted sequences of sub-tokens.

==== Expected output format of Evaluator for ogbg-code2
{'F1': F1}
- F1 (float): F1 score averaged over samples.
'''
# print(dataset.num_classes)
# print(dataset.num_node_features)
# print(dataset[0])
# print(len(dataset.y))

Downloading http://snap.stanford.edu/ogb/data/graphproppred/code2.zip


Downloaded 0.91 GB: 100%|██████████| 934/934 [00:27<00:00, 33.94it/s]


Extracting dataset/code2.zip


Processing...


Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 452741/452741 [00:01<00:00, 369661.07it/s]


Converting graphs into PyG objects...


100%|██████████| 452741/452741 [00:18<00:00, 24715.61it/s]


Saving...


Done!


PygGraphPropPredDataset(452741)


"\n==== Expected input format of Evaluator for ogbg-code2\n{'seq_ref': seq_ref, 'seq_pred': seq_pred}\n- seq_ref: a list of lists of strings\n- seq_pred: a list of lists of strings\nwhere seq_ref stores the reference sequences of sub-tokens, and\nseq_pred stores the predicted sequences of sub-tokens.\n\n==== Expected output format of Evaluator for ogbg-code2\n{'F1': F1}\n- F1 (float): F1 score averaged over samples.\n"

# SAGEConv Model
This SAGEConv model uses multiple GraphSage layers and a global_add_pool pooling method for graph level aggregation.

In [5]:
from torch_geometric.nn.models.tgn import Linear
class GraphSage(torch.nn.Module):
    """
    AST is a directed graph with 100-150 nodes. This GAT takes in nodes of 2 dimension
    features and convert them to 1x256 embeddings using a basic encoder ASTNodeEncoder
    before passing the embedding through GAT layers. No self-loops are added to establish
    a baseline. Additional augmentation and words are added by using codeBERT to transform
    them into dense embeddings

    Input
        Batch of node_size x256 dimensions
    Output
        Embeddings of batch_size x pred_list_len X num_classes, i.e. 64 x 5 x len(vocab_list)
    """
    def __init__(self, node_embedder, hidden_dim, output_dim, num_layers,
                 dropout, pred_list_len, num_classes, heads):
        super(GraphSage, self).__init__()

        self.node_embedder = node_embedder
        self.hidden_dim = hidden_dim
        self.pred_list_len = pred_list_len
        self.num_classes = num_classes
        self.heads = heads

        # SAGEConv layers
        self.convs = torch.nn.ModuleList()
        self.convs.extend([SAGEConv(hidden_dim, hidden_dim, aggr="lstm") for i in range(num_layers-1)])
        self.convs.append(SAGEConv(hidden_dim, output_dim, aggr='lstm'))

        # Batch norm
        self.bns = torch.nn.ModuleList()
        self.bns.extend([torch.nn.BatchNorm1d(hidden_dim) for i in range(num_layers-1)])

        # Global pool
        self.pool_method = global_add_pool

        # Probability of an element getting zeroed
        self.dropout = dropout

        # Post-processing layer that transform graph embedding into 5 token
        # predictions for each graph
        self.post_mps = Linear(output_dim, num_classes*pred_list_len, bias=True)

        # self.post_mps = torch.nn.ModuleList()
        # for i in range(pred_list_len):
        #     self.post_mps.append(torch.nn.Linear(output_dim, self.num_classes))

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, batched_data):
        x, edge_index, edge_attr, node_depth, batch = batched_data.x, batched_data.edge_index, batched_data.edge_attr, batched_data.node_depth, batched_data.batch

        # Pre-processing. Each node x has two features initially, the node type (e.g for , while)
        # and the node text (e.g. delete). ASTNodeEncoder is used to turn
        # 2 dimension into dim_hidden dimensions through a linear transformation

        # print("Raw x shape {}".format(x.shape)) # batch size * N x D

        x = self.node_embedder(x, node_depth.view(-1,))
        edge_index = edge_index[:, edge_index[1].argsort()]

        out = None
        for i in range(len(self.convs)-1):
            x = self.convs[i](x, edge_index)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        out = self.convs[-1](x, edge_index) # batch * N x outdim

        out = out * (0.5**node_depth)

        out = self.pool_method(out, batch) # batch x outdim
        #print("Output before pooling {}".format(out.shape))

        # We want to turn one single graph embedding per graph to pred 5
        # words. So the desired output is batch_size x word len x num_classes
        out = self.post_mps(out).view(out.shape[0], self.pred_list_len, self.num_classes) # batch_size x word_len x num_classes

        # pred_list = []
        # for i in range(self.pred_list_len):
        #     pred_list.append(self.post_mps[i](out))
        # out = torch.stack(pred_list, dim=1)

        #out = self.softmax(out)
        #print(out)

        return out

# Train

In [6]:
def train(model, device, data_loader, optimizer, epoch, args):
    model.train()

    loss_accum = 0

    # Used to shorten training for debugging
    max_steps = args['max_steps']

    loss_fn = torch.nn.CrossEntropyLoss()

    for step, batch in enumerate(tqdm(data_loader, desc="Iteration")):
        batch = batch.to(device)

        if batch.x.shape[0] == 1 or batch.batch[-1] == 0:
            pass
        else:
            # if epoch < args['warmup']:
            #     iteration = epoch * len(data_loader) + step
            #     for param_group in optimizer.param_groups:
            #         param_group["lr"] = lr_modifier(iteration)

            # Each prediction is the one-hot probability of the classification
            pred_list = model(batch) # batch_size x words per graph x num_classes
            optimizer.zero_grad()

            # y comes in as a list of words per graph. Need to one-hot encode it
            y = one_hot_y(batch.y, args['num_classes'], args['words_per_method_name']) #batch_size x words_per_method_name x num_classes i.e. 32x5x15002

            # print("Pred_list {}".format(pred_list.shape))
            # print("One-hot encoded y shape {}".format(y.shape))

            #print("Batch y {}".format(batch.y[0]))

            # print("Pred_list {}".format(pred_list[0]))
            # print("One-hot encoded y {}".format(y[0]))

            loss = 0
            for i in range(pred_list.shape[0]):
                loss += loss_fn(pred_list[i].to(torch.float32), y[i].to(torch.float32))


            # pred_flat = pred_list.view(pred_list.shape[0]*args['words_per_method_name'],
            #                            args['num_classes']) # batch*word len x num_class
            # y_flat = y.view(y.shape[0]*args['words_per_method_name'],
            #                            args['num_classes']) # batch*word len x num_class

            #loss = loss_fn(pred_flat, y_flat.to(torch.float32))

            # print("Pred flat {}".format(pred_flat))
            # print("Y_flat {}".format(y_flat))

            loss_accum += loss/pred_list.shape[0]

            #print("Loss {}".format(loss.item()))

            loss.backward()
            optimizer.step()

            # Used to shorten training for debugging
            if step > max_steps:
                break

    print("Epoch loss {}".format(loss_accum / (step+1)))


# Eval

In [7]:
def eval(model, device, loader, evaluator, args):
    model.eval()
    seq_ref_list = []
    seq_pred_list = []

    # Used to shorten training for debugging
    max_steps = args['max_steps']

    for step, batch in enumerate(tqdm(loader, desc="Iteration")):
        batch = batch.to(device)

        if batch.x.shape[0] == 1:
            pass
        else:
            with torch.no_grad():
                pred_list = model(batch) # batch_size x pred_list_len X num_classes

            #print("Preds are {}".format(pred_list.shape))
            seq_ref_list.extend(batch.y)
            seq_pred_list.extend(decode_to_word(pred_list))

            #print("Seq_ref_list len {} pred list len {}".format(len(seq_ref_list), len(seq_pred_list)))

        # Used to shorten for debugging
        if step > max_steps:
            break

    print("seq_ref_list {}".format(seq_ref_list[:5]))
    print("seq_pred_list {}".format(seq_pred_list[:5]))

    input_dict = {"seq_ref": seq_ref_list, "seq_pred": seq_pred_list}

    return evaluator.eval(input_dict)

# Helper methods
Might modularize away in a different file

In [8]:
class SemanticNodeEncoder(torch.nn.Module):
    '''
    Use CodeBERT to convert node type strings and node attributes strings into embeddings.
    Depth is not used in this embedding.
        Input:
            emb_dim: default node feature of N X D
            node_types: list of node type strings [98 type_string]
            node_attributes: list of node attribute strings [10029 attribute_string]
            depth: The depth of the node in the AST.
        Output:
            BERTCode-based embedding of node attribute strings. Dim: N x 768
    '''
    def __init__(self, codeBert, tokenizer, emb_dim, node_type_mapping, node_attributes_mapping):
        super(SemanticNodeEncoder, self).__init__()

        self.max_depth = 20
        self.codeBert = codeBert
        self.tokenizer = tokenizer
        self.emb_dim = emb_dim
        self.node_type_mapping = node_type_mapping
        self.node_attributes_mapping = node_attributes_mapping

        self.type_encoder = torch.nn.Embedding(len(node_type_mapping), emb_dim)
        self.attribute_encoder = torch.nn.Embedding(len(node_attributes_mapping), emb_dim) # 10029 x 768
        self.depth_encoder = torch.nn.Embedding(self.max_depth+1, emb_dim)

        #self.type_encoder.weight = torch.nn.Parameter(self.get_embedding(node_type_mapping))
        self.attribute_encoder.weight = torch.nn.Parameter(self.get_embedding(node_attributes_mapping))

    def get_embedding(self, mapping):
        '''
        Input:
            mapping: Either list of node type strings [98 type_string] or list of node attribute strings [10029 attribute_string]
        Output:
            BERTCode-based embedding of node attribute strings. Dim: N x 768
        '''
        node_embeddings = []
        feature_embedded = {}
        for i, feature_string in enumerate(mapping):
            if feature_string in feature_embedded:
                node_embedding = feature_embedded[feature_string]
            else:
                if i in [0, 1, 6, 10021, 10022, 10023, 10024, 10027, 10028, 10029]: # some node attr are not recognized
                    feature_string = '[UNK]'

                # standard codeBert steps taken to transform strings into embeddings
                try:
                    tokens = self.tokenizer.tokenize(feature_string)
                    tokens_ids = self.tokenizer.convert_tokens_to_ids(tokens)
                    tokens_tensor = torch.tensor(tokens_ids).to(device)[None,:]
                    node_embedding = self.codeBert(tokens_tensor)[0].view(tokens_tensor.shape[1], -1)
                    node_embedding = node_embedding.sum(dim=0, keepdim=True) # sum embeddings if >1 word in the feature string
                    #print(node_embedding.shape)
                except:
                    print("Error tokenizing {} {}".format(feature_string, i))

                feature_embedded[feature_string] = node_embedding
            node_embeddings.append(node_embedding)

        node_embeddings = torch.cat(node_embeddings, dim=0) # Nx768
        return node_embeddings

    def forward(self, x, depth):
        '''
            Input:
                'x': default node feature. the first and second column represents node type and node attributes.
                'depth': The depth of the node in the AST.
            Output:
        '''
        depth[depth > self.max_depth] = self.max_depth # 1xN of depth 1-20
        # print(x[:, 0]) # Nx1, holding the id of the node type
        return self.type_encoder(x[:,0]) + self.attribute_encoder(x[:,1]) + self.depth_encoder(depth)



class ASTNodeEncoder(torch.nn.Module):
    '''
        Input:
            x: default node feature. the first and second column represents node type and node attributes.
            depth: The depth of the node in the AST.

        Output:
            emb_dim-dimensional vector

    '''
    def __init__(self, emb_dim, num_nodetypes, num_nodeattributes, max_depth):
        super(ASTNodeEncoder, self).__init__()

        self.max_depth = max_depth

        self.type_encoder = torch.nn.Embedding(num_nodetypes, emb_dim)
        self.attribute_encoder = torch.nn.Embedding(num_nodeattributes, emb_dim)
        self.depth_encoder = torch.nn.Embedding(self.max_depth + 1, emb_dim)


    def forward(self, x, depth):
        depth[depth > self.max_depth] = self.max_depth
        return self.type_encoder(x[:,0]) + self.attribute_encoder(x[:,1]) + self.depth_encoder(depth)


def get_vocab_mapping(seq_list, num_vocab):
    '''
        Input:
            seq_list: a list of sequences
            num_vocab: vocabulary size
        Output:
            vocab2idx:
                A dictionary that maps vocabulary into integer index.
                Additioanlly, we also index '__UNK__' and '__EOS__'
                '__UNK__' : out-of-vocabulary term
                '__EOS__' : end-of-sentence
            idx2vocab:
                A list that maps idx to actual vocabulary.
    '''

    vocab_cnt = {}
    vocab_list = []
    for seq in seq_list:
        for w in seq:
            if w in vocab_cnt:
                vocab_cnt[w] += 1
            else:
                vocab_cnt[w] = 1
                vocab_list.append(w)

    cnt_list = np.array([vocab_cnt[w] for w in vocab_list])
    topvocab = np.argsort(-cnt_list, kind = 'stable')[:num_vocab]

    print('Coverage of top {} vocabulary:'.format(num_vocab))
    print(float(np.sum(cnt_list[topvocab]))/np.sum(cnt_list))

    vocab2idx = {vocab_list[vocab_idx]: idx for idx, vocab_idx in enumerate(topvocab)}
    idx2vocab = [vocab_list[vocab_idx] for vocab_idx in topvocab]

    # print(topvocab)
    # print([vocab_list[v] for v in topvocab[:10]])
    # print([vocab_list[v] for v in topvocab[-10:]])

    vocab2idx['__UNK__'] = num_vocab
    idx2vocab.append('__UNK__')

    vocab2idx['__EOS__'] = num_vocab + 1
    idx2vocab.append('__EOS__')

    # test the correspondence between vocab2idx and idx2vocab
    for idx, vocab in enumerate(idx2vocab):
        assert(idx == vocab2idx[vocab])

    # test that the idx of '__EOS__' is len(idx2vocab) - 1.
    # This fact will be used in decode_arr_to_seq, when finding __EOS__
    assert(vocab2idx['__EOS__'] == len(idx2vocab) - 1)

    return vocab2idx, idx2vocab

def one_hot_y(batch_y, num_classes, max_seq_len):
    '''
    Given data.y, produce a tensor batch_size x number of words per method name.

    Input:
        batch_data.y: a list of lists, each of word len. batch_size x word len
        output: one hot encoding of dimensions batch_size x word len x num_classes.
    '''
    #print(batch_y)
    labels = torch.zeros(len(batch_y), max_seq_len, dtype=torch.long).to(device)

    for i, y in enumerate(batch_y):
        # if the number of words in y is less than the max length, fill blanks with end of word token
        y = y[:max_seq_len] + ['__EOS__'] * max(0, max_seq_len - len(y))
        for j, w in enumerate(y):
            index = vocab2idx['__UNK__']
            if w in vocab2idx:
                index = vocab2idx[w]
            labels[i][j] = index
        # print(labels[i])
    labels_one_hot = F.one_hot(labels, num_classes).to(device) # batch x word len x num_class
    #print('labels {}'.format(labels_one_hot))

    return labels_one_hot

def decode_to_word(pred_list):
    '''
        The goal is to turn an one-hot-encoder back into word. We need to take the
        argmax of the prediction

        Input:  embeddings, batch x word len x num class
        Output: list of lists, each a sequence of words. batch x word len
    '''
    #print("Prediction {}".format(pred_list))
    argmax = torch.argmax(pred_list, dim=-1).tolist()
    #print("Argmax {}".format(argmax))

    for i, graph in enumerate(argmax):
        word_list = []
        for word_index in graph:
            if word_index < len(vocab2idx) - 2:
                w = idx2vocab[word_index]
                word_list.append(w)
        argmax[i] = word_list
    #print("decoded pred words {}".format(argmax))
    return argmax

#Run Epochs
Train and then evaluate the accuracy of the GCN model

In [9]:
def run_epochs(model, evaluator, args, dataset):
    split_idx = dataset.get_idx_split()
    train_loader = DataLoader(dataset[split_idx["train"]], batch_size=args['batch_size'], shuffle=True)
    valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=args['batch_size'], shuffle=False)
    test_loader = DataLoader(dataset[split_idx["test"]], batch_size=args['batch_size'], shuffle=False)

    valid_curve = []
    test_curve = []
    train_curve = []

    optimizer = torch.optim.AdamW(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

    # # We want to decrease lr as training goes progresses.
    # # lr_scheduler method decays lr with a cosine annealing according to https://arxiv.org/pdf/1608.03983.pdf
    # lr_modifier = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args['epochs'])
    # lr_steps = args['lr'] / (args['warmup'] * len(train_loader))
    # def lr_scheduler(s):
    #     lr = s * lr_steps
    #     return lr

    for epoch in range(1, args['epochs'] + 1):
        print("=====Epoch {}".format(epoch))
        print('Training...')
        train(model, device, train_loader, optimizer, epoch, args)

        print('Evaluating...')
        train_perf = eval(model, device, train_loader, evaluator, args)
        valid_perf = eval(model, device, valid_loader, evaluator, args)
        test_perf = eval(model, device, test_loader, evaluator, args)

        # if epoch >= args['warmup'] and lr_modifier is not None:
        #     lr_modifier.step()

        print({'Train': train_perf, 'Validation': valid_perf, 'Test': test_perf})

        train_curve.append(train_perf[dataset.eval_metric])
        valid_curve.append(valid_perf[dataset.eval_metric])
        test_curve.append(test_perf[dataset.eval_metric])

    print('F1')
    best_val_epoch = np.argmax(np.array(valid_curve))
    best_train = max(train_curve)
    print('Finished training!')
    print('Best validation score: {}'.format(valid_curve[best_val_epoch]))
    print('Test score: {}'.format(test_curve[best_val_epoch]))

In [10]:
# There are 450k words (duplicates included) in the true labels.
# The top 15k is extracted and stored in a dict vocab2idx.
# This top 15k covers 96% of the all unique words.
vocab2idx, idx2vocab = get_vocab_mapping(dataset.y, args['num_vocab'])
print("Vocab list {}".format(vocab2idx))
args['num_classes'] = len(vocab2idx)

Coverage of top 10000 vocabulary:
0.9431979657467654


# Main
Grab ogbg-code2 and create a dictionary of all of the y labels

In [None]:
if __name__ == "__main__":


    nodetypes_mapping = pd.read_csv(os.path.join(dataset.root, 'mapping', 'typeidx2type.csv.gz'))['type'].tolist()
    nodeattributes_mapping = pd.read_csv(os.path.join(dataset.root, 'mapping', 'attridx2attr.csv.gz'))['attr'].tolist()

    tokenizer = AutoTokenizer.from_pretrained("microsoft/graphcodebert-base")
    graphBert = AutoModel.from_pretrained("microsoft/graphcodebert-base")
    graphBert.to(device)
    node_encoder = SemanticNodeEncoder(graphBert, tokenizer, args['hidden_dim'], nodetypes_mapping, nodeattributes_mapping)

    gnn = GraphSage(
        node_encoder,
        args['hidden_dim'],
        args['output_dim'],
        args['num_layers'],
        args['dropout'],
        args['words_per_method_name'],
        args['num_classes'],
        args['heads'],
    ).to(device)

    # param_size = 0
    # for param in gcn.parameters():
    #     param_size += param.nelement() * param.element_size()
    # print(param_size)

    run_epochs(gnn, evaluator, args, dataset)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/539 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at microsoft/graphcodebert-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Error tokenizing nan 763
=====Epoch 1
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 3.33625864982605
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['truncate', 'to'], ['update', 'highlights'], ['text'], ['get', 'context', 'tags'], ['sample', 'discrete', 'from', 'log']]
seq_pred_list [['get'], ['get'], [], ['get'], []]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['create', 'loop'], ['write'], ['get'], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['get'], ['get'], ['get'], ['create'], ['create']]
{'Train': {'precision': 0.12756036302789056, 'recall': 0.05768711010563472, 'F1': 0.07567137860350791}, 'Validation': {'precision': 0.11419263414705409, 'recall': 0.04896618783961555, 'F1': 0.06553495687913008}, 'Test': {'precision': 0.10913446935180122, 'recall': 0.04820448621706139, 'F1': 0.0640902700361421}}
=====Epoch 2
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 3.0760574340820312
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['compile', 'string'], ['init', 'bem', 'obj'], ['set', 'one', 'time', 'boot'], ['create', 'binary'], ['parse', 'string']]
seq_pred_list [[], ['run'], ['set'], ['run'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['run'], ['write', 'file'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['get'], ['get'], ['get'], [], ['create']]
{'Train': {'precision': 0.1785526599603898, 'recall': 0.08686027357199191, 'F1': 0.11095107631225694}, 'Validation': {'precision': 0.15350031409329304, 'recall': 0.06943729647415496, 'F1': 0.09095849902047035}, 'Test': {'precision': 0.14867110139116702, 'recall': 0.06907394389625117, 'F1': 0.09015570319561571}}
=====Epoch 3
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.910001516342163
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['truncate'], ['opath', 'from', 'ext'], ['set', 'value'], ['to', 'shapefile'], ['laplacian', 'pca']]
seq_pred_list [[], ['get'], ['set'], [], []]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['call'], ['save'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['save']]
{'Train': {'precision': 0.19454285709616906, 'recall': 0.10115067173258094, 'F1': 0.12579330216396709}, 'Validation': {'precision': 0.1617763071394136, 'recall': 0.07710997300119445, 'F1': 0.09864968026864857}, 'Test': {'precision': 0.15917927221918476, 'recall': 0.07911087100534886, 'F1': 0.10044853820824295}}
=====Epoch 4
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.779207229614258
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['insert'], ['handle', 'shot'], ['add'], ['get', 'networks'], ['post']]
seq_pred_list [['get'], ['get'], ['parse'], ['get'], ['post']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['create'], ['save'], [], ['get'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['update']]
{'Train': {'precision': 0.2231925815897667, 'recall': 0.12708318008703326, 'F1': 0.15270402271675523}, 'Validation': {'precision': 0.17870812698134433, 'recall': 0.091753920600833, 'F1': 0.11411399224433365}, 'Test': {'precision': 0.1788963307210983, 'recall': 0.09633415634645816, 'F1': 0.11821303784808433}}
=====Epoch 5
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.6658921241760254
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['message', 'loop'], ['send'], ['set', 'debugger'], ['nonwhitelisted', 'allowed', 'principals'], ['clear', 'recent', 'files']]
seq_pred_list [['listen'], [], ['set'], ['get'], ['set']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['create'], ['write'], [], ['get'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get', 'md5'], [], ['get']]
{'Train': {'precision': 0.24233635475289406, 'recall': 0.14570360201481883, 'F1': 0.17143770663828106}, 'Validation': {'precision': 0.18463572482505733, 'recall': 0.09864343350231064, 'F1': 0.12051245492320128}, 'Test': {'precision': 0.18539578397424214, 'recall': 0.10208487015485375, 'F1': 0.12423835454179906}}
=====Epoch 6
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.562704086303711
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['sampler'], ['load', 'locale', 'prefixdata', 'file'], ['gray2int'], ['bb', 'range'], ['enable']]
seq_pred_list [['get'], ['read', 'file'], ['get'], ['get'], ['set']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['optimize', 'estimate'], ['write'], [], ['get'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], ['add'], ['download']]
{'Train': {'precision': 0.26325359825087746, 'recall': 0.1660397527856643, 'F1': 0.191868097216824}, 'Validation': {'precision': 0.19386568494251358, 'recall': 0.10759664021048077, 'F1': 0.12937625113195958}, 'Test': {'precision': 0.1965342324281635, 'recall': 0.1143578183354017, 'F1': 0.136252932617066}}
=====Epoch 7
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.4655115604400635
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['setup', 'config', 'from', 'kwargs'], ['iter', 'valid', 'fields'], ['get', 'corner'], ['file', 'hash'], ['get', 'group', 'host']]
seq_pred_list [['get', 'config'], ['get', 'fields'], ['get'], ['file', 'hash'], ['get', 'host']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['create'], ['write'], [], ['get'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'valid'], ['get'], ['get'], [], ['download']]
{'Train': {'precision': 0.28543762378179116, 'recall': 0.1876907001651472, 'F1': 0.21345802191142396}, 'Validation': {'precision': 0.20262669646900705, 'recall': 0.11562680221027068, 'F1': 0.13723168233401833}, 'Test': {'precision': 0.198511633558107, 'recall': 0.11954215013263672, 'F1': 0.14012536124345856}}
=====Epoch 8
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.3713250160217285
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['get', 'subclass', 'tree'], ['get', 'user'], ['get', 'bin'], ['visit', 'name'], ['sql', 'flush']]
seq_pred_list [['get'], ['get'], ['get'], ['visit'], ['add']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['create'], ['write'], [], ['get'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['calculate', 'md5'], [], ['get']]
{'Train': {'precision': 0.2957527321868606, 'recall': 0.1864162258549428, 'F1': 0.21636940355672754}, 'Validation': {'precision': 0.1991103124863041, 'recall': 0.11011204747782738, 'F1': 0.1330705517474155}, 'Test': {'precision': 0.20317037239535873, 'recall': 0.11826521417144707, 'F1': 0.14101360792175444}}
=====Epoch 9
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.280259132385254
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['assert', 'dut', 'trace', 'not', 'contains'], ['head', 'tail', 'middle'], ['get', 'brief', 'module', 'info', 'from', 'file'], ['files'], ['walk']]
seq_pred_list [['on'], ['get', 'path'], ['get', 'file'], ['get'], ['step']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['optimize', 'confidence'], ['load'], [], ['get'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['load']]
{'Train': {'precision': 0.32235482806178145, 'recall': 0.21706463187451042, 'F1': 0.24554291696898442}, 'Validation': {'precision': 0.20669895253539028, 'recall': 0.11724226088735008, 'F1': 0.13990915927410646}, 'Test': {'precision': 0.2062526577972176, 'recall': 0.12411920238601487, 'F1': 0.14590670300074346}}
=====Epoch 10
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.1924357414245605
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['correlated', 'values'], ['toggle', 'use', 'font', 'background', 'sensitivity'], ['create', 'party', 'from', 'name'], ['pixel'], ['fit']]
seq_pred_list [['correlated'], ['on'], ['get', 'users'], ['line'], ['fit']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['optimize'], ['write'], [], ['get'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], [], ['extract']]
{'Train': {'precision': 0.34112807289317676, 'recall': 0.2339732517715414, 'F1': 0.2633745471975593}, 'Validation': {'precision': 0.20246453667587033, 'recall': 0.11820462812815005, 'F1': 0.1395349952040139}, 'Test': {'precision': 0.2052601603790778, 'recall': 0.1253080513537047, 'F1': 0.1466322306877255}}
=====Epoch 11
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.1085662841796875
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['get', 'open', 'file', 'name'], ['start', 'container'], ['clone'], ['print', 'code'], ['section']]
seq_pred_list [['ask', 'file'], ['run', 'container'], ['clone'], ['get', 'code'], ['section']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['optimize'], ['write'], [], ['get', 'layer'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'urls'], ['get'], ['hash'], [], ['upload']]
{'Train': {'precision': 0.36587842585511576, 'recall': 0.2687681177053714, 'F1': 0.294584774163222}, 'Validation': {'precision': 0.19410015923799503, 'recall': 0.11900448774270923, 'F1': 0.13769948307573762}, 'Test': {'precision': 0.20167137476459507, 'recall': 0.12968772148310145, 'F1': 0.14822292807530643}}
=====Epoch 12
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.0272939205169678
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['list', 'nodes', 'min'], ['find', 'pkg', 'dist'], ['run', 'airbnb', 'demo'], ['get'], ['parse', 'params']]
seq_pred_list [['avail', 'locations'], ['pypi', 'pkg', 'dist'], [], ['list'], ['get', 'params']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['train', 'multi'], ['write'], [], ['get', 'layer'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], ['download', 'url'], ['get', 'json']]
{'Train': {'precision': 0.3812751648789799, 'recall': 0.28087330004593514, 'F1': 0.30746876184641075}, 'Validation': {'precision': 0.20144555959737623, 'recall': 0.12229227580939023, 'F1': 0.14181542381787812}, 'Test': {'precision': 0.2074486665451674, 'recall': 0.13358996444699014, 'F1': 0.15266897081413208}}
=====Epoch 13
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.9486497640609741
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['parse', 'host'], ['guess', 'image', 'format', 'from', 'http', 'response'], ['any2sql'], ['urlargs', 'replace', 'text', 'in', 'arg'], ['get', 'hostname']]
seq_pred_list [['parse'], ['clean', 'http'], [], ['parse'], ['get', 'hostname']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['minimize'], ['write'], [], ['get', 'layer'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get', 'id'], ['download'], ['get', 'page']]
{'Train': {'precision': 0.40061204580661597, 'recall': 0.29437540908123727, 'F1': 0.3236620880998711}, 'Validation': {'precision': 0.19950694657492218, 'recall': 0.11952256775962783, 'F1': 0.13954028542931554}, 'Test': {'precision': 0.20457065184375187, 'recall': 0.129585911577437, 'F1': 0.14928564919680284}}
=====Epoch 14
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.873936414718628
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['do', 'preprocess'], ['count'], ['start', 'sctp', 'server'], ['get', 'hidden'], ['cell', 'array', 'generator']]
seq_pred_list [['parse'], ['count', 'of'], ['set'], ['get'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['run'], ['write', 'binary'], [], ['get', 'layer'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], ['download'], ['load', 'page', 'html']]
{'Train': {'precision': 0.414775011275173, 'recall': 0.3146747090958883, 'F1': 0.341544269295044}, 'Validation': {'precision': 0.1982001723860864, 'recall': 0.12207635841001312, 'F1': 0.1409100869766163}, 'Test': {'precision': 0.20480150051637205, 'recall': 0.1326769839823421, 'F1': 0.151238099577356}}
=====Epoch 15
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.801698923110962
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['use', 'active', 'composition', 'view'], ['meet'], ['param', 'value'], ['find', 'by', 'project'], ['flush']]
seq_pred_list [['use', 'isolated', 'composition', 'view'], [], ['get'], ['get', 'by'], ['remove']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['run'], ['write'], [], ['get', 'layer'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], ['download'], ['extract', 'page']]
{'Train': {'precision': 0.44398890294200316, 'recall': 0.33696334937327055, 'F1': 0.36637825202345675}, 'Validation': {'precision': 0.19817971979956467, 'recall': 0.11985260920046388, 'F1': 0.1392000761944225}, 'Test': {'precision': 0.20651008444201446, 'recall': 0.1345590298570069, 'F1': 0.15270950674176478}}
=====Epoch 16
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.7322907447814941
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['copy', 'object'], ['post'], ['parse', 'args'], ['cortex', 'to', 'image', 'interpolation'], ['get', 'size']]
seq_pred_list [['list'], ['create'], ['run'], ['cortex', 'interpolation'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['train'], ['create'], [], ['get'], ['inverse']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get', 'key'], ['download'], ['clean']]
{'Train': {'precision': 0.4632005722558845, 'recall': 0.35338499798559947, 'F1': 0.3841948657934663}, 'Validation': {'precision': 0.19481453886721886, 'recall': 0.1192543710447028, 'F1': 0.13818440036829832}, 'Test': {'precision': 0.20120284308365227, 'recall': 0.12900137699208228, 'F1': 0.147466594924222}}
=====Epoch 17
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.665633201599121
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['groups', 'rename'], ['load', 'json', 'file'], ['on', 'message'], ['spa', 'c'], ['reachable', 'nodes']]
seq_pred_list [['channels', 'set'], ['load', 'json', 'file'], ['on', 'message'], ['spa', 'c'], ['reachable', 'stack']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['run'], ['write'], [], ['create'], ['decorator', 'templates']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], ['download'], ['html']]
{'Train': {'precision': 0.4843583119268454, 'recall': 0.3859069243192941, 'F1': 0.41253932585412467}, 'Validation': {'precision': 0.19118639610816499, 'recall': 0.12182527508381039, 'F1': 0.13867836764580418}, 'Test': {'precision': 0.20236240204118824, 'recall': 0.13540395624868015, 'F1': 0.15196517887879285}}
=====Epoch 18
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.6038682460784912
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['raw', 'role', 'mentions'], ['compute', 'upper', 'bound'], ['parse', 'isoformat'], ['get', 'arc'], ['get']]
seq_pred_list [['get'], ['get', 'upper'], ['parse', 'isoformat'], ['get'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['run'], ['write'], [], ['create', 'layer'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'ca'], ['get'], ['get', 'key'], ['download'], ['download']]
{'Train': {'precision': 0.5009395569020401, 'recall': 0.3908770280284782, 'F1': 0.42235096978182446}, 'Validation': {'precision': 0.1951965639654643, 'recall': 0.1229483238270546, 'F1': 0.1406786799366893}, 'Test': {'precision': 0.20370572869206002, 'recall': 0.1351987815355776, 'F1': 0.1529459964533775}}
=====Epoch 19
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.543622374534607
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['get', 'personal', 'access', 'token'], ['bind', 'global', 'key'], ['assign', 'objective', 'requisite'], ['prod', 'all', 'once'], ['read']]
seq_pred_list [['get', 'personal'], ['bind', 'event', 'key'], ['add', 'objective', 'requisite'], ['get'], ['draw']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['optimize'], ['write'], ['create'], ['create'], ['create', 'image']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get', 'key'], ['download'], ['extract']]
{'Train': {'precision': 0.5088892892392364, 'recall': 0.40602890905298394, 'F1': 0.43475708249762246}, 'Validation': {'precision': 0.18978174168383222, 'recall': 0.12276859846216029, 'F1': 0.13902037612648122}, 'Test': {'precision': 0.19798463033837557, 'recall': 0.13384164157127526, 'F1': 0.15004379366052412}}
=====Epoch 20
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.4860026836395264
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['check', 'jobs', 'status'], ['savefig'], ['get', 'representations', 'of', 'kind'], ['reconnect', 'remote'], ['page', 'set', 'auto', 'attach', 'to', 'created', 'pages']]
seq_pred_list [['status', 'jobs'], ['savefig'], ['get', 'kind'], ['reconnect', 'remote'], ['debugger', 'set', 'skip']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['run'], ['latex', 'binary'], [], ['get'], ['get', 'templates']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], ['download'], ['download', 'page']]
{'Train': {'precision': 0.5329262015412671, 'recall': 0.4345098446435291, 'F1': 0.46166154307958596}, 'Validation': {'precision': 0.17651020437977533, 'recall': 0.11499038937133339, 'F1': 0.12953716505015975}, 'Test': {'precision': 0.18795790049207217, 'recall': 0.12866540972746549, 'F1': 0.14286383912545747}}
=====Epoch 21
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.4316390752792358
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['delete'], ['use', 'gradient'], ['tokenize', 'block'], ['get', 'zip', 'content'], ['log', 'progress']]
seq_pred_list [['delete'], ['register', 'operator'], ['tokenize'], ['load'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['create'], ['save'], ['symmetric'], ['create', 'model'], ['default']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'like'], ['get'], ['hash', 'key'], ['download'], ['download']]
{'Train': {'precision': 0.548608659986535, 'recall': 0.45597333480940594, 'F1': 0.4815279785419353}, 'Validation': {'precision': 0.18308936319410962, 'recall': 0.12281354008027101, 'F1': 0.13671828480742887}, 'Test': {'precision': 0.19498131948241296, 'recall': 0.1375206838096395, 'F1': 0.15113027223470088}}
=====Epoch 22
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.3803963661193848
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['parse', 'nestings2'], ['send', 'from', 'directory'], ['create'], ['tz', 'file'], ['get']]
seq_pred_list [['parse'], ['send', 'file', 'directory'], ['message'], ['tz'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['run'], ['latex'], [], ['create'], ['get', 'batches']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['available'], ['get'], ['get'], [], ['page', 'page']]
{'Train': {'precision': 0.5645475060624482, 'recall': 0.4664560277498906, 'F1': 0.49414721531858075}, 'Validation': {'precision': 0.18126104804896936, 'recall': 0.11920223086239654, 'F1': 0.13390083569116748}, 'Test': {'precision': 0.19436622926918173, 'recall': 0.1324226671410925, 'F1': 0.14787940655217857}}
=====Epoch 23
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.3302055597305298
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['check', 'timeseries', 'input'], ['load', 'model', 'from', 'path'], ['parse', 'numbering'], ['verify'], ['remove', 'system']]
seq_pred_list [['unpack', 'data'], ['load', 'model'], ['parse', 'numbering'], ['verify'], ['remove']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['run'], ['run'], [], ['create', 'layer'], ['get', 'templates']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get', 'key'], ['run'], ['extract', 'page']]
{'Train': {'precision': 0.5739120275048857, 'recall': 0.4985777803401265, 'F1': 0.5170970767314852}, 'Validation': {'precision': 0.1786299688828505, 'recall': 0.12440517060865924, 'F1': 0.13650258936449058}, 'Test': {'precision': 0.1874058380414312, 'recall': 0.13666715121745193, 'F1': 0.14785166815647954}}
=====Epoch 24
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.2844871282577515
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['reroot'], ['sqllab'], ['unpublish'], ['init', 'widget'], ['merge', 'tops', 'same']]
seq_pred_list [['reroot', 'node'], ['index'], ['set'], ['init', 'widget'], ['merge', 'tops']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['run'], ['latex'], ['rgb'], ['create', 'layer'], ['get', 'batches']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'admin'], ['get'], ['get'], ['download'], ['download', 'page']]
{'Train': {'precision': 0.5930840294527128, 'recall': 0.5081045006253844, 'F1': 0.5312235728701421}, 'Validation': {'precision': 0.17850140976757098, 'recall': 0.1224965234081244, 'F1': 0.13505755151677049}, 'Test': {'precision': 0.18603973027155096, 'recall': 0.1329760675304688, 'F1': 0.14518122462354283}}
=====Epoch 25
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.2393715381622314
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['synchronous'], ['read', 'very', 'lazy'], ['read', 'starlog'], ['get', 'all', 'commands'], ['get', 'field', 'info']]
seq_pred_list [['synchronous'], ['read'], [], ['get', 'names'], ['get', 'field']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['run'], ['run'], [], ['create', 'server', 'layer'], ['get', 'batches']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['available', 'share'], ['get'], ['get', 'id'], [], ['download']]
{'Train': {'precision': 0.6012061984038277, 'recall': 0.5234239269180663, 'F1': 0.5436920240134134}, 'Validation': {'precision': 0.16908883727045626, 'recall': 0.11681256526218615, 'F1': 0.12837412182418098}, 'Test': {'precision': 0.17861991981046108, 'recall': 0.12961937812607577, 'F1': 0.14055556936229435}}
F1
Finished training!
Best validation score: 0.14181542381787812
Test score: 0.15266897081413208
