# Experiment Motivation

GraphSAGE with biased pooling showed high testing score but also high overfitting. Try using less layers to reduce overfitting.

In [None]:
args = {
    'num_layers': 3,
    'hidden_dim': 768,
    'output_dim': 768,
    'dropout': 0,
    'lr': 0.001,
    'epochs': 25,
    'num_vocab': 5000,
    'warmup': 2,
    'weight_decay': 0.000001,
    'words_per_method_name': 5,
    'max_steps': 99999,
    'batch_size': 32,
    'heads': 3,
}

# Setup

In [None]:
!pip install ogb
!pip install torch_geometric
!python -c "import ogb; print(ogb.__version__)"

import os
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from torch_geometric.loader import DataLoader
import torch
import pandas as pd
import torch.nn.functional as F
from tqdm.notebook import tqdm
print(torch.__version__)

from torch_geometric.nn import SAGEConv

import torch_geometric.transforms as T
from torch_geometric.nn import global_add_pool, global_mean_pool
from numpy import float32
from transformers import AutoTokenizer, AutoModel

import copy
import numpy as np
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device: {}'.format(device))

Collecting ogb
  Downloading ogb-1.3.6-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Collecting outdated>=0.2.0 (from ogb)
  Downloading outdated-0.2.2-py2.py3-none-any.whl (7.5 kB)
Collecting littleutils (from outdated>=0.2.0->ogb)
  Downloading littleutils-0.2.2.tar.gz (6.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: littleutils
  Building wheel for littleutils (setup.py) ... [?25l[?25hdone
  Created wheel for littleutils: filename=littleutils-0.2.2-py3-none-any.whl size=7026 sha256=e67f41bd6a31729135d103865cef24362d9e506272dd07afbda4f1b0ccdf9a22
  Stored in directory: /root/.cache/pip/wheels/3d/fe/b0/27a9892da57472e538c7452a721a9cf463cc03cf7379889266
Successfully built littleutils
Installing collected packages: littleutils, outdated, ogb
Successfully installed littleutils-0.2.2 ogb-1.3.6 outdated-0.2.2
Collecting torch_geometric
  

# Dataset

In [None]:
dataset = PygGraphPropPredDataset(name = "ogbg-code2")
print(dataset)
evaluator = Evaluator(name='ogbg-code2')
'''
==== Expected input format of Evaluator for ogbg-code2
{'seq_ref': seq_ref, 'seq_pred': seq_pred}
- seq_ref: a list of lists of strings
- seq_pred: a list of lists of strings
where seq_ref stores the reference sequences of sub-tokens, and
seq_pred stores the predicted sequences of sub-tokens.

==== Expected output format of Evaluator for ogbg-code2
{'F1': F1}
- F1 (float): F1 score averaged over samples.
'''
# print(dataset.num_classes)
# print(dataset.num_node_features)
# print(dataset[0])
# print(len(dataset.y))

Downloading http://snap.stanford.edu/ogb/data/graphproppred/code2.zip


Downloaded 0.91 GB: 100%|██████████| 934/934 [00:59<00:00, 15.58it/s]


Extracting dataset/code2.zip


Processing...


Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 452741/452741 [00:01<00:00, 337726.91it/s]


Converting graphs into PyG objects...


100%|██████████| 452741/452741 [00:21<00:00, 21284.00it/s]


Saving...


Done!


PygGraphPropPredDataset(452741)


"\n==== Expected input format of Evaluator for ogbg-code2\n{'seq_ref': seq_ref, 'seq_pred': seq_pred}\n- seq_ref: a list of lists of strings\n- seq_pred: a list of lists of strings\nwhere seq_ref stores the reference sequences of sub-tokens, and\nseq_pred stores the predicted sequences of sub-tokens.\n\n==== Expected output format of Evaluator for ogbg-code2\n{'F1': F1}\n- F1 (float): F1 score averaged over samples.\n"

# SAGEConv Model
This SAGEConv model uses multiple GraphSage layers and a global_add_pool pooling method for graph level aggregation.

In [None]:
from torch_geometric.nn.models.tgn import Linear
class GraphSage(torch.nn.Module):
    """
    AST is a directed graph with 100-150 nodes. This GAT takes in nodes of 2 dimension
    features and convert them to 1x256 embeddings using a basic encoder ASTNodeEncoder
    before passing the embedding through GAT layers. No self-loops are added to establish
    a baseline. Additional augmentation and words are added by using codeBERT to transform
    them into dense embeddings

    Input
        Batch of node_size x256 dimensions
    Output
        Embeddings of batch_size x pred_list_len X num_classes, i.e. 64 x 5 x len(vocab_list)
    """
    def __init__(self, node_embedder, hidden_dim, output_dim, num_layers,
                 dropout, pred_list_len, num_classes, heads):
        super(GraphSage, self).__init__()

        self.node_embedder = node_embedder
        self.hidden_dim = hidden_dim
        self.pred_list_len = pred_list_len
        self.num_classes = num_classes
        self.heads = heads

        # SAGEConv layers
        self.convs = torch.nn.ModuleList()
        self.convs.extend([SAGEConv(hidden_dim, hidden_dim, aggr="lstm") for i in range(num_layers-1)])
        self.convs.append(SAGEConv(hidden_dim, output_dim, aggr='lstm'))

        # Batch norm
        self.bns = torch.nn.ModuleList()
        self.bns.extend([torch.nn.BatchNorm1d(hidden_dim) for i in range(num_layers-1)])

        # Global pool
        self.pool_method = global_add_pool

        # Probability of an element getting zeroed
        self.dropout = dropout

        # Post-processing layer that transform graph embedding into 5 token
        # predictions for each graph
        self.post_mps = Linear(output_dim, num_classes*pred_list_len, bias=True)

        # self.post_mps = torch.nn.ModuleList()
        # for i in range(pred_list_len):
        #     self.post_mps.append(torch.nn.Linear(output_dim, self.num_classes))

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, batched_data):
        x, edge_index, edge_attr, node_depth, batch = batched_data.x, batched_data.edge_index, batched_data.edge_attr, batched_data.node_depth, batched_data.batch

        # Pre-processing. Each node x has two features initially, the node type (e.g for , while)
        # and the node text (e.g. delete). ASTNodeEncoder is used to turn
        # 2 dimension into dim_hidden dimensions through a linear transformation

        # print("Raw x shape {}".format(x.shape)) # batch size * N x D

        x = self.node_embedder(x, node_depth.view(-1,))
        edge_index = edge_index[:, edge_index[1].argsort()]

        out = None
        for i in range(len(self.convs)-1):
            x = self.convs[i](x, edge_index)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        out = self.convs[-1](x, edge_index) # batch * N x outdim

        out = out * (0.5**node_depth)

        out = self.pool_method(out, batch) # batch x outdim
        #print("Output before pooling {}".format(out.shape))

        # We want to turn one single graph embedding per graph to pred 5
        # words. So the desired output is batch_size x word len x num_classes
        out = self.post_mps(out).view(out.shape[0], self.pred_list_len, self.num_classes) # batch_size x word_len x num_classes

        # pred_list = []
        # for i in range(self.pred_list_len):
        #     pred_list.append(self.post_mps[i](out))
        # out = torch.stack(pred_list, dim=1)

        #out = self.softmax(out)
        #print(out)

        return out

# Train

In [None]:
def train(model, device, data_loader, optimizer, epoch, args):
    model.train()

    loss_accum = 0

    # Used to shorten training for debugging
    max_steps = args['max_steps']

    loss_fn = torch.nn.CrossEntropyLoss()

    for step, batch in enumerate(tqdm(data_loader, desc="Iteration")):
        batch = batch.to(device)

        if batch.x.shape[0] == 1 or batch.batch[-1] == 0:
            pass
        else:
            # if epoch < args['warmup']:
            #     iteration = epoch * len(data_loader) + step
            #     for param_group in optimizer.param_groups:
            #         param_group["lr"] = lr_modifier(iteration)

            # Each prediction is the one-hot probability of the classification
            pred_list = model(batch) # batch_size x words per graph x num_classes
            optimizer.zero_grad()

            # y comes in as a list of words per graph. Need to one-hot encode it
            y = one_hot_y(batch.y, args['num_classes'], args['words_per_method_name']) #batch_size x words_per_method_name x num_classes i.e. 32x5x15002

            # print("Pred_list {}".format(pred_list.shape))
            # print("One-hot encoded y shape {}".format(y.shape))

            #print("Batch y {}".format(batch.y[0]))

            # print("Pred_list {}".format(pred_list[0]))
            # print("One-hot encoded y {}".format(y[0]))

            loss = 0
            for i in range(pred_list.shape[0]):
                loss += loss_fn(pred_list[i].to(torch.float32), y[i].to(torch.float32))


            # pred_flat = pred_list.view(pred_list.shape[0]*args['words_per_method_name'],
            #                            args['num_classes']) # batch*word len x num_class
            # y_flat = y.view(y.shape[0]*args['words_per_method_name'],
            #                            args['num_classes']) # batch*word len x num_class

            #loss = loss_fn(pred_flat, y_flat.to(torch.float32))

            # print("Pred flat {}".format(pred_flat))
            # print("Y_flat {}".format(y_flat))

            loss_accum += loss/pred_list.shape[0]

            #print("Loss {}".format(loss.item()))

            loss.backward()
            optimizer.step()

            # Used to shorten training for debugging
            if step > max_steps:
                break

    print("Epoch loss {}".format(loss_accum / (step+1)))


# Eval

In [None]:
def eval(model, device, loader, evaluator, args):
    model.eval()
    seq_ref_list = []
    seq_pred_list = []

    # Used to shorten training for debugging
    max_steps = args['max_steps']

    for step, batch in enumerate(tqdm(loader, desc="Iteration")):
        batch = batch.to(device)

        if batch.x.shape[0] == 1:
            pass
        else:
            with torch.no_grad():
                pred_list = model(batch) # batch_size x pred_list_len X num_classes

            #print("Preds are {}".format(pred_list.shape))
            seq_ref_list.extend(batch.y)
            seq_pred_list.extend(decode_to_word(pred_list))

            #print("Seq_ref_list len {} pred list len {}".format(len(seq_ref_list), len(seq_pred_list)))

        # Used to shorten for debugging
        if step > max_steps:
            break

    print("seq_ref_list {}".format(seq_ref_list[:5]))
    print("seq_pred_list {}".format(seq_pred_list[:5]))

    input_dict = {"seq_ref": seq_ref_list, "seq_pred": seq_pred_list}

    return evaluator.eval(input_dict)

# Helper methods
Might modularize away in a different file

In [None]:
class SemanticNodeEncoder(torch.nn.Module):
    '''
    Use CodeBERT to convert node type strings and node attributes strings into embeddings.
    Depth is not used in this embedding.
        Input:
            emb_dim: default node feature of N X D
            node_types: list of node type strings [98 type_string]
            node_attributes: list of node attribute strings [10029 attribute_string]
            depth: The depth of the node in the AST.
        Output:
            BERTCode-based embedding of node attribute strings. Dim: N x 768
    '''
    def __init__(self, codeBert, tokenizer, emb_dim, node_type_mapping, node_attributes_mapping):
        super(SemanticNodeEncoder, self).__init__()

        self.max_depth = 20
        self.codeBert = codeBert
        self.tokenizer = tokenizer
        self.emb_dim = emb_dim
        self.node_type_mapping = node_type_mapping
        self.node_attributes_mapping = node_attributes_mapping

        self.type_encoder = torch.nn.Embedding(len(node_type_mapping), emb_dim)
        self.attribute_encoder = torch.nn.Embedding(len(node_attributes_mapping), emb_dim) # 10029 x 768
        self.depth_encoder = torch.nn.Embedding(self.max_depth+1, emb_dim)

        #self.type_encoder.weight = torch.nn.Parameter(self.get_embedding(node_type_mapping))
        self.attribute_encoder.weight = torch.nn.Parameter(self.get_embedding(node_attributes_mapping))

    def get_embedding(self, mapping):
        '''
        Input:
            mapping: Either list of node type strings [98 type_string] or list of node attribute strings [10029 attribute_string]
        Output:
            BERTCode-based embedding of node attribute strings. Dim: N x 768
        '''
        node_embeddings = []
        feature_embedded = {}
        for i, feature_string in enumerate(mapping):
            if feature_string in feature_embedded:
                node_embedding = feature_embedded[feature_string]
            else:
                if i in [0, 1, 6, 10021, 10022, 10023, 10024, 10027, 10028, 10029]: # some node attr are not recognized
                    feature_string = '[UNK]'

                # standard codeBert steps taken to transform strings into embeddings
                try:
                    tokens = self.tokenizer.tokenize(feature_string)
                    tokens_ids = self.tokenizer.convert_tokens_to_ids(tokens)
                    tokens_tensor = torch.tensor(tokens_ids).to(device)[None,:]
                    node_embedding = self.codeBert(tokens_tensor)[0].view(tokens_tensor.shape[1], -1)
                    node_embedding = node_embedding.sum(dim=0, keepdim=True) # sum embeddings if >1 word in the feature string
                    #print(node_embedding.shape)
                except:
                    print("Error tokenizing {} {}".format(feature_string, i))

                feature_embedded[feature_string] = node_embedding
            node_embeddings.append(node_embedding)

        node_embeddings = torch.cat(node_embeddings, dim=0) # Nx768
        return node_embeddings

    def forward(self, x, depth):
        '''
            Input:
                'x': default node feature. the first and second column represents node type and node attributes.
                'depth': The depth of the node in the AST.
            Output:
        '''
        depth[depth > self.max_depth] = self.max_depth # 1xN of depth 1-20
        # print(x[:, 0]) # Nx1, holding the id of the node type
        return self.type_encoder(x[:,0]) + self.attribute_encoder(x[:,1]) + self.depth_encoder(depth)



class ASTNodeEncoder(torch.nn.Module):
    '''
        Input:
            x: default node feature. the first and second column represents node type and node attributes.
            depth: The depth of the node in the AST.

        Output:
            emb_dim-dimensional vector

    '''
    def __init__(self, emb_dim, num_nodetypes, num_nodeattributes, max_depth):
        super(ASTNodeEncoder, self).__init__()

        self.max_depth = max_depth

        self.type_encoder = torch.nn.Embedding(num_nodetypes, emb_dim)
        self.attribute_encoder = torch.nn.Embedding(num_nodeattributes, emb_dim)
        self.depth_encoder = torch.nn.Embedding(self.max_depth + 1, emb_dim)


    def forward(self, x, depth):
        depth[depth > self.max_depth] = self.max_depth
        return self.type_encoder(x[:,0]) + self.attribute_encoder(x[:,1]) + self.depth_encoder(depth)


def get_vocab_mapping(seq_list, num_vocab):
    '''
        Input:
            seq_list: a list of sequences
            num_vocab: vocabulary size
        Output:
            vocab2idx:
                A dictionary that maps vocabulary into integer index.
                Additioanlly, we also index '__UNK__' and '__EOS__'
                '__UNK__' : out-of-vocabulary term
                '__EOS__' : end-of-sentence
            idx2vocab:
                A list that maps idx to actual vocabulary.
    '''

    vocab_cnt = {}
    vocab_list = []
    for seq in seq_list:
        for w in seq:
            if w in vocab_cnt:
                vocab_cnt[w] += 1
            else:
                vocab_cnt[w] = 1
                vocab_list.append(w)

    cnt_list = np.array([vocab_cnt[w] for w in vocab_list])
    topvocab = np.argsort(-cnt_list, kind = 'stable')[:num_vocab]

    print('Coverage of top {} vocabulary:'.format(num_vocab))
    print(float(np.sum(cnt_list[topvocab]))/np.sum(cnt_list))

    vocab2idx = {vocab_list[vocab_idx]: idx for idx, vocab_idx in enumerate(topvocab)}
    idx2vocab = [vocab_list[vocab_idx] for vocab_idx in topvocab]

    # print(topvocab)
    # print([vocab_list[v] for v in topvocab[:10]])
    # print([vocab_list[v] for v in topvocab[-10:]])

    vocab2idx['__UNK__'] = num_vocab
    idx2vocab.append('__UNK__')

    vocab2idx['__EOS__'] = num_vocab + 1
    idx2vocab.append('__EOS__')

    # test the correspondence between vocab2idx and idx2vocab
    for idx, vocab in enumerate(idx2vocab):
        assert(idx == vocab2idx[vocab])

    # test that the idx of '__EOS__' is len(idx2vocab) - 1.
    # This fact will be used in decode_arr_to_seq, when finding __EOS__
    assert(vocab2idx['__EOS__'] == len(idx2vocab) - 1)

    return vocab2idx, idx2vocab

def one_hot_y(batch_y, num_classes, max_seq_len):
    '''
    Given data.y, produce a tensor batch_size x number of words per method name.

    Input:
        batch_data.y: a list of lists, each of word len. batch_size x word len
        output: one hot encoding of dimensions batch_size x word len x num_classes.
    '''
    #print(batch_y)
    labels = torch.zeros(len(batch_y), max_seq_len, dtype=torch.long).to(device)

    for i, y in enumerate(batch_y):
        # if the number of words in y is less than the max length, fill blanks with end of word token
        y = y[:max_seq_len] + ['__EOS__'] * max(0, max_seq_len - len(y))
        for j, w in enumerate(y):
            index = vocab2idx['__UNK__']
            if w in vocab2idx:
                index = vocab2idx[w]
            labels[i][j] = index
        # print(labels[i])
    labels_one_hot = F.one_hot(labels, num_classes).to(device) # batch x word len x num_class
    #print('labels {}'.format(labels_one_hot))

    return labels_one_hot

def decode_to_word(pred_list):
    '''
        The goal is to turn an one-hot-encoder back into word. We need to take the
        argmax of the prediction

        Input:  embeddings, batch x word len x num class
        Output: list of lists, each a sequence of words. batch x word len
    '''
    #print("Prediction {}".format(pred_list))
    argmax = torch.argmax(pred_list, dim=-1).tolist()
    #print("Argmax {}".format(argmax))

    for i, graph in enumerate(argmax):
        word_list = []
        for word_index in graph:
            if word_index < len(vocab2idx) - 2:
                w = idx2vocab[word_index]
                word_list.append(w)
        argmax[i] = word_list
    #print("decoded pred words {}".format(argmax))
    return argmax

#Run Epochs
Train and then evaluate the accuracy of the GCN model

In [None]:
def run_epochs(model, evaluator, args, dataset):
    split_idx = dataset.get_idx_split()
    train_loader = DataLoader(dataset[split_idx["train"]], batch_size=args['batch_size'], shuffle=True)
    valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=args['batch_size'], shuffle=False)
    test_loader = DataLoader(dataset[split_idx["test"]], batch_size=args['batch_size'], shuffle=False)

    valid_curve = []
    test_curve = []
    train_curve = []

    optimizer = torch.optim.AdamW(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

    # # We want to decrease lr as training goes progresses.
    # # lr_scheduler method decays lr with a cosine annealing according to https://arxiv.org/pdf/1608.03983.pdf
    # lr_modifier = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args['epochs'])
    # lr_steps = args['lr'] / (args['warmup'] * len(train_loader))
    # def lr_scheduler(s):
    #     lr = s * lr_steps
    #     return lr

    for epoch in range(1, args['epochs'] + 1):
        print("=====Epoch {}".format(epoch))
        print('Training...')
        train(model, device, train_loader, optimizer, epoch, args)

        print('Evaluating...')
        train_perf = eval(model, device, train_loader, evaluator, args)
        valid_perf = eval(model, device, valid_loader, evaluator, args)
        test_perf = eval(model, device, test_loader, evaluator, args)

        # if epoch >= args['warmup'] and lr_modifier is not None:
        #     lr_modifier.step()

        print({'Train': train_perf, 'Validation': valid_perf, 'Test': test_perf})

        train_curve.append(train_perf[dataset.eval_metric])
        valid_curve.append(valid_perf[dataset.eval_metric])
        test_curve.append(test_perf[dataset.eval_metric])

    print('F1')
    best_val_epoch = np.argmax(np.array(valid_curve))
    best_train = max(train_curve)
    print('Finished training!')
    print('Best validation score: {}'.format(valid_curve[best_val_epoch]))
    print('Test score: {}'.format(test_curve[best_val_epoch]))

In [None]:
# There are 450k words (duplicates included) in the true labels.
# The top 15k is extracted and stored in a dict vocab2idx.
# This top 15k covers 96% of the all unique words.
vocab2idx, idx2vocab = get_vocab_mapping(dataset.y, args['num_vocab'])
print("Vocab list {}".format(vocab2idx))
args['num_classes'] = len(vocab2idx)

Coverage of top 5000 vocabulary:
0.901200162173439


# Main
Grab ogbg-code2 and create a dictionary of all of the y labels

In [None]:
if __name__ == "__main__":


    nodetypes_mapping = pd.read_csv(os.path.join(dataset.root, 'mapping', 'typeidx2type.csv.gz'))['type'].tolist()
    nodeattributes_mapping = pd.read_csv(os.path.join(dataset.root, 'mapping', 'attridx2attr.csv.gz'))['attr'].tolist()

    tokenizer = AutoTokenizer.from_pretrained("microsoft/graphcodebert-base")
    graphBert = AutoModel.from_pretrained("microsoft/graphcodebert-base")
    graphBert.to(device)
    node_encoder = SemanticNodeEncoder(graphBert, tokenizer, args['hidden_dim'], nodetypes_mapping, nodeattributes_mapping)

    gnn = GraphSage(
        node_encoder,
        args['hidden_dim'],
        args['output_dim'],
        args['num_layers'],
        args['dropout'],
        args['words_per_method_name'],
        args['num_classes'],
        args['heads'],
    ).to(device)

    # param_size = 0
    # for param in gcn.parameters():
    #     param_size += param.nelement() * param.element_size()
    # print(param_size)

    run_epochs(gnn, evaluator, args, dataset)

Some weights of RobertaModel were not initialized from the model checkpoint at microsoft/graphcodebert-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Error tokenizing nan 763
Error tokenizing yaml 9937
Error tokenizing yang_keys 9938
Error tokenizing yang_name 9939
Error tokenizing yang_type 9940
Error tokenizing yaw 9941
Error tokenizing yaxis 9942
Error tokenizing yb 9943
Error tokenizing yc 9944
Error tokenizing yd 9945
Error tokenizing ydata 9946
Error tokenizing year 9947
Error tokenizing years 9948
Error tokenizing yellow 9949
Error tokenizing yerr 9950
Error tokenizing yes 9951
Error tokenizing yesterday 9952
Error tokenizing yi 9953
Error tokenizing ylab 9954
Error tokenizing ylabel 9955
Error tokenizing ylim 9956
Error tokenizing ymax 9957
Error tokenizing ymin 9958
Error tokenizing yn 9959
Error tokenizing yo 9960
Error tokenizing yp 9961
Error tokenizing ypos 9962
Error tokenizing yr 9963
Error tokenizing ys 9964
Error tokenizing yscale 9965
Error tokenizing yt 9966
Error tokenizing yticks 9967
Error tokenizing yvals 9968
Error tokenizing yy 9969
Error tokenizing z 9970
Error tokenizing z0 9971
Error tokenizing z1 9972
Er

Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 3.1034092903137207
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['directories'], ['fulfillment', 'from', 'details'], ['msg', 'curse'], ['move', 'window'], ['net', 'fx', 'sdk', 'includes']]
seq_pred_list [['get'], [], ['get'], [], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['run'], ['load', 'file'], [], [], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['create']]
{'Train': {'precision': 0.14192370793053186, 'recall': 0.06409264712841392, 'F1': 0.08425432847290398}, 'Validation': {'precision': 0.12254897664022439, 'recall': 0.05263579230467953, 'F1': 0.07050803111854083}, 'Test': {'precision': 0.11932750136686714, 'recall': 0.052524545751719075, 'F1': 0.07007736507463135}}
=====Epoch 2
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.8310999870300293
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['tobinary'], ['xep', 'role'], ['get', 'job', 'logs'], ['ensure', 'benchmark', 'data'], ['p', 'statement', 'continue']]
seq_pred_list [['get'], [], [], ['get'], ['p']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [[], ['save', 'file'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['load']]
{'Train': {'precision': 0.18970964795314754, 'recall': 0.09674246126776424, 'F1': 0.1212268556062397}, 'Validation': {'precision': 0.1616294867861682, 'recall': 0.07596000197569203, 'F1': 0.0978411833256905}, 'Test': {'precision': 0.15778127088269245, 'recall': 0.07733904235407789, 'F1': 0.09872624945068904}}
=====Epoch 3
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.6763665676116943
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['height'], ['start'], ['register', 'name'], ['migrate', 'node'], ['update', 'credentials']]
seq_pred_list [[], ['update'], [], [], ['connect', 'session']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['texture', 'cont', 'geojson', 'crop', 'trait'], ['write'], [], [], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['hash'], [], ['write']]
{'Train': {'precision': 0.2042890594871593, 'recall': 0.11213866066467691, 'F1': 0.13648361144059878}, 'Validation': {'precision': 0.16248484317248837, 'recall': 0.08149046137278596, 'F1': 0.10202515706087605}, 'Test': {'precision': 0.1639124293785311, 'recall': 0.08665049930427185, 'F1': 0.10716538842563991}}
=====Epoch 4
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.550065040588379
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['expand'], ['get', 'tournament', 'prize', 'pool'], ['extract', 'subset'], ['scan'], ['index', 'document']]
seq_pred_list [[], ['get'], ['get'], [], ['delete']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [[], ['save'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['parse']]
{'Train': {'precision': 0.2204801262819382, 'recall': 0.12159160530388251, 'F1': 0.14821340204790698}, 'Validation': {'precision': 0.16822617638894977, 'recall': 0.08452363560725747, 'F1': 0.10603911946112934}, 'Test': {'precision': 0.17375159467833062, 'recall': 0.09319025344175699, 'F1': 0.11493696500530835}}
=====Epoch 5
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.443499803543091
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['singleframe', 'to', 'nifti'], ['get', 'cached', 'mine', 'data'], ['find'], ['arp', 'limit', 'exceeded', 'arp', 'limit'], ['release']]
seq_pred_list [[], ['get'], ['search'], ['server', 'originator'], []]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['inception'], ['export'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], [], ['download']]
{'Train': {'precision': 0.2472408099169232, 'recall': 0.14752426215692435, 'F1': 0.17433065165858286}, 'Validation': {'precision': 0.18048165841258712, 'recall': 0.09739079953612978, 'F1': 0.11849627195641133}, 'Test': {'precision': 0.18425596865318025, 'recall': 0.10484279581463836, 'F1': 0.12605502222095988}}
=====Epoch 6
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.3480072021484375
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['set', 'defaults'], ['try', 'connect'], ['ask', 'for', 'confirmation'], ['send', 'photo'], ['remove', 'log', 'action']]
seq_pred_list [['configure', 'defaults'], ['connect'], ['ask'], ['send', 'photo'], []]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['simulate', 'truncate', 'psql', 'align', 'middleware'], ['save'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], []]
{'Train': {'precision': 0.2562851736376649, 'recall': 0.15939361892892245, 'F1': 0.1852768527766295}, 'Validation': {'precision': 0.17258038596952563, 'recall': 0.09528277440031833, 'F1': 0.11506038287823799}, 'Test': {'precision': 0.18176751108681125, 'recall': 0.10762550950153464, 'F1': 0.12739013714956854}}
=====Epoch 7
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.259460210800171
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['to', 'tokens'], ['del', 'running', 'bp', 'from', 'all', 'threads'], ['connect'], ['multipartite'], ['binary', 'arithemtic']]
seq_pred_list [['get'], ['remove'], [], ['make'], ['copy']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['optimize', 'single'], ['save'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['hash'], [], ['get']]
{'Train': {'precision': 0.2828395542875072, 'recall': 0.18201888711661346, 'F1': 0.20924150034267056}, 'Validation': {'precision': 0.1874428423251669, 'recall': 0.10731759110270746, 'F1': 0.1276159689798643}, 'Test': {'precision': 0.1918846060385153, 'recall': 0.11656943106505707, 'F1': 0.13650771954489832}}
=====Epoch 8
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.1773157119750977
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['probe', 'image'], ['join'], ['options'], ['fetch', 'no', 'cache'], ['policy', 'present']]
seq_pred_list [['convert', 'image'], [], ['boolean', 'options', 'option', 'args'], ['get'], ['thing', 'absent', 'absent']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['optimize', 'run', 'based', 'checkpoint'], ['save'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['hash'], [], ['load']]
{'Train': {'precision': 0.3016216640194521, 'recall': 0.20104836324430123, 'F1': 0.22802299946466037}, 'Validation': {'precision': 0.18672627134738717, 'recall': 0.10783379282086386, 'F1': 0.12772685817345503}, 'Test': {'precision': 0.1921397545714112, 'recall': 0.11899941709263742, 'F1': 0.1382958993196828}}
=====Epoch 9
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.0968809127807617
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['get', 'reservations'], ['from', 'rgb'], ['piecewise'], ['break', 'to', 'bytes'], ['from', 'birth', 'year']]
seq_pred_list [['get'], ['from'], [], [], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['train', 'opt', 'rate'], ['save'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['detect'], ['get'], ['get'], [], ['load']]
{'Train': {'precision': 0.325992092672118, 'recall': 0.2171595608970601, 'F1': 0.24682410702961025}, 'Validation': {'precision': 0.19486932258111644, 'recall': 0.11201509456538601, 'F1': 0.13290397447385657}, 'Test': {'precision': 0.20036829475730514, 'recall': 0.12162868367078318, 'F1': 0.1423977663589473}}
=====Epoch 10
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 2.022235631942749
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['get', 'model'], ['hash', 'producer'], ['draw', 'legend'], ['log', 'verbose'], ['run', 'tool']]
seq_pred_list [['get'], ['hash'], [], ['warn'], ['run', 'calc']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['optimize', 'opt'], ['save'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['extract', 'file']]
{'Train': {'precision': 0.3394435375283514, 'recall': 0.2315112430352665, 'F1': 0.2609518960808508}, 'Validation': {'precision': 0.1992644373347358, 'recall': 0.11656016017018779, 'F1': 0.1375138761692647}, 'Test': {'precision': 0.2073051454954134, 'recall': 0.13095907064605808, 'F1': 0.15070299969151799}}
=====Epoch 11
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.9478117227554321
Evaluating...


Iteration:   0%|          | 0/12750 [00:01<?, ?it/s]

seq_ref_list [['populate'], ['can', 'expand', 'to'], ['get', 'out', 'subnet', 'id'], ['is', 'list', 'member'], ['transform']]
seq_pred_list [['populate'], ['is'], ['get', 'out', 'subnet', 'id'], ['add', 'list'], []]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [[], ['save'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['hash', 'md5'], [], []]
{'Train': {'precision': 0.36346758142635843, 'recall': 0.26468785182643345, 'F1': 0.2909568743219175}, 'Validation': {'precision': 0.19110823800967114, 'recall': 0.11798780636270227, 'F1': 0.1361009053716246}, 'Test': {'precision': 0.19806892047870728, 'recall': 0.1300778752665029, 'F1': 0.14741073022921025}}
=====Epoch 12
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.8779349327087402
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['get', 'command'], ['merge', 'settings'], ['do', 'reparse'], ['get', 'ordered', 'entries'], ['find', 'hotspot']]
seq_pred_list [['get', 'command'], ['get'], ['do'], ['query', 'queryset'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['optimize'], ['save'], [], ['get'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['hash', 'md5'], [], ['parse']]
{'Train': {'precision': 0.38545024707335734, 'recall': 0.27783411953160914, 'F1': 0.30700699575901746}, 'Validation': {'precision': 0.19284597741450085, 'recall': 0.11851619963115778, 'F1': 0.13684018985110274}, 'Test': {'precision': 0.19794590243606097, 'recall': 0.12804265348688532, 'F1': 0.1459032348794514}}
=====Epoch 13
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.8102747201919556
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['quantstr'], ['eth', 'get', 'transaction', 'by', 'block', 'number', 'and', 'index'], ['delete'], ['get', 'branding', 'ids'], ['read', 'single', 'query', 'result']]
seq_pred_list [[], ['eth', 'get'], ['delete'], ['get', 'gradebook', 'ids'], ['parse']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [[], ['dump'], [], ['get', 'subnet'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get', 'cache'], [], ['extract']]
{'Train': {'precision': 0.40613357158264213, 'recall': 0.3024319779709802, 'F1': 0.33055825161327645}, 'Validation': {'precision': 0.19696059955296485, 'recall': 0.12205908849639402, 'F1': 0.14041268879985627}, 'Test': {'precision': 0.1993917441224713, 'recall': 0.13117131473815455, 'F1': 0.14847822453754655}}
=====Epoch 14
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.7453149557113647
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['make', 'fake', 'data'], ['clean', 'previous', 'run'], ['get', 'file', 'paths', 'with', 'extensions', 'in', 'directory'], ['generators'], ['send', 'contact']]
seq_pred_list [[], ['clear'], ['find', 'files', 'files'], ['get'], ['send']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [[], ['save'], [], ['get', 'default', 'observation', 'encoder'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['hash'], [], ['download']]
{'Train': {'precision': 0.41960315476073756, 'recall': 0.31122704817444213, 'F1': 0.3407602617722046}, 'Validation': {'precision': 0.19288834348658163, 'recall': 0.1186881683178304, 'F1': 0.13668237797176747}, 'Test': {'precision': 0.1983719093615212, 'recall': 0.13042452796074994, 'F1': 0.14750727878119896}}
=====Epoch 15
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.6830086708068848
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['reset'], ['exec', 'info'], ['groups', 'invite'], ['get', 'screen'], ['refresh']]
seq_pred_list [['set'], ['tags'], ['channels', 'remove'], ['get', 'screen'], ['get']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['train'], ['save', 'jar'], [], ['get', 'nn', 'observation'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get', 'key'], [], ['download']]
{'Train': {'precision': 0.4440764489414409, 'recall': 0.33517146851221646, 'F1': 0.36538803332818537}, 'Validation': {'precision': 0.18983871674628566, 'recall': 0.11845995501822296, 'F1': 0.13616101093201502}, 'Test': {'precision': 0.1974113055099933, 'recall': 0.13082475244517344, 'F1': 0.14769353826215498}}
=====Epoch 16
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.623335361480713
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['api', 'representation'], ['process', 'uniprot', 'ids'], ['course', 'feature'], ['check', 'user', 'token'], ['gen', 'random', 'bank', 'card']]
seq_pred_list [['api', 'representation'], ['process', 'genes', 'genotype'], ['course'], ['active', 'user'], ['random', 'random', 'card']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['create'], ['download'], [], ['server', 'nn', 'remove'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'valid', 'url'], ['get'], ['get', 'key'], ['set'], ['download', 'from']]
{'Train': {'precision': 0.458193619232504, 'recall': 0.36023780253018006, 'F1': 0.38614818775828924}, 'Validation': {'precision': 0.19289345663321208, 'recall': 0.12475571681098263, 'F1': 0.1409217330613658}, 'Test': {'precision': 0.20074722070348094, 'recall': 0.13734413013732313, 'F1': 0.1527150392992383}}
=====Epoch 17
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.5672900676727295
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['attention', 'lm', 'moe', 'prepare', 'decoder'], ['argmax'], ['register', 'sds'], ['next'], ['call', 'brightness']]
seq_pred_list [['attention', 'lm', 'moe', 'decoder'], ['argmax'], ['delete'], ['dispatch'], ['call', 'brightness']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['mcmc'], ['save'], ['generic', 'similarity'], ['server', 'encoder'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], ['set'], ['download']]
{'Train': {'precision': 0.4755098339117989, 'recall': 0.37541379742115816, 'F1': 0.4020546351973116}, 'Validation': {'precision': 0.1908759550627456, 'recall': 0.12408488588639353, 'F1': 0.1394156188826829}, 'Test': {'precision': 0.20033336370815868, 'recall': 0.1387869885819585, 'F1': 0.15338788112844973}}
=====Epoch 18
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.5134270191192627
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['coloured', 'network'], ['get', 'language', 'model'], ['do', 'render'], ['untrack', 'tendril'], ['as', 'plural']]
seq_pred_list [['to', 'network'], ['from'], [], ['remove'], []]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [[], ['save'], ['generic'], ['get', 'board', 'stack', 'encoder'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get', 'key'], ['configure'], ['download']]
{'Train': {'precision': 0.4966257002045872, 'recall': 0.3902081564260221, 'F1': 0.41996169549380313}, 'Validation': {'precision': 0.18803304553622302, 'recall': 0.1212788640605619, 'F1': 0.1373151907380772}, 'Test': {'precision': 0.19390225381204057, 'recall': 0.13030422195608088, 'F1': 0.14601290640793102}}
=====Epoch 19
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.4626028537750244
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['frame', 'apply'], ['listen'], ['update'], ['get', 'dim', 'indexers'], ['get', 'stream', 'url']]
seq_pred_list [['from'], ['listen'], ['get'], ['find'], ['get', 'url', 'url']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['initialize'], ['save'], [], ['create', 'stack'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'valid'], ['get'], ['get'], [], ['download', 'content']]
{'Train': {'precision': 0.49969842997300484, 'recall': 0.3892780230899385, 'F1': 0.42064681422210043}, 'Validation': {'precision': 0.19302128529897297, 'recall': 0.12141592769523699, 'F1': 0.13876878528015837}, 'Test': {'precision': 0.19847138691452526, 'recall': 0.13153630080912748, 'F1': 0.1484834678109692}}
=====Epoch 20
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.4139950275421143
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['pop'], ['format', 'payload'], ['run', 'command'], ['enter', 'namespace'], ['rlmb', 'ppo', 'tiny']]
seq_pred_list [['pop'], ['observe', 'payload'], ['handle', 'command'], ['add', 'namespace'], ['rlmb', 'ppo', 'tpu']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['train'], ['write'], [], ['create', 'stack'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], ['set'], ['download']]
{'Train': {'precision': 0.5268717195782759, 'recall': 0.4193433696194887, 'F1': 0.4496849467536126}, 'Validation': {'precision': 0.17925012052417055, 'recall': 0.11666376226024733, 'F1': 0.131536642541595}, 'Test': {'precision': 0.18683327258368265, 'recall': 0.12938314153800756, 'F1': 0.1430765680970711}}
=====Epoch 21
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.3670790195465088
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['to', 'realimag'], ['show', 'logs'], ['quadgk', 'int'], ['can', 'create'], ['cluster']]
seq_pred_list [[], ['get'], ['int'], ['find', 'element'], ['generate']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['train', 'single'], ['save'], [], ['create', 'nn', 'stack'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is', 'url'], ['get'], ['get'], ['set'], ['download']]
{'Train': {'precision': 0.5385109254792766, 'recall': 0.43901254734833167, 'F1': 0.46635752215634313}, 'Validation': {'precision': 0.17796452937137516, 'recall': 0.1186012622167682, 'F1': 0.13215783452010824}, 'Test': {'precision': 0.18637916894477854, 'recall': 0.13241212635348773, 'F1': 0.14485071102074928}}
=====Epoch 22
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.3231589794158936
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['color', 'image', 'callback'], ['write', 'markdown', 'spec'], ['nanmean'], ['cmd', 'map'], ['log', 'node', 'info']]
seq_pred_list [[], ['output', 'markdown', 'to'], ['fn'], ['cmd'], ['write']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [[], ['dump'], [], ['get', 'resnet', 'stack', 'encoder', 'count'], []]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], ['set'], ['download']]
{'Train': {'precision': 0.5527227091789714, 'recall': 0.4381933392753883, 'F1': 0.47166785277125484}, 'Validation': {'precision': 0.17671546069451138, 'recall': 0.11432408027709777, 'F1': 0.12929673332224062}, 'Test': {'precision': 0.18506242026608347, 'recall': 0.12561308330512813, 'F1': 0.14034496399859986}}
=====Epoch 23
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.2814887762069702
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['is', 'all', 'field', 'none'], ['calculate', 'path', 'integration', 'error'], ['watchdog', 'time'], ['append', 'headers'], ['macs2', 'call', 'peaks']]
seq_pred_list [['is', 'all', 'field', 'none'], ['calculate', 'path', 'integration', 'error'], ['watchdog'], [], ['bam', 'call', 'peaks']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['optimize'], ['dump'], ['source'], ['get', 'managed', 'stack', 'encoder', 'count'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['get'], [], ['download', 'from']]
{'Train': {'precision': 0.5629076220169814, 'recall': 0.4653272992074172, 'F1': 0.4923311981067428}, 'Validation': {'precision': 0.17859344640691882, 'recall': 0.12070151328270273, 'F1': 0.13362360638295248}, 'Test': {'precision': 0.18852666909665267, 'recall': 0.13231241375719233, 'F1': 0.1456388259696078}}
=====Epoch 24
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.241520643234253
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['get', 'geometry', 'from', 'iupac', 'symbol'], ['belspec', 'yaml2json'], ['forward'], ['get', 'input', 'grads'], ['need', 'swap', 'wh']]
seq_pred_list [['get', 'geometry', 'from', 'mp', 'symbol'], [], ['forward'], ['get'], ['is']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['train'], ['save'], ['filepath'], ['create', 'backend', 'stack'], ['get']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], [], ['get', 'hash'], ['set'], ['download']]
{'Train': {'precision': 0.5737635710597359, 'recall': 0.48315043665615776, 'F1': 0.507560843280845}, 'Validation': {'precision': 0.17323632963725877, 'recall': 0.11915035155491832, 'F1': 0.1308011494429516}, 'Test': {'precision': 0.18158450276410912, 'recall': 0.13240116972005983, 'F1': 0.14332571398591082}}
=====Epoch 25
Training...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

Epoch loss 1.2038639783859253
Evaluating...


Iteration:   0%|          | 0/12750 [00:00<?, ?it/s]

seq_ref_list [['limit'], ['update', 'board', 'user', 'settings'], ['email', 'report'], ['asluav', 'status', 'encode'], ['section', 'radial', 'distances']]
seq_pred_list [['set'], ['update', 'board', 'days'], ['email'], ['serial', 'udb'], ['section', 'radial', 'distances']]


Iteration:   0%|          | 0/714 [00:00<?, ?it/s]

seq_ref_list [['learn'], ['save', 'act'], ['nature', 'cnn'], ['mlp'], ['lstm']]
seq_pred_list [['train', 'single'], ['create'], ['dependency', 'length'], ['create', 'managed', 'stack', 'encoder', 'count'], ['get', 'bitmap']]


Iteration:   0%|          | 0/686 [00:00<?, ?it/s]

seq_ref_list [['get', 'vid', 'from', 'url'], ['sina', 'xml', 'to', 'url', 'list'], ['make', 'mimi'], ['fc2video', 'download'], ['dailymotion', 'download']]
seq_pred_list [['is'], ['get'], ['generate'], ['set', 'url'], ['download']]
{'Train': {'precision': 0.5792363603087763, 'recall': 0.5045642343455817, 'F1': 0.5230090017119461}, 'Validation': {'precision': 0.16893325152298724, 'recall': 0.12210261980937354, 'F1': 0.13187843283605213}, 'Test': {'precision': 0.17942561205273072, 'recall': 0.13498796578348246, 'F1': 0.14421317170770426}}
F1
Finished training!
Best validation score: 0.1409217330613658
Test score: 0.1527150392992383
