In [1]:
import collections
import cPickle as pickle
import glob
import itertools
import json
import operator
import os
import re
import sys

import matplotlib.pyplot as plt
%matplotlib inline
import editdistance
import numpy as np

from datasets import executor
from datasets.karel.karel_runtime import KarelRuntime

In [97]:
val_code = {}
with open('data/karel/val.pkl') as f:
    while True:
        try:
            example = pickle.load(f)
            val_code[example['guid']] = tuple(example['code'])
        except EOFError:
            break

test_code = {}
with open('data/karel/test.pkl') as f:
    while True:
        try:
            example = pickle.load(f)
            test_code[example['guid']] = tuple(example['code'])
        except EOFError:
            break

def test_top_n(exec_results, n):
    correct, total = 0, 0
    for guid, hypotheses in exec_results.iteritems():
        total += 1
        for i, (_, correctness) in zip(range(n), hypotheses):
            if np.all(correctness[:5]):
                correct += correctness[5]
                break
    return correct, total

def test_top_n_any(exec_results, n):
    correct, total = 0, 0
    for guid, hypotheses in exec_results.iteritems():
        total += 1
        for i, (_, correctness) in zip(range(n), hypotheses):
            if np.all(correctness):
                correct += 1
                break
    return correct, total

def test_top_n_any_exact_match(exec_results, n):
    correct, total = 0, 0
    for guid, hypotheses in exec_results.iteritems():
        orig_code = val_code[guid]
        total += 1
        for i, (hypothesis, _) in zip(range(n), hypotheses):
            if hypothesis == orig_code:
                correct += 1
                break
    return correct, total

In [5]:
for fn in sorted(glob.glob('logdirs/20180201/*/report-dev-*.jsonl')):
    result = json.loads(next(iter(open(fn))))
    
    dist, step = re.search(r'report-dev-m(.*)-(\d+).jsonl', fn).groups()
    model_name = os.path.basename(os.path.dirname(fn))
    
    print '{} {} step {}: {:.4f} (of {})'.format(
        model_name, dist, step, result['correct'] / float(result['total']), result['total'])

karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 100100: 0.4116 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 150100: 0.4068 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 200100: 0.3960 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 250100: 0.3748 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 300100: 0.3664 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 350100: 0.3640 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 400100: 0.3520 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 450100: 0.3636 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 456000: 0.3496 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 50100: 0.3832 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 100100: 0.7212 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 150100: 0.7008 (of 2500)
karel-lgr

In [10]:
model_best = collections.defaultdict(lambda: (0.0, 0))
for fn in sorted(glob.glob('logdirs/20180201/*/report-dev-*.jsonl')):
    result = json.loads(next(iter(open(fn))))
    dist, step = re.search(r'report-dev-m(.*)-(\d+).jsonl', fn).groups()
    step = int(step)
    model_name = os.path.basename(os.path.dirname(fn))
    accuracy = result['correct'] / float(result['total'])
    print '{} {} step {}: {:.4f} (of {})'.format(model_name, dist, step, accuracy, result['total'])
    model_best[model_name, dist] = max(model_best[model_name, dist], accuracy, -step))

[(('karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5', '0,0,1'),
  (0.4116, -100100)),
 (('karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5', '0,1'),
  (0.7212, -100100)),
 (('karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5', '1'), (0.9812, -150100)),
 (('karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5', '0,0,1'),
  (0.8676, -150100)),
 (('karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5', '0,1'),
  (0.9484, -300100)),
 (('karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5', '1'), (0.9724, -200100)),
 (('karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5', '0,0,1'),
  (0.9184, -250100)),
 (('karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5', '0,1'),
  (0.9556, -200100)),
 (('karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5', '1'),
  (0.9736, -300100))]

In [14]:
top1_model_best = collections.defaultdict(lambda: (0.0, 0))
for fn in sorted(glob.glob('logdirs/20180201/*/exec-results-dev-*.pkl')):
    dist, step = re.search(r'exec-results-dev-m(.*)-(\d+).pkl', fn).groups()
    step = int(step)
    model_name = os.path.basename(os.path.dirname(fn))
    exec_results = pickle.load(open(fn))
    correct, total = test_top_n(exec_results, 1)
    accuracy = correct / float(total)
    print '{} {} step {}: {:.4f} (of {})'.format(model_name, dist, step, accuracy, total)
    top1_model_best[model_name, dist] = max(top1_model_best[model_name, dist], (accuracy, -int(step)))

karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 100100: 0.1124 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 150100: 0.1268 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 200100: 0.1268 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 250100: 0.1200 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 300100: 0.1184 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 350100: 0.1208 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 400100: 0.1240 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 450100: 0.1220 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 456000: 0.1204 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 50100: 0.0932 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 100100: 0.2248 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 150100: 0.2300 (of 2500)
karel-lgr

In [15]:
top1_model_best

defaultdict(<function __main__.<lambda>>,
            {('karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5', '0'): (0.68125,
              -50100),
             ('karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.1268, -150100),
             ('karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.2364, -200100),
             ('karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5', '1'): (0.79,
              -300100),
             ('karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.5184, -150100),
             ('karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.7076, -300100),
             ('karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5', '1'): (0.8156,
              -200100),
             ('karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.6912, -400100),
             ('karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.7644, -350100),
        

In [19]:
model_best = collections.defaultdict(lambda: (0.0, 0))
for fn in sorted(glob.glob('logdirs/20180207/*/report-dev-*.jsonl')):
    result = json.loads(next(iter(open(fn))))
    dist, step = re.search(r'report-dev-m(.*)-(\d+).jsonl', fn).groups()
    step = int(step)
    model_name = os.path.basename(os.path.dirname(fn))
    accuracy = result['correct'] / float(result['total'])
    print '{} {} step {}: {:.4f} (of {})'.format(model_name, dist, step, accuracy, result['total'])
    model_best[model_name, dist] = max(model_best[model_name, dist], (accuracy, -step))

karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 100100: 0.1256 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 150100: 0.1204 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 200100: 0.1184 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 250100: 0.1156 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 300100: 0.1144 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 350100: 0.1164 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 400100: 0.1180 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 436300: 0.1172 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 50100: 0.1324 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 100100: 0.2484 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 150100: 0.2356 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-

In [20]:
model_best

defaultdict(<function __main__.<lambda>>,
            {('karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.1324, -50100),
             ('karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.2528, -50100),
             ('karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '1'): (0.9816, -100100),
             ('karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.666, -100100),
             ('karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.9036, -100100),
             ('karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '1'): (0.9632, -100100),
             ('karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.904, -350100),
             ('karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.9504, -400100),
             ('karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
  

In [21]:
top1_model_best = collections.defaultdict(lambda: (0.0, 0))
for fn in sorted(glob.glob('logdirs/20180207/*/exec-results-dev-*.pkl')):
    dist, step = re.search(r'exec-results-dev-m(.*)-(\d+).pkl', fn).groups()
    step = int(step)
    model_name = os.path.basename(os.path.dirname(fn))
    exec_results = pickle.load(open(fn))
    correct, total = test_top_n(exec_results, 1)
    accuracy = correct / float(total)
    print '{} {} step {}: {:.4f} (of {})'.format(model_name, dist, step, accuracy, total)
    top1_model_best[model_name, dist] = max(top1_model_best[model_name, dist], (accuracy, -int(step)))

karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 100100: 0.0672 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 150100: 0.0640 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 200100: 0.0632 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 250100: 0.0612 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 300100: 0.0656 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 350100: 0.0616 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 400100: 0.0632 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 436300: 0.0620 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 50100: 0.0524 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 100100: 0.1352 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 150100: 0.1312 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-

karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 250100: 0.3972 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 300100: 0.3956 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 350100: 0.4040 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 400100: 0.4008 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 436300: 0.4036 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 50100: 0.2484 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 step 100100: 0.4200 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 step 150100: 0.4384 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 step 200100: 0.4652 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 step 250100: 0.4660 (of 2500)
karel-lgrl-ref-edit-state-mem

In [22]:
top1_model_best

defaultdict(<function __main__.<lambda>>,
            {('karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.0672, -100100),
             ('karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.14, -350100),
             ('karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '1'): (0.7416, -100100),
             ('karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.2444, -100100),
             ('karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.4788, -100100),
             ('karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '1'): (0.6572, -100100),
             ('karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.6096, -400100),
             ('karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.6852, -436300),
             ('karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',


In [23]:
top1_model_best = collections.defaultdict(lambda: (0.0, 0))
for fn in sorted(glob.glob('logdirs/20180207/*/exec-results-dev-*.pkl')):
    dist, step = re.search(r'exec-results-dev-m(.*)-(\d+).pkl', fn).groups()
    step = int(step)
    model_name = os.path.basename(os.path.dirname(fn))
    exec_results = pickle.load(open(fn))
    correct, total = test_top_n(exec_results, 1)
    accuracy = correct / float(total)
    print '{} {} step {}: {:.4f} (of {})'.format(model_name, dist, step, accuracy, total)
    top1_model_best[model_name, dist] = max(top1_model_best[model_name, dist], (accuracy, -int(step)))

karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 100100: 0.0672 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 150100: 0.0640 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 200100: 0.0632 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 250100: 0.0612 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 300100: 0.0656 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 350100: 0.0616 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 400100: 0.0632 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 436300: 0.0620 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 50100: 0.0524 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 100100: 0.1352 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 150100: 0.1312 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-

karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 250100: 0.3972 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 300100: 0.3956 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 350100: 0.4040 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 400100: 0.4008 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 436300: 0.4036 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 50100: 0.2484 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 step 100100: 0.4200 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 step 150100: 0.4384 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 step 200100: 0.4652 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 step 250100: 0.4660 (of 2500)
karel-lgrl-ref-edit-state-mem

In [24]:
top1_model_best

defaultdict(<function __main__.<lambda>>,
            {('karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.0672, -100100),
             ('karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.14, -350100),
             ('karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '1'): (0.7416, -100100),
             ('karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.2444, -100100),
             ('karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.4788, -100100),
             ('karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '1'): (0.6572, -100100),
             ('karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.6096, -400100),
             ('karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.6852, -436300),
             ('karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',


In [25]:
top1_model_best = collections.defaultdict(lambda: (0.0, 0))
for fn in sorted(glob.glob('logdirs/20180208/*/exec-results-dev-*.pkl')):
    dist, step = re.search(r'exec-results-dev-m(.*)-(\d+).pkl', fn).groups()
    step = int(step)
    model_name = os.path.basename(os.path.dirname(fn))
    exec_results = pickle.load(open(fn))
    correct, total = test_top_n(exec_results, 1)
    accuracy = correct / float(total)
    print '{} {} step {}: {:.4f} (of {})'.format(model_name, dist, step, accuracy, total)
    top1_model_best[model_name, dist] = max(top1_model_best[model_name, dist], (accuracy, -int(step)))

karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 100100: 0.6964 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 150100: 0.6984 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 200100: 0.6924 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 250100: 0.6812 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 300100: 0.6660 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 50100: 0.6268 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 100100: 0.6972 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 150100: 0.6988 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 200100: 0.6916 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 250100: 0.6812 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 300100: 0.6664 (of 2500)
karel-lgrl

In [61]:
top1_model_best = collections.defaultdict(lambda: (0.0, 0))
for fn in sorted(glob.glob('logdirs/20180208/*ref-edit-state-m123*/exec-results-dev-*.pkl')):
    dist, step = re.search(r'exec-results-dev-m(.*)-(\d+).pkl', fn).groups()
    step = int(step)
    model_name = os.path.basename(os.path.dirname(fn))
    exec_results = pickle.load(open(fn))
    correct, total = test_top_n(exec_results, 1)
    accuracy = correct / float(total)
    print '{} {} step {}: {:.4f} (of {})'.format(model_name, dist, step, accuracy, total)
    top1_model_best[model_name, dist] = max(top1_model_best[model_name, dist], (accuracy, -int(step)))

karel-lgrl-ref-edit-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 100100: 0.0048 (of 2500)
karel-lgrl-ref-edit-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 150100: 0.0044 (of 2500)
karel-lgrl-ref-edit-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 200100: 0.0052 (of 2500)
karel-lgrl-ref-edit-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 250100: 0.0044 (of 2500)
karel-lgrl-ref-edit-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 300100: 0.0068 (of 2500)
karel-lgrl-ref-edit-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 350100: 0.0072 (of 2500)
karel-lgrl-ref-edit-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 400100: 0.0052 (of 2500)
karel-lgrl-ref-edit-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 436300: 0.0060 (of 2500)
karel-lgrl-ref-edit-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 50100: 0.0052 (of 2500)
karel-lgrl-ref-edit-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 100100: 0.0112 (of 2500)
karel-lgrl-ref-edit-state-m123-sgd-cl1-lr0.

In [26]:
top1_model_best

defaultdict(<function __main__.<lambda>>,
            {('karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.6984, -150100),
             ('karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.6988, -150100),
             ('karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '1'): (0.698, -150100),
             ('karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,0,1'): (0.6048, -100100),
             ('karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '0,1'): (0.68, -100100),
             ('karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5',
              '1'): (0.7752, -100100)})

In [72]:
top1_edit_m12_best = collections.defaultdict(lambda: (0.0, 0))
for fn in sorted(glob.glob('logdirs/20180211/karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-*.pkl')):
    dist, step = re.search(r'exec-results-dev-m(.*)-(\d+).pkl', fn).groups()
    step = int(step)
    model_name = os.path.basename(os.path.dirname(fn))
    exec_results = pickle.load(open(fn))
    correct, total = test_top_n(exec_results, 1)
    accuracy = correct / float(total)
    print '{} {} step {}: {:.4f} (of {})'.format(model_name, dist, step, accuracy, total)
    top1_edit_m12_best[model_name, dist] = max(top1_edit_m12_best[model_name, dist], (accuracy, -int(step)))
print top1_edit_m12_best

karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 100100: 0.2776 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 150100: 0.2928 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 200100: 0.3112 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 250100: 0.3196 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 300100: 0.3312 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 350100: 0.3224 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 400100: 0.3240 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 436300: 0.3268 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 step 50100: 0.1580 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 100100: 0.5768 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 step 150100: 0.6064 (of 2500)
karel-lgrl-ref-edit-m12-sg

In [63]:
def print_results(fn):
    _, dist, step = re.search(r'exec-results-(dev|test)-m(.*)-(\d+).pkl', fn).groups()
    step = int(step)
    model_name = os.path.basename(os.path.dirname(fn))
    exec_results = pickle.load(open(fn))
    
    for k in (1, 64):
        correct, total = test_top_n_any(exec_results, k)
        accuracy = correct / float(total)
        print '{} {} k={} gen step {}: {:.4f} (of {})'.format(model_name, dist, k, step, accuracy, total)

        correct, total = test_top_n_any_exact_match(exec_results, k)
        accuracy = correct / float(total)
        print '{} {} k={} exact step {}: {:.4f} (of {})'.format(model_name, dist, k, step, accuracy, total)

In [39]:
print_results('logdirs/20180201/karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m1-300100.pkl')
print_results('logdirs/20180201/karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,1-200100.pkl')
print_results('logdirs/20180201/karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,0,1-150100.pkl')

karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 300100: 0.7900 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 300100: 0.7520 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 300100: 0.9812 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 300100: 0.9696 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 200100: 0.2364 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 200100: 0.1652 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 200100: 0.7176 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 200100: 0.5528 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 150100: 0.1268 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 150100: 0.0804 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=64 gen step 150100: 0.4164 (of 2500)
karel-lg

In [66]:
print_results('logdirs/20180201/karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m1-300100.pkl')
print_results('logdirs/20180201/karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m0,1-200100.pkl')
print_results('logdirs/20180201/karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m0,0,1-150100.pkl')

karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 300100: 0.7928 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 300100: 0.7588 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 300100: 0.9868 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 300100: 0.9712 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 200100: 0.2392 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 200100: 0.1656 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 200100: 0.7084 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 200100: 0.5408 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 150100: 0.1252 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 150100: 0.0812 (of 2500)
karel-lgrl-ref-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=64 gen step 150100: 0.4028 (of 2500)
karel-lg

In [40]:
print_results('logdirs/20180201/karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m1-200100.pkl')
print_results('logdirs/20180201/karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,1-300100.pkl')
print_results('logdirs/20180201/karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,0,1-150100.pkl')

karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 200100: 0.8156 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 200100: 0.7376 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 200100: 0.9812 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 200100: 0.9640 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 300100: 0.7076 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 300100: 0.6184 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 300100: 0.9576 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 300100: 0.9152 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 150100: 0.5184 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 150100: 0.4128 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=64 gen step 150100: 0.8776 (of 250

In [67]:
print_results('logdirs/20180201/karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m1-200100.pkl')
print_results('logdirs/20180201/karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m0,1-300100.pkl')
print_results('logdirs/20180201/karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m0,0,1-150100.pkl')

karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 200100: 0.8324 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 200100: 0.7616 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 200100: 0.9840 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 200100: 0.9680 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 300100: 0.7268 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 300100: 0.6376 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 300100: 0.9656 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 300100: 0.9252 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 150100: 0.5036 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 150100: 0.4104 (of 2500)
karel-lgrl-ref-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=64 gen step 150100: 0.8912 (of 250

In [42]:
print_results('logdirs/20180201/karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m1-250100.pkl')
print_results('logdirs/20180201/karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,1-350100.pkl')
print_results('logdirs/20180201/karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,0,1-400100.pkl')

karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 250100: 0.8276 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 250100: 0.7368 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 250100: 0.9836 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 250100: 0.9640 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 350100: 0.7644 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 350100: 0.6532 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 350100: 0.9624 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 350100: 0.9212 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 400100: 0.6912 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 400100: 0.5508 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=64 gen step 400100: 0.9

In [68]:
print_results('logdirs/20180201/karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m1-250100.pkl')
print_results('logdirs/20180201/karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m0,1-350100.pkl')
print_results('logdirs/20180201/karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m0,0,1-400100.pkl')

karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 250100: 0.8332 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 250100: 0.7372 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 250100: 0.9880 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 250100: 0.9656 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 350100: 0.7728 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 350100: 0.6592 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 350100: 0.9712 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 350100: 0.9256 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 400100: 0.6912 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 400100: 0.5704 (of 2500)
karel-lgrl-ref-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=64 gen step 400100: 0.9

In [43]:
print_results('logdirs/20180207/karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m1-100100.pkl')
print_results('logdirs/20180207/karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,1-350100.pkl')
print_results('logdirs/20180207/karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,0,1-100100.pkl')

karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 100100: 0.7416 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 100100: 0.7152 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 100100: 0.9856 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 100100: 0.9760 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 350100: 0.1400 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 350100: 0.0720 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 350100: 0.2396 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 350100: 0.0904 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 100100: 0.0672 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 100100: 0.0356 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.

In [69]:
print_results('logdirs/20180207/karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m1-100100.pkl')
print_results('logdirs/20180207/karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m0,1-350100.pkl')
print_results('logdirs/20180207/karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m0,0,1-100100.pkl')

karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 100100: 0.7392 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 100100: 0.7156 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 100100: 0.9884 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 100100: 0.9788 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 350100: 0.1500 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 350100: 0.0780 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 350100: 0.2468 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 350100: 0.0960 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 100100: 0.0760 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 100100: 0.0364 (of 2500)
karel-lgrl-ref-edit-m1-sgd-cl1-lr0.1-lds100k-ldr0.

In [45]:
print_results('logdirs/20180207/karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m1-100100.pkl')
print_results('logdirs/20180207/karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,1-100100.pkl')
print_results('logdirs/20180207/karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,0,1-100100.pkl')

karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 100100: 0.6572 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 100100: 0.6164 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 100100: 0.9696 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 100100: 0.9496 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 100100: 0.4788 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 100100: 0.4260 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 100100: 0.9088 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 100100: 0.8588 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 100100: 0.2444 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 100100: 0.1864 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-ld

In [77]:
print_results('logdirs/20180211/karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m1-436300.pkl')
print_results('logdirs/20180211/karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,1-400100.pkl')
print_results('logdirs/20180211/karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,0,1-300100.pkl')

karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 436300: 0.7904 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 436300: 0.7252 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 436300: 0.9840 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 436300: 0.9696 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 400100: 0.6652 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 400100: 0.5944 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 400100: 0.9588 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 400100: 0.9196 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 300100: 0.3312 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 300100: 0.2472 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-ld

In [80]:
print_results('logdirs/20180211/karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m1-436300.pkl')
print_results('logdirs/20180211/karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,1-400100.pkl')
print_results('logdirs/20180211/karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,0,1-300100.pkl')

karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 436300: 0.7940 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 436300: 0.7336 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 436300: 0.9888 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 436300: 0.9732 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 400100: 0.6824 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 400100: 0.6100 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 400100: 0.9604 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 400100: 0.9220 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 300100: 0.3232 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 300100: 0.2428 (of 2500)
karel-lgrl-ref-edit-m12-sgd-cl1-lr0.1-ld

In [49]:
print_results('logdirs/20180207/karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m1-400100.pkl')
print_results('logdirs/20180207/karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,1-436300.pkl')
print_results('logdirs/20180207/karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,0,1-400100.pkl')

karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 400100: 0.7944 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 400100: 0.7128 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 400100: 0.9864 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 400100: 0.9664 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 436300: 0.6852 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 436300: 0.5856 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 436300: 0.9604 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 436300: 0.9168 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 400100: 0.6096 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 400100: 0.4972 (of 2500)
karel-lgrl-ref-edit-m123-sgd-c

In [70]:
print_results('logdirs/20180207/karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m1-400100.pkl')
print_results('logdirs/20180207/karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m0,1-436300.pkl')
print_results('logdirs/20180207/karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-test-m0,0,1-400100.pkl')

karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 400100: 0.7932 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 400100: 0.7128 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 400100: 0.9880 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 400100: 0.9692 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 436300: 0.7088 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 436300: 0.6140 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 436300: 0.9624 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 436300: 0.9148 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 400100: 0.6044 (of 2500)
karel-lgrl-ref-edit-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 400100: 0.5004 (of 2500)
karel-lgrl-ref-edit-m123-sgd-c

In [50]:
print_results('logdirs/20180208/karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m1-150100.pkl')
print_results('logdirs/20180208/karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,1-150100.pkl')
print_results('logdirs/20180208/karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,0,1-150100.pkl')

karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 150100: 0.6980 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 150100: 0.4020 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 150100: 0.8488 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 150100: 0.5776 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 150100: 0.6988 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 150100: 0.4024 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 150100: 0.8480 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 150100: 0.5768 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 150100: 0.6984 (of 2500)
karel-lgrl-ref-state-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 150100: 0.4024 (of 2500)
karel-lgrl-ref-state

In [51]:
print_results('logdirs/20180208/karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m1-100100.pkl')
print_results('logdirs/20180208/karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,1-100100.pkl')
print_results('logdirs/20180208/karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,0,1-100100.pkl')

karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 100100: 0.7752 (of 2500)
karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 100100: 0.6920 (of 2500)
karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 100100: 0.9780 (of 2500)
karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 100100: 0.9572 (of 2500)
karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 100100: 0.6800 (of 2500)
karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 100100: 0.5836 (of 2500)
karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 100100: 0.9452 (of 2500)
karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 100100: 0.8988 (of 2500)
karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 100100: 0.6048 (of 2500)
karel-lgrl-ref-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 exact step 10010

In [53]:
print_results('logdirs/20180207/karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m1-300100.pkl')
print_results('logdirs/20180207/karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,1-350100.pkl')
print_results('logdirs/20180207/karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5/exec-results-dev-m0,0,1-436300.pkl')

karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 gen step 300100: 0.4704 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=1 exact step 300100: 0.3348 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 gen step 300100: 0.8792 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 1 k=64 exact step 300100: 0.5704 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 gen step 350100: 0.4040 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=1 exact step 350100: 0.3008 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 gen step 350100: 0.8096 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,1 k=64 exact step 350100: 0.5556 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-cl1-lr0.1-lds100k-ldr0.5 0,0,1 k=1 gen step 436300: 0.3076 (of 2500)
karel-lgrl-ref-edit-state-mem-m123-sgd-c

In [98]:
def print_results2(fn):
    step = re.search(r'exec-results-dev-(\d+).pkl', fn).group(1)
    step = int(step)
    model_name = os.path.basename(os.path.dirname(fn))
    exec_results = pickle.load(open(fn))
    
    for k in (1, 64):
        correct, total = test_top_n_any(exec_results, k)
        accuracy = correct / float(total)
        print '{} k={} gen step {}: {:.4f} (of {})'.format(model_name, k, step, accuracy, total)

        correct, total = test_top_n_any_exact_match(exec_results, k)
        accuracy = correct / float(total)
        print '{} k={} exact step {}: {:.4f} (of {})'.format(model_name,  k, step, accuracy, total)
print_results2('../text2code/models/20180115/karel-sgd-cl1-lr1-lds100k-ldr0.5/exec-results-dev-00150100.pkl')


karel-sgd-cl1-lr1-lds100k-ldr0.5 k=1 gen step 150100: 0.7068 (of 2500)
karel-sgd-cl1-lr1-lds100k-ldr0.5 k=1 exact step 150100: 0.4032 (of 2500)
karel-sgd-cl1-lr1-lds100k-ldr0.5 k=64 gen step 150100: 0.8552 (of 2500)
karel-sgd-cl1-lr1-lds100k-ldr0.5 k=64 exact step 150100: 0.5776 (of 2500)


In [92]:
print_results2('../text2code/models/20180115/karel-sgd-cl1-lr1-lds100k-ldr0.5/exec-results-test-00150100.pkl')

karel-sgd-cl1-lr1-lds100k-ldr0.5 k=1 gen step 150100: 0.7036 (of 2500)
karel-sgd-cl1-lr1-lds100k-ldr0.5 k=1 exact step 150100: 0.3904 (of 2500)
karel-sgd-cl1-lr1-lds100k-ldr0.5 k=64 gen step 150100: 0.8580 (of 2500)
karel-sgd-cl1-lr1-lds100k-ldr0.5 k=64 exact step 150100: 0.5812 (of 2500)
