# Experiments with I/O -> trace and trace -> code model run consecutively.

## I/O -> trace
`python train.py 
--dataset karel 
--karel-train-shuf
--karel-trace-inc-val
--model_type karel-trace-pred
--karel-trace-grid-enc presnet
--debug_every_n=1000 --eval_every_n=2000 --keep_every_n=10000 --log_interval=100
--batch_size 64 --num_epochs 50 --max_beam_trees 1 --optimizer sgd --gradient-clip 1 --lr 1 --lr_decay_steps 100000 --lr_decay_rate 0.5
--model_dir ../logdirs/20180321/karel-trace-pred-gridpresnet`

Snapshot from iteration 250000 was used, with beam size 1:
`python infer.py --model_type karel-trace-pred --dataset karel-00${i}-of-008 --karel-trace-inc-val --eval-train --max_beam_trees 1 --model_dir ../logdirs/20180321/karel-trace-pred-gridpresnet --step 250100 --max_decoder_length 1000 --infer-output ../data/karel/train-20180321-trace-pred-gridpresnet-bs1-st250100-00${i}-of-008.pkl`

## Trace -> code
Concatenate action and each timestep in trace, no grids in trace:
`python train.py --dataset karel-20180222-trace-pred-gridpresnet-bs1-st250100 --model_type karel-code-trace --karel-train-shuf --batch-create-train 'ConstantBatch(5, 1, True)' --karel-io-enc lgrl --karel-trace-action-enc emb --karel-trace-grid-enc none --karel-trace-cond-enc concat --karel-trace-enc indiv:concat --num_placeholders 0 --debug_every_n=1000 --eval_every_n=2000 --keep_every_n=10000 --log_interval=100 --batch_size 64 --num_epochs 50  --max_beam_trees 1 --optimizer sgd --gradient-clip 1 --lr 1 --lr_decay_steps 100000 --lr_decay_rate 0.5 --model_dir ../logdirs/20180321/karel-code-trace-ioshuf-concat-nogrid`

Interleave actions and trace, no grids:
`python train.py --dataset karel-20180222-trace-pred-gridpresnet-bs1-st250100 --model_type karel-code-trace --karel-train-shuf --batch-create-train 'ConstantBatch(5, 1, True)' --karel-io-enc lgrl --karel-trace-action-enc emb --karel-trace-grid-enc none --karel-trace-cond-enc concat --karel-trace-enc indiv:interleave --num_placeholders 0 --debug_every_n=1000 --eval_every_n=2000 --keep_every_n=10000 --log_interval=100 --batch_size 64 --num_epochs 50  --max_beam_trees 1 --optimizer sgd --gradient-clip 1 --lr 1 --lr_decay_steps 100000 --lr_decay_rate 0.5 --model_dir ../logdirs/20180321/karel-code-trace-ioshuf-interleave-nogrid`

**Best top-1 number**: 0.8148

In [2]:
import collections
import glob
import json
import re

import pandas as pd

In [10]:
rows = []
for path in glob.glob('../logdirs/20180321/karel-code-trace*/report*'):
    trace_type = re.search('(concat|interleave)', path).group(1)
    grid = '-nogrid' not in path
    io = '-noio' not in path
    actions = '-noactions' not in path
    
    bs = int(re.search(r'-bs(\d+)-', path).group(1))
    step = re.search(r'(\d+)\.jsonl', path).group(1)
    
    results = json.load(open(path))
    row = [trace_type, grid, io, actions, bs, step]
    for k in (0, 4, 42):
        if k >= bs:
            row.extend([0, 0, 0])
            continue
        row.append(results['exact'][k] / results['total'])
        row.append(results['semantic'][k] / results['total'])
        row.append(results['generalization'][k] / results['total'])
    rows.append(row)
    
columns = ['trace_type', 'grid', 'io', 'actions', 'bs', 'step']
for k in (0, 4, 42):
    columns.extend(x.format(k+1) for x in ('top{}-exact', 'top{}-sem', 'top{}-gen'))

In [11]:
results = pd.DataFrame(rows, columns=columns)

In [12]:
results.sort_values(by='top1-gen')

Unnamed: 0,trace_type,grid,io,actions,bs,step,top1-exact,top1-sem,top1-gen,top5-exact,top5-sem,top5-gen,top43-exact,top43-sem,top43-gen
48,concat,False,True,True,1,50100,0.3964,0.7568,0.7232,0.0,0.0,0.0,0.0,0.0,0.0
15,interleave,False,True,True,1,50100,0.4044,0.7668,0.7304,0.0,0.0,0.0,0.0,0.0,0.0
51,concat,False,True,True,1,750100,0.4052,0.7896,0.7516,0.0,0.0,0.0,0.0,0.0,0.0
43,concat,False,True,True,1,872500,0.4048,0.7908,0.752,0.0,0.0,0.0,0.0,0.0,0.0
52,concat,False,True,True,1,800100,0.4048,0.7916,0.7532,0.0,0.0,0.0,0.0,0.0,0.0
22,concat,False,True,True,1,850100,0.4048,0.7916,0.7536,0.0,0.0,0.0,0.0,0.0,0.0
28,concat,False,True,True,1,700100,0.404,0.792,0.7536,0.0,0.0,0.0,0.0,0.0,0.0
39,concat,False,True,True,1,650100,0.4064,0.7936,0.7552,0.0,0.0,0.0,0.0,0.0,0.0
9,interleave,False,True,True,1,150100,0.4108,0.7948,0.7564,0.0,0.0,0.0,0.0,0.0,0.0
6,interleave,False,True,True,1,100100,0.406,0.7964,0.7576,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
results.groupby(['trace_type', 'bs']).agg({'top1-gen': 'max', 'step': 'first'})

Unnamed: 0_level_0,Unnamed: 1_level_0,step,top1-gen
trace_type,bs,Unnamed: 2_level_1,Unnamed: 3_level_1
concat,1,450100,0.7688
concat,43,650100,0.8044
interleave,1,200100,0.7716
interleave,43,400100,0.8148


In [16]:
results.groupby(['trace_type', 'bs']).agg({'top43-gen': 'max', 'step': 'first'})

Unnamed: 0_level_0,Unnamed: 1_level_0,step,top43-gen
trace_type,bs,Unnamed: 2_level_1,Unnamed: 3_level_1
concat,1,450100,0.0
concat,43,650100,0.8964
interleave,1,200100,0.0
interleave,43,400100,0.8936


In [4]:
rows = []
for path in glob.glob('../logdirs/20180321/karel-code-trace*/report-20180321*'):
    trace_type = re.search('(concat|interleave)', path).group(1)
    grid = '-nogrid' not in path
    io = '-noio' not in path
    actions = '-noactions' not in path
    
    bs = int(re.search(r'-bs(\d+)-', path).group(1))
    step = re.search(r'(\d+)\.jsonl', path).group(1)
    
    results = json.load(open(path))
    row = [trace_type, grid, io, actions, bs, step]
    for k in (0, 4, 42):
        if k >= bs:
            row.extend([0, 0, 0])
            continue
        row.append(results['exact'][k] / results['total'])
        row.append(results['semantic'][k] / results['total'])
        row.append(results['generalization'][k] / results['total'])
    rows.append(row)
    
columns = ['trace_type', 'grid', 'io', 'actions', 'bs', 'step']
for k in (0, 4, 42):
    columns.extend(x.format(k+1) for x in ('top{}-exact', 'top{}-sem', 'top{}-gen'))
results = pd.DataFrame(rows, columns=columns)

In [5]:
results.groupby(['trace_type', 'bs']).agg({'top1-gen': 'max', 'step': 'first'})

Unnamed: 0_level_0,Unnamed: 1_level_0,step,top1-gen
trace_type,bs,Unnamed: 2_level_1,Unnamed: 3_level_1
concat,1,650100,0.7764
concat,43,600100,0.81
interleave,1,433500,0.7728
interleave,43,100100,0.8144


In [6]:
results.groupby(['trace_type', 'bs']).agg({'top43-gen': 'max', 'step': 'first'})

Unnamed: 0_level_0,Unnamed: 1_level_0,step,top43-gen
trace_type,bs,Unnamed: 2_level_1,Unnamed: 3_level_1
concat,1,650100,0.0
concat,43,600100,0.9
interleave,1,433500,0.0
interleave,43,100100,0.9008
