In [1]:
import collections
import glob
import itertools
import json
import operator
import os
import re

import natsort
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline
plt.rcParams['figure.figsize'] = [20, 20]

from seq2struct.utils import evaluation

In [2]:
os.chdir('..')

In [45]:
accuracy_per_run = collections.defaultdict(dict)
all_metrics = []
metric_types = set()
rows = []
for d in sorted(glob.glob('logdirs/20190201-hs-allmatches-anysplit-multimean/*')):
    exp_name = os.path.basename(d)
    exp_vars = re.match('filt-([^_]+)_st-([^_]+)_nt-([^_]+)', exp_name).groups()

    infer_paths = glob.glob(os.path.join(d, 'infer-val-step*-bs1.jsonl'))
    all_scores = []
    for infer_path in infer_paths:
        step = int(re.search('step(\d+)', infer_path).group(1))
        _, metrics = evaluation.compute_metrics(
            'configs/hearthstone/nl2code.jsonnet', '', 'val', infer_path)
        all_scores.append((step, metrics['exact match']))
        all_metrics.append((exp_name, step, metrics))
        metric_types.update(metrics.keys())
    
    all_scores.sort(key=operator.itemgetter(0))
    sorted_scores = sorted(all_scores, reverse=True, key=operator.itemgetter(1))
    rows.append(exp_vars + (len(all_scores),) + (sorted_scores[0] if sorted_scores else (-1, -1)))
    accuracy_per_run[exp_name] = {
        'x': [s[0] for s in all_scores],
        'all': [s[1] for s in all_scores],
    }
    print(d)
    
metric_types = tuple(sorted(metric_types))
df = pd.DataFrame(rows, columns=('filt', 'cov', 'nt', 'num steps eval', 'step', 'exact match'))
flat_df = pd.DataFrame(
    [(exp_name, step) + tuple(metrics.get(t) for t in metric_types) for exp_name, step, metrics in all_metrics],
    columns=('exp_name', 'step') + metric_types)

logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-10
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-20
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-40
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-80
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-xent_nt-10
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-xent_nt-20
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-xent_nt-40
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-xent_nt-80
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-10
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-20
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-40
logdirs/20190201-hs-allmatches-anysplit-mu

In [46]:
df

Unnamed: 0,filt,cov,nt,num steps eval,step,exact match
0,contains-hole,cov-examples,10,25,1400,0.151515
1,contains-hole,cov-examples,20,25,2400,0.090909
2,contains-hole,cov-examples,40,25,2300,0.166667
3,contains-hole,cov-examples,80,25,2200,0.19697
4,contains-hole,cov-xent,10,25,2100,0.151515
5,contains-hole,cov-xent,20,25,2100,0.166667
6,contains-hole,cov-xent,40,25,2100,0.181818
7,contains-hole,cov-xent,80,25,1900,0.151515
8,none,cov-examples,10,25,2300,0.151515
9,none,cov-examples,20,25,2400,0.181818


In [48]:
accuracy_per_run = collections.defaultdict(dict)
all_metrics = []
metric_types = set()
rows = []
for d in sorted(glob.glob('logdirs/20190201-hs-allmatches-anysplit-multimean/*')):
    exp_name = os.path.basename(d)
    exp_vars = re.match('filt-([^_]+)_st-([^_]+)_nt-([^_]+)', exp_name).groups()

    infer_paths = glob.glob(os.path.join(d, 'infer-val-step*-bs1.jsonl'))
    all_scores = []
    for infer_path in infer_paths:
        step = int(re.search('step(\d+)', infer_path).group(1))
        _, metrics = evaluation.compute_metrics(
            'configs/hearthstone/nl2code.jsonnet', '', 'val', infer_path)
        all_scores.append((step, metrics['corpus BLEU']))
        all_metrics.append((exp_name, step, metrics))
        metric_types.update(metrics.keys())
    
    all_scores.sort(key=operator.itemgetter(0))
    sorted_scores = sorted(all_scores, reverse=True, key=operator.itemgetter(1))
    rows.append(exp_vars + (len(all_scores),) + (sorted_scores[0] if sorted_scores else (-1, -1)))
    accuracy_per_run[exp_name] = {
        'x': [s[0] for s in all_scores],
        'all': [s[1] for s in all_scores],
    }
    print(d)
    
metric_types = tuple(sorted(metric_types))
df = pd.DataFrame(rows, columns=('filt', 'cov', 'nt', 'num steps eval', 'step', 'corpus BLEU'))
flat_df = pd.DataFrame(
    [(exp_name, step) + tuple(metrics.get(t) for t in metric_types) for exp_name, step, metrics in all_metrics],
    columns=('exp_name', 'step') + metric_types)

logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-10
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-20
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-40
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-80
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-xent_nt-10
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-xent_nt-20
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-xent_nt-40
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-xent_nt-80
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-10
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-20
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-40
logdirs/20190201-hs-allmatches-anysplit-mu

In [49]:
df

Unnamed: 0,filt,cov,nt,num steps eval,step,corpus BLEU
0,contains-hole,cov-examples,10,25,2500,0.785207
1,contains-hole,cov-examples,20,25,1900,0.745324
2,contains-hole,cov-examples,40,25,2100,0.76429
3,contains-hole,cov-examples,80,25,2500,0.773765
4,contains-hole,cov-xent,10,25,2300,0.78327
5,contains-hole,cov-xent,20,25,2200,0.782329
6,contains-hole,cov-xent,40,25,2100,0.76961
7,contains-hole,cov-xent,80,25,2500,0.76748
8,none,cov-examples,10,25,2200,0.762103
9,none,cov-examples,20,25,2500,0.782529


In [50]:
accuracy_per_run = collections.defaultdict(dict)
all_metrics = []
metric_types = set()
rows = []
for d in sorted(glob.glob('logdirs/20190201-hs-allmatches-anysplit-multimean/*')):
    exp_name = os.path.basename(d)
    exp_vars = re.match('filt-([^_]+)_st-([^_]+)_nt-([^_]+)', exp_name).groups()

    infer_paths = glob.glob(os.path.join(d, 'infer-val-step*-bs1.jsonl'))
    all_scores = []
    for infer_path in infer_paths:
        step = int(re.search('step(\d+)', infer_path).group(1))
        _, metrics = evaluation.compute_metrics(
            'configs/hearthstone/nl2code.jsonnet', '', 'val', infer_path)
        all_scores.append((step, metrics['sentence BLEU']))
        all_metrics.append((exp_name, step, metrics))
        metric_types.update(metrics.keys())
    
    all_scores.sort(key=operator.itemgetter(0))
    sorted_scores = sorted(all_scores, reverse=True, key=operator.itemgetter(1))
    rows.append(exp_vars + (len(all_scores),) + (sorted_scores[0] if sorted_scores else (-1, -1)))
    accuracy_per_run[exp_name] = {
        'x': [s[0] for s in all_scores],
        'all': [s[1] for s in all_scores],
    }
    print(d)
    
metric_types = tuple(sorted(metric_types))
df = pd.DataFrame(rows, columns=('filt', 'cov', 'nt', 'num steps eval', 'step', 'sentence BLEU'))
flat_df = pd.DataFrame(
    [(exp_name, step) + tuple(metrics.get(t) for t in metric_types) for exp_name, step, metrics in all_metrics],
    columns=('exp_name', 'step') + metric_types)

logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-10
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-20
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-40
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-80
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-xent_nt-10
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-xent_nt-20
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-xent_nt-40
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-xent_nt-80
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-10
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-20
logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-40
logdirs/20190201-hs-allmatches-anysplit-mu

In [51]:
df

Unnamed: 0,filt,cov,nt,num steps eval,step,sentence BLEU
0,contains-hole,cov-examples,10,25,2500,0.780821
1,contains-hole,cov-examples,20,25,2200,0.744844
2,contains-hole,cov-examples,40,25,2200,0.765076
3,contains-hole,cov-examples,80,25,2500,0.779958
4,contains-hole,cov-xent,10,25,2100,0.780367
5,contains-hole,cov-xent,20,25,2000,0.787002
6,contains-hole,cov-xent,40,25,2400,0.772903
7,contains-hole,cov-xent,80,25,2500,0.770651
8,none,cov-examples,10,25,2400,0.753953
9,none,cov-examples,20,25,2200,0.787985


# Corpus BLEU

In [17]:
flat_df.loc[flat_df['corpus BLEU'].idxmax()]

exp_name         filt-none_st-cov-xent_nt-10
step                                    2300
corpus BLEU                         0.785476
exact match                         0.151515
sentence BLEU                       0.773789
Name: 312, dtype: object

In [19]:
# Beam size 1
!CUDA_VISIBLE_DEVICES= python infer.py --config configs/hearthstone-idioms/nl2code-0201-allmatches-anysplit-multimean.jsonnet --logdir logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-xent_nt-10 --config-args "{filt: 'none', st: 'cov-xent', nt: 10}" --output __LOGDIR__/infer-test-step02300-bs1.jsonl --step 2300 --section test --beam-size 1

Loading model from logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-xent_nt-10/model_checkpoint-00002300
100%|███████████████████████████████████████████| 66/66 [00:20<00:00,  3.33it/s]


In [23]:
_, metrics = evaluation.compute_metrics(
    'configs/hearthstone/nl2code.jsonnet', '', 'test', 
    'logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-xent_nt-10/infer-test-step02300-bs1.jsonl')
metrics

OrderedDict([('exact match', 0.10606060606060606),
             ('sentence BLEU', 0.7434579505376531),
             ('corpus BLEU', 0.7480294254902261)])

In [None]:
# Beam size 15
!CUDA_VISIBLE_DEVICES= python infer.py --config configs/hearthstone-idioms/nl2code-0201-allmatches-anysplit-multimean.jsonnet --logdir logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-xent_nt-10 --config-args "{filt: 'none', st: 'cov-xent', nt: 10}" --output __LOGDIR__/infer-test-step02300-bs15.jsonl --step 2300 --section test --beam-size 15

In [None]:
_, metrics = evaluation.compute_metrics(
    'configs/hearthstone/nl2code.jsonnet', '', 'test', 
    'logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-xent_nt-10/infer-test-step02300-bs15.jsonl')
metrics

# Sentence BLEU

In [14]:
flat_df.loc[flat_df['sentence BLEU'].idxmax()]

exp_name         filt-none_st-cov-examples_nt-20
step                                        2200
corpus BLEU                             0.780836
exact match                             0.166667
sentence BLEU                           0.787985
Name: 248, dtype: object

In [None]:
# Beam size 1
!CUDA_VISIBLE_DEVICES= python infer.py --config configs/hearthstone-idioms/nl2code-0201-allmatches-anysplit-multimean.jsonnet --logdir logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-20 --config-args "{filt: 'none', st: 'cov-examples', nt: 20}" --output __LOGDIR__/infer-test-step02200-bs1.jsonl --step 2200 --section test --beam-size 1

In [25]:
_, metrics = evaluation.compute_metrics(
    'configs/hearthstone/nl2code.jsonnet', '', 'test', 
    'logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-20/infer-test-step02200-bs1.jsonl')
metrics

OrderedDict([('exact match', 0.16666666666666666),
             ('sentence BLEU', 0.7716206213695468),
             ('corpus BLEU', 0.7507985265452364)])

In [26]:
# Beam size 15
!CUDA_VISIBLE_DEVICES= python infer.py --config configs/hearthstone-idioms/nl2code-0201-allmatches-anysplit-multimean.jsonnet --logdir logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-20 --config-args "{filt: 'none', st: 'cov-examples', nt: 20}" --output __LOGDIR__/infer-test-step02200-bs15.jsonl --step 2200 --section test --beam-size 15

Loading model from logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-20/model_checkpoint-00002200
100%|███████████████████████████████████████████| 66/66 [05:56<00:00,  4.90s/it]


In [28]:
_, metrics = evaluation.compute_metrics(
    'configs/hearthstone/nl2code.jsonnet', '', 'test', 
    'logdirs/20190201-hs-allmatches-anysplit-multimean/filt-none_st-cov-examples_nt-20/infer-test-step02200-bs15.jsonl')
metrics

OrderedDict([('exact match', 0.15151515151515152),
             ('sentence BLEU', 0.7700509157673502),
             ('corpus BLEU', 0.7411605536438948)])

# Exact match

In [15]:
flat_df.loc[flat_df['exact match'].idxmax()]

exp_name         filt-contains-hole_st-cov-examples_nt-80
step                                                 2200
corpus BLEU                                      0.760019
exact match                                       0.19697
sentence BLEU                                    0.777438
Name: 98, dtype: object

In [29]:
# Beam size 1
!CUDA_VISIBLE_DEVICES= python infer.py --config configs/hearthstone-idioms/nl2code-0201-allmatches-anysplit-multimean.jsonnet --logdir logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-80 --config-args "{filt: 'contains-hole', st: 'cov-examples', nt: 80}" --output __LOGDIR__/infer-test-step02200-bs1.jsonl --step 2200 --section test --beam-size 1

Loading model from logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-80/model_checkpoint-00002200
100%|███████████████████████████████████████████| 66/66 [00:20<00:00,  2.93it/s]


In [30]:
_, metrics = evaluation.compute_metrics(
    'configs/hearthstone/nl2code.jsonnet', '', 'test', 
    'logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-80/infer-test-step02200-bs1.jsonl')
metrics

OrderedDict([('exact match', 0.19696969696969696),
             ('sentence BLEU', 0.779738300040056),
             ('corpus BLEU', 0.7661866828940765)])

# Compare against baseline

In [33]:
accuracy_per_run = collections.defaultdict(dict)
all_metrics = []
metric_types = set()
rows = []
for d in sorted(glob.glob('logdirs/20181231-nl2code-hearthstone-fef2c5b//*')):
    exp_name = os.path.basename(d)
    exp_vars = re.match('att([^_]+)', exp_name).groups()

    infer_paths = glob.glob(os.path.join(d, 'infer-val-step*-bs1.jsonl'))
    all_scores = []
    for infer_path in infer_paths:
        step = int(re.search('step(\d+)', infer_path).group(1))
        _, metrics = evaluation.compute_metrics(
            'configs/hearthstone/nl2code.jsonnet', '', 'val', infer_path)
        all_scores.append((step, metrics['exact match']))
        all_metrics.append((exp_name, step, metrics))
        metric_types.update(metrics.keys())
    
    all_scores.sort(key=operator.itemgetter(0))
    sorted_scores = sorted(all_scores, reverse=True, key=operator.itemgetter(1))
    rows.append(exp_vars + (len(all_scores),) + (sorted_scores[0] if sorted_scores else (-1, -1)))
    accuracy_per_run[exp_name] = {
        'x': [s[0] for s in all_scores],
        'all': [s[1] for s in all_scores],
    }
    print(d)
    
metric_types = tuple(sorted(metric_types))
df = pd.DataFrame(rows, columns=('att', 'num steps eval', 'step', 'exact match'))
flat_df = pd.DataFrame(
    [(exp_name, step) + tuple(metrics.get(t) for t in metric_types) for exp_name, step, metrics in all_metrics],
    columns=('exp_name', 'step') + metric_types)

logdirs/20181231-nl2code-hearthstone-fef2c5b/att0
logdirs/20181231-nl2code-hearthstone-fef2c5b/att1


In [38]:
df

Unnamed: 0,att,num steps eval,step,exact match
0,0,25,1900,0.166667
1,1,25,2300,0.19697


## Corpus BLEU

In [35]:
flat_df.loc[flat_df['corpus BLEU'].idxmax()]

exp_name             att1
step                 1900
corpus BLEU      0.789029
exact match      0.151515
sentence BLEU    0.780316
Name: 44, dtype: object

In [36]:
!CUDA_VISIBLE_DEVICES= python infer.py --config configs/hearthstone/nl2code.jsonnet --logdir logdirs/20181231-nl2code-hearthstone-fef2c5b/att1 --output __LOGDIR__/infer-test-step01900-bs1.jsonl --step 1900 --section test --beam-size 1

Loading model from logdirs/20181231-nl2code-hearthstone-fef2c5b/att1/model_checkpoint-00001900
100%|███████████████████████████████████████████| 66/66 [00:21<00:00,  3.23it/s]


In [37]:
_, metrics = evaluation.compute_metrics(
    'configs/hearthstone/nl2code.jsonnet', '', 'test',
    'logdirs/20181231-nl2code-hearthstone-fef2c5b/att1/infer-test-step01900-bs1.jsonl')
metrics

OrderedDict([('exact match', 0.12121212121212122),
             ('sentence BLEU', 0.7242459632152278),
             ('corpus BLEU', 0.7335344583509769)])

## Sentence BLEU

In [39]:
flat_df.loc[flat_df['sentence BLEU'].idxmax()]

exp_name              att0
step                  2400
corpus BLEU       0.777414
exact match      0.0909091
sentence BLEU     0.780721
Name: 24, dtype: object

In [40]:
!CUDA_VISIBLE_DEVICES= python infer.py --config configs/hearthstone/nl2code.jsonnet --logdir logdirs/20181231-nl2code-hearthstone-fef2c5b/att0 --output __LOGDIR__/infer-test-step02400-bs1.jsonl --step 2400 --section test --beam-size 1

Loading model from logdirs/20181231-nl2code-hearthstone-fef2c5b/att0/model_checkpoint-00002400
100%|███████████████████████████████████████████| 66/66 [00:19<00:00,  3.16it/s]


In [41]:
_, metrics = evaluation.compute_metrics(
    'configs/hearthstone/nl2code.jsonnet', '', 'test',\
    'logdirs/20181231-nl2code-hearthstone-fef2c5b/att0/infer-test-step02400-bs1.jsonl')
metrics

OrderedDict([('exact match', 0.19696969696969696),
             ('sentence BLEU', 0.7791194850374037),
             ('corpus BLEU', 0.7638435491009413)])

## Exact match

In [42]:
flat_df.loc[flat_df['exact match'].idxmax()]

exp_name             att1
step                 2300
corpus BLEU      0.762964
exact match       0.19697
sentence BLEU    0.766753
Name: 37, dtype: object

In [43]:
!CUDA_VISIBLE_DEVICES= python infer.py --config configs/hearthstone/nl2code.jsonnet --logdir logdirs/20181231-nl2code-hearthstone-fef2c5b/att1 --output __LOGDIR__/infer-test-step02300-bs1.jsonl --step 2300 --section test --beam-size 1

Loading model from logdirs/20181231-nl2code-hearthstone-fef2c5b/att1/model_checkpoint-00002300
100%|███████████████████████████████████████████| 66/66 [00:19<00:00,  3.89it/s]


In [44]:
_, metrics = evaluation.compute_metrics(
    'configs/hearthstone/nl2code.jsonnet', '', 'test',
    'logdirs/20181231-nl2code-hearthstone-fef2c5b/att1/infer-test-step02300-bs1.jsonl')
metrics

OrderedDict([('exact match', 0.15151515151515152),
             ('sentence BLEU', 0.7430320121685502),
             ('corpus BLEU', 0.7229233074922048)])

# Measure idiom popularity

In [53]:
!CUDA_VISIBLE_DEVICES= python infer.py \
    --config configs/hearthstone-idioms/nl2code-0201-allmatches-anysplit-multimean.jsonnet \
    --logdir logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-80 \
    --config-args "{filt: 'contains-hole', st: 'cov-examples', nt: 80}" \
    --output-history \
    --output __LOGDIR__/infer-test-step02200-bs1-with-history.jsonl --step 2200 --section test --beam-size 1

Loading model from logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-80/model_checkpoint-00002200
100%|███████████████████████████████████████████| 66/66 [00:19<00:00,  3.17it/s]


In [55]:
inferred = [json.loads(line) for line in open('logdirs/20190201-hs-allmatches-anysplit-multimean/filt-contains-hole_st-cov-examples_nt-80/infer-test-step02200-bs1-with-history.jsonl')]

In [63]:
all_rules = json.load(
    open('data/hearthstone-idioms-20190201/all-matches-trees-anysplit/filt-contains-hole_st-cov-examples_nt-80/nl2code/grammar_rules.json')
)['all_rules']

In [72]:
all_template_counts = []
for example in inferred:
    template_counts = collections.Counter()
    for choice in example['beams'][0]['choice_history']:
        if isinstance(choice, int):
            rule = all_rules[choice]
            if isinstance(rule[1], str) and re.match('Template\d+', rule[1]):
                template_counts[tuple(rule)] += 1
    all_template_counts.append(template_counts)
            

## Most frequently selected idioms

In [86]:
all_templates = {
    'Template{}'.format(template['id']): template for template in 
    json.load(open('data/hearthstone-idioms-20190201/all-matches-trees-anysplit/filt-contains-hole_st-cov-examples_nt-80/templates.json'))
}

In [92]:
most_frequent = sum(all_template_counts, collections.Counter()).most_common(5)
most_frequent

[(('mod', 'Template4'), 62),
 (('arguments_plus_templates', 'Template33'), 56),
 (('stmt_seq_elem', 'Template24'), 56),
 (('stmt_seq_elem', 'Template75'), 49),
 (('stmt_seq_elem', 'Template13'), 45)]

In [93]:
import pprint
for (lhs, rhs), count in most_frequent:
    print(rhs)
    pprint.pprint(all_templates[rhs])
    print()

Template4
{'holes': [{'id': 0, 'type': 'AddChild'},
           {'id': 1, 'type': 'AddChild'},
           {'id': 2, 'type': 'ReplaceSelf'},
           {'id': 3, 'type': 'AddChild'},
           {'id': 4, 'type': 'AddChild'},
           {'id': 5, 'type': 'ReplaceSelf'},
           {'id': 6, 'type': 'ReplaceSelf'},
           {'id': 7, 'type': 'ReplaceSelf'}],
 'id': 4,
 'idiom': ['Module',
           [],
           None,
           [['ClassDef',
             [],
             None,
             [['ClassDef-name', [], 0, []],
              ['ClassDef-bases',
               [],
               None,
               [['Name', [], 1, []],
                ['ClassDef-bases', [], None, [['End', [], None, []]]]]],
              ['ClassDef-keywords', [], None, [['End', [], None, []]]],
              ['ClassDef-body',
               [],
               None,
               [['FunctionDef',
                 [],
                 None,
                 [['FunctionDef-name', [], None, [['__init__', [], Non

## Idioms that appear in the greatest number of instances

In [97]:
most_common = collections.Counter(itertools.chain.from_iterable(c.keys() for c in all_template_counts)).most_common(5)
most_common

[(('mod', 'Template4'), 62),
 (('stmt_seq_elem', 'Template24'), 56),
 (('arguments_plus_templates', 'Template33'), 55),
 (('stmt_seq_elem', 'Template13'), 45),
 (('arguments_plus_templates', 'Template46'), 43)]

In [101]:
already_printed = {rhs for (_, rhs), _ in most_frequent}
for (lhs, rhs), count in most_common:
    if rhs in already_printed:
        continue
    print(rhs)
    pprint.pprint(all_templates[rhs])
    print()

Template46
{'holes': [{'id': 0, 'type': 'AddChild'}],
 'id': 46,
 'idiom': ['arguments',
           [],
           None,
           [['arguments-args',
             [],
             None,
             [['arg',
               [],
               None,
               [['arg-arg', [], 0, []],
                ['arg-annotation', [], None, [['Null', [], None, []]]]]],
              ['arguments-args', [], None, [['End', [], None, []]]]]],
            ['arguments-vararg', [], None, [['Null', [], None, []]]],
            ['arguments-kwonlyargs', [], None, [['End', [], None, []]]],
            ['arguments-kw_defaults', [], None, [['End', [], None, []]]],
            ['arguments-kwarg', [], None, [['Null', [], None, []]]],
            ['arguments-defaults', [], None, [['End', [], None, []]]]]]}

