In [1]:
import collections
import itertools
import json
import re

import pandas as pd
import numpy as np
import IPython.display

In [2]:
def analyze(report, templates, accuracy_ks=(1, 2), precision_ks=(1, 2), recall_ks=(1, 2)):
    # Count how often 
    template_match_counts = collections.defaultdict(int)
    template_choice_ranks = {
        'all': collections.defaultdict(list),
        'templates only': collections.defaultdict(list),        
    }
    template_valid_choice_ranks = {
        'all': collections.defaultdict(list),
        'templates only': collections.defaultdict(list),
    }
    min_valid_ranks = []
    
    for item in report:
        for entry in item['history']:
            if not isinstance(entry['choices'][0], str):
                continue
            
            all_ranks = {}
            template_only_ranks = {}
            
            template_only_i = 0
            for i, choice in enumerate(entry['choices']):
                all_ranks[choice] = i
                template_only_ranks[choice] = template_only_i
                if not re.match('Template(\d+).*', choice):
                    template_only_i += 1
            
            # For overall top-k accuracy
            min_valid_rank = min(all_ranks[choice] for choice in entry['valid_choices'])
            min_valid_ranks.append(min_valid_rank)
            
            # For precision
            #   times that choice appeared at rank k, when it was valid (template_valid_choice_ranks)
            # / times that choice appeared at rank k (tp + fp), whether or not valid (template_choice_ranks)
            for choice in entry['choices']:
                m = re.match('Template(\d+).*', choice)
                if not m:
                    continue
                template_id = int(m.group(1))
                template_choice_ranks['all'][template_id].append(all_ranks[choice])
                template_choice_ranks['templates only'][template_id].append(template_only_ranks[choice])
                
            # For recall
            #   times that choice appeared at rank k, when it was valid (template_valid_choice_ranks)
            # / times that choice was valid (tp + fn)
            for choice in entry['valid_choices']:                
                m = re.match('Template(\d+).*', choice)
                if not m:
                    continue
                template_id = int(m.group(1))
                template_match_counts[template_id] += 1
                
                # Determine its rank
                template_valid_choice_ranks['all'][template_id].append(all_ranks[choice])
                # Determine its rank, excluding other templates
                template_valid_choice_ranks['templates only'][template_id].append(template_only_ranks[choice])

    min_valid_ranks = np.array(min_valid_ranks)
    # Top-k accuracy: there exists a valid choice such that its rank ≤ k
    top_k_accuracy = {
        k: np.sum(min_valid_ranks < k) / len(min_valid_ranks) for k in accuracy_ks
    }
    
    # Precision:
    #   Among the # of times that a given template has rank ≤ k, 
    #   how often it is a valid choice
    top_k_precision = {
        type_name: {
            k: {i: np.sum(np.array(template_valid_choice_ranks[type_name][i]) < k) /
                   np.sum(np.array(template_choice_ranks[type_name][i]) < k)
                for i in template_match_counts.keys()} for k in precision_ks
        } for type_name in template_valid_choice_ranks
    }
    
    # Recall:
    #   Among the # of times that a given template is a valid choice,
    #   how often it is a choice with rank ≤ k
    top_k_recall = {
        type_name: {
            k: {i: np.sum(np.array(ranks) < k) / len(ranks) for i, ranks in ranks_of_type.items()} for k in recall_ks
        } for type_name, ranks_of_type in template_valid_choice_ranks.items()}

    accuracy_df = pd.DataFrame({
        'Accuracy @ {}'.format(k): [top_k_accuracy[k]]
        for k in accuracy_ks
    })
    pr_df = pd.DataFrame({
        'Head': {t['id']: t['idiom'][0] for t in templates},
        'Matches': template_match_counts, 
        **{
            'Precision @ {} {}'.format(k, type_name): top_k_precision[type_name][k]
            for type_name in top_k_precision.keys()
            for k in precision_ks
        },
        **{
            'Recall @ {} {}'.format(k, type_name): top_k_recall[type_name][k]
            for type_name in top_k_recall.keys()
            for k in recall_ks
        }
    })
    return accuracy_df, pr_df


In [3]:
def analyze_anysplit_one(name, section):
    report = [json.loads(line)
              for line
              in open('../logdirs/20190201-hs-allmatches-anysplit/{}/debug-{}-step2600.jsonl'.format(name, section))]
    templates = json.load(open('../data/hearthstone-idioms-20190201/all-matches-trees-anysplit/{}/templates.json'.format(name)))
    return analyze(report, templates)

def analyze_anysplit(section):
    for filt, st, nt in itertools.product(
            ('none', 'contains-hole'), ('cov-xent', 'cov-examples'), ('10', '20', '40', '80')):
        name = 'filt-{}_st-{}_nt-{}'.format(filt, st, nt)
        acc_df, pr_df = analyze_anysplit_one(name, section)
        print(name)
        print('Templates containing matches:        {}'.format(sum(pr_df['Matches'] > 0)))
        print('Templates with non-zero rank 1 freq: {}'.format(sum(pr_df['Recall @ 1 templates only'] > 0)))
        print()

In [4]:
analyze_anysplit('train')



filt-none_st-cov-xent_nt-10
Templates containing matches:        8
Templates with non-zero rank 1 freq: 1

filt-none_st-cov-xent_nt-20
Templates containing matches:        17
Templates with non-zero rank 1 freq: 0

filt-none_st-cov-xent_nt-40
Templates containing matches:        34
Templates with non-zero rank 1 freq: 3

filt-none_st-cov-xent_nt-80
Templates containing matches:        71
Templates with non-zero rank 1 freq: 2

filt-none_st-cov-examples_nt-10
Templates containing matches:        8
Templates with non-zero rank 1 freq: 0

filt-none_st-cov-examples_nt-20
Templates containing matches:        17
Templates with non-zero rank 1 freq: 2

filt-none_st-cov-examples_nt-40
Templates containing matches:        30
Templates with non-zero rank 1 freq: 5

filt-none_st-cov-examples_nt-80
Templates containing matches:        66
Templates with non-zero rank 1 freq: 3

filt-contains-hole_st-cov-xent_nt-10
Templates containing matches:        9
Templates with non-zero rank 1 freq: 3

filt-c

In [5]:
analyze_anysplit('val')

filt-none_st-cov-xent_nt-10
Templates containing matches:        8
Templates with non-zero rank 1 freq: 1





filt-none_st-cov-xent_nt-20
Templates containing matches:        17
Templates with non-zero rank 1 freq: 0

filt-none_st-cov-xent_nt-40
Templates containing matches:        34
Templates with non-zero rank 1 freq: 2

filt-none_st-cov-xent_nt-80
Templates containing matches:        71
Templates with non-zero rank 1 freq: 2

filt-none_st-cov-examples_nt-10
Templates containing matches:        8
Templates with non-zero rank 1 freq: 0

filt-none_st-cov-examples_nt-20
Templates containing matches:        17
Templates with non-zero rank 1 freq: 2

filt-none_st-cov-examples_nt-40
Templates containing matches:        30
Templates with non-zero rank 1 freq: 3

filt-none_st-cov-examples_nt-80
Templates containing matches:        66
Templates with non-zero rank 1 freq: 3

filt-contains-hole_st-cov-xent_nt-10
Templates containing matches:        9
Templates with non-zero rank 1 freq: 3

filt-contains-hole_st-cov-xent_nt-20
Templates containing matches:        19
Templates with non-zero rank 1 freq:

In [6]:
for df in analyze_anysplit_one('filt-none_st-cov-examples_nt-40', 'train'):
    IPython.display.display(df)



Unnamed: 0,Accuracy @ 1,Accuracy @ 2
0,0.992878,0.99992


Unnamed: 0,Head,Matches,Precision @ 1 all,Precision @ 1 templates only,Precision @ 2 all,Precision @ 2 templates only,Recall @ 1 all,Recall @ 1 templates only,Recall @ 2 all,Recall @ 2 templates only
0,ClassDef,345.0,,,,,0.0,0.0,0.0,0.0
1,Module,532.0,,,1.0,1.0,0.0,0.0,1.0,1.0
2,ClassDef-body,,,,,,,,,
3,FunctionDef,131.0,,,,,0.0,0.0,0.0,0.0
4,FunctionDef,533.0,,,,,0.0,0.0,0.0,0.0
5,FunctionDef,154.0,,,,,0.0,0.0,0.0,0.0
6,FunctionDef,550.0,,,,,0.0,0.0,0.0,0.0
7,FunctionDef,712.0,,,,,0.0,0.0,0.0,0.0
8,Expr,538.0,,,,,0.0,0.0,0.0,0.0
9,Call,253.0,,,,,0.0,0.0,0.0,0.0


In [7]:
for df in analyze_anysplit_one('filt-none_st-cov-examples_nt-40', 'val'):
    IPython.display.display(df)



Unnamed: 0,Accuracy @ 1,Accuracy @ 2
0,0.966188,0.987879


Unnamed: 0,Head,Matches,Precision @ 1 all,Precision @ 1 templates only,Precision @ 2 all,Precision @ 2 templates only,Recall @ 1 all,Recall @ 1 templates only,Recall @ 2 all,Recall @ 2 templates only
0,ClassDef,41.0,,,,,0.0,0.0,0.0,0.0
1,Module,66.0,,,1.0,1.0,0.0,0.0,1.0,1.0
2,ClassDef-body,,,,,,,,,
3,FunctionDef,12.0,,,,,0.0,0.0,0.0,0.0
4,FunctionDef,67.0,,,,,0.0,0.0,0.0,0.0
5,FunctionDef,18.0,,,,,0.0,0.0,0.0,0.0
6,FunctionDef,74.0,,,,,0.0,0.0,0.0,0.0
7,FunctionDef,92.0,,,,,0.0,0.0,0.0,0.0
8,Expr,68.0,,,,,0.0,0.0,0.0,0.0
9,Call,32.0,,,,,0.0,0.0,0.0,0.0


In [8]:
def analyze_anysplit_multimean_one(name, section):
    report = [json.loads(line)
              for line
              in open('../logdirs/20190201-hs-allmatches-anysplit-multimean/{}/debug-{}-step2600.jsonl'.format(name, section))]    
    templates = json.load(open('../data/hearthstone-idioms-20190201/all-matches-trees-anysplit/{}/templates.json'.format(name)))
    return analyze(report, templates)


def analyze_anysplit_multimean(section):
    for filt, st, nt in itertools.product(
            ('none', 'contains-hole'), ('cov-xent', 'cov-examples'), ('10', '20', '40', '80')):
        name = 'filt-{}_st-{}_nt-{}'.format(filt, st, nt)
        acc_df, pr_df = analyze_anysplit_multimean_one(name, section)
        print(name)
        print('Templates containing matches:        {}'.format(sum(pr_df['Matches'] > 0)))
        print('Templates with non-zero rank 1 freq: {}'.format(sum(pr_df['Recall @ 1 templates only'] > 0)))
        print()

In [9]:
analyze_anysplit_multimean('train')



filt-none_st-cov-xent_nt-10
Templates containing matches:        8
Templates with non-zero rank 1 freq: 7

filt-none_st-cov-xent_nt-20
Templates containing matches:        17
Templates with non-zero rank 1 freq: 14

filt-none_st-cov-xent_nt-40
Templates containing matches:        34
Templates with non-zero rank 1 freq: 34

filt-none_st-cov-xent_nt-80
Templates containing matches:        71
Templates with non-zero rank 1 freq: 62

filt-none_st-cov-examples_nt-10
Templates containing matches:        8
Templates with non-zero rank 1 freq: 8

filt-none_st-cov-examples_nt-20
Templates containing matches:        17
Templates with non-zero rank 1 freq: 17

filt-none_st-cov-examples_nt-40
Templates containing matches:        30
Templates with non-zero rank 1 freq: 29

filt-none_st-cov-examples_nt-80
Templates containing matches:        66
Templates with non-zero rank 1 freq: 62

filt-contains-hole_st-cov-xent_nt-10
Templates containing matches:        9
Templates with non-zero rank 1 freq: 9



In [10]:
analyze_anysplit_multimean('val')

filt-none_st-cov-xent_nt-10
Templates containing matches:        8
Templates with non-zero rank 1 freq: 6





filt-none_st-cov-xent_nt-20
Templates containing matches:        17
Templates with non-zero rank 1 freq: 11

filt-none_st-cov-xent_nt-40
Templates containing matches:        34
Templates with non-zero rank 1 freq: 26

filt-none_st-cov-xent_nt-80
Templates containing matches:        71
Templates with non-zero rank 1 freq: 45

filt-none_st-cov-examples_nt-10
Templates containing matches:        8
Templates with non-zero rank 1 freq: 7

filt-none_st-cov-examples_nt-20
Templates containing matches:        17
Templates with non-zero rank 1 freq: 12

filt-none_st-cov-examples_nt-40
Templates containing matches:        30
Templates with non-zero rank 1 freq: 25

filt-none_st-cov-examples_nt-80
Templates containing matches:        66
Templates with non-zero rank 1 freq: 55

filt-contains-hole_st-cov-xent_nt-10
Templates containing matches:        9
Templates with non-zero rank 1 freq: 8

filt-contains-hole_st-cov-xent_nt-20
Templates containing matches:        19
Templates with non-zero rank 1

In [11]:
for df in analyze_anysplit_multimean_one('filt-none_st-cov-examples_nt-40', 'train'):
    IPython.display.display(df)



Unnamed: 0,Accuracy @ 1,Accuracy @ 2
0,0.99876,0.99992


Unnamed: 0,Head,Matches,Precision @ 1 all,Precision @ 1 templates only,Precision @ 2 all,Precision @ 2 templates only,Recall @ 1 all,Recall @ 1 templates only,Recall @ 2 all,Recall @ 2 templates only
0,ClassDef,345.0,1.0,1.0,0.650943,0.649718,0.066667,0.066667,1.0,1.0
1,Module,532.0,1.0,1.0,1.0,1.0,0.097744,0.097744,1.0,1.0
2,ClassDef-body,,,,,,,,,
3,FunctionDef,131.0,0.972222,0.954198,0.954198,0.231449,0.801527,0.954198,0.954198,1.0
4,FunctionDef,533.0,0.99759,0.998088,0.99811,0.574353,0.776735,0.979362,0.990619,1.0
5,FunctionDef,154.0,,,0.962617,0.147935,0.0,0.0,0.668831,1.0
6,FunctionDef,550.0,1.0,1.0,1.0,0.516432,0.001818,0.005455,0.012727,1.0
7,FunctionDef,712.0,1.0,1.0,0.161426,0.635147,0.008427,0.014045,0.108146,1.0
8,Expr,538.0,1.0,1.0,1.0,0.523856,0.877323,0.990706,0.992565,1.0
9,Call,253.0,1.0,1.0,0.234476,0.155215,0.612648,0.612648,1.0,1.0


In [12]:
for df in analyze_anysplit_multimean_one('filt-none_st-cov-examples_nt-40', 'val'):
    IPython.display.display(df)



Unnamed: 0,Accuracy @ 1,Accuracy @ 2
0,0.96555,0.985327


Unnamed: 0,Head,Matches,Precision @ 1 all,Precision @ 1 templates only,Precision @ 2 all,Precision @ 2 templates only,Recall @ 1 all,Recall @ 1 templates only,Recall @ 2 all,Recall @ 2 templates only
0,ClassDef,41.0,,,0.621212,0.621212,0.0,0.0,1.0,1.0
1,Module,66.0,1.0,1.0,1.0,1.0,0.075758,0.075758,1.0,1.0
2,ClassDef-body,,,,,,,,,
3,FunctionDef,12.0,0.818182,0.6875,0.6875,0.169014,0.75,0.916667,0.916667,1.0
4,FunctionDef,67.0,0.981481,0.984615,0.984615,0.544715,0.791045,0.955224,0.955224,1.0
5,FunctionDef,18.0,,,0.875,0.138462,0.0,0.0,0.777778,1.0
6,FunctionDef,74.0,,,0.0,0.552239,0.0,0.0,0.0,1.0
7,FunctionDef,92.0,,,0.237288,0.652482,0.0,0.0,0.152174,1.0
8,Expr,68.0,0.983051,0.985294,0.985294,0.544715,0.852941,0.985294,0.985294,0.985294
9,Call,32.0,1.0,1.0,0.230216,0.156098,0.625,0.625,1.0,1.0
