In [1]:
import pathlib

import numpy as np
import pandas as pd

import hier
import metrics

In [2]:
exp_dirs = {
    # lr = 0.1
    3: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-3/',
    4: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-4/',
    5: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-5/',
    6: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-6/',
    7: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat/',

#     # lr = 0.01
#     3: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-3-lr-0.01/',
#     4: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-4-lr-0.01/',
#     5: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-5-lr-0.01/',
#     6: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-6-lr-0.01/',
#     7: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-03-31-inat21mini/flat_softmax-lr-0.01-b-64-wd-0.0003-ep-20',
}

In [3]:
with open('resources/hierarchy/inat21.csv') as f:
    tree, names = hier.make_hierarchy_from_edges(hier.load_edges(f))

In [4]:
subtrees = {}
node_subsets = {}
projections = {}

for depth in exp_dirs:
    if depth == 7:
        subtree = tree
        node_subset = np.arange(tree.num_nodes())
        projection = np.arange(tree.num_nodes())
    else:
        subtree_file = f'resources/subtree/inat21_max_depth_{depth}.csv'
        with open(subtree_file) as f:
            subtree, subtree_names = hier.make_hierarchy_from_edges(hier.load_edges(f))
        node_subset = hier.find_subset_index(names, subtree_names)
        projection = hier.find_projection(tree, node_subset)

    subtrees[depth] = subtree
    node_subsets[depth] = node_subset
    projections[depth] = projection

In [5]:
acc = {}

for train_depth in exp_dirs:
    output_file = pathlib.Path(exp_dirs[train_depth]) / 'predictions/output-epoch-0020.pkl'
    outputs = np.load(output_file, allow_pickle=True)

    # To use 'leaf' predictions, we should load the full probability vector
    # and do max-leaf inference within the sub-tree.
    # On the other hand, 'majority' inference can be projected post-hoc.
    pred_subtree = outputs['pred']['majority']
    gt_subtree = outputs['gt']
    train_node_subset = node_subsets[train_depth]
    pred = train_node_subset[pred_subtree]
    gt = train_node_subset[gt_subtree]

    for test_depth in exp_dirs:
        if test_depth > train_depth:
            continue
        # Project ground-truth to test
        project = projections[test_depth]
        gt_proj = project[gt]
        pred_proj = project[pred]
        acc[train_depth, test_depth] = np.mean(gt_proj == pred_proj)

In [6]:
pd.Series(acc).unstack()

Unnamed: 0,3,4,5,6,7
3,0.78844,,,,
4,0.81713,0.48301,,,
5,0.83108,0.52361,0.35589,,
6,0.8327,0.55167,0.40487,0.26763,
7,0.83447,0.57058,0.4296,0.306,0.22784


In [7]:
# Check whether any classes are much better or worse.

per_class = {}

for train_depth in exp_dirs:
    output_file = pathlib.Path(exp_dirs[train_depth]) / 'predictions/output-epoch-0020.pkl'
    outputs = np.load(output_file, allow_pickle=True)

    # To use 'leaf' predictions, we should load the full probability vector
    # and do max-leaf inference within the sub-tree.
    # On the other hand, 'majority' inference can be projected post-hoc.
    pred_subtree = outputs['pred']['majority']
    gt_subtree = outputs['gt']
    train_node_subset = node_subsets[train_depth]
    pred = train_node_subset[pred_subtree]
    gt = train_node_subset[gt_subtree]

    for test_depth in exp_dirs:
        if test_depth > train_depth:
            continue
        # Project ground-truth to test
        project = projections[test_depth]
        gt_proj = project[gt]
        pred_proj = project[pred]

        is_correct = (gt_proj == pred_proj)
        per_class[train_depth, test_depth] = np.array([
            np.mean(is_correct[gt_proj == y])
            for y in subtrees[test_depth].leaf_subset()
        ])

In [8]:
rows = {}

for test_depth in exp_dirs:
    for depth_a in exp_dirs:
        if depth_a < test_depth:
            continue
        for depth_b in exp_dirs:
            if depth_b <= depth_a:
                continue
            rows[test_depth, depth_a, depth_b] = pd.DataFrame(
                [per_class[depth_a, test_depth] - per_class[depth_b, test_depth]]
            ).T.describe([0.5, 0.85, 0.95]).T

table = pd.concat(rows)
table = table.droplevel(3)
table.index = table.index.set_names(['test', 'a', 'b'])
del table['count']
# table.format('{:+.1%}')

def highlight(s):
    if s.name == 'std':
        return None
    return ['color: green;' if x > 0 else 'color:black' for x in s]

table.style.apply(highlight).format('{:+.1%}')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std,min,50%,85%,95%,max
test,a,b,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3,3,4,-5.8%,+9.1%,-40.0%,-3.3%,+0.0%,+1.5%,+16.0%
3,3,5,-9.1%,+11.5%,-45.0%,-8.0%,+0.0%,+2.8%,+18.0%
3,3,6,-15.7%,+15.7%,-60.0%,-10.0%,-2.8%,+0.0%,+16.0%
3,3,7,-18.8%,+16.4%,-65.0%,-15.0%,-4.5%,-1.6%,+12.0%
3,4,5,-3.3%,+7.3%,-22.0%,-1.7%,+1.4%,+5.3%,+16.0%
3,4,6,-9.9%,+12.3%,-50.0%,-7.2%,+0.5%,+4.8%,+6.0%
3,4,7,-13.0%,+13.1%,-46.7%,-10.0%,-0.2%,+2.7%,+3.6%
3,5,6,-6.6%,+9.0%,-40.0%,-5.7%,+0.5%,+4.0%,+10.0%
3,5,7,-9.7%,+10.2%,-40.0%,-10.0%,+0.0%,+4.1%,+10.0%
3,6,7,-3.1%,+10.5%,-30.0%,-2.1%,+0.7%,+8.8%,+50.0%
