In [1]:
import pathlib

import numpy as np
import pandas as pd

import hier
import metrics

In [10]:
exp_dirs = {
    3: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-3/',
    4: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-4/',
    5: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-5/',
    6: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-6/',
    # 7: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat/',
    # 7: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-03-31-inat21mini/flat_softmax-lr-0.01-b-64-wd-0.0003-ep-20',
}

In [11]:
with open('resources/hierarchy/inat21.csv') as f:
    tree, names = hier.make_hierarchy_from_edges(hier.load_edges(f))

In [12]:
subtrees = {}
node_subsets = {}
projections = {}

for depth in exp_dirs:
    if depth == 7:
        subtree = tree
        node_subset = np.arange(tree.num_nodes())
        projection = np.arange(tree.num_nodes())
    else:
        subtree_file = f'resources/subtree/inat21_max_depth_{depth}.csv'
        with open(subtree_file) as f:
            subtree, subtree_names = hier.make_hierarchy_from_edges(hier.load_edges(f))
        node_subset = hier.find_subset_index(names, subtree_names)
        projection = hier.find_projection(tree, node_subset)

    subtrees[depth] = subtree
    node_subsets[depth] = node_subset
    projections[depth] = projection

In [13]:
err = {}

for train_depth in exp_dirs:
    output_file = pathlib.Path(exp_dirs[train_depth]) / 'predictions/output-epoch-0020.pkl'
    outputs = np.load(output_file, allow_pickle=True)

    # To use 'leaf' predictions, we should load the full probability vector
    # and do max-leaf inference within the sub-tree.
    # On the other hand, 'majority' inference can be projected post-hoc.
    pred_subtree = outputs['pred']['majority']
    gt_subtree = outputs['gt']
    train_node_subset = node_subsets[train_depth]
    pred = train_node_subset[pred_subtree]
    gt = train_node_subset[gt_subtree]

    for test_depth in exp_dirs:
        if test_depth > train_depth:
            continue
        # Project ground-truth to test
        project = projections[test_depth]
        gt_proj = project[gt]
        pred_proj = project[pred]
        err[train_depth, test_depth] = np.mean(gt_proj == pred_proj)

In [14]:
pd.Series(err).unstack()

Unnamed: 0,3,4,5,6,7
3,0.78844,,,,
4,0.81713,0.48301,,,
5,0.83108,0.52361,0.35589,,
6,0.8327,0.55167,0.40487,0.26763,
7,0.88844,0.68037,0.55616,0.42105,0.31846
