In [1]:
import pathlib

import numpy as np
import pandas as pd

import hier
import metrics

In [2]:
exp_dirs = {
    # lr = 0.1
    3: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-3/',
    4: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-4/',
    5: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-5/',
    6: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-6/',
    7: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat/',

#     # lr = 0.01
#     3: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-3-lr-0.01/',
#     4: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-4-lr-0.01/',
#     5: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-5-lr-0.01/',
#     6: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-04-07-inat21-trunc/flat-depth-6-lr-0.01/',
#     7: '/mnt/ssd1/projects/2022-01-hierarchical/experiments/2022-03-31-inat21mini/flat_softmax-lr-0.01-b-64-wd-0.0003-ep-20',
}

In [3]:
# Convert from single array to shards that can be streamed.
# For full hierarchy, the probability array is [100000, 10000] > 3GiB.

def convert(pkl_file, npz_file):
    outputs = np.load(pkl_file, allow_pickle=True)
    x = outputs['prob']
    # Divide into shards of around 128MiB.
    num_shards = np.ceil(np.size(x) * x.dtype.itemsize / (2 ** 27))
    np.savez(npz_file, *np.array_split(x, num_shards))

for name, exp_dir in exp_dirs.items():
    exp_dir = pathlib.Path(exp_dir)
    npz_file = exp_dir / 'predictions/prob-epoch-0020.npz'
    pkl_file = exp_dir / 'predictions/full-output-epoch-0020.pkl'
    if pkl_file.exists() and not npz_file.exists():
        convert(pkl_file=pkl_file, npz_file=npz_file)

In [4]:
with open('resources/hierarchy/inat21.csv') as f:
    tree, names = hier.make_hierarchy_from_edges(hier.load_edges(f))

In [5]:
subtrees = {}
node_subsets = {}
projections = {}

for depth in exp_dirs:
    if depth == 7:
        subtree = tree
        node_subset = np.arange(tree.num_nodes())
        projection = np.arange(tree.num_nodes())
    else:
        subtree_file = f'resources/subtree/inat21_max_depth_{depth}.csv'
        with open(subtree_file) as f:
            subtree, subtree_names = hier.make_hierarchy_from_edges(hier.load_edges(f))
        node_subset = hier.find_subset_index(names, subtree_names)
        projection = hier.find_projection(tree, node_subset)

    subtrees[depth] = subtree
    node_subsets[depth] = node_subset
    projections[depth] = projection

In [6]:
acc = {}

for train_depth in exp_dirs:
    output_file = pathlib.Path(exp_dirs[train_depth]) / 'predictions/output-epoch-0020.pkl'
    outputs = np.load(output_file, allow_pickle=True)

    # To use 'leaf' predictions, we should load the full probability vector
    # and do max-leaf inference within the sub-tree.
    # On the other hand, 'majority' inference can be projected post-hoc.
    pred_subtree = outputs['pred']['majority']
    gt_subtree = outputs['gt']
    train_node_subset = node_subsets[train_depth]
    pred = train_node_subset[pred_subtree]
    gt = train_node_subset[gt_subtree]

    for test_depth in exp_dirs:
        if test_depth > train_depth:
            continue
        # Project ground-truth to test
        project = projections[test_depth]
        gt_proj = project[gt]
        pred_proj = project[pred]
        acc[train_depth, test_depth] = np.mean(gt_proj == pred_proj)

In [7]:
pd.Series(acc).unstack()

Unnamed: 0,3,4,5,6,7
3,0.78844,,,,
4,0.81713,0.48301,,,
5,0.83108,0.52361,0.35589,,
6,0.8327,0.55167,0.40487,0.26763,
7,0.83447,0.57058,0.4296,0.306,0.22784


In [8]:
# Obtain *per-class* accuracy at each depth.

per_class = {}

for train_depth in exp_dirs:
    output_file = pathlib.Path(exp_dirs[train_depth]) / 'predictions/output-epoch-0020.pkl'
    outputs = np.load(output_file, allow_pickle=True)

    # To use 'leaf' predictions, we should load the full probability vector
    # and do max-leaf inference within the sub-tree.
    # On the other hand, 'majority' inference can be projected post-hoc.
    pred_subtree = outputs['pred']['majority']
    gt_subtree = outputs['gt']
    train_node_subset = node_subsets[train_depth]
    pred = train_node_subset[pred_subtree]
    gt = train_node_subset[gt_subtree]

    for test_depth in exp_dirs:
        if test_depth > train_depth:
            continue
        # Project ground-truth to test
        project = projections[test_depth]
        gt_proj = project[gt]
        pred_proj = project[pred]

        is_correct = (gt_proj == pred_proj)
        per_class[train_depth, test_depth] = np.array([
            np.mean(is_correct[gt_proj == y])
            for y in subtrees[test_depth].leaf_subset()
        ])

    del outputs

In [9]:
# Check whether it is ever advantageous to train at a *coarser* level.
# Consider training at two different levels A < B (i.e. B is deeper).
# Compare the per-class accuracy at some level <= A.

# Compute (level A) - (level B).
# Greater positive numbers mean that coarse training is better.
percentiles = [0.5, 0.85, 0.95]

rows = {}

for depth_a in exp_dirs:
    for depth_b in exp_dirs:
        if not depth_a < depth_b:
            continue
        for test_depth in exp_dirs:
            if not test_depth <= depth_a:
                continue
            rows[test_depth, depth_a, depth_b] = pd.DataFrame([
                per_class[depth_a, test_depth] - per_class[depth_b, test_depth]
            ]).T.describe(percentiles).T

table = pd.concat(rows).sort_index()
table = table.droplevel(3)
table.index = table.index.set_names(['test', 'a', 'b'])
del table['count']
# table.format('{:+.1%}')

def highlight(s):
    if s.name == 'std':
        return None
    return ['color: green;' if x > 0 else 'color: black;' for x in s]

table.style.apply(highlight).format('{:+.1%}')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std,min,50%,85%,95%,max
test,a,b,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3,3,4,-5.8%,+9.1%,-40.0%,-3.3%,+0.0%,+1.5%,+16.0%
3,3,5,-9.1%,+11.5%,-45.0%,-8.0%,+0.0%,+2.8%,+18.0%
3,3,6,-15.7%,+15.7%,-60.0%,-10.0%,-2.8%,+0.0%,+16.0%
3,3,7,-18.8%,+16.4%,-65.0%,-15.0%,-4.5%,-1.6%,+12.0%
3,4,5,-3.3%,+7.3%,-22.0%,-1.7%,+1.4%,+5.3%,+16.0%
3,4,6,-9.9%,+12.3%,-50.0%,-7.2%,+0.5%,+4.8%,+6.0%
3,4,7,-13.0%,+13.1%,-46.7%,-10.0%,-0.2%,+2.7%,+3.6%
3,5,6,-6.6%,+9.0%,-40.0%,-5.7%,+0.5%,+4.0%,+10.0%
3,5,7,-9.7%,+10.2%,-40.0%,-10.0%,+0.0%,+4.1%,+10.0%
3,6,7,-3.1%,+10.5%,-30.0%,-2.1%,+0.7%,+8.8%,+50.0%


In [10]:
# Same thing but always evaluate at coarse level.
# (This is just less information to interpret.)
# Greater positive numbers mean coarser is better.

percentiles = [0.5, 0.85, 0.95]

rows = {}
for test_depth in exp_dirs:
    for train_depth in exp_dirs:
        if train_depth <= test_depth:
            continue
        rows[test_depth, train_depth] = pd.DataFrame(
            [per_class[test_depth, test_depth] - per_class[train_depth, test_depth]]
        ).T.describe(percentiles).T

table = pd.concat(rows)
table = table.droplevel(2)
table.index = table.index.set_names(['test', 'train'])
del table['count']
# table.format('{:+.1%}')

def highlight(s):
    if s.name == 'std':
        return None
    return ['color: green;' if x > 0 else 'color: black;' for x in s]

table.style.apply(highlight).format('{:+.1%}')

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,min,50%,85%,95%,max
test,train,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,4,-5.8%,+9.1%,-40.0%,-3.3%,+0.0%,+1.5%,+16.0%
3,5,-9.1%,+11.5%,-45.0%,-8.0%,+0.0%,+2.8%,+18.0%
3,6,-15.7%,+15.7%,-60.0%,-10.0%,-2.8%,+0.0%,+16.0%
3,7,-18.8%,+16.4%,-65.0%,-15.0%,-4.5%,-1.6%,+12.0%
4,5,-6.3%,+10.2%,-50.0%,-4.1%,+0.0%,+3.0%,+32.5%
4,6,-13.3%,+15.1%,-80.0%,-10.0%,+0.0%,+3.3%,+12.5%
4,7,-16.9%,+16.3%,-90.0%,-15.0%,+0.0%,+1.8%,+12.5%
5,6,-8.6%,+12.0%,-70.0%,-5.9%,+0.0%,+5.0%,+25.0%
5,7,-12.6%,+14.1%,-80.0%,-10.0%,+0.0%,+5.0%,+25.0%
6,7,-5.1%,+11.2%,-70.0%,+0.0%,+3.3%,+10.0%,+50.0%


In [11]:
import infer

In [12]:
acc = {}

for train_depth in exp_dirs:
    print(train_depth)
    output_file = pathlib.Path(exp_dirs[train_depth]) / 'predictions/output-epoch-0020.pkl'
    outputs = np.load(output_file, allow_pickle=True)
    prob_file = pathlib.Path(exp_dirs[train_depth]) / 'predictions/prob-epoch-0020.npz'
    prob = np.load(prob_file, allow_pickle=False)

    train_subset = node_subsets[train_depth]
    train_projection = projections[train_depth]
    gt_train = train_subset[outputs['gt']]

    for test_depth in exp_dirs:
        if test_depth > train_depth:
            continue
        test_subset = node_subsets[test_depth]
        test_projection = projections[test_depth]
        
        is_leaf = subtrees[test_depth].leaf_mask()

        pred = []
        for i, prob_i in enumerate(prob.values()):
            pred_i = infer.argmax_where(prob_i[:, train_projection[test_subset]], is_leaf)
            pred.append(pred_i)
        pred = np.concatenate(pred, axis=0)

        gt = test_projection[gt_train]
        acc[train_depth, test_depth] = np.mean(gt == pred)

3
4
5
6
7


In [13]:
pd.Series(acc).unstack()

Unnamed: 0,3,4,5,6,7
3,0.82732,,,,
4,0.85242,0.60735,,,
5,0.86614,0.64661,0.50681,,
6,0.86995,0.68298,0.56823,0.44626,
7,0.87368,0.70484,0.598,0.49295,0.40945


In [14]:
# Obtain *per-class* accuracy at each depth.

per_class = {}

for train_depth in exp_dirs:
    print(train_depth)
    output_file = pathlib.Path(exp_dirs[train_depth]) / 'predictions/output-epoch-0020.pkl'
    outputs = np.load(output_file, allow_pickle=True)
    prob_file = pathlib.Path(exp_dirs[train_depth]) / 'predictions/prob-epoch-0020.npz'
    prob = np.load(prob_file, allow_pickle=False)

    train_subset = node_subsets[train_depth]
    train_projection = projections[train_depth]
    gt_train = train_subset[outputs['gt']]

    for test_depth in exp_dirs:
        if test_depth > train_depth:
            continue
        test_subset = node_subsets[test_depth]
        test_projection = projections[test_depth]
        
        is_leaf = subtrees[test_depth].leaf_mask()

        pred = []
        for i, prob_i in enumerate(prob.values()):
            pred_i = infer.argmax_where(prob_i[:, train_projection[test_subset]], is_leaf)
            pred.append(pred_i)
        pred = np.concatenate(pred, axis=0)

        gt = test_projection[gt_train]

        is_correct = (gt == pred)
        per_class[train_depth, test_depth] = np.array([
            np.mean(is_correct[gt == y])
            for y in subtrees[test_depth].leaf_subset()
        ])

3
4
5
6
7


In [15]:
# Display per-class accuracy statistics.
#
# Note that there are 10 examples per leaf node.
# This makes the numbers round at deep levels.

rows = {}
for test_depth in exp_dirs:
    for train_depth in exp_dirs:
        if train_depth < test_depth:
            continue
        rows[test_depth, train_depth] = pd.DataFrame(
            [per_class[train_depth, test_depth]]
        ).T.describe().T

table = pd.concat(rows)
table = table.droplevel(2)
table.index = table.index.set_names(['test', 'train'])
del table['count']
# table.format('{:+.1%}')

table.style.format('{:.1%}')

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,min,25%,50%,75%,max
test,train,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,3,32.7%,30.3%,0.0%,0.0%,31.1%,54.4%,92.2%
3,4,39.8%,29.0%,0.0%,11.2%,42.0%,61.1%,93.4%
3,5,43.8%,27.5%,0.0%,25.0%,47.5%,63.8%,94.8%
3,6,52.2%,22.2%,0.0%,35.8%,56.7%,64.9%,95.1%
3,7,54.6%,22.0%,5.0%,40.0%,58.8%,66.3%,95.1%
4,4,26.4%,25.1%,0.0%,0.0%,20.0%,47.5%,92.2%
4,5,35.1%,24.4%,0.0%,15.0%,36.5%,53.3%,93.2%
4,6,43.3%,22.4%,0.0%,29.3%,45.9%,60.0%,93.3%
4,7,47.0%,21.8%,0.0%,30.0%,50.0%,62.0%,92.9%
5,5,26.9%,23.7%,0.0%,5.0%,23.3%,45.0%,93.3%


In [16]:
# Look at difference in per-class accuracy between coarse and fine levels.

percentiles = [0.5, 0.85, 0.95]

rows = {}
for test_depth in exp_dirs:
    for train_depth in exp_dirs:
        if train_depth <= test_depth:
            continue
        rows[test_depth, train_depth] = pd.DataFrame(
            [per_class[test_depth, test_depth] - per_class[train_depth, test_depth]]
        ).T.describe(percentiles).T

table = pd.concat(rows)
table = table.droplevel(2)
table.index = table.index.set_names(['test', 'train'])
del table['count']
# table.format('{:+.1%}')

def highlight(s):
    if s.name == 'std':
        return None
    return ['color: green;' if x > 0 else 'color: black;' for x in s]

table.style.apply(highlight).format('{:+.1%}')

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,min,50%,85%,95%,max
test,train,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,4,-7.1%,+10.1%,-45.0%,-4.3%,+0.0%,+1.7%,+8.0%
3,5,-11.1%,+12.8%,-50.0%,-8.0%,+0.0%,+2.2%,+12.0%
3,6,-19.5%,+16.9%,-60.0%,-16.7%,-4.0%,-1.7%,+10.0%
3,7,-21.9%,+17.6%,-66.7%,-20.0%,-4.1%,-1.8%,+10.0%
4,5,-8.7%,+12.7%,-70.0%,-5.3%,+0.0%,+4.1%,+25.0%
4,6,-16.9%,+17.4%,-80.0%,-13.6%,-0.1%,+2.1%,+15.0%
4,7,-20.5%,+18.4%,-90.0%,-18.9%,-2.9%,+0.0%,+20.0%
5,6,-11.5%,+14.0%,-90.0%,-10.0%,+0.0%,+7.1%,+30.0%
5,7,-16.2%,+16.1%,-90.0%,-14.7%,+0.0%,+5.0%,+25.0%
6,7,-5.6%,+13.2%,-80.0%,-5.0%,+10.0%,+10.0%,+50.0%
