In [1]:
import csv
import pathlib

import networkx as nx
import numpy as np

import util

In [2]:
RESOURCES_DIR = pathlib.Path('../resources')

In [3]:
# Load FiveAI hierarchy for ImageNet.
with open(RESOURCES_DIR / 'hierarchy/imagenet_fiveai.csv') as f:
    super_edges = list(csv.reader(f))
g = nx.DiGraph()
g.add_edges_from(super_edges)

In [4]:
# Get label order using order of appearance.

paths = [
    RESOURCES_DIR / 'hierarchy_raw/mini_imagenet/train.csv',
    RESOURCES_DIR / 'hierarchy_raw/mini_imagenet/val.csv',
    RESOURCES_DIR / 'hierarchy_raw/mini_imagenet/test.csv',
]

examples = []
for path in paths:
    with open(path) as f:
        r = csv.reader(f)
        next(r)  # Skip header.
        examples.extend(r)

label_order = list(util.unique_in_order(label for _, label in examples))
len(label_order)

100

In [5]:
edges = util.dfs_edges_with_order(g, label_order)
len(edges)

259

In [6]:
with open(RESOURCES_DIR / 'hierarchy/mini_imagenet_fiveai.csv', 'w') as f:
    w = csv.writer(f)
    for edge in edges:
        w.writerow(edge)

In [7]:
# Load label order for Tiny ImageNet.

with open(RESOURCES_DIR / 'hierarchy_raw/tiny_imagenet/wnids.txt') as f:
    label_order = f.read().splitlines()
len(label_order)

200

In [8]:
edges = util.dfs_edges_with_order(g, label_order)
len(edges)

412

In [9]:
with open(RESOURCES_DIR / 'hierarchy/tiny_imagenet_fiveai.csv', 'w') as f:
    w = csv.writer(f)
    for edge in edges:
        w.writerow(edge)

In [10]:
# Check number of non-trivial internal nodes.
g = nx.DiGraph()
g.add_edges_from(edges)
degree = np.array([g.out_degree[x] for x in g])
print('internal nodes:', np.sum(degree > 0))
print('non-trivial internal nodes:', np.sum(degree > 1))
print('branch factor: mean {:.3g}, median {:.3g}'.format(
    np.mean(degree[degree > 1]), np.median(degree[degree > 1])))

internal nodes: 213
non-trivial internal nodes: 98
branch factor: mean 3.03, median 2
