In [1]:
import csv
import json
import pathlib

import networkx as nx
import numpy as np
import scipy.io

import util

In [2]:
RESOURCES_DIR = pathlib.Path('../resources')

In [3]:
# Load graph from FiveAI paper.
with open(RESOURCES_DIR / 'hierarchy_raw/fiveai/imagenet_isa.txt') as f:
    r = csv.reader(f, delimiter=' ')
    edges = list(r)

g = nx.DiGraph()
for u, v in edges:
    g.add_edge(u, v)

In [4]:
# Load label order from ILSVRC devkit.
meta = scipy.io.loadmat(RESOURCES_DIR / 'hierarchy_raw/imagenet/meta.mat')
synsets = meta['synsets'].squeeze()
label_order = [x.item() for x in synsets[:1000]['WNID']]

In [5]:
set(label_order) == set(node for node in g if g.out_degree[node] == 0)

True

In [6]:
label_order.sort()

In [7]:
# Sanity check (requires ImageNet dataset).

# import torchvision
# dataset = torchvision.datasets.ImageNet('/home/jack/data/torchvision/imagenet/', 'train')
# assert label_order == dataset.wnids

In [8]:
edges = util.dfs_edges_with_order(g, label_order)

In [9]:
with open(RESOURCES_DIR / 'hierarchy/imagenet_fiveai.csv', 'w') as f:
    w = csv.writer(f)
    for edge in edges:
        w.writerow(edge)

In [10]:
# Check number of non-trivial internal nodes.
g = nx.DiGraph()
g.add_edges_from(edges)
degree = np.array([g.out_degree[x] for x in g])
print('internal nodes:', np.sum(degree > 0))
print('non-trivial internal nodes:', np.sum(degree > 1))
print('branch factor: mean {:.3g}, median {:.3g}'.format(
    np.mean(degree[degree > 1]), np.median(degree[degree > 1])))

internal nodes: 372
non-trivial internal nodes: 372
branch factor: mean 3.69, median 2


In [11]:
# Find class names and write to file.

import nltk
from nltk.corpus import wordnet as wn

key_to_name = {key: wn.synset_from_pos_and_offset(key[0], int(key[1:])).name() for key in g}

with open(RESOURCES_DIR / 'class_names/imagenet_fiveai.json', 'w') as f:
    json.dump(key_to_name, f)