In [1]:
import json

import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
import torchvision

import datasets
import hier

In [2]:
with open('resources/hierarchy/tiny_imagenet_fiveai.csv') as f:
    tree, node_keys = hier.make_hierarchy_from_edges(hier.load_edges(f))

with open('resources/class_names/imagenet_fiveai.json') as f:
    key_to_name = json.load(f)

In [3]:
for max_leaf_size in [10, 20, 50]:
    # Truncate graph such that all leaf nodes have size <= max_leaf_size.
    # Ensure that leaf nodes map to leaf nodes.
    node_size = tree.num_leaf_descendants()
    can_be_leaf = (node_size <= max_leaf_size)
    parent = tree.parents(root_loop=True)
    keep = (~can_be_leaf[parent])

    key_subset = [name for i, name in enumerate(node_keys) if keep[i]]
    with open(f'resources/class_subset/tiny_imagenet_fiveai_max_leaf_size_{max_leaf_size}.txt', 'w') as f:
        f.writelines([x + '\n' for x in key_subset])

    subtree, node_subset, project = hier.subtree(tree, np.flatnonzero(keep))
    assert np.all(subtree.leaf_mask()[project[tree.leaf_subset()]])
    print(f'max leaf size {max_leaf_size}: leaf nodes {subtree.num_leaf_nodes()}')
    leaf_keys = [node_keys[node_subset[i]] for i in subtree.leaf_subset()]
    print([key_to_name[k] for k in leaf_keys])
    print()

max leaf size 10: leaf nodes 72
['feline.n.01', 'mechanism.n.05', 'equipment.n.01', 'furnishing.n.02', 'natural_object.n.01', 'amphibian.n.03', 'geological_formation.n.01', 'substance.n.01', 'implement.n.01', 'instrument.n.01', 'canine.n.02', 'insect.n.01', 'bear.n.01', 'public_transport.n.01', 'food.n.01', 'rodent.n.01', 'covering.n.02', 'musical_instrument.n.01', 'support.n.10', 'fish.n.01', 'ungulate.n.01', 'restraint.n.06', 'heater.n.01', 'garment.n.01', 'primate.n.02', 'mollusk.n.01', 'bird.n.01', 'trap.n.01', 'echinoderm.n.01', 'bag.n.01', 'arachnid.n.01', 'procyonid.n.01', 'vessel.n.03', 'hosiery.n.01', 'wheeled_vehicle.n.01', 'remote_control.n.01', 'source_of_illumination.n.01', 'machine.n.01', 'marsupial.n.01', 'gown.n.05', 'triumphal_arch.n.01', 'headdress.n.01', 'barrier.n.01', 'crustacean.n.01', 'beacon.n.03', 'coelenterate.n.01', 'establishment.n.04', 'crane.n.04', 'vegetable.n.01', 'box.n.01', 'breathing_device.n.01', 'aquatic_mammal.n.01', 'bridge.n.01', 'tableware.n.01'

In [4]:
for max_depth in [3, 4, 5]:
    keep = (tree.depths() <= max_depth)
    key_subset = [name for i, name in enumerate(node_keys) if keep[i]]

    with open(f'resources/class_subset/tiny_imagenet_fiveai_max_depth_{max_depth}.txt', 'w') as f:
        f.writelines([x + '\n' for x in key_subset])

    subtree, node_subset, project = hier.subtree(tree, np.flatnonzero(keep))
    assert np.all(subtree.leaf_mask()[project[tree.leaf_subset()]])
    print(f'max depth {max_depth}: leaf nodes {subtree.num_leaf_nodes()}')
    leaf_keys = [node_keys[node_subset[i]] for i in subtree.leaf_subset()]
    print([key_to_name[k] for k in leaf_keys])
    print()

max depth 3: leaf nodes 13
['organism.n.01', 'artifact.n.01', 'natural_object.n.01', 'cliff.n.01', 'beverage.n.01', 'nutriment.n.01', 'shore.n.01', 'natural_elevation.n.01', 'foodstuff.n.02', 'mushroom.n.05', 'mashed_potato.n.01', 'bell_pepper.n.02', 'cruciferous_vegetable.n.01']

max depth 4: leaf nodes 23
['animal.n.01', 'instrumentality.n.03', 'fruit.n.01', 'cliff.n.01', 'espresso.n.01', 'dish.n.02', 'covering.n.02', 'seashore.n.01', 'course.n.07', 'commodity.n.01', 'coral_reef.n.01', 'bread.n.01', 'condiment.n.01', 'lakeside.n.01', 'mountain.n.01', 'structure.n.01', 'mushroom.n.05', 'tableware.n.01', 'teddy.n.01', 'mashed_potato.n.01', 'comic_book.n.01', 'bell_pepper.n.02', 'cauliflower.n.02']

max depth 5: leaf nodes 46
['vertebrate.n.01', 'device.n.01', 'equipment.n.01', 'furnishing.n.02', 'edible_fruit.n.01', 'cliff.n.01', 'espresso.n.01', 'implement.n.01', 'invertebrate.n.01', 'conveyance.n.03', 'pizza.n.01', 'protective_covering.n.01', 'potpie.n.01', 'seashore.n.01', 'dessert.