In [1]:
import re
import ipywidgets as widgets
from IPython.display import display, clear_output

In [2]:
# vehicle taxonomy
classes_hierarchy = {
    "ground": {
        "wheels": ["truck", "van", "car"],
        "track": {
            "industrial": ["bulldozer", "combine harvester"],
            "military": ["tank", "artillery"]
        }
    },
    "water": [
        "wind-powered", "human-powered", "engine-powered"
    ],
    "aerial": ["engine-powered", "other"]
}

In [3]:
docs = [
    "vehicle 1",
    "vehicle 2",
    "vehicle 3"
]

In [4]:
def get_labels(docs, hierarchy, level_names=[]):
    labels = [[] for doc in docs]


    def on_single_change(doc_index, class_level, change):
        if change['type'] == 'change' and change['name'] == 'value':
            prev_labels = labels[doc_index][:]
            new_value = change['new']
            if not new_value:
                labels[doc_index][:] = prev_labels[:class_level] if class_level > 0 else []
            else:
                if class_level == 0:
                    labels[doc_index][:] = [change['new']]
                else:
                    labels[doc_index][:] = prev_labels[:class_level] + [new_value]
            update_widgets()

    def spawn_observer(doc_index, class_level):
        def observer(change):
            return on_single_change(doc_index, class_level, change)
        return observer
    
    def to_options(item):
        return item if type(item) == list else item.keys()
    
    def update_widgets():
        controls = []
        for doc_index, doc in enumerate(docs):
            doc_labels = labels[doc_index]
            doc_controls = []
            current_level = hierarchy.copy()
            
            for class_level, label in enumerate(doc_labels):
                options = to_options(current_level)
                try:
                    control = widgets.Dropdown(
                        options=options,
                        description=level_names[class_level],
                        name=f'{level_names[class_level]}_{doc_index}_{class_level}',
                        value=label
                    )
                except:
                    print(class_level, doc_labels, options, label)
                control.observe(spawn_observer(doc_index, class_level))
                doc_controls.append(control)
                current_level = current_level.get(label) if type(current_level) == dict else None
            if current_level:
                class_level = len(doc_labels)
                control = widgets.Dropdown(
                    options=['', *to_options(current_level)],
                    description=level_names[class_level],
                    name=f'{level_names[class_level]}_{doc_index}_{class_level}',
                )
                control.observe(spawn_observer(doc_index, class_level))
                doc_controls.append(control)
            
            controls.append(doc_controls)

        clear_output()
        display(widgets.VBox(
            [widgets.HBox([widgets.Label(docs[doc_index]), *sub_controls])
             for doc_index, sub_controls in enumerate(controls)
            ]
        ))

    update_widgets()
    
    return list(zip(docs, labels))

labels = get_labels(docs, classes_hierarchy, ['Kind', 'Type', 'Subtype', 'Sub-subtype'])

VBox(children=(HBox(children=(Label(value='vehicle 1'), Dropdown(description='Kind', options=('ground', 'water…

In [5]:
labels

[('vehicle 1', ['ground', 'wheels', 'van']),
 ('vehicle 2', ['water', 'human-powered']),
 ('vehicle 3', ['aerial', 'other'])]