# NABirds Class Hierarchy
This notebook explores the hierarchy of classes in the NABirds to gain an understanding of how birds are categorized

In [1]:
# Load necessary packages

import pandas as pd
from anytree import Node, RenderTree

In [2]:
# Load in hierarchy.txt sheet from dataset_information.xlsx

data = pd.read_excel('dataset_information.xlsx', sheet_name='hierarchy.txt')

In [3]:
# Inspect dataframe

data

Unnamed: 0,child_class_id,parent_class_id
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0
...,...,...
1005,1006,591
1006,1007,259
1007,1008,704
1008,1009,691


In [4]:
# Reorder and rename columns to use Anytree code
data.columns = ['Child', 'Parent']
new_order = ['Parent', 'Child']
data = data.reindex(columns=new_order)

In [5]:
# Create tree of classes

def add_nodes(nodes, parent, child):
    if parent not in nodes:
        nodes[parent] = Node(parent)  
    if child not in nodes:
        nodes[child] = Node(child)
    nodes[child].parent = nodes[parent]


nodes = {}  # store references to created nodes 
# data.apply(lambda x: add_nodes(nodes, x["Parent"], x["Child"]), axis=1)  # 1-liner
for parent, child in zip(data["Parent"],data["Child"]):
    add_nodes(nodes, parent, child)

roots = list(data[~data["Parent"].isin(data["Child"])]["Parent"].unique())
for root in roots:         # you can skip this for roots[0], if there is no forest and just 1 tree
    for pre, _, node in RenderTree(nodes[root]):
        print("%s%s" % (pre, node.name))

0
├── 1
│   ├── 24
│   │   └── 610
│   ├── 27
│   │   └── 454
│   ├── 33
│   │   ├── 313
│   │   └── 611
│   ├── 36
│   │   └── 612
│   ├── 42
│   │   └── 455
│   ├── 51
│   │   └── 456
│   ├── 57
│   │   └── 457
│   ├── 60
│   │   └── 458
│   ├── 67
│   │   └── 459
│   ├── 72
│   │   └── 460
│   ├── 81
│   │   ├── 314
│   │   └── 613
│   ├── 87
│   │   ├── 315
│   │   └── 614
│   ├── 92
│   │   ├── 316
│   │   └── 615
│   ├── 98
│   │   └── 461
│   ├── 102
│   │   ├── 317
│   │   └── 616
│   ├── 109
│   │   └── 462
│   ├── 116
│   │   ├── 318
│   │   └── 617
│   ├── 122
│   │   ├── 319
│   │   └── 618
│   ├── 128
│   │   ├── 320
│   │   └── 619
│   ├── 130
│   │   ├── 321
│   │   └── 620
│   ├── 147
│   │   ├── 322
│   │   └── 621
│   ├── 153
│   │   ├── 323
│   │   └── 622
│   ├── 159
│   │   ├── 324
│   │   └── 623
│   ├── 161
│   │   ├── 325
│   │   └── 624
│   ├── 168
│   │   ├── 326
│   │   └── 625
│   ├── 174
│   │   ├── 327
│   │   └── 626
│   ├── 179
│   │   ├── 295
│   │   ├─

## Create version #2 of tree which has text descriptions of classes



In [6]:
# Read in class descriptions

classes = pd.read_excel('dataset_information.xlsx', sheet_name='classes.txt')

In [7]:
# Inspect classes.txt 

classes

Unnamed: 0,child_class_id,class_description
0,1,"Ducks, Geese, and Swans"
1,2,"Grouse, Quail, and Allies"
2,3,Loons
3,4,Grebes
4,5,Storks
...,...,...
1005,1006,Fox Sparrow (Thick-billed/Slate-colored)
1006,1007,Summer Tanager (Immature Male)
1007,1008,Orchard Oriole (Female/Juvenile)
1008,1009,Yellow-rumped Warbler (Winter/juvenile Audubon's)


In [8]:
# Add column to classes to ensure unique nodes

classes['tree_description'] = '(' + classes['child_class_id'].astype(str) + ') ' + classes['class_description']

In [9]:
# Inspect dataframe after adding tree_description

classes

Unnamed: 0,child_class_id,class_description,tree_description
0,1,"Ducks, Geese, and Swans","(1) Ducks, Geese, and Swans"
1,2,"Grouse, Quail, and Allies","(2) Grouse, Quail, and Allies"
2,3,Loons,(3) Loons
3,4,Grebes,(4) Grebes
4,5,Storks,(5) Storks
...,...,...,...
1005,1006,Fox Sparrow (Thick-billed/Slate-colored),(1006) Fox Sparrow (Thick-billed/Slate-colored)
1006,1007,Summer Tanager (Immature Male),(1007) Summer Tanager (Immature Male)
1007,1008,Orchard Oriole (Female/Juvenile),(1008) Orchard Oriole (Female/Juvenile)
1008,1009,Yellow-rumped Warbler (Winter/juvenile Audubon's),(1009) Yellow-rumped Warbler (Winter/juvenile ...


In [10]:
# Create dict for mapping descriptions to class IDs

class_dict = pd.Series(classes['tree_description'].values,index=classes['child_class_id']).to_dict()

In [11]:
# Create copy of original data frame 
data_text = data.copy()

In [12]:
# Map descriptions to classes

data_text['Parent'].replace(to_replace=class_dict, inplace=True)
data_text['Child'].replace(to_replace=class_dict, inplace=True)

In [20]:
# Create tree with descriptions and write to 'tree.txt'

nodes = {}  # store references to created nodes 
# data.apply(lambda x: add_nodes(nodes, x["Parent"], x["Child"]), axis=1)  # 1-liner
for parent, child in zip(data_text["Parent"],data_text["Child"]):
    add_nodes(nodes, parent, child)

roots = list(data_text[~data_text["Parent"].isin(data_text["Child"])]["Parent"].unique())
with open('tree.txt', 'w', encoding='utf-8') as f:
    for root in roots:         # you can skip this for roots[0], if there is no forest and just 1 tree
       for pre, _, node in RenderTree(nodes[root]):
           print("%s%s" % (pre, node.name), file=f)