In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

In [3]:
from datetime import datetime

from sklearn.base import clone
from sklearn.multiclass import OneVsRestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn_hierarchical_classification.classifier import HierarchicalClassifier

from src.data_processing import (hierarchy_to_dict, read_data, read_hierarchy,
                                 split_data)
from src.metrics import get_comparision
from src.models import LCN, LCPN, LCL

# Experiments

In [4]:
RANDOM_SEED = 123456
BASE_CLASSIFIER = DecisionTreeClassifier(random_state=RANDOM_SEED)

In [5]:
H = read_hierarchy('./data/raw/imclef07a.hf')
train_data = read_data('./data/raw/imclef07a_train')
test_data = read_data('./data/raw/imclef07a_test')

class_hierarchy = hierarchy_to_dict(H)
X_train = train_data.drop(columns=['label', 'path'])
y_train = train_data['label']
X_test = test_data.drop(columns=['label', 'path'])
y_test = test_data['label']

## LCPN - library

In [6]:
lcpn_lib = HierarchicalClassifier(
    base_estimator=BASE_CLASSIFIER,
    class_hierarchy=class_hierarchy,
)

start_time = datetime.now()
lcpn_lib.fit(X_train, y_train)
print('Training time: ', datetime.now()-start_time)

start_time = datetime.now()
lcpn_lib_pred = lcpn_lib.predict(X_test)
print('Predicting time: ', datetime.now()-start_time)

Training time:  0:00:06.153024
Predicting time:  0:00:00.394704


## LCPN - own implementation

In [7]:
lcpn = LCPN(
    BASE_CLASSIFIER,
    H
)

In [8]:
lcpn.fit(train_data)

100%|██████████| 3/3 [00:01<00:00,  1.69it/s]


In [9]:
df_predicted_lcpn = lcpn.predict(test_data)

100%|██████████| 34/34 [00:00<00:00, 310.19it/s]
100%|██████████| 1006/1006 [00:01<00:00, 634.03it/s]


## LCN - library

In [10]:
lcn_lib = HierarchicalClassifier(
    base_estimator=OneVsRestClassifier(BASE_CLASSIFIER),
    class_hierarchy=class_hierarchy,
)

start_time = datetime.now()
lcn_lib.fit(X_train, y_train)
print('Training time: ', datetime.now()-start_time)

start_time = datetime.now()
lcn_lib_pred = lcn_lib.predict(X_test)
print('Predicting time: ', datetime.now()-start_time)

Training time:  0:00:10.792067
Predicting time:  0:00:01.464500


## LCN - own implementation

In [11]:
lcn = LCN(
    BASE_CLASSIFIER,
    H
)

In [12]:
lcn.fit(train_data)

100%|██████████| 3/3 [00:03<00:00,  1.23s/it]


In [13]:
df_predicted_lcn = lcn.predict(test_data)

100%|██████████| 34/34 [00:00<00:00, 134.50it/s]
100%|██████████| 1006/1006 [00:01<00:00, 637.97it/s]


## LCL - own implementation

In [14]:
lcl = LCL(
    BASE_CLASSIFIER,
    H
)

In [15]:
lcl.fit(train_data)

100%|██████████| 3/3 [00:01<00:00,  2.08it/s]


In [16]:
df_predicted_lcl = lcl.predict(test_data)

100%|██████████| 1006/1006 [00:02<00:00, 457.62it/s]


## FLAT

In [17]:
clf = clone(BASE_CLASSIFIER)
clf.fit(X_train, y_train)
flat_pred = clf.predict(X_test)

## Comparision

In [20]:
comparision = get_comparision(
    [
        ['lcpn_lib', lcpn_lib_pred],
        ['lcpn_own', df_predicted_lcpn['predicted_label']],
        ['lcn_lib', lcn_lib_pred],
        ['lcn_own', df_predicted_lcn['predicted_label']],
        ['lcl_own', df_predicted_lcl['predicted_label']],
        ['flat', flat_pred]
    ],
    y_test,
    lcpn_lib.graph_
)

comparision.sort_values('h_f1', ascending=False)

Unnamed: 0,model,h_f1,f1_micro
5,flat,0.792578,0.602386
0,lcpn_lib,0.766907,0.56163
1,lcpn_own,0.766907,0.56163
3,lcn_own,0.746955,0.529821
4,lcl_own,0.744727,0.519881
2,lcn_lib,0.742265,0.513917
