In [1]:
import os, sys, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)

import pandas as pd
from settings import RESULTS_PATH

In [2]:
results_path = os.path.join(RESULTS_PATH, "compare_results.csv")
df_results = pd.read_csv(results_path)

In [3]:
df_results = df_results.round({"f1_score": 4, "h_acc": 4, "h_f1_score": 4})
df_results["f1_score"] = df_results["f1_score"].map(lambda v: v * 100.0)
df_results["h_acc"] = df_results["h_acc"].map(lambda v: v * 100.0)
df_results["h_f1_score"] = df_results["h_f1_score"].map(lambda v: v * 100.0)
df_results

Unnamed: 0,algorithm,inner_clf,f1_score,h_acc,h_f1_score
0,flat,GaussianNB,41.19,57.89,58.17
1,flat,SVC,54.09,84.36,76.97
2,flat,RandomForestClassifier,46.04,79.13,64.86
3,flat,DecisionTreeClassifier,32.12,63.42,48.02
4,one_vs_rest,GaussianNB,41.9,60.37,58.01
5,one_vs_rest,SVC,64.52,84.76,77.83
6,one_vs_rest,RandomForestClassifier,51.42,80.35,71.32
7,one_vs_rest,DecisionTreeClassifier,31.71,57.75,47.19
8,lcn,GaussianNB,33.6,57.39,54.74
9,lcn,SVC,78.62,85.45,83.03


## Best for every algorithm

In [4]:
def best_for_alg(df_results):
    df_bests = pd.DataFrame(columns=df_results.columns)
    for alg in df_results["algorithm"].unique():
        df_alg = df_results[df_results["algorithm"] == alg]
        row = df_results.iloc[df_alg["h_f1_score"].idxmax(), :]
        df_bests = df_bests.append(row, ignore_index=True)
    return df_bests

In [5]:
best_for_alg(df_results)

Unnamed: 0,algorithm,inner_clf,f1_score,h_acc,h_f1_score
0,flat,SVC,54.09,84.36,76.97
1,one_vs_rest,SVC,64.52,84.76,77.83
2,lcn,SVC,78.62,85.45,83.03
3,lcpn,SVC,56.48,84.86,81.64
4,lcl,SVC,54.09,84.69,78.04


## Mean performance

In [14]:
df_mean = df_results[["algorithm", "f1_score", "h_acc", "h_f1_score"]].groupby(["algorithm"]).mean()
df_mean = df_mean.reindex(["flat", "one_vs_rest", "lcn", "lcpn", "lcl"])
df_mean = df_mean.reset_index()
df_mean

Unnamed: 0,algorithm,f1_score,h_acc,h_f1_score
0,flat,43.36,71.2,62.005
1,one_vs_rest,47.3875,70.8075,63.5875
2,lcn,49.905,72.555,65.8425
3,lcpn,38.662,66.568,59.118
4,lcl,43.5075,70.4525,61.4875


## Pros and cons

### Flat \ Big-bang classifier
\+ Very simple. \
\- Bad results.

### One vs rest
It was used because in used dataset which has instances only in leaves, it is truly *Local Classifier per Node*. 

\+ It gives better results than simple classifier. \
\+ It gives quite good results on average. Better than LCPN and LCL. \
\- It is more complicated and learning consumes more time.

### Local Classifier per Node
Because used dataset has instances only in leaves, it was implemented in special way. Classification is done on every level in class hierarchy. Then, the probability for every leaf is counted as average of probability for every node in path from root to leaf.

\+ Best results, mainly because of the biggest number of classifiers. \
\- Biggest number of classifiers which means the biggest time complexity.

### Local Classifier per Parent Node
\+ Quite good results. \
\+ Less number of classifiers than in LCN. \
\- Quite complicated implementation, mainly because of necessity of filtering dataset per node.

### Local Classifier per Level
\+ Simple. \
\- Problem with inconsist predictions on different levels. \
\- Not so good.