### Hierarchical classification

In [1]:
import pandas as pd
import numpy as np
import ast

from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.decomposition import TruncatedSVD
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline

from sklearn.metrics import classification_report

from sklearn.preprocessing import MultiLabelBinarizer

from sklearn_hierarchical_classification.classifier import HierarchicalClassifier
from sklearn_hierarchical_classification.constants import ROOT

from util_koyak import col_selector_hiera, grid_estimator
from tqdm import tqdm_notebook

import warnings; warnings.simplefilter('ignore')



#### Importing features and selecting them.

In [2]:
df_genre = pd.read_csv("features_genre_HIERA_min_100.csv", index_col=0)
df_mfcc_cont = col_selector_hiera(['mfcc','contrast'], df_genre)

In [3]:
X = df_mfcc_cont.drop("genre_selected",axis=1)
y = df_mfcc_cont["genre_selected"]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=.10, random_state=4444)

In [5]:
sca = StandardScaler()
X_train_sca = sca.fit_transform(X_train)
X_test_sca = sca.transform(X_test);

Creating the class hierarchy

In [6]:
df_genres = pd.read_csv("genres.csv")

In [7]:
class_hierarchy = {}
for index in range(df_genres["genre_id"].shape[0]):
    try:
        class_hierarchy[df_genres["parent"].iloc[index]].append(df_genres["genre_id"].iloc[index])
    except:    
        class_hierarchy[df_genres["parent"].iloc[index]] = [df_genres["genre_id"].iloc[index]]
                     
class_hierarchy[ROOT]=class_hierarchy[0]
del class_hierarchy[0]

### Hierarchical classification with SVM

In [8]:
def Hierarchical_fit(estimator, class_hierarchy, X_train_sca, y_train, X_test_sca, y_test, stopping_criteria =0.5):
    base_estimator = make_pipeline(TruncatedSVD(n_components=24),estimator)
    clf = HierarchicalClassifier(
                        base_estimator = base_estimator,
                        class_hierarchy=class_hierarchy,
                        prediction_depth="nmlnp",
                        stopping_criteria=stopping_criteria,
#                        progress_wrapper=tqdm_notebook
                        )
    clf.fit(X_train_sca, y_train)
    y_pred = clf.predict(X_test_sca)
    y_pred_train = clf.predict(X_train_sca)    
    return accuracy_score(y_train, y_pred_train), accuracy_score(y_test, y_pred)

In [9]:
results=[]
for stopping_criteria in [0.2,0.5,0.8]:    
    results.append([stopping_criteria, 
                    Hierarchical_fit(SVC(kernel="rbf", probability=True), 
                                     class_hierarchy, X_train_sca, y_train, X_test_sca, 
                                     y_test, stopping_criteria =stopping_criteria)])

_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 46
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 79
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 86
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 92
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 102
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 130
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 3
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 4
_train_l

_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 14
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 19
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 181
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 182
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 468
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 65
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 6
_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node 16
_train

In [10]:
results

[[0.2, (0.5745209764817608, 0.4489693313222725)],
 [0.5, (0.5760292721077035, 0.45098039215686275)],
 [0.8, (0.576532037316351, 0.44419306184012064)]]

In [None]:
results=[]
for gamma in [0.001, 0.01, 0.1, 1, 10]:
    print("gamma: " + str(gamma))
    for stopping_criteria in [0.2,0.5,0.8]:    
        results.append([stopping_criteria, 
                        Hierarchical_fit(SVC(gamma=gamma, kernel="rbf", probability=True), 
                                         class_hierarchy, X_train_sca, y_train, X_test_sca, 
                                         y_test, stopping_criteria =stopping_criteria)])

gamma: 0.001


In [None]:
results