In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from scipy.sparse import csr_matrix, lil_matrix
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestRegressor
from skmultilearn.ensemble import RakelD, RakelO, MajorityVotingClassifier
from skmultilearn.cluster import FixedLabelSpaceClusterer
from skmultilearn.embedding import SKLearnEmbedder, EmbeddingClassifier
from sklearn.manifold import SpectralEmbedding
from sklearn.metrics import make_scorer
import skmultilearn.problem_transform as skpt
import pandas as pd
import numpy as np
import skmultilearn.adapt as skadapt
import sklearn.metrics as metrics
from sklearn import preprocessing

def BinaryRelevance(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y, base_classif, title):
    #print(base_classif)
    classifier = skpt.BinaryRelevance(base_classif)
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy(title, predictions ,dataset_test_y)
    
def ClassifierChain(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y, base_classif, title):
    classifier = skpt.ClassifierChain(base_classif)
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy(title, predictions ,dataset_test_y)

def LabelPowerset(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y, base_classif, title):
    classifier = skpt.LabelPowerset(base_classif)
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy(title, predictions ,dataset_test_y)
 
def MLkNN(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,num_neighbours, smoothing_param):
    x_train = lil_matrix(dataset_train_x).toarray()
    y_train = lil_matrix(dataset_train_y).toarray()
    x_test = lil_matrix(dataset_test_x).toarray()
    
    classifier = skadapt.MLkNN(k=num_neighbours,s=smoothing_param)
    %timeit classifier.fit(x_train,y_train)
    predictions = classifier.predict(x_test)
    
    text = "MLkNN w/ k=" + str(num_neighbours) + " s="+str(smoothing_param)
    
    Metrics_Accuracy(text, predictions ,dataset_test_y)
    
def MLARAM(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,num_vigilance, num_threshold):
    x_train = lil_matrix(dataset_train_x).toarray()
    y_train = lil_matrix(dataset_train_y).toarray()
    x_test = lil_matrix(dataset_test_x).toarray()
    
    #Threshold controls number of prototypes to participate; vigilance controls how large hyperbox is
    classifier = skadapt.MLARAM(threshold = num_threshold, vigilance = num_vigilance)
    %timeit classifier.fit(x_train,y_train)
    predictions = classifier.predict(x_test)
    
    text = "MLARAM w/ Threshold = " + str(num_threshold) + ", Vigilance = "+ str(num_vigilance)
    
    Metrics_Accuracy(text, predictions ,dataset_test_y)
        
    
#Random Label Space Partitionining with Label Powerset
def RAkELd(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,base_clasif,num_labels):
    classifier = RakelD(
        base_classifier=base_clasif,
        labelset_size=num_labels
    )

    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("RAkELd", predictions ,dataset_test_y)
    
#random overlapping label space division with Label Powerset
def RAkELO(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,base_clasif,num_labels, num_models):
    classifier = RakelO(
        base_classifier=base_clasif,
        labelset_size=num_labels,
        model_count=num_models
    )

    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("RAkELO", predictions ,dataset_test_y)

def LabelSpacePartitioningClassifier(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = MajorityVotingClassifier(
        clusterer=FixedLabelSpaceClusterer(clusters = [[1,3,4], [0,2,5]]),
        classifier = skpt.ClassifierChain(classifier=SVC())
    )
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("Label Space Partition", predictions ,dataset_test_y)

def BRkNNa(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y, num_neighbours):
    x_train = lil_matrix(dataset_train_x).toarray()
    y_train = lil_matrix(dataset_train_y).toarray()
    x_test = lil_matrix(dataset_test_x).toarray()
    
    classifier = skadapt.BRkNNaClassifier(k=num_neighbours)
    %timeit classifier.fit(x_train,y_train)
    predictions = classifier.predict(x_test)
    
    text = "BRkNNa w/ k=" + str(num_neighbours)
    
    Metrics_Accuracy(text, predictions ,dataset_test_y)
    
def BRkNNb(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y, num_neighbours):
    x_train = lil_matrix(dataset_train_x).toarray()
    y_train = lil_matrix(dataset_train_y).toarray()
    x_test = lil_matrix(dataset_test_x).toarray()
    
    classifier = skadapt.BRkNNbClassifier(k=num_neighbours)
    %timeit classifier.fit(x_train,y_train)
    predictions = classifier.predict(x_test)
    
    text = "BRkNNb w/ k=" + str(num_neighbours)
    
    Metrics_Accuracy(text, predictions ,dataset_test_y)
    
def EmbeddingClassifierMethod(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = EmbeddingClassifier(
        SKLearnEmbedder(SpectralEmbedding(n_components=10)),
        RandomForestRegressor(n_estimators=10),
        skadapt.MLkNN(k=5)
    )
    %timeit classifier.fit(lil_matrix(dataset_train_x).toarray(), lil_matrix(dataset_train_y).toarray())
    predictions = classifier.predict(dataset_test_x)

    Metrics_Accuracy("Embedded Classifier", predictions ,dataset_test_y)

def TwinMLSVM(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,nc_k, omega):
    classifier = skadapt.MLTSVM(c_k = nc_k, sor_omega = omega)
    classifier.fit(csr_matrix(dataset_train_x),csr_matrix(dataset_train_y))
    predictions = classifier.predict(csr_matrix(dataset_test_x))
    
    Metrics_Accuracy("MLTSVM", predictions ,dataset_test_y)


    
def Metrics_Accuracy(classifier,predictions,dataset_test_y):
    #results
    print("Results for ",classifier)
    # accuracy
    print("Accuracy = ",accuracy_score(dataset_test_y,predictions))
    # hamming loss
    print("Hamming loss = ",metrics.hamming_loss(dataset_test_y,predictions))
    # log loss
    #print(type(predictions)==np.ndarray)
    print("Log loss = ",metrics.log_loss(dataset_test_y,predictions.toarray() if type(predictions)!=np.ndarray else predictions))
    # Exact Match Score
    #exact_match_score = np.all(predictions.toarray() == dataset_test_y, axis=1).mean()
    #print('Exact match score (Whole row must match):', exact_match_score)
    
    print("")
    
def Util_ClassifierMethods(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y):
    BinaryRelevance(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y)
    ClassifierChain(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y)
    ClassifierChainCV(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y)
    LabelPowerset(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y)

def FindBestMNBParams(classif, dataset_train_x, dataset_train_y):
    rangefloat = [round(x * 0.1, 1) for x in range(1, 11)]
    parameters = {
        'classifier': [MultinomialNB()],
        'classifier__alpha': rangefloat,
    }
    clf = GridSearchCV(classif, parameters, scoring=make_scorer(metrics.hamming_loss,greater_is_better=False), n_jobs=3)
    clf.fit(dataset_train_x, dataset_train_y)
    print(clf.best_params_)
    #print(pd.DataFrame(clf.cv_results_))
    
    return clf.best_params_

def FindBestSVCParams(classif, dataset_train_x, dataset_train_y):
    parameters = {
            'classifier': [SVC()],
            'classifier__degree': [2,3,4],
            'classifier__kernel': ['linear','poly','rbf'],
            #'classifier__max_iter': [10000],
            #'classifier__loss': ['hinge','squared_hinge'],
    }
    
    clf = GridSearchCV(classif, parameters, scoring=make_scorer(metrics.hamming_loss,greater_is_better=False), n_jobs=3)
    clf.fit(dataset_train_x, dataset_train_y)
    print(clf.best_params_)
    #print(pd.DataFrame(clf.cv_results_))
    
    return clf.best_params_    
    
    
#estimating best params using hamming loss for multi label problems
def FindBestK(classif, dataset_train_x, dataset_train_y):
    rangefloatv = [round(x * 0.1, 1) for x in range(5, 11)]
    
    parameters = {'k': range(1,20), 's': rangefloatv} 
    if type(classif) == type(skadapt.BRkNNaClassifier()) or type(classif) == type(skadapt.BRkNNbClassifier()):
        parameters = {'k': range(1,20)}

    clf = GridSearchCV(classif, parameters, scoring=make_scorer(metrics.hamming_loss,greater_is_better=False), n_jobs=2)
    clf.fit(lil_matrix(dataset_train_x).toarray(), lil_matrix(dataset_train_y).toarray())
    print(clf.best_params_)
    return clf.best_params_

def FindBestVT(dataset_train_x, dataset_train_y):
    rangefloat = [round(x * 0.01, 2) for x in range(1, 11)]
    rangefloatv = [round(x * 0.1, 1) for x in range(5, 11)]
    parameters = {'threshold': rangefloat, 'vigilance': rangefloatv} #default thres = 0.02, vigi = 0.9

    clf = GridSearchCV(skadapt.MLARAM(), parameters, scoring=make_scorer(metrics.hamming_loss,greater_is_better=False), n_jobs=2)
    clf.fit(lil_matrix(dataset_train_x).toarray(), lil_matrix(dataset_train_y).toarray())
    print(clf.best_params_)
    return clf.best_params_

def FindCKParam(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    rangev = [2**i for i in range(-5, 3, 2)]
    #introduce back 0 default to rangev
    rangev = rangev+ [0]
    rangefloat = [round(x * 0.1, 1) for x in range(1, 11)]    
    #rangefloat2 = [1e-06, 1e-05]
    parameters = {'c_k': rangev, 'sor_omega': rangefloat} 
    
    clf = GridSearchCV(skadapt.MLTSVM(), parameters, scoring=make_scorer(metrics.hamming_loss,greater_is_better=False), n_jobs=4)
    clf.fit(csr_matrix(dataset_train_x),csr_matrix(dataset_train_y))
    print(clf.best_params_)
    return clf.best_params_

def GridSearchCV_base(classif, dataset_train_x, dataset_train_y):
    rangefloat = [round(x * 0.1, 1) for x in range(1, 11)]
    parameters = [
        {
            'base_classifier': [GaussianNB()],
            #'labelset_size': 
        },
        {
            'base_classifier': [MultinomialNB()],
            'base_classifier__alpha': rangefloat, #for smoothing {Additive smoothing parameter NB}
        },
        {
            'base_classifier': [SVC()],
            'base_classifier__kernel': ['rbf','linear','sigmoid'],
        },
    ]
    
    classifier = GridSearchCV(RakelD(), parameters, scoring=make_scorer(metrics.hamming_loss,greater_is_better=False), n_jobs=3)
    classifier.fit(dataset_train_x, dataset_train_y)
    return classifier.best_params_
    
def GridSearchCV_baseRakel(classif, dataset_train_x, dataset_train_y):
    #labelset_size denotes the desired size of partition
    range_labelset_size = list(range(1,11))
    rangefloat = [round(x * 0.1, 1) for x in range(1, 11)]
    parameters = [
        {
            'base_classifier': [GaussianNB()],
            'labelset_size': range_labelset_size,
        },
        {
            'base_classifier': [MultinomialNB()],
            'base_classifier__alpha': rangefloat, #for smoothing {Additive smoothing parameter NB}
            'labelset_size': range_labelset_size,
        },
        {
            'base_classifier': [SVC()],
            'base_classifier__kernel': ['rbf','linear','sigmoid'],
            'labelset_size': range_labelset_size,
        },
    ]
    print(type(classif) == type(RakelO()))
    if (type(classif) == type(RakelO())):
        end_range = dataset_train_y.shape[1]//2 if dataset_train_y.shape[1]//2 > (3+1) else dataset_train_y.shape[1]
        range_labelset_size = list(range(3, end_range))
        #starting_range = dataset_train_y.shape[1]//range_labelset_size[0]
        range_model_count = list(range(2*dataset_train_y.shape[1],2*dataset_train_y.shape[1]+1)) #[x*2 for x in range((starting_range), (starting_range+1))]#[x*2 for x in range(dataset_train_y.shape[1]//6, dataset_train_y.shape[1]//2)]
        print(dataset_train_y.shape[1])
        print(range_labelset_size)
        print(range_model_count)
        parameters = [
            {
                'base_classifier': [GaussianNB()],
                'labelset_size': range_labelset_size,
                'model_count': range_model_count,
            },
            {
                'base_classifier': [MultinomialNB()],
                'base_classifier__alpha': rangefloat, #for smoothing {Additive smoothing parameter NB}
                'labelset_size': range_labelset_size,
                'model_count': range_model_count,
            },
            {
                'base_classifier': [SVC()],
                'base_classifier__kernel': ['rbf','linear','sigmoid'],
                'labelset_size': range_labelset_size,
                'model_count': range_model_count,
            },
        ]
    
    classifier = GridSearchCV(classif, parameters, scoring=make_scorer(metrics.hamming_loss,greater_is_better=False), n_jobs=3)
    classifier.fit(dataset_train_x, dataset_train_y)
    return classifier.best_params_

In [2]:
#birds
print("Load Birds dataset")
birds1 = pd.read_csv(r"C:/Users/K/Desktop/Assignment1/birds-train.csv")
birds2 = pd.read_csv(r"C:/Users/K/Desktop/Assignment1/birds-test.csv")
birds = birds1.append(birds2)

#scale based on columns before split
mms = preprocessing.MinMaxScaler()

#print(birds.iloc[:,0:260])
birds.iloc[:,0:260] = mms.fit_transform(birds.iloc[:,0:260])

#print(birds.iloc[:,0:260])

#split dataset
dataset_train_bird, dataset_test_bird = train_test_split(birds,random_state=42, test_size=0.20, shuffle=True)

dataset_train_x_bird = dataset_train_bird.iloc[:,0:260]
dataset_train_y_bird = dataset_train_bird.iloc[:,-19:]

dataset_test_x_bird = dataset_test_bird.iloc[:,0:260]
dataset_test_y_bird = dataset_test_bird.iloc[:,-19:]

#emotions
print("Load Emotions dataset")
emotions = pd.read_csv(r"C:/Users/K/Desktop/Assignment1/emotions.csv")

#scale based on columns before split
mms = preprocessing.MinMaxScaler()
emotions.iloc[:,0:72] = mms.fit_transform(emotions.iloc[:,0:72])

#split dataset
dataset_train_emotions, dataset_test_emotions = train_test_split(emotions,random_state=42, test_size=0.20, shuffle=True)

dataset_train_x_emotions = dataset_train_emotions.iloc[:,0:72]
dataset_train_y_emotions = dataset_train_emotions.iloc[:,-6:]

dataset_test_x_emotions = dataset_test_emotions.iloc[:,0:72]
dataset_test_y_emotions = dataset_test_emotions.iloc[:,-6:]

#yeast
print("Load Yeast dataset")
yeast = pd.read_csv(r"C:/Users/K/Desktop/Assignment1/yeast.csv")

#scale based on columns before split
mms = preprocessing.MinMaxScaler()
yeast.iloc[:,0:103] = mms.fit_transform(yeast.iloc[:,0:103])

#split dataset
dataset_train_yeast, dataset_test_yeast = train_test_split(yeast,random_state=42, test_size=0.20, shuffle=True)

dataset_train_x_yeast = dataset_train_yeast.iloc[:,0:103]
dataset_train_y_yeast = dataset_train_yeast.iloc[:,-14:]

dataset_test_x_yeast = dataset_test_yeast.iloc[:,0:103]
dataset_test_y_yeast = dataset_test_yeast.iloc[:,-14:]

Load Birds dataset
Load Emotions dataset
Load Yeast dataset


In [70]:
#Binary Relevance
print("%Comparison Binary Relevance GaussianNB%")
base_classif = GaussianNB()
print("Bird dataset")
BinaryRelevance(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,base_classif, "GaussianNB")
print("Emotions dataset")
BinaryRelevance(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "GaussianNB")
print("Yeast dataset")
BinaryRelevance(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,base_classif, "GaussianNB")


%Comparison Binary Relevance GaussianNB%
Bird dataset
111 ms ± 6.94 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  GaussianNB
Accuracy =  0.023255813953488372
Hamming loss =  0.4679722562219502
Log loss =  13.848272932073373

Emotions dataset
19.8 ms ± 4.43 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  GaussianNB
Accuracy =  0.23529411764705882
Hamming loss =  0.242296918767507
Log loss =  17.748562300103703

Yeast dataset
147 ms ± 9.33 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  GaussianNB
Accuracy =  0.09917355371900827
Hamming loss =  0.30283353010625735
Log loss =  65.19914296904747



In [71]:
print("%Comparison Binary Relevance SVC%")
base_classif = SVC()
print("Bird dataset")
BinaryRelevance(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird, base_classif, "SVC")
print("Emotions dataset")
BinaryRelevance(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "SVC")
print("Yeast dataset")
BinaryRelevance(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif, "SVC")



%Comparison Binary Relevance SVC%
Bird dataset
502 ms ± 16.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  SVC
Accuracy =  0.5193798449612403
Hamming loss =  0.04243166054671563
Log loss =  5.06656988488039

Emotions dataset
108 ms ± 2.02 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  SVC
Accuracy =  0.31932773109243695
Hamming loss =  0.17507002801120447
Log loss =  19.842430799140388

Yeast dataset
5.64 s ± 58.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  SVC
Accuracy =  0.18801652892561985
Hamming loss =  0.18654073199527746
Log loss =  67.03054392022239



In [72]:
print("%Comparison Binary Relevance SVC aft tuned%")
print("Bird dataset")
dict_res = FindBestSVCParams(skpt.BinaryRelevance(),dataset_train_x_bird, dataset_train_y_bird)
#base_classif = LinearSVC(max_iter=10000, loss = dict_res['classifier__loss'])
base_classif = SVC(kernel = dict_res['classifier__kernel'], degree = dict_res['classifier__degree'])
BinaryRelevance(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird, base_classif, "SVC tuned")

print("Emotions dataset")
dict_res = FindBestSVCParams(skpt.BinaryRelevance(),dataset_train_x_emotions,dataset_train_y_emotions)
base_classif = SVC(kernel = dict_res['classifier__kernel'], degree = dict_res['classifier__degree'])
BinaryRelevance(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "SVC tuned")

print("Yeast dataset")
dict_res = FindBestSVCParams(skpt.BinaryRelevance(),dataset_train_x_yeast,dataset_train_y_yeast)
base_classif = SVC(kernel = dict_res['classifier__kernel'], degree = dict_res['classifier__degree'])
BinaryRelevance(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif, "SVC tuned")


%Comparison Binary Relevance SVC aft tuned%
Bird dataset
{'classifier': SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=2, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), 'classifier__degree': 2, 'classifier__kernel': 'linear'}
406 ms ± 8.51 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  SVC tuned
Accuracy =  0.5193798449612403
Hamming loss =  0.04243166054671563
Log loss =  10.303185256978105

Emotions dataset
{'classifier': SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=2, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), 'classifier__degree': 2, 'classifier__kernel': 'rbf'}
107 ms ± 2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  SVC tuned


In [73]:
print("%Comparison Binary Relevance MNB%")
base_classif = MultinomialNB()
print("Bird dataset")
BinaryRelevance(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird, base_classif, "MNB")
print("Emotions dataset")
BinaryRelevance(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "MNB")
print("Yeast dataset")
BinaryRelevance(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif, "MNB")



%Comparison Binary Relevance MNB%
Bird dataset
80.7 ms ± 3.21 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB
Accuracy =  0.4806201550387597
Hamming loss =  0.06364749082007344
Log loss =  7.956975083617847

Emotions dataset
16.3 ms ± 327 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  MNB
Accuracy =  0.18487394957983194
Hamming loss =  0.23109243697478993
Log loss =  14.074788028114167

Yeast dataset
91.8 ms ± 1.22 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB
Accuracy =  0.014462809917355372
Hamming loss =  0.22638724911452185
Log loss =  95.30119203275576



In [74]:
print("%Comparison Binary Relevance MNB aft tuned%")
print("Bird dataset")
dict_res = FindBestMNBParams(skpt.BinaryRelevance(),dataset_train_x_bird, dataset_train_y_bird)
#base_classif = LinearSVC(max_iter=10000, loss = dict_res['classifier__loss'])
base_classif = MultinomialNB(alpha = dict_res['classifier__alpha'])
BinaryRelevance(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird, base_classif, "MNB tuned")

print("Emotions dataset")
dict_res = FindBestMNBParams(skpt.BinaryRelevance(),dataset_train_x_emotions,dataset_train_y_emotions)
base_classif = MultinomialNB(alpha = dict_res['classifier__alpha'])
BinaryRelevance(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "MNB tuned")

print("Yeast dataset")
dict_res = FindBestMNBParams(skpt.BinaryRelevance(),dataset_train_x_yeast,dataset_train_y_yeast)
base_classif = MultinomialNB(alpha = dict_res['classifier__alpha'])
BinaryRelevance(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif, "MNB tuned")


%Comparison Binary Relevance MNB aft tuned%
Bird dataset
{'classifier': MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True), 'classifier__alpha': 1.0}
78.8 ms ± 2.71 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB tuned
Accuracy =  0.4806201550387597
Hamming loss =  0.06364749082007344
Log loss =  7.956975083617847

Emotions dataset
{'classifier': MultinomialNB(alpha=0.2, class_prior=None, fit_prior=True), 'classifier__alpha': 0.2}
16.1 ms ± 191 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  MNB tuned
Accuracy =  0.18487394957983194
Hamming loss =  0.22969187675070027
Log loss =  13.796195742115925

Yeast dataset
{'classifier': MultinomialNB(alpha=0.1, class_prior=None, fit_prior=True), 'classifier__alpha': 0.1}
90 ms ± 2.79 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB tuned
Accuracy =  0.014462809917355372
Hamming loss =  0.2262396694214876
Log loss =  95.2356950892885



In [75]:
#Classifier Chain
print("CC")
base_classif = GaussianNB()
print("Bird dataset")
ClassifierChain(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,base_classif,"GaussianNB")
print("Emotions dataset")
ClassifierChain(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif,"GaussianNB")
print("Yeast dataset")
ClassifierChain(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif,"GaussianNB")


CC
Bird dataset
128 ms ± 5.89 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  GaussianNB
Accuracy =  0.023255813953488372
Hamming loss =  0.28845369237046103
Log loss =  16.070205795919176

Emotions dataset
15.5 ms ± 1.8 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  GaussianNB
Accuracy =  0.2184873949579832
Hamming loss =  0.23669467787114845
Log loss =  17.729774152665914

Yeast dataset
164 ms ± 3.59 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  GaussianNB
Accuracy =  0.09090909090909091
Hamming loss =  0.31729634002361273
Log loss =  67.37627753072266



In [76]:
print("%Comparison Classifier Chain SVC%")
base_classif = SVC()
print("Bird dataset")
ClassifierChain(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird, base_classif, "SVC")
print("Emotions dataset")
ClassifierChain(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "SVC")
print("Yeast dataset")
ClassifierChain(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif, "SVC")

%Comparison Classifier Chain SVC%
Bird dataset
485 ms ± 12.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  SVC
Accuracy =  0.5193798449612403
Hamming loss =  0.042839657282741736
Log loss =  4.844477648937925

Emotions dataset
108 ms ± 21.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  SVC
Accuracy =  0.35294117647058826
Hamming loss =  0.19187675070028012
Log loss =  21.021862144997353

Yeast dataset
4.68 s ± 269 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  SVC
Accuracy =  0.22107438016528927
Hamming loss =  0.2001180637544274
Log loss =  65.94675102057282



In [77]:
print("%Comparison Classifier Chain SVC aft tuned%")
print("Bird dataset")
dict_res = FindBestSVCParams(skpt.ClassifierChain(),dataset_train_x_bird, dataset_train_y_bird)
#base_classif = LinearSVC(max_iter=10000, loss = dict_res['classifier__loss'])
base_classif = SVC(kernel = dict_res['classifier__kernel'], degree = dict_res['classifier__degree'])
ClassifierChain(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird, base_classif, "SVC tuned")

print("Emotions dataset")
dict_res = FindBestSVCParams(skpt.ClassifierChain(),dataset_train_x_emotions,dataset_train_y_emotions)
base_classif = SVC(kernel = dict_res['classifier__kernel'], degree = dict_res['classifier__degree'])
ClassifierChain(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "SVC tuned")

print("Yeast dataset")
dict_res = FindBestSVCParams(skpt.ClassifierChain(),dataset_train_x_yeast,dataset_train_y_yeast)
base_classif = SVC(kernel = dict_res['classifier__kernel'], degree = dict_res['classifier__degree'])
ClassifierChain(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif, "SVC tuned")


%Comparison Classifier Chain SVC aft tuned%
Bird dataset
{'classifier': SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=2, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), 'classifier__degree': 2, 'classifier__kernel': 'linear'}
538 ms ± 44.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  SVC tuned
Accuracy =  0.5271317829457365
Hamming loss =  0.04039167686658507
Log loss =  10.714785994665249

Emotions dataset
{'classifier': SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=2, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), 'classifier__degree': 2, 'classifier__kernel': 'rbf'}
122 ms ± 3.39 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  SVC tun

In [78]:
print("%Comparison Classifier Chain MNB%")
base_classif = MultinomialNB()
print("Bird dataset")
ClassifierChain(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird, base_classif, "MNB")
print("Emotions dataset")
ClassifierChain(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "MNB")
print("Yeast dataset")
ClassifierChain(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif, "MNB")

%Comparison Classifier Chain MNB%
Bird dataset
123 ms ± 3.96 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB
Accuracy =  0.4728682170542636
Hamming loss =  0.07058343533251733
Log loss =  7.5098591455546035

Emotions dataset
26.3 ms ± 2.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB
Accuracy =  0.24369747899159663
Hamming loss =  0.24369747899159663
Log loss =  25.931911968714516

Yeast dataset
163 ms ± 3.19 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB
Accuracy =  0.06818181818181818
Hamming loss =  0.23878394332939787
Log loss =  85.07557246529349



In [79]:
print("%Comparison Classifier Chain MNB aft tuned%")
print("Bird dataset")
dict_res = FindBestMNBParams(skpt.ClassifierChain(),dataset_train_x_bird, dataset_train_y_bird)
#base_classif = LinearSVC(max_iter=10000, loss = dict_res['classifier__loss'])
base_classif = MultinomialNB(alpha = dict_res['classifier__alpha'])
ClassifierChain(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird, base_classif, "MNB tuned")

print("Emotions dataset")
dict_res = FindBestMNBParams(skpt.ClassifierChain(),dataset_train_x_emotions,dataset_train_y_emotions)
base_classif = MultinomialNB(alpha = dict_res['classifier__alpha'])
ClassifierChain(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "MNB tuned")

print("Yeast dataset")
dict_res = FindBestMNBParams(skpt.ClassifierChain(),dataset_train_x_yeast,dataset_train_y_yeast)
base_classif = MultinomialNB(alpha = dict_res['classifier__alpha'])
ClassifierChain(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif, "MNB tuned")


%Comparison Classifier Chain MNB aft tuned%
Bird dataset
{'classifier': MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True), 'classifier__alpha': 1.0}
129 ms ± 3.71 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB tuned
Accuracy =  0.4728682170542636
Hamming loss =  0.07058343533251733
Log loss =  7.5098591455546035

Emotions dataset
{'classifier': MultinomialNB(alpha=0.1, class_prior=None, fit_prior=True), 'classifier__alpha': 0.1}
31.8 ms ± 1.92 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB tuned
Accuracy =  0.24369747899159663
Hamming loss =  0.24649859943977592
Log loss =  25.950376040792975

Yeast dataset
{'classifier': MultinomialNB(alpha=0.9, class_prior=None, fit_prior=True), 'classifier__alpha': 0.9}
195 ms ± 6.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB tuned
Accuracy =  0.06818181818181818
Hamming loss =  0.23878394332939787
Log loss =  85.07557246529349



In [80]:
#Label Powerset
print("Comparison LP GaussianNB")
base_classif = GaussianNB()
print("Bird dataset")
LabelPowerset(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,base_classif,"GaussianNB")
print("Emotions dataset")
LabelPowerset(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif,"GaussianNB")
print("Yeast dataset")
LabelPowerset(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif,"GaussianNB")


Comparison LP GaussianNB
Bird dataset
52.2 ms ± 6.14 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  GaussianNB
Accuracy =  0.046511627906976744
Hamming loss =  0.08200734394124846
Log loss =  19.91339372336576

Emotions dataset
16.3 ms ± 500 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  GaussianNB
Accuracy =  0.24369747899159663
Hamming loss =  0.24649859943977592
Log loss =  28.74001269861988

Yeast dataset
102 ms ± 4.46 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  GaussianNB
Accuracy =  0.1756198347107438
Hamming loss =  0.24321133412042503
Log loss =  65.31607274155078



In [81]:
print("%Comparison Label Powerset SVC%")
base_classif = SVC()
print("Bird dataset")
LabelPowerset(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird, base_classif, "SVC")
print("Emotions dataset")
LabelPowerset(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "SVC")
print("Yeast dataset")
LabelPowerset(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif, "SVC")

%Comparison Label Powerset SVC%
Bird dataset
601 ms ± 32.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  SVC
Accuracy =  0.5116279069767442
Hamming loss =  0.044063647490820076
Log loss =  7.517075500495655

Emotions dataset
94.1 ms ± 2.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  SVC
Accuracy =  0.37815126050420167
Hamming loss =  0.18207282913165265
Log loss =  16.337664355216496

Yeast dataset
3.06 s ± 80.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  SVC
Accuracy =  0.2665289256198347
Hamming loss =  0.20056080283353012
Log loss =  61.3614143059135



In [82]:
print("%Comparison Label Powerset SVC aft tuned%")
print("Bird dataset")
dict_res = FindBestSVCParams(skpt.LabelPowerset(),dataset_train_x_bird, dataset_train_y_bird)
#base_classif = LinearSVC(max_iter=10000, loss = dict_res['classifier__loss'])
base_classif = SVC(kernel = dict_res['classifier__kernel'], degree = dict_res['classifier__degree'])
LabelPowerset(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird, base_classif, "SVC tuned")

print("Emotions dataset")
dict_res = FindBestSVCParams(skpt.LabelPowerset(),dataset_train_x_emotions,dataset_train_y_emotions)
base_classif = SVC(kernel = dict_res['classifier__kernel'], degree = dict_res['classifier__degree'])
LabelPowerset(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "SVC tuned")

print("Yeast dataset")
dict_res = FindBestSVCParams(skpt.LabelPowerset(),dataset_train_x_yeast,dataset_train_y_yeast)
base_classif = SVC(kernel = dict_res['classifier__kernel'], degree = dict_res['classifier__degree'])
LabelPowerset(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif, "SVC tuned")


%Comparison Label Powerset SVC aft tuned%
Bird dataset
{'classifier': SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=2, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), 'classifier__degree': 2, 'classifier__kernel': 'linear'}
504 ms ± 28.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  SVC tuned
Accuracy =  0.5348837209302325
Hamming loss =  0.041207670338637294
Log loss =  11.004515304212044

Emotions dataset
{'classifier': SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=2, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), 'classifier__degree': 2, 'classifier__kernel': 'rbf'}
93.1 ms ± 2.91 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  SVC tun

In [83]:
print("%Comparison Label Powerset MNB%")
base_classif = MultinomialNB()
print("Bird dataset")
LabelPowerset(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird, base_classif, "MNB")
print("Emotions dataset")
LabelPowerset(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "MNB")
print("Yeast dataset")
LabelPowerset(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif, "MNB")

%Comparison Label Powerset MNB%
Bird dataset
24.6 ms ± 1.22 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB
Accuracy =  0.5038759689922481
Hamming loss =  0.049775601795185635
Log loss =  15.730139665142733

Emotions dataset
11.6 ms ± 193 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  MNB
Accuracy =  0.33613445378151263
Hamming loss =  0.22268907563025211
Log loss =  21.24474485870703

Yeast dataset
64.7 ms ± 1.48 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB
Accuracy =  0.17768595041322313
Hamming loss =  0.2411452184179457
Log loss =  68.62675792362288



In [84]:
print("%Comparison Label Powerset MNB aft tuned%")
print("Bird dataset")
dict_res = FindBestMNBParams(skpt.LabelPowerset(),dataset_train_x_bird, dataset_train_y_bird)
#base_classif = LinearSVC(max_iter=10000, loss = dict_res['classifier__loss'])
base_classif = MultinomialNB(alpha = dict_res['classifier__alpha'])
LabelPowerset(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird, base_classif, "MNB tuned")

print("Emotions dataset")
dict_res = FindBestMNBParams(skpt.LabelPowerset(),dataset_train_x_emotions,dataset_train_y_emotions)
base_classif = MultinomialNB(alpha = dict_res['classifier__alpha'])
LabelPowerset(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions, base_classif, "MNB tuned")

print("Yeast dataset")
dict_res = FindBestMNBParams(skpt.LabelPowerset(),dataset_train_x_yeast,dataset_train_y_yeast)
base_classif = MultinomialNB(alpha = dict_res['classifier__alpha'])
LabelPowerset(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast, base_classif, "MNB tuned")


%Comparison Label Powerset MNB aft tuned%
Bird dataset
{'classifier': MultinomialNB(alpha=0.7, class_prior=None, fit_prior=True), 'classifier__alpha': 0.7}
32.5 ms ± 10.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB tuned
Accuracy =  0.5271317829457365
Hamming loss =  0.04732762137902897
Log loss =  17.045241206389715

Emotions dataset
{'classifier': MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True), 'classifier__alpha': 1.0}
13.3 ms ± 471 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  MNB tuned
Accuracy =  0.33613445378151263
Hamming loss =  0.22268907563025211
Log loss =  21.24474485870703

Yeast dataset
{'classifier': MultinomialNB(alpha=0.1, class_prior=None, fit_prior=True), 'classifier__alpha': 0.1}
73.2 ms ± 4.93 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MNB tuned
Accuracy =  0.1797520661157025
Hamming loss =  0.23996458087367178
Log loss =  68.34131349060709



In [85]:
#to do loop to find 
dict_res = GridSearchCV_base(RakelD(),dataset_train_x_bird, dataset_train_y_bird)
print(dict_res)
RAkELd(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,dict_res['base_classifier'],3)
dict_res = GridSearchCV_base(RakelD(),dataset_train_x_emotions, dataset_train_y_emotions)
print(dict_res)
RAkELd(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,dict_res['base_classifier'],3)
dict_res = GridSearchCV_base(RakelD(),dataset_train_x_yeast, dataset_train_y_yeast)
print(dict_res)
RAkELd(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,dict_res['base_classifier'],3)

{'base_classifier': SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), 'base_classifier__kernel': 'linear'}
378 ms ± 10.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  RAkELd
Accuracy =  0.5426356589147286
Hamming loss =  0.03916768665850673
Log loss =  9.77537847446885

{'base_classifier': SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), 'base_classifier__kernel': 'rbf'}
79.3 ms ± 2.36 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  RAkELd
Accuracy =  0.3025210084033613
Hamming loss =  0.18627450980392157
Log loss =  18.05296075499493

{'base_cla

In [86]:
#to do loop to find 
dict_res = GridSearchCV_baseRakel(RakelD(),dataset_train_x_bird, dataset_train_y_bird)
print(dict_res)
RAkELd(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,dict_res['base_classifier'],dict_res['labelset_size'])
dict_res = GridSearchCV_baseRakel(RakelD(),dataset_train_x_emotions, dataset_train_y_emotions)
print(dict_res)
RAkELd(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,dict_res['base_classifier'],dict_res['labelset_size'])
dict_res = GridSearchCV_baseRakel(RakelD(),dataset_train_x_yeast, dataset_train_y_yeast)
print(dict_res)
RAkELd(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,dict_res['base_classifier'],dict_res['labelset_size'])

False
{'base_classifier': SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), 'base_classifier__kernel': 'linear', 'labelset_size': 1}
542 ms ± 36.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  RAkELd
Accuracy =  0.5193798449612403
Hamming loss =  0.04243166054671563
Log loss =  10.303185256978105

False
{'base_classifier': SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), 'base_classifier__kernel': 'rbf', 'labelset_size': 1}
169 ms ± 16.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  RAkELd
Accuracy =  0.31932773109243695
Hamming loss =  0.17507

In [87]:
#to do loop to find 
dict_res = GridSearchCV_baseRakel(RakelO(),dataset_train_x_bird, dataset_train_y_bird)
#print(dict_res)
RAkELO(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,dict_res['base_classifier'],dict_res['labelset_size'],dict_res['model_count'])
dict_res = GridSearchCV_baseRakel(RakelO(),dataset_train_x_emotions, dataset_train_y_emotions)
#print(dict_res)
RAkELO(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,dict_res['base_classifier'],dict_res['labelset_size'],dict_res['model_count'])
dict_res = GridSearchCV_baseRakel(RakelO(),dataset_train_x_yeast, dataset_train_y_yeast)
#print(dict_res)
RAkELO(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,dict_res['base_classifier'],dict_res['labelset_size'],dict_res['model_count'])

True
19
[3, 4, 5, 6, 7, 8]
[38]
2.19 s ± 49.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  RAkELO
Accuracy =  0.5348837209302325
Hamming loss =  0.03916768665850673
Log loss =  11.447218012971714

True
6
[3, 4, 5]
[12]
362 ms ± 7.24 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  RAkELO
Accuracy =  0.40336134453781514
Hamming loss =  0.16666666666666666
Log loss =  17.338945444561645

True
14
[3, 4, 5, 6]
[28]
18.6 s ± 288 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  RAkELO
Accuracy =  0.20867768595041322
Hamming loss =  0.1855076741440378
Log loss =  64.09402421276856



In [88]:
#Adapted Algorithms
#MLkNN with k =10 (default) smoothing_param = 1
k = 10
s = 1
print("MLkNN")
print("Bird dataset")
MLkNN(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,k,s)
print("Emotions dataset")
MLkNN(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,k,s)
print("Yeast dataset")
MLkNN(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,k,s)

MLkNN
Bird dataset
655 ms ± 22.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=10 s=1
Accuracy =  0.5038759689922481
Hamming loss =  0.04569563443492452
Log loss =  12.39589726977999

Emotions dataset
302 ms ± 16.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=10 s=1
Accuracy =  0.2605042016806723
Hamming loss =  0.20028011204481794
Log loss =  22.745173058945998

Yeast dataset
2.76 s ± 56.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=10 s=1
Accuracy =  0.2024793388429752
Hamming loss =  0.19347697756788665
Log loss =  61.67800027255757



In [89]:
#Adapted Algorithms
#MLkNN with Find the best K
print("MLkNN")
print("Bird dataset")
dict_res = FindBestK(skadapt.MLkNN(),dataset_train_x_bird, dataset_train_y_bird)
MLkNN(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,dict_res['k'],dict_res['s'])
print("Emotions dataset")
dict_res= FindBestK(skadapt.MLkNN(), dataset_train_x_emotions,dataset_train_y_emotions)
MLkNN(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,dict_res['k'],dict_res['s'])
print("Yeast dataset")
dict_res= FindBestK(skadapt.MLkNN(), dataset_train_x_yeast,dataset_train_y_yeast)
MLkNN(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,dict_res['k'],dict_res['s'])

MLkNN
Bird dataset
{'k': 2, 's': 1.0}
581 ms ± 21.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=2 s=1.0
Accuracy =  0.5038759689922481
Hamming loss =  0.046103631170950635
Log loss =  14.33945301555224

Emotions dataset
{'k': 18, 's': 0.5}
361 ms ± 12.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=18 s=0.5
Accuracy =  0.24369747899159663
Hamming loss =  0.20588235294117646
Log loss =  24.94522546540537

Yeast dataset
{'k': 18, 's': 0.7}
3.27 s ± 31 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=18 s=0.7
Accuracy =  0.1859504132231405
Hamming loss =  0.19259149940968123
Log loss =  64.14271555537948



In [90]:
#MLARAM
v = 0.95
t = 0.05
print("MLARAM")
print("Bird dataset")
MLARAM(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,v,t)
print("Emotions dataset")
MLARAM(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,v,t)
print("Yeast dataset")
MLARAM(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,v,t)



MLARAM
Bird dataset
1.52 s ± 599 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MLARAM w/ Threshold = 0.05, Vigilance = 0.95
Accuracy =  0.06976744186046512
Hamming loss =  0.11587107303141575
Log loss =  19.144448673942428

Emotions dataset
The slowest run took 4.28 times longer than the fastest. This could mean that an intermediate result is being cached.
3.01 s ± 1.25 s per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MLARAM w/ Threshold = 0.05, Vigilance = 0.95
Accuracy =  0.24369747899159663
Hamming loss =  0.242296918767507
Log loss =  25.609121502007017

Yeast dataset
5.62 s ± 1.96 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLARAM w/ Threshold = 0.05, Vigilance = 0.95
Accuracy =  0.2024793388429752
Hamming loss =  0.24217827626918537
Log loss =  58.79743077583615



In [91]:
#MLARAM with tuning
print("MLARAM")
print("Bird dataset")
dict_res = FindBestVT(dataset_train_x_bird, dataset_train_y_bird)
MLARAM(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,dict_res['vigilance'],dict_res['threshold'])
print("Emotions dataset")
dict_res = FindBestVT(dataset_train_x_emotions,dataset_train_y_emotions)
MLARAM(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,dict_res['vigilance'],dict_res['threshold'])
print("Yeast dataset")
dict_res = FindBestVT(dataset_train_x_yeast,dataset_train_y_yeast)
MLARAM(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,dict_res['vigilance'],dict_res['threshold'])

MLARAM
Bird dataset
{'threshold': 0.01, 'vigilance': 0.9}
1.12 s ± 442 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MLARAM w/ Threshold = 0.01, Vigilance = 0.9
Accuracy =  0.10077519379844961
Hamming loss =  0.10567115463076296
Log loss =  17.240775251451335

Emotions dataset
{'threshold': 0.08, 'vigilance': 0.9}
The slowest run took 4.16 times longer than the fastest. This could mean that an intermediate result is being cached.
1.95 s ± 802 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MLARAM w/ Threshold = 0.08, Vigilance = 0.9
Accuracy =  0.2857142857142857
Hamming loss =  0.21008403361344538
Log loss =  19.356077640115846

Yeast dataset
{'threshold': 0.07, 'vigilance': 0.7}
1.29 s ± 426 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLARAM w/ Threshold = 0.07, Vigilance = 0.7
Accuracy =  0.16115702479338842
Hamming loss =  0.20041322314049587
Log loss =  66.48396247530553



In [92]:
#Adapted Algorithms
#BRkNNa with k = 10 (default)
k = 10
print("BRkNNa")
print("Bird dataset")
BRkNNa(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,k)
print("Emotions dataset")
BRkNNa(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,k)
print("Yeast dataset")
BRkNNa(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,k)

BRkNNa
Bird dataset
22 ms ± 386 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  BRkNNa w/ k=10
Accuracy =  0.5348837209302325
Hamming loss =  0.04243166054671563
Log loss =  8.33255347581939

Emotions dataset
4.83 ms ± 221 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  BRkNNa w/ k=10
Accuracy =  0.24369747899159663
Hamming loss =  0.19327731092436976
Log loss =  22.16183397606018

Yeast dataset
46.9 ms ± 1.27 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  BRkNNa w/ k=10
Accuracy =  0.19008264462809918
Hamming loss =  0.1897874852420307
Log loss =  66.22835015176206



In [93]:
#Adapted Algorithms
#BRkNNa with Find the best K
print("BRkNNa tuned")
print("Bird dataset")
dict_res = FindBestK(skadapt.BRkNNaClassifier(), dataset_train_x_bird, dataset_train_y_bird)
BRkNNa(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,dict_res['k'])
print("Emotions dataset")
dict_res= FindBestK(skadapt.BRkNNaClassifier(), dataset_train_x_emotions,dataset_train_y_emotions)
BRkNNa(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,dict_res['k'])
print("Yeast dataset")
dict_res= FindBestK(skadapt.BRkNNaClassifier(), dataset_train_x_yeast,dataset_train_y_yeast)
BRkNNa(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,dict_res['k'])

BRkNNa tuned
Bird dataset
{'k': 2}
26.9 ms ± 5.08 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  BRkNNa w/ k=2
Accuracy =  0.5116279069767442
Hamming loss =  0.042839657282741736
Log loss =  9.64145865735343

Emotions dataset
{'k': 14}
4.93 ms ± 197 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  BRkNNa w/ k=14
Accuracy =  0.2773109243697479
Hamming loss =  0.19187675070028012
Log loss =  21.614871213511798

Yeast dataset
{'k': 19}
46.3 ms ± 544 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  BRkNNa w/ k=19
Accuracy =  0.1859504132231405
Hamming loss =  0.1878689492325856
Log loss =  63.72172339623755



In [94]:
#Adapted Algorithms
#BRkNNb with k = 10 (default)
k = 10
print("BRkNNb")
print("Bird dataset")
BRkNNb(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,k)
print("Emotions dataset")
BRkNNb(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,k)
print("Yeast dataset")
BRkNNb(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,k)

BRkNNb
Bird dataset
22.4 ms ± 551 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  BRkNNb w/ k=10
Accuracy =  0.015503875968992248
Hamming loss =  0.5356997144022848
Log loss =  27.335374353080482

Emotions dataset
4.7 ms ± 74.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  BRkNNb w/ k=10
Accuracy =  0.03361344537815126
Hamming loss =  0.3865546218487395
Log loss =  39.48951258518467

Yeast dataset
46 ms ± 649 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  BRkNNb w/ k=10
Accuracy =  0.002066115702479339
Hamming loss =  0.3367768595041322
Log loss =  87.51865433097085



In [95]:
#Adapted Algorithms
#BRkNNb with Find the best K
print("BRkNNb tuned")
print("Bird dataset")
dict_res = FindBestK(skadapt.BRkNNbClassifier(), dataset_train_x_bird, dataset_train_y_bird)
BRkNNb(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,dict_res['k'])
print("Emotions dataset")
dict_res= FindBestK(skadapt.BRkNNbClassifier(), dataset_train_x_emotions,dataset_train_y_emotions)
BRkNNb(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,dict_res['k'])
print("Yeast dataset")
dict_res= FindBestK(skadapt.BRkNNbClassifier(), dataset_train_x_yeast,dataset_train_y_yeast)
BRkNNb(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,dict_res['k'])

BRkNNb tuned
Bird dataset
{'k': 5}
22.1 ms ± 321 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  BRkNNb w/ k=5
Accuracy =  0.015503875968992248
Hamming loss =  0.5246838025295798
Log loss =  27.874092319833792

Emotions dataset
{'k': 5}
4.86 ms ± 89.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  BRkNNb w/ k=5
Accuracy =  0.05042016806722689
Hamming loss =  0.3795518207282913
Log loss =  38.88485398422384

Yeast dataset
{'k': 19}
48.1 ms ± 2.46 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  BRkNNb w/ k=19
Accuracy =  0.002066115702479339
Hamming loss =  0.32615112160566706
Log loss =  84.82771553187013



In [96]:
#todo label relations exploration
print("Bird dataset")
LabelSpacePartitioningClassifier(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
LabelSpacePartitioningClassifier(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
LabelSpacePartitioningClassifier(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

Bird dataset
113 ms ± 1.84 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  Label Space Partition
Accuracy =  0.49612403100775193
Hamming loss =  0.044879640962872294
Log loss =  4.937110254934766

Emotions dataset
80.3 ms ± 901 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  Label Space Partition
Accuracy =  0.3277310924369748
Hamming loss =  0.18207282913165265
Log loss =  20.942294964690408

Yeast dataset
1.91 s ± 17.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  Label Space Partition
Accuracy =  0.05371900826446281
Hamming loss =  0.2625442739079103
Log loss =  102.874400008373



In [97]:
#Embedded Classifier
print("Bird dataset")
EmbeddingClassifierMethod(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
EmbeddingClassifierMethod(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
EmbeddingClassifierMethod(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

Bird dataset
1.72 s ± 63.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  Embedded Classifier
Accuracy =  0.4573643410852713
Hamming loss =  0.04814361485108119
Log loss =  14.659880296162731

Emotions dataset
516 ms ± 15.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  Embedded Classifier
Accuracy =  0.2773109243697479
Hamming loss =  0.2030812324929972
Log loss =  20.28697198538459

Yeast dataset
5.47 s ± 45.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  Embedded Classifier
Accuracy =  0.2128099173553719
Hamming loss =  0.20336481700118064
Log loss =  60.06668473037323



In [12]:
print("Bird dataset")   
TwinMLSVM(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,0.5,1)
dict_res = FindCKParam(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
TwinMLSVM(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,dict_res['c_k'],dict_res['sor_omega'])
print("Emotions dataset")
TwinMLSVM(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,0.5,1)
dict_res = FindCKParam(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
TwinMLSVM(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,dict_res['c_k'],dict_res['sor_omega'])
print("Yeast dataset")
TwinMLSVM(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,0.5,1)
dict_res = FindCKParam(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)
TwinMLSVM(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,dict_res['c_k'],dict_res['sor_omega'])

Bird dataset
Results for  MLTSVM
Accuracy =  0.4806201550387597
Hamming loss =  0.06405548755609955
Log loss =  10.856367012562288

{'c_k': 0.125, 'sor_omega': 0.1}
Results for  MLTSVM
Accuracy =  0.4728682170542636
Hamming loss =  0.05671154630762954
Log loss =  9.732793676393436

Emotions dataset
Results for  MLTSVM
Accuracy =  0.15966386554621848
Hamming loss =  0.24089635854341737
Log loss =  18.44356171463906

{'c_k': 0.5, 'sor_omega': 0.5}
Results for  MLTSVM
Accuracy =  0.23529411764705882
Hamming loss =  0.22128851540616246
Log loss =  19.654737869842275

Yeast dataset
Results for  MLTSVM
Accuracy =  0.006198347107438017
Hamming loss =  0.32290436835891384
Log loss =  59.732021195750384

{'c_k': 0.5, 'sor_omega': 0.1}
Results for  MLTSVM
Accuracy =  0.03925619834710744
Hamming loss =  0.2767119244391972
Log loss =  71.23620112149082



In [22]:
print([2**i for i in range(-5, 4, 2)])

[0.03125, 0.125, 0.5, 2, 8]


In [11]:
ranges = [2**i for i in range(-5, 3, 2)]
ranges = ranges + [0]
print(ranges)

[0.03125, 0.125, 0.5, 2, 0]
