In [117]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from scipy.sparse import csr_matrix, lil_matrix
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestRegressor
from skmultilearn.ensemble import RakelD, RakelO, MajorityVotingClassifier
from skmultilearn.cluster import FixedLabelSpaceClusterer
from skmultilearn.embedding import SKLearnEmbedder, EmbeddingClassifier
from sklearn.manifold import SpectralEmbedding
from sklearn.metrics import make_scorer
import skmultilearn.problem_transform as skpt
import pandas as pd
import numpy as np
import skmultilearn.adapt as skadapt
import sklearn.metrics as metrics
from sklearn import preprocessing

def BinaryRelevance(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = skpt.BinaryRelevance(GaussianNB())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("Binary Relevance w/ GaussianNB", predictions ,dataset_test_y)
    
def BinaryRelevanceSVC(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):    
    classifier = skpt.BinaryRelevance(SVC())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("Binary Relevance w/ SVC", predictions ,dataset_test_y)
      
def BinaryRelevanceMNB(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):      
    classifier = skpt.BinaryRelevance(MultinomialNB())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("Binary Relevance w/ MNB", predictions ,dataset_test_y)    
    
def ClassifierChain(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = skpt.ClassifierChain(GaussianNB())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("CC w/ GaussianNB", predictions ,dataset_test_y)

def ClassifierChainSVC(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = skpt.ClassifierChain(SVC())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("CC w/ SVC", predictions ,dataset_test_y)

def ClassifierChainMNB(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):    
    classifier = skpt.ClassifierChain(MultinomialNB(alpha = 1.0))
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("CC w/ MNB", predictions ,dataset_test_y)
    
#     rangefloat = [round(x * 0.1, 1) for x in range(1, 10)] #degree of smoothing
#     parameters = {'classifier__alpha' : rangefloat}

#     clf = GridSearchCV(skpt.ClassifierChain(MultinomialNB()), parameters, scoring=make_scorer(metrics.hamming_loss,greater_is_better=False), n_jobs=2)
#     #print(clf.get_params().keys())
#     clf.fit(dataset_train_x, dataset_train_y)
#     print(clf.cv_results_)
#     #return clf.best_params_
    
#     classifier = skpt.ClassifierChain(MultinomialNB(alpha = clf.best_params_['classifier__alpha']))
#     %timeit classifier.fit(dataset_train_x, dataset_train_y)
#     predictions = classifier.predict(dataset_test_x)
    
#     Metrics_Accuracy("CC w/ MNB tuning", predictions ,dataset_test_y)
   
def LabelPowerset(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = skpt.LabelPowerset(GaussianNB())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("LP w/ GaussianNB", predictions ,dataset_test_y)

def LabelPowersetSVC(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):    
    classifier = skpt.LabelPowerset(SVC())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("LP w/ SVC", predictions ,dataset_test_y)

def LabelPowersetMNB(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):    
    classifier = skpt.LabelPowerset(MultinomialNB())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("LP w/ MNB", predictions ,dataset_test_y)

#Choose best classifier between MNB and SVC - combined
def BinaryRelevanceCV(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    parameters = [
        {
            'classifier': [MultinomialNB()],
            'classifier__alpha': [0.7, 1.0], #for smoothing {Additive smoothing parameter NB}
        },
        {
            'classifier': [SVC()],
            'classifier__kernel': ['rbf','linear'],
        },
    ]

    classifier = GridSearchCV(skpt.BinaryRelevance(), parameters, scoring = 'accuracy')
    print(classifier)
    classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("Binary Relevance w/ CV",predictions, dataset_test_y)
    
def ClassifierChainCV(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    parameters = [
        {
            'classifier': [MultinomialNB()],
            'classifier__alpha': [0.7, 1.0],
        },
        {
            'classifier': [SVC()],
            'classifier__kernel': ['rbf', 'linear'],
        },
    ]
    classifier = GridSearchCV(skpt.ClassifierChain(),parameters,scoring='accuracy',n_jobs=2)
    print(classifier)
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    Metrics_Accuracy("CC Cross Validate", predictions ,dataset_test_y)    
 
def MLkNN(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,num_neighbours, smoothing_param):
    x_train = lil_matrix(dataset_train_x).toarray()
    y_train = lil_matrix(dataset_train_y).toarray()
    x_test = lil_matrix(dataset_test_x).toarray()
    
    classifier = skadapt.MLkNN(k=num_neighbours,s=smoothing_param)
    %timeit classifier.fit(x_train,y_train)
    predictions = classifier.predict(x_test)
    
    text = "MLkNN w/ k=" + str(num_neighbours)
    
    Metrics_Accuracy(text, predictions ,dataset_test_y)
    
def MLARAM(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,num_vigilance, num_threshold):
    x_train = lil_matrix(dataset_train_x).toarray()
    y_train = lil_matrix(dataset_train_y).toarray()
    x_test = lil_matrix(dataset_test_x).toarray()
    
    #Threshold controls number of prototypes to participate; vigilance controls how large hyperbox is
    classifier = skadapt.MLARAM(threshold = num_threshold, vigilance = num_vigilance)
    %timeit classifier.fit(x_train,y_train)
    predictions = classifier.predict(x_test)
    
    text = "MLARAM w/ Threshold = " + str(num_threshold) + ", Vigilance = "+ str(num_vigilance)
    
    Metrics_Accuracy(text, predictions ,dataset_test_y)
        
    
#Random Label Space Partitionining with Label Powerset
def RAkELd(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,num_labels):
    classifier = RakelD(
        base_classifier=GaussianNB(),
        base_classifier_require_dense=[True, True],
        labelset_size=num_labels
    )

    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("RAkELd", predictions ,dataset_test_y)
    
#random overlapping label space division with Label Powerset
def RAkELO(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,num_labels):
    classifier = RakelO(
        base_classifier=GaussianNB(),
        base_classifier_require_dense=[True, True],
        labelset_size=dataset_train_y.shape[1],
        model_count=12
    )

    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("RAkELO", predictions ,dataset_test_y)

def LabelSpacePartitioningClassifier(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = MajorityVotingClassifier(
        clusterer=FixedLabelSpaceClusterer(clusters = [[1,3,4], [0,2,5]]),
        classifier = skpt.ClassifierChain(classifier=SVC())
    )
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("Label Space Partition", predictions ,dataset_test_y)

def EmbeddingClassifierMethod(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = EmbeddingClassifier(
        SKLearnEmbedder(SpectralEmbedding(n_components=10)),
        RandomForestRegressor(n_estimators=10),
        skadapt.MLkNN(k=5)
    )
    %timeit classifier.fit(lil_matrix(dataset_train_x).toarray(), lil_matrix(dataset_train_y).toarray())
    predictions = classifier.predict(dataset_test_x)

    Metrics_Accuracy("Embedded Classifier", predictions ,dataset_test_y)
    
def Metrics_Accuracy(classifier,predictions,dataset_test_y):
    #results
    print("Results for ",classifier)
    # accuracy
    print("Accuracy = ",accuracy_score(dataset_test_y,predictions))
    # hamming loss
    print("Hamming loss = ",metrics.hamming_loss(dataset_test_y,predictions))
    # log loss
    #print(type(predictions)==np.ndarray)
    print("Log loss = ",metrics.log_loss(dataset_test_y,predictions.toarray() if type(predictions)!=np.ndarray else predictions))
    # Exact Match Score
    #exact_match_score = np.all(predictions.toarray() == dataset_test_y, axis=1).mean()
    #print('Exact match score (Whole row must match):', exact_match_score)
    
    print("")
    
def Util_ClassifierMethods(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y):
    BinaryRelevance(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y)
    ClassifierChain(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y)
    ClassifierChainCV(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y)
    LabelPowerset(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y)
    
#estimating best params using hamming loss for multi label problems
def FindBestK(dataset_train_x, dataset_train_y):
    rangefloatv = [round(x * 0.1, 1) for x in range(5, 10)]
    parameters = {'k': range(1,5), 's': rangefloatv}

    clf = GridSearchCV(skadapt.MLkNN(), parameters, scoring=make_scorer(metrics.hamming_loss,greater_is_better=False), n_jobs=2)
    clf.fit(lil_matrix(dataset_train_x).toarray(), lil_matrix(dataset_train_y).toarray())
    print(clf.best_params_)
    return clf.best_params_

def FindBestVT(dataset_train_x, dataset_train_y):
    rangefloat = [round(x * 0.01, 2) for x in range(1, 10)]
    rangefloatv = [round(x * 0.1, 1) for x in range(5, 10)]
    parameters = {'threshold': rangefloat, 'vigilance': rangefloatv} #default thres = 0.02, vigi = 0.9

    clf = GridSearchCV(skadapt.MLARAM(), parameters, scoring=make_scorer(metrics.hamming_loss,greater_is_better=False), n_jobs=2)
    clf.fit(lil_matrix(dataset_train_x).toarray(), lil_matrix(dataset_train_y).toarray())
    print(clf.best_params_)
    return clf.best_params_

In [118]:
#birds
print("Load Birds dataset")
birds1 = pd.read_csv(r"C:/Users/K/Desktop/Assignment1/birds-train.csv")
birds2 = pd.read_csv(r"C:/Users/K/Desktop/Assignment1/birds-test.csv")
birds = birds1.append(birds2)

#scale based on columns before split
mms = preprocessing.MinMaxScaler()

#print(birds.iloc[:,0:260])
birds.iloc[:,0:260] = mms.fit_transform(birds.iloc[:,0:260])

#print(birds.iloc[:,0:260])

#split dataset
dataset_train_bird, dataset_test_bird = train_test_split(birds,random_state=42, test_size=0.20, shuffle=True)

dataset_train_x_bird = dataset_train_bird.iloc[:,0:260]
dataset_train_y_bird = dataset_train_bird.iloc[:,-19:]

dataset_test_x_bird = dataset_test_bird.iloc[:,0:260]
dataset_test_y_bird = dataset_test_bird.iloc[:,-19:]

#emotions
print("Load Emotions dataset")
emotions = pd.read_csv(r"C:/Users/K/Desktop/Assignment1/emotions.csv")

#scale based on columns before split
mms = preprocessing.MinMaxScaler()
emotions.iloc[:,0:72] = mms.fit_transform(emotions.iloc[:,0:72])

#split dataset
dataset_train_emotions, dataset_test_emotions = train_test_split(emotions,random_state=42, test_size=0.20, shuffle=True)

dataset_train_x_emotions = dataset_train_emotions.iloc[:,0:72]
dataset_train_y_emotions = dataset_train_emotions.iloc[:,-6:]

dataset_test_x_emotions = dataset_test_emotions.iloc[:,0:72]
dataset_test_y_emotions = dataset_test_emotions.iloc[:,-6:]

#yeast
print("Load Yeast dataset")
yeast = pd.read_csv(r"C:/Users/K/Desktop/Assignment1/yeast.csv")

#scale based on columns before split
mms = preprocessing.MinMaxScaler()
yeast.iloc[:,0:103] = mms.fit_transform(yeast.iloc[:,0:103])

#split dataset
dataset_train_yeast, dataset_test_yeast = train_test_split(yeast,random_state=42, test_size=0.20, shuffle=True)

dataset_train_x_yeast = dataset_train_yeast.iloc[:,0:103]
dataset_train_y_yeast = dataset_train_yeast.iloc[:,-14:]

dataset_test_x_yeast = dataset_test_yeast.iloc[:,0:103]
dataset_test_y_yeast = dataset_test_yeast.iloc[:,-14:]

Load Birds dataset
Load Emotions dataset
Load Yeast dataset


In [119]:
#Binary Relevance
print("%Comparison Binary Relevance%")
print("Bird dataset")
BinaryRelevance(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
BinaryRelevance(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
BinaryRelevance(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

print("%Comparison Binary Relevance SVC%")
print("Bird dataset")
BinaryRelevanceSVC(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
BinaryRelevanceSVC(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
BinaryRelevanceSVC(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

print("%Comparison Binary Relevance MNB%")
print("Bird dataset")
BinaryRelevanceMNB(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
BinaryRelevanceMNB(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
BinaryRelevanceMNB(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

# print("%Comparison Binary Relevance Grid Search CV%")
# print("Bird dataset")
# BinaryRelevanceCV(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
# print("Emotions dataset")
# BinaryRelevanceCV(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
# print("Yeast dataset")
# BinaryRelevanceCV(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

%Comparison Binary Relevance%
Bird dataset
64.7 ms ± 4.08 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  Binary Relevance w/ GaussianNB
Accuracy =  0.023255813953488372
Hamming loss =  0.4679722562219502
Log loss =  13.848272932073373

Emotions dataset
8.63 ms ± 81 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  Binary Relevance w/ GaussianNB
Accuracy =  0.23529411764705882
Hamming loss =  0.242296918767507
Log loss =  17.748562300103703

Yeast dataset
80.6 ms ± 1.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  Binary Relevance w/ GaussianNB
Accuracy =  0.09917355371900827
Hamming loss =  0.30283353010625735
Log loss =  65.19914296904747

%Comparison Binary Relevance SVC%
Bird dataset
356 ms ± 6.09 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  Binary Relevance w/ SVC
Accuracy =  0.5193798449612403
Hamming loss =  0.04243166054671563
Log loss =  5.06656988488039

Emotions dataset
82.5 ms ± 2.1 ms per

In [120]:
#Classifier Chain
print("Comparison CC")
print("Bird dataset")
ClassifierChain(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
ClassifierChain(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
ClassifierChain(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

print("Comparison CC SVC")
print("Bird dataset")
ClassifierChainSVC(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
ClassifierChainSVC(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
ClassifierChainSVC(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

print("Comparison CC MNB")
print("Bird dataset")
ClassifierChainMNB(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
ClassifierChainMNB(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
ClassifierChainMNB(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

Comparison CC
Bird dataset
76 ms ± 582 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  CC w/ GaussianNB
Accuracy =  0.023255813953488372
Hamming loss =  0.28845369237046103
Log loss =  16.070205795919176

Emotions dataset
10.3 ms ± 211 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  CC w/ GaussianNB
Accuracy =  0.2184873949579832
Hamming loss =  0.23669467787114845
Log loss =  17.729774152665914

Yeast dataset
115 ms ± 6.95 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  CC w/ GaussianNB
Accuracy =  0.09090909090909091
Hamming loss =  0.31729634002361273
Log loss =  67.37627753072266

Comparison CC SVC
Bird dataset
436 ms ± 9.68 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  CC w/ SVC
Accuracy =  0.5193798449612403
Hamming loss =  0.042839657282741736
Log loss =  4.844477648937925

Emotions dataset
79.7 ms ± 2.82 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  CC w/ SVC
Accuracy =  

In [121]:
#Label Powerset
print("Comparison LP")
print("Bird dataset")
LabelPowerset(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
LabelPowerset(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
LabelPowerset(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

print("Comparison LP SVC")
print("Bird dataset")
LabelPowersetSVC(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
LabelPowersetSVC(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
LabelPowersetSVC(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

print("Comparison LP MNB")
print("Bird dataset")
LabelPowersetMNB(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
LabelPowersetMNB(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
LabelPowersetMNB(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

Comparison LP
Bird dataset
17.9 ms ± 444 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  LP w/ GaussianNB
Accuracy =  0.046511627906976744
Hamming loss =  0.08200734394124846
Log loss =  19.91339372336576

Emotions dataset
6.58 ms ± 188 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  LP w/ GaussianNB
Accuracy =  0.24369747899159663
Hamming loss =  0.24649859943977592
Log loss =  28.74001269861988

Yeast dataset
40.9 ms ± 2.86 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  LP w/ GaussianNB
Accuracy =  0.1756198347107438
Hamming loss =  0.24321133412042503
Log loss =  65.31607274155078

Comparison LP SVC
Bird dataset
335 ms ± 13.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  LP w/ SVC
Accuracy =  0.5116279069767442
Hamming loss =  0.044063647490820076
Log loss =  7.517075500495655

Emotions dataset
48.6 ms ± 1.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  LP w/ SVC
Accuracy = 

In [122]:
#Test other methods
RAkELd(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,19)
RAkELd(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,6)
RAkELd(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,14)

18.5 ms ± 98.8 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  RAkELd
Accuracy =  0.046511627906976744
Hamming loss =  0.08200734394124846
Log loss =  19.91339372336576

7.12 ms ± 293 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  RAkELd
Accuracy =  0.24369747899159663
Hamming loss =  0.24649859943977592
Log loss =  28.74001269861988

39.7 ms ± 2.36 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  RAkELd
Accuracy =  0.1756198347107438
Hamming loss =  0.24321133412042503
Log loss =  65.31607274155078



In [123]:
#Test other methods
RAkELO(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,19)
RAkELO(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,6)
RAkELO(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,14)

207 ms ± 13.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  RAkELO
Accuracy =  0.046511627906976744
Hamming loss =  0.08200734394124846
Log loss =  19.91339372336576

67.9 ms ± 2.39 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  RAkELO
Accuracy =  0.24369747899159663
Hamming loss =  0.24649859943977592
Log loss =  28.74001269861988

363 ms ± 5.09 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  RAkELO
Accuracy =  0.1756198347107438
Hamming loss =  0.24321133412042503
Log loss =  65.31607274155078



In [124]:
#Adapted Algorithms
#MLkNN with k =3
print("MLkNN")
print("Bird dataset")
MLkNN(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,3,1)
print("Emotions dataset")
MLkNN(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,3,1)
print("Yeast dataset")
MLkNN(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,3,1)

MLkNN
Bird dataset
585 ms ± 33.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=3
Accuracy =  0.4806201550387597
Hamming loss =  0.05018359853121175
Log loss =  14.733384524766644

Emotions dataset
250 ms ± 1.62 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=3
Accuracy =  0.25210084033613445
Hamming loss =  0.21568627450980393
Log loss =  22.801879123949576

Yeast dataset
2.36 s ± 59.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=3
Accuracy =  0.1962809917355372
Hamming loss =  0.21782762691853602
Log loss =  62.65869941412603



In [125]:
#Adapted Algorithms
#MLkNN with Find the best K
print("MLkNN")
print("Bird dataset")
dict_res = FindBestK(dataset_train_x_bird, dataset_train_y_bird)
MLkNN(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,dict_res['k'],dict_res['s'])
print("Emotions dataset")
dict_res= FindBestK(dataset_train_x_emotions,dataset_train_y_emotions)
MLkNN(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,dict_res['k'],dict_res['s'])
print("Yeast dataset")
dict_res= FindBestK(dataset_train_x_yeast,dataset_train_y_yeast)
MLkNN(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,dict_res['k'],dict_res['s'])

MLkNN
Bird dataset
{'k': 2, 's': 0.9}
563 ms ± 8.35 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=2
Accuracy =  0.5038759689922481
Hamming loss =  0.046103631170950635
Log loss =  14.33945301555224

Emotions dataset
{'k': 3, 's': 0.5}
261 ms ± 14.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=3
Accuracy =  0.25210084033613445
Hamming loss =  0.21568627450980393
Log loss =  22.801879123949576

Yeast dataset
{'k': 3, 's': 0.5}
2.49 s ± 98.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=3
Accuracy =  0.1962809917355372
Hamming loss =  0.21782762691853602
Log loss =  62.65869941412603



In [126]:
#MLARAM
v = 0.95
t = 0.05
print("MLARAM")
print("Bird dataset")
MLARAM(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,v,t)
print("Emotions dataset")
MLARAM(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,v,t)
print("Yeast dataset")
MLARAM(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,v,t)



MLARAM
Bird dataset
1.5 s ± 588 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MLARAM w/ Threshold = 0.05, Vigilance = 0.95
Accuracy =  0.06976744186046512
Hamming loss =  0.11587107303141575
Log loss =  19.144448673942428

Emotions dataset
3.07 s ± 1.18 s per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MLARAM w/ Threshold = 0.05, Vigilance = 0.95
Accuracy =  0.24369747899159663
Hamming loss =  0.242296918767507
Log loss =  25.609121502007017

Yeast dataset
5.5 s ± 1.99 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLARAM w/ Threshold = 0.05, Vigilance = 0.95
Accuracy =  0.2024793388429752
Hamming loss =  0.24217827626918537
Log loss =  58.79743077583615



In [127]:
#MLARAM with tuning
print("MLARAM")
print("Bird dataset")
dict_res = FindBestVT(dataset_train_x_bird, dataset_train_y_bird)
MLARAM(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,dict_res['vigilance'],dict_res['threshold'])
print("Emotions dataset")
dict_res = FindBestVT(dataset_train_x_emotions,dataset_train_y_emotions)
MLARAM(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,dict_res['vigilance'],dict_res['threshold'])
print("Yeast dataset")
dict_res = FindBestVT(dataset_train_x_yeast,dataset_train_y_yeast)
MLARAM(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,dict_res['vigilance'],dict_res['threshold'])

MLARAM
Bird dataset
{'threshold': 0.01, 'vigilance': 0.9}
The slowest run took 4.23 times longer than the fastest. This could mean that an intermediate result is being cached.
1.17 s ± 487 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MLARAM w/ Threshold = 0.01, Vigilance = 0.9
Accuracy =  0.10077519379844961
Hamming loss =  0.10567115463076296
Log loss =  17.240775251451335

Emotions dataset
{'threshold': 0.08, 'vigilance': 0.9}
The slowest run took 4.13 times longer than the fastest. This could mean that an intermediate result is being cached.
1.87 s ± 754 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MLARAM w/ Threshold = 0.08, Vigilance = 0.9
Accuracy =  0.2857142857142857
Hamming loss =  0.21008403361344538
Log loss =  19.356077640115846

Yeast dataset
{'threshold': 0.07, 'vigilance': 0.7}
1.26 s ± 436 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLARAM w/ Threshold = 0.07, Vigilance = 0.7
Accuracy =  0.1611570247

In [128]:
#todo label relations exploration
print("Bird dataset")
LabelSpacePartitioningClassifier(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
LabelSpacePartitioningClassifier(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
LabelSpacePartitioningClassifier(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

Bird dataset
107 ms ± 326 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  Label Space Partition
Accuracy =  0.49612403100775193
Hamming loss =  0.044879640962872294
Log loss =  4.937110254934766

Emotions dataset
77.4 ms ± 51.1 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  Label Space Partition
Accuracy =  0.3277310924369748
Hamming loss =  0.18207282913165265
Log loss =  20.942294964690408

Yeast dataset
1.97 s ± 82.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  Label Space Partition
Accuracy =  0.05371900826446281
Hamming loss =  0.2625442739079103
Log loss =  102.874400008373



In [129]:
#Embedded Classifier
print("Bird dataset")
EmbeddingClassifierMethod(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
EmbeddingClassifierMethod(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
EmbeddingClassifierMethod(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

Bird dataset
1.62 s ± 35.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  Embedded Classifier
Accuracy =  0.4496124031007752
Hamming loss =  0.0485516115871073
Log loss =  14.682705404528363

Emotions dataset
487 ms ± 4.48 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  Embedded Classifier
Accuracy =  0.2773109243697479
Hamming loss =  0.2030812324929972
Log loss =  20.28697198538459

Yeast dataset
5.28 s ± 87.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  Embedded Classifier
Accuracy =  0.2128099173553719
Hamming loss =  0.20336481700118064
Log loss =  60.06668473037323



In [130]:
rangefloat = [round(x * 0.01, 2) for x in range(1, 5)]
print(rangefloat)

[0.01, 0.02, 0.03, 0.04]
