In [59]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from scipy.sparse import csr_matrix, lil_matrix
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestRegressor
from skmultilearn.ensemble import RakelD, RakelO, MajorityVotingClassifier
from skmultilearn.cluster import FixedLabelSpaceClusterer
#from skmultilearn.embedding import SKLearnEmbedder, EmbeddingClassifier
from sklearn.manifold import SpectralEmbedding
import skmultilearn.problem_transform as skpt
import pandas as pd
import numpy as np
import skmultilearn.adapt as skadapt
import sklearn.metrics as metrics
from sklearn import preprocessing

"""" Brief Description of each base classifier
Base classifiers: MultinomialNB, C-Support Vector Classification (SVC), Logistic Regression, GaussianNB
MultinomialNB:



""""

def BinaryRelevance(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = skpt.BinaryRelevance(GaussianNB())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("Binary Relevance w/ GaussianNB", predictions ,dataset_test_y)
    
def BinaryRelevanceSVC(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):    
    classifier = skpt.BinaryRelevance(SVC())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("Binary Relevance w/ SVC", predictions ,dataset_test_y)
      
def BinaryRelevanceMNB(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):      
    classifier = skpt.BinaryRelevance(MultinomialNB())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("Binary Relevance w/ MNB", predictions ,dataset_test_y)
    
def ClassifierChain(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = skpt.ClassifierChain(LogisticRegression(max_iter=120000))
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("CC w/ Logistic Regression (iter=120000)", predictions ,dataset_test_y)

def ClassifierChainSVC(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = skpt.ClassifierChain(SVC())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("CC w/ SVC", predictions ,dataset_test_y)

def ClassifierChainMNB(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = skpt.ClassifierChain(MultinomialNB())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("CC w/ MNB", predictions ,dataset_test_y)
   
def LabelPowerset(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = skpt.LabelPowerset(LogisticRegression(max_iter=120000))
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("LP w/ Logistic Regression (iter=120000)", predictions ,dataset_test_y)

def LabelPowersetSVC(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):    
    classifier = skpt.LabelPowerset(SVC())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("LP w/ SVC", predictions ,dataset_test_y)

def LabelPowersetMNB(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):    
    classifier = skpt.LabelPowerset(MultinomialNB())
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("LP w/ MNB", predictions ,dataset_test_y)

#Choose best classifier between MNB and SVC - combined
def BinaryRelevanceCV(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    parameters = [
        {
            'classifier': [MultinomialNB()],
            'classifier__alpha': [0.7, 1.0], #for smoothing {Additive smoothing parameter NB}
        },
        {
            'classifier': [SVC()],
            'classifier__kernel': ['rbf','linear'],
        },
    ]

    classifier = GridSearchCV(skpt.BinaryRelevance(), parameters, scoring = 'accuracy')
    classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("Binary Relevance w/ CV",predictions, dataset_test_y)
    
def ClassifierChainCV(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    parameters = [
        {
            'classifier': [MultinomialNB()],
            'classifier__alpha': [0.7, 1.0],
        },
        {
            'classifier': [SVC()],
            'classifier__kernel': ['rbf', 'linear'],
        },
    ]
    classifier = GridSearchCV(skpt.ClassifierChain(LogisticRegression(max_iter=120000)),parameters,scoring='accuracy',n_jobs=2)
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("CC Cross Validate w/ Logistic Regression (iter=120000)", predictions ,dataset_test_y)    
 
def MLkNN(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,num_neighbours):
    x_train = lil_matrix(dataset_train_x).toarray()
    y_train = lil_matrix(dataset_train_y).toarray()
    x_test = lil_matrix(dataset_test_x).toarray()
    
    classifier = skadapt.MLkNN(k=num_neighbours)
    %timeit classifier.fit(x_train,y_train)
    predictions = classifier.predict(x_test)
    
    text = "MLkNN w/ k=" + str(num_neighbours)
    
    Metrics_Accuracy(text, predictions ,dataset_test_y)
    
def MLARAM(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,num_vigilance, num_threshold):
    x_train = lil_matrix(dataset_train_x).toarray()
    y_train = lil_matrix(dataset_train_y).toarray()
    x_test = lil_matrix(dataset_test_x).toarray()
    
    classifier = skadapt.MLARAM(threshold = num_threshold, vigilance = num_vigilance)
    %timeit classifier.fit(x_train,y_train)
    predictions = classifier.predict(x_test)
    
    text = "MLARAM w/ Threshold = " + str(num_threshold) + ", Vigilance = "+ str(num_vigilance)
    
    Metrics_Accuracy(text, predictions ,dataset_test_y)
        
    
#Random Label Space Partitionining with Label Powerset
def RAkELd(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,num_labels):
    classifier = RakelD(
        base_classifier=GaussianNB(),
        base_classifier_require_dense=[True, True],
        labelset_size=num_labels
    )

    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("RAkELd", predictions ,dataset_test_y)
    
#random overlapping label space division with Label Powerset
def RAkELO(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,num_labels):
    classifier = RakelO(
        base_classifier=GaussianNB(),
        base_classifier_require_dense=[True, True],
        labelset_size=dataset_train_y.shape[1],
        model_count=12
    )

    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("RAkELO", predictions ,dataset_test_y)

def LabelSpacePartitioningClassifier(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = MajorityVotingClassifier(
        clusterer=FixedLabelSpaceClusterer(clusters = [[1,3,4], [0,2,5]]),
        classifier = skpt.ClassifierChain(classifier=SVC())
    )
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)
    
    Metrics_Accuracy("Label Space Partition", predictions ,dataset_test_y)

def EmbeddingClassifier(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y):
    classifier = EmbeddingClassifier(
        SKLearnEmbedder(SpectralEmbedding(n_components=10)),
        RandomForestRegressor(n_estimators=10),
        MLkNN(k=5)
    )
    %timeit classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)

    Metrics_Accuracy("Embedded Classifier", predictions ,dataset_test_y)
    
def Metrics_Accuracy(classifier,predictions,dataset_test_y):
    #results
    print("Results for ",classifier)
    # accuracy
    print("Accuracy = ",accuracy_score(dataset_test_y,predictions))
    # hamming loss
    print("Hamming loss = ",metrics.hamming_loss(dataset_test_y,predictions))
    # log loss
    #print(type(predictions)==np.ndarray)
    print("Log loss = ",metrics.log_loss(dataset_test_y,predictions.toarray() if type(predictions)!=np.ndarray else predictions))
    # Exact Match Score
    #exact_match_score = np.all(predictions.toarray() == dataset_test_y, axis=1).mean()
    #print('Exact match score (Whole row must match):', exact_match_score)
    
    print("")
    
def Util_ClassifierMethods(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y):
    BinaryRelevance(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y)
    ClassifierChain(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y)
    ClassifierChainCV(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y)
    LabelPowerset(dataset_train_x,dataset_train_y,dataset_test_x,dataset_test_y)

In [60]:
#birds
print("Load Birds dataset")
birds1 = pd.read_csv(r"C:/Users/K/Desktop/Assignment1/birds-train.csv")
birds2 = pd.read_csv(r"C:/Users/K/Desktop/Assignment1/birds-test.csv")
birds = birds1.append(birds2)

#scale based on columns before split
mms = preprocessing.MinMaxScaler()

#print(birds.iloc[:,0:260])
birds.iloc[:,0:260] = mms.fit_transform(birds.iloc[:,0:260])

#print(birds.iloc[:,0:260])

#split dataset
dataset_train_bird, dataset_test_bird = train_test_split(birds,random_state=42, test_size=0.20, shuffle=True)

dataset_train_x_bird = dataset_train_bird.iloc[:,0:260]
dataset_train_y_bird = dataset_train_bird.iloc[:,-19:]

dataset_test_x_bird = dataset_test_bird.iloc[:,0:260]
dataset_test_y_bird = dataset_test_bird.iloc[:,-19:]

#emotions
print("Load Emotions dataset")
emotions = pd.read_csv(r"C:/Users/K/Desktop/Assignment1/emotions.csv")

#scale based on columns before split
mms = preprocessing.MinMaxScaler()
emotions.iloc[:,0:72] = mms.fit_transform(emotions.iloc[:,0:72])

#split dataset
dataset_train_emotions, dataset_test_emotions = train_test_split(emotions,random_state=42, test_size=0.20, shuffle=True)

dataset_train_x_emotions = dataset_train_emotions.iloc[:,0:72]
dataset_train_y_emotions = dataset_train_emotions.iloc[:,-6:]

dataset_test_x_emotions = dataset_test_emotions.iloc[:,0:72]
dataset_test_y_emotions = dataset_test_emotions.iloc[:,-6:]

#yeast
print("Load Yeast dataset")
yeast = pd.read_csv(r"C:/Users/K/Desktop/Assignment1/yeast.csv")

#scale based on columns before split
mms = preprocessing.MinMaxScaler()
yeast.iloc[:,0:103] = mms.fit_transform(yeast.iloc[:,0:103])

#split dataset
dataset_train_yeast, dataset_test_yeast = train_test_split(yeast,random_state=42, test_size=0.20, shuffle=True)

dataset_train_x_yeast = dataset_train_yeast.iloc[:,0:103]
dataset_train_y_yeast = dataset_train_yeast.iloc[:,-14:]

dataset_test_x_yeast = dataset_test_yeast.iloc[:,0:103]
dataset_test_y_yeast = dataset_test_yeast.iloc[:,-14:]

Load Birds dataset
Load Emotions dataset
Load Yeast dataset


In [48]:
#Binary Relevance
print("%Comparison Binary Relevance%")
print("Bird dataset")
BinaryRelevance(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
BinaryRelevance(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
BinaryRelevance(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

print("%Comparison Binary Relevance SVC%")
print("Bird dataset")
BinaryRelevanceSVC(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
BinaryRelevanceSVC(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
BinaryRelevanceSVC(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

print("%Comparison Binary Relevance MNB%")
print("Bird dataset")
BinaryRelevanceMNB(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
BinaryRelevanceMNB(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
BinaryRelevanceMNB(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

# print("%Comparison Binary Relevance Grid Search CV%")
# print("Bird dataset")
# BinaryRelevanceCV(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
# print("Emotions dataset")
# BinaryRelevanceCV(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
# print("Yeast dataset")
# BinaryRelevanceCV(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

%Comparison Binary Relevance%
Bird dataset
63.7 ms ± 916 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  Binary Relevance w/ GaussianNB
Accuracy =  0.023255813953488372
Hamming loss =  0.4679722562219502
Log loss =  13.848272932073373

Emotions dataset
8.89 ms ± 198 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  Binary Relevance w/ GaussianNB
Accuracy =  0.23529411764705882
Hamming loss =  0.242296918767507
Log loss =  17.748562300103703

Yeast dataset
83.3 ms ± 2.46 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  Binary Relevance w/ GaussianNB
Accuracy =  0.09917355371900827
Hamming loss =  0.30283353010625735
Log loss =  65.19914296904747

%Comparison Binary Relevance SVC%
Bird dataset
367 ms ± 13.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  Binary Relevance w/ SVC
Accuracy =  0.5193798449612403
Hamming loss =  0.04243166054671563
Log loss =  5.06656988488039

Emotions dataset
80.6 ms ± 1.15 ms p

In [49]:
#Classifier Chain
print("Comparison CC")
print("Bird dataset")
ClassifierChain(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
ClassifierChain(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
ClassifierChain(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

print("Comparison CC SVC")
print("Bird dataset")
ClassifierChainSVC(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
ClassifierChainSVC(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
ClassifierChainSVC(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

print("Comparison CC MNB")
print("Bird dataset")
ClassifierChainMNB(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
ClassifierChainMNB(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
ClassifierChainMNB(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

Comparison CC
Bird dataset
277 ms ± 9.26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  CC w/ Logistic Regression (iter=120000)
Accuracy =  0.5271317829457365
Hamming loss =  0.04079967360261118
Log loss =  6.3651325985076035

Emotions dataset
119 ms ± 3.07 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  CC w/ Logistic Regression (iter=120000)
Accuracy =  0.31932773109243695
Hamming loss =  0.19887955182072828
Log loss =  22.408560859175967

Yeast dataset
968 ms ± 12.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  CC w/ Logistic Regression (iter=120000)
Accuracy =  0.19834710743801653
Hamming loss =  0.21487603305785125
Log loss =  63.07648711082171

Comparison CC SVC
Bird dataset
422 ms ± 13.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  CC w/ SVC
Accuracy =  0.5193798449612403
Hamming loss =  0.042839657282741736
Log loss =  4.844477648937925

Emotions dataset
77.9 ms ± 1.53 ms per loop (mean ± std. d

In [50]:
#Label Powerset Chain
print("Comparison LP")
print("Bird dataset")
LabelPowerset(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
LabelPowerset(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
LabelPowerset(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

print("Comparison LP SVC")
print("Bird dataset")
LabelPowersetSVC(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
LabelPowersetSVC(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
LabelPowersetSVC(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

print("Comparison LP MNB")
print("Bird dataset")
LabelPowersetMNB(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
LabelPowersetMNB(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
LabelPowersetMNB(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

Comparison LP
Bird dataset
1.5 s ± 37.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  LP w/ Logistic Regression (iter=120000)
Accuracy =  0.5348837209302325
Hamming loss =  0.0416156670746634
Log loss =  10.80469458603544

Emotions dataset
307 ms ± 8.32 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  LP w/ Logistic Regression (iter=120000)
Accuracy =  0.31932773109243695
Hamming loss =  0.21988795518207283
Log loss =  21.52267144665573

Yeast dataset
17.4 s ± 270 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  LP w/ Logistic Regression (iter=120000)
Accuracy =  0.2727272727272727
Hamming loss =  0.2024793388429752
Log loss =  61.579227927103624

Comparison LP SVC
Bird dataset
322 ms ± 2.82 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  LP w/ SVC
Accuracy =  0.5116279069767442
Hamming loss =  0.044063647490820076
Log loss =  7.517075500495655

Emotions dataset
47.7 ms ± 783 µs per loop (mean ± std. dev. of 7 

In [51]:
#GridSearchCV
print("Comparison GridSearchCV for CC")
print("Bird dataset")
ClassifierChainCV(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
ClassifierChainCV(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
ClassifierChainCV(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

Comparison GridSearchCV for CC
Bird dataset
2.69 s ± 47.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  CC Cross Validate w/ Logistic Regression (iter=120000)
Accuracy =  0.5271317829457365
Hamming loss =  0.04039167686658507
Log loss =  10.714785994665249

Emotions dataset
607 ms ± 8.68 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  CC Cross Validate w/ Logistic Regression (iter=120000)
Accuracy =  0.35294117647058826
Hamming loss =  0.19187675070028012
Log loss =  21.021862144997353

Yeast dataset
15.3 s ± 566 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  CC Cross Validate w/ Logistic Regression (iter=120000)
Accuracy =  0.22107438016528927
Hamming loss =  0.2001180637544274
Log loss =  65.94675102057282



In [52]:
#Test other methods
RAkELd(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,19)
RAkELd(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,6)
RAkELd(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,14)

21.8 ms ± 2.45 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  RAkELd
Accuracy =  0.046511627906976744
Hamming loss =  0.08200734394124846
Log loss =  19.91339372336576

7.51 ms ± 629 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Results for  RAkELd
Accuracy =  0.24369747899159663
Hamming loss =  0.24649859943977592
Log loss =  28.74001269861988

43 ms ± 1.79 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  RAkELd
Accuracy =  0.1756198347107438
Hamming loss =  0.24321133412042503
Log loss =  65.31607274155078



In [53]:
#Test other methods
RAkELO(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,19)
RAkELO(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,6)
RAkELO(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,14)

202 ms ± 4.23 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  RAkELO
Accuracy =  0.046511627906976744
Hamming loss =  0.08200734394124846
Log loss =  19.91339372336576

74.1 ms ± 4.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  RAkELO
Accuracy =  0.24369747899159663
Hamming loss =  0.24649859943977592
Log loss =  28.74001269861988

617 ms ± 250 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  RAkELO
Accuracy =  0.1756198347107438
Hamming loss =  0.24321133412042503
Log loss =  65.31607274155078



In [54]:
#Adapted Algorithms
#MLkNN
print("MLkNN")
print("Bird dataset")
MLkNN(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,2)
print("Emotions dataset")
MLkNN(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,2)
print("Yeast dataset")
MLkNN(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,2)

MLkNN
Bird dataset
625 ms ± 9.18 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=2
Accuracy =  0.5038759689922481
Hamming loss =  0.046103631170950635
Log loss =  14.33945301555224

Emotions dataset
271 ms ± 6.76 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=2
Accuracy =  0.21008403361344538
Hamming loss =  0.23809523809523808
Log loss =  24.290454309919024

Yeast dataset
2.35 s ± 72.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLkNN w/ k=2
Accuracy =  0.1487603305785124
Hamming loss =  0.2473435655253837
Log loss =  70.47514298171542



In [55]:
#MLARAM
v = 0.95
t = 0.05
print("MLARAM")
print("Bird dataset")
MLARAM(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird,v,t)
print("Emotions dataset")
MLARAM(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions,v,t)
print("Yeast dataset")
MLARAM(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast,v,t)

MLARAM
Bird dataset
1.51 s ± 570 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MLARAM w/ Threshold = 0.05, Vigilance = 0.95
Accuracy =  0.06976744186046512
Hamming loss =  0.11587107303141575
Log loss =  19.144448673942428

Emotions dataset
The slowest run took 4.40 times longer than the fastest. This could mean that an intermediate result is being cached.
2.95 s ± 1.24 s per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  MLARAM w/ Threshold = 0.05, Vigilance = 0.95
Accuracy =  0.24369747899159663
Hamming loss =  0.242296918767507
Log loss =  25.609121502007017

Yeast dataset
5.47 s ± 1.93 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  MLARAM w/ Threshold = 0.05, Vigilance = 0.95
Accuracy =  0.2024793388429752
Hamming loss =  0.24217827626918537
Log loss =  58.79743077583615



In [58]:
#todo label relations exploration
LabelSpacePartitioningClassifier(dataset_train_x_bird, dataset_train_y_bird, dataset_test_x_bird, dataset_test_y_bird)
print("Emotions dataset")
LabelSpacePartitioningClassifier(dataset_train_x_emotions,dataset_train_y_emotions,dataset_test_x_emotions,dataset_test_y_emotions)
print("Yeast dataset")
LabelSpacePartitioningClassifier(dataset_train_x_yeast,dataset_train_y_yeast,dataset_test_x_yeast,dataset_test_y_yeast)

120 ms ± 10.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  Label Space Partition
Accuracy =  0.49612403100775193
Hamming loss =  0.044879640962872294
Log loss =  4.937110254934766

Emotions dataset
88.1 ms ± 2.37 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Results for  Label Space Partition
Accuracy =  0.3277310924369748
Hamming loss =  0.18207282913165265
Log loss =  20.942294964690408

Yeast dataset
2.06 s ± 43.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Results for  Label Space Partition
Accuracy =  0.05371900826446281
Hamming loss =  0.2625442739079103
Log loss =  102.874400008373

