In [1]:
import numpy as np
from collections import Counter
from scipy.io import arff
import pandas as pd
from skmultilearn.dataset import load_dataset
from skmultilearn.dataset import available_data_sets
from skmultilearn.problem_transform import LabelPowerset,BinaryRelevance
from sklearn.neighbors import KNeighborsClassifier
import time
import sklearn.metrics as metrics
from skmultilearn.adapt import MLkNN
from sklearn.model_selection import GridSearchCV, cross_validate

In [17]:
#Load dataset

X, y, feature_names, label_names = load_dataset('yeast', 'undivided')

def custom_ce(yt, y_hat):
    return metrics.coverage_error(yt.toarray(),y_hat.toarray())

def custom_rl(yt, y_hat):
    return metrics.label_ranking_loss(yt.toarray(),y_hat.toarray())

def custom_lr(yt, y_hat):
    return metrics.label_ranking_average_precision_score(yt.toarray(),y_hat.toarray())

hl_scorer = metrics.make_scorer(metrics.hamming_loss, greater_is_better=False)
ce_scorer = metrics.make_scorer(custom_ce, greater_is_better=False)
rl_scorer = metrics.make_scorer(custom_rl, greater_is_better=False)
lr_scorer = metrics.make_scorer(custom_lr, greater_is_better=True)

scores = {
  "hl": hl_scorer,
  "ce": ce_scorer,
  "lr": lr_scorer,
  "rl": rl_scorer
}

yeast:undivided - exists, not redownloading


In [25]:
#ML-kNN k-fold

classifier = MLkNN(k=12)
cv_results = cross_validate(classifier, X, y, scoring = scores,cv=10)

KeyboardInterrupt: 

In [24]:
print 'ML-kNN 10-fold Hamming loss: ', -np.mean(cv_results['test_hl'])
print 'ML-kNN 10-fold Coverage error: ', -np.mean(cv_results['test_ce'])
print 'ML-kNN 10-fold Ranking Loss: ', -np.mean(cv_results['test_rl'])
print 'ML-kNN 10-fold Label Ranking Average Precision: ', np.mean(cv_results['test_lr'])

ML-kNN 10-fold Hamming loss:  0.19888917193707836
ML-kNN 10-fold Coverage error:  11.236375638695517
ML-kNN 10-fold Ranking Loss:  0.43854323221612945
ML-kNN 10-fold Label Ranking Average Preicision:  0.6159937225542723


In [27]:
#Binary Relevance k-fold

classifier2 = BinaryRelevance(
    classifier = KNeighborsClassifier(n_neighbors=12)
)
cv_results = cross_validate(classifier2, X, y, scoring = scores,cv=10)

In [28]:
print 'BinaryRelevance 10-fold Hamming loss: ', -np.mean(cv_results['test_hl'])
print 'BinaryRelevance 10-fold Coverage error: ', -np.mean(cv_results['test_ce'])
print 'BinaryRelevance 10-fold Ranking Loss: ', -np.mean(cv_results['test_rl'])
print 'BinaryRelevance 10-fold Label Ranking Average Precision: ', np.mean(cv_results['test_lr'])

BinaryRelevance 10-fold Hamming loss:  0.19616978645647073
BinaryRelevance 10-fold Coverage error:  11.524282432015362
BinaryRelevance 10-fold Ranking Loss:  0.4658913471510225
BinaryRelevance 10-fold Label Ranking Average Preicision:  0.6150200020833977


In [29]:
#Label Power-set k-fold

classifier3 = LabelPowerset(
    classifier = KNeighborsClassifier(n_neighbors=12)
)
cv_results = cross_validate(classifier3, X, y, scoring = scores,cv=10)

In [30]:
print 'Label Power-set 10-fold Hamming loss: ', -np.mean(cv_results['test_hl'])
print 'Label Power-set 10-fold Coverage error: ', -np.mean(cv_results['test_ce'])
print 'Label Power-set 10-fold Ranking Loss: ', -np.mean(cv_results['test_rl'])
print 'Label Power-set 10-fold Label Ranking Average Precision: ', np.mean(cv_results['test_lr'])

Label Power-set 10-fold Hamming loss:  0.21129982804822486
Label Power-set 10-fold Coverage error:  10.626149651932376
Label Power-set 10-fold Ranking Loss:  0.42591493637901934
Label Power-set 10-fold Label Ranking Average Preicision:  0.6140993890215607
