In [1]:
import os
os.chdir(os.path.dirname(os.getcwd()))

In [17]:
import numpy as np

from skmultilearn.ext import Meka,download_meka
from skmultilearn.dataset import load_dataset
from skmultilearn.adapt import MLkNN

from sklearn.model_selection import train_test_split
from sklearn.metrics import hamming_loss,accuracy_score,f1_score

In [3]:
path_to_meka=download_meka()

MEKA 1.9.2 found, not downloading


In [4]:
X, Y, feature_names, label_names = load_dataset('scene', 'undivided')

scene:undivided - exists, not redownloading


In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33,random_state=42)

In [6]:
X_train.shape,X_test.shape,Y_train.shape,Y_test.shape

((1612, 294), (795, 294), (1612, 6), (795, 6))

## train Meka classifier

>note that Meka is also a wrapper for all [Mulan](http://mulan.sourceforge.net/) classifiers

> The default number of neighbours for Mulan MLkNN is 10 [source](https://github.com/tsoumakas/mulan/blob/master/mulan/src/main/java/mulan/classifier/lazy/MultiLabelKNN.java#L80)

In [7]:
meka = Meka(
    meka_classifier="meka.classifiers.multilabel.MULAN -S MLkNN",
    weka_classifier="weka.classifiers.bayes.NaiveBayes",
    meka_classpath=path_to_meka,
    java_command='/usr/bin/java'
)

In [8]:
%%time
meka.fit(X_train, Y_train)

CPU times: user 2.83 s, sys: 96 ms, total: 2.92 s
Wall time: 8.69 s


Meka(java_command='/usr/bin/java',
   meka_classifier='meka.classifiers.multilabel.MULAN -S MLkNN',
   meka_classpath='/home/felipe/scikit_ml_learn_data/meka/meka-release-1.9.2/lib/',
   weka_classifier='weka.classifiers.bayes.NaiveBayes')

In [9]:
%%time
Y_preds_meka = meka.predict(X_test)

CPU times: user 1.41 s, sys: 48 ms, total: 1.46 s
Wall time: 6.44 s


## train skmultilearn classifier

In [10]:
%%time
skml = MLkNN(k=10)
skml.fit(X_train, Y_train)

CPU times: user 2.43 s, sys: 8 ms, total: 2.44 s
Wall time: 2.43 s


In [11]:
%%time
Y_preds_skml = skml.predict(X_test)

CPU times: user 1.34 s, sys: 12 ms, total: 1.36 s
Wall time: 1.32 s


## compare both outputs

In [12]:
Y_preds_meka.shape,Y_preds_skml.shape

((795, 6), (795, 6))

In [13]:
number_of_matches = 0

for i in range(Y_preds_meka.shape[0]):
    preds_meka = Y_preds_meka[i,:].toarray()
    preds_skml = Y_preds_skml[i,:].toarray()
    
    if(np.allclose(preds_meka,preds_skml)):
        number_of_matches += 1
number_of_matches

676

In [16]:
print("### Predictions equality ###")
print(np.array_equal(Y_preds_meka, Y_preds_skml))
print('')
print("### Pctg of exact matches ###")
print("{:.3f}%".format(number_of_matches/Y_preds_meka.shape[0]*100))
print('')
print("### Hamming loss ###")
print("SKML: %f" % hamming_loss(Y_test, Y_preds_skml))
print("MEKA: %f" % hamming_loss(Y_test, Y_preds_meka))
print('')
print("### Accuracy score ###")
print("SKML: %f" % accuracy_score(Y_test, Y_preds_skml))
print("MEKA: %f " % accuracy_score(Y_test, Y_preds_meka))
print('')
print("### Micro-F1 score ###")
print("SKML: %f" % f1_score(Y_test, Y_preds_skml,average='micro'))
print("MEKA: %f" % f1_score(Y_test, Y_preds_meka,average='micro'))
print('')

### Predictions equality ###
False

### Pctg of exact matches ###
85.031%

### Hamming loss ###
SKML: 0.089518
MEKA: 0.092872

### Accuracy score ###
SKML: 0.644025
MEKA: 0.633962 

### Micro-F1 score ###
SKML: 0.744158
MEKA: 0.742292

