In [1]:
import numpy as np
from collections import Counter
from scipy.io import arff
import pandas as pd
from skmultilearn.dataset import load_dataset
from skmultilearn.dataset import available_data_sets
from skmultilearn.problem_transform import LabelPowerset
from sklearn.neighbors import KNeighborsClassifier
import time
import sklearn.metrics as metrics
from skmultilearn.adapt import MLkNN
from sklearn.model_selection import GridSearchCV

In [10]:
# Importando os dados
X, y, feature_names, label_names = load_dataset('yeast', 'train')
Xt, yt, feature_names, label_names = load_dataset('yeast', 'test')

yeast:train - exists, not redownloading
yeast:test - exists, not redownloading


In [3]:
# Transformação do problema - Label Powerset + k-nearest neighbors
classifier = LabelPowerset(
    classifier = KNeighborsClassifier(n_neighbors=12),
    require_dense = True
)

start=time.time()
classifier.fit(X, y)
print('training time taken: ',round(time.time()-start,0),'seconds')

('training time taken: ', 0.0, 'seconds')


In [4]:
# Aplicando no conjunto de teste
start=time.time()
y_hat=classifier.predict(Xt)
print('prediction time taken: ',round(time.time()-start,0),'seconds')

('prediction time taken: ', 0.0, 'seconds')


In [5]:
# Avaliando o método
lp_f1=metrics.f1_score(yt, y_hat, average='micro')
lp_hamm=metrics.hamming_loss(yt,y_hat)
print('Label Powerset F1-score:',round(lp_f1,3))
print('Label Powerset Hamming Loss:',round(lp_hamm,3))

('Label Powerset F1-score:', 0.621)
('Label Powerset Hamming Loss:', 0.219)


In [6]:
# ML-kNN (Multi-Label k-Nearest-Neighbors)
start=time.time()

classifier = MLkNN(k=12)
classifier.fit(X, y)

print('training time taken: ',round(time.time()-start,0),'seconds')

  result = np.vstack(result)


('training time taken: ', 2.0, 'seconds')


In [7]:
# Aplicando o método no conjunto de teste
start=time.time()
y_hat=classifier.predict(Xt)
print('prediction time taken: ',round(time.time()-start,0),'seconds')

('prediction time taken: ', 1.0, 'seconds')


In [8]:
# Avaliando o método
lp_f1=metrics.f1_score(yt, y_hat, average='micro')
lp_hamm=metrics.hamming_loss(yt,y_hat)
print('Label Powerset F1-score:',round(lp_f1,3))
print('Label Powerset Hamming Loss:',round(lp_hamm,3))

('Label Powerset F1-score:', 0.625)
('Label Powerset Hamming Loss:', 0.204)


In [9]:
from skmultilearn.problem_transform import BinaryRelevance

# Transformação do problema - Label Powerset + k-nearest neighbors
classifier2 = BinaryRelevance(
    classifier = KNeighborsClassifier(n_neighbors=12),
    require_dense = True
)

start=time.time()
classifier2.fit(X, y)
print('training time taken: ',round(time.time()-start,0),'seconds')

# Aplicando no conjunto de teste
start=time.time()
y_hat=classifier2.predict(Xt)
print('prediction time taken: ',round(time.time()-start,0),'seconds')

# Avaliando o método
lp_f1=metrics.f1_score(yt, y_hat, average='micro')
lp_hamm=metrics.hamming_loss(yt,y_hat)
print('Label Powerset F1-score:',round(lp_f1,3))
print('Label Powerset Hamming Loss:',round(lp_hamm,3))

('training time taken: ', 0.0, 'seconds')
('prediction time taken: ', 3.0, 'seconds')
('Label Powerset F1-score:', 0.618)
('Label Powerset Hamming Loss:', 0.202)
