# Multi-Label Classification of music into emotions

In [2]:
# download the dataset
from skmultilearn.dataset import load_dataset
X_train, y_train, feature_names, label_names = load_dataset('emotions', 'train')
X_test, y_test, _, _ = load_dataset('emotions', 'test')
y_train = y_train.toarray()
y_test = y_test.toarray()

emotions:train - exists, not redownloading
emotions:test - exists, not redownloading


In [3]:
len(feature_names)

72

In [4]:
X_train.shape, X_test.shape

((391, 72), (202, 72))

In [5]:
# existing labels
label_names

[('amazed-suprised', ['0', '1']),
 ('happy-pleased', ['0', '1']),
 ('relaxing-calm', ['0', '1']),
 ('quiet-still', ['0', '1']),
 ('sad-lonely', ['0', '1']),
 ('angry-aggresive', ['0', '1'])]

In [6]:
# number of unique label combinations
import numpy as np
len(np.unique(y_train,axis=0))

26

In [7]:
# Approach 1: KNeighborsClassifier works also for multi-label classification
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

# This is an unsupervised learning classifier, which
# predicts the target class by looking at its 'n_neighbors' neighbors.
classifier = KNeighborsClassifier(n_neighbors=3)

# train
classifier.fit(X_train, y_train)

# predict
y_pred = classifier.predict(X_test)

accuracy_score(y_test,y_pred)

0.19306930693069307

In [8]:
# Approach 2: Multi output classification.
#
# This strategy consists of fitting one classifier per target column. 
# This is a simple strategy for extending classifiers that do not
# natively support multi-output classification.

# Since the labels are analyzed separately, the result is the average
# of the accuracy scores of the labels.

from sklearn.multioutput import MultiOutputClassifier
from sklearn.svm import SVC

classifier = MultiOutputClassifier(SVC(kernel='linear'))
classifier.fit(X_train, y_train)

# predict
y_pred = classifier.predict(X_test)

accuracy_score(y_test,y_pred)

0.2722772277227723

In [None]:
# Approach 3: Label powersets
# label powerset is a problem transformation method that
# transforms the multi-label problem into a multi-class problem
# by considering each unique set of labels as a single label.
# This way, a multi-class classifier can be used to predict the
# label combinations directly.

from skmultilearn.problem_transform import LabelPowerset
from sklearn.svm import SVC

# initialize Label Powerset multi-label classifier
classifier = LabelPowerset(SVC(kernel='linear'))

# train
classifier.fit(X_train, y_train)

# The number of unique label combinations is 26 as seen before
print(len(classifier.unique_combinations_))

# predict
predictions = classifier.predict(X_test)

accuracy_score(y_test,predictions)

26


0.3564356435643564