In [None]:
# https://github.com/catboost/tutorials/blob/master/classification/multilabel_classification_tutorial.ipynb

In [1]:
from catboost import CatBoostClassifier, Pool
from catboost.utils import eval_metric
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split

In [2]:
# Generate synthetic dataset

X, Y = make_multilabel_classification(n_samples=500, n_features=20, n_classes=5, random_state=0)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)
train_pool = Pool(X_train, Y_train)
test_pool = Pool(X_test, Y_test)

In [7]:
X

array([[3., 1., 4., ..., 4., 1., 3.],
       [5., 0., 6., ..., 0., 0., 3.],
       [3., 4., 1., ..., 3., 2., 5.],
       ...,
       [3., 1., 3., ..., 4., 3., 3.],
       [5., 2., 4., ..., 2., 1., 4.],
       [3., 2., 3., ..., 3., 2., 1.]])

In [8]:
Y

array([[0, 0, 1, 1, 1],
       [0, 0, 1, 0, 0],
       [1, 1, 0, 1, 0],
       ...,
       [0, 0, 0, 0, 0],
       [1, 0, 0, 0, 1],
       [0, 1, 1, 0, 1]])

In [4]:
# Train model

clf = CatBoostClassifier(
    loss_function='MultiLogloss',
    eval_metric='HammingLoss',
    iterations=500,
    class_names=['A', 'B', 'C', 'D', 'E']
)
clf.fit(train_pool, eval_set=test_pool, metric_period=10, plot=True, verbose=50)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.033623
0:	learn: 0.2858667	test: 0.3328000	best: 0.3328000 (0)	total: 183ms	remaining: 1m 31s
50:	learn: 0.0784000	test: 0.1920000	best: 0.1920000 (50)	total: 309ms	remaining: 2.72s
100:	learn: 0.0362667	test: 0.2080000	best: 0.1920000 (50)	total: 442ms	remaining: 1.74s
150:	learn: 0.0149333	test: 0.2016000	best: 0.1920000 (50)	total: 615ms	remaining: 1.42s
200:	learn: 0.0074667	test: 0.2032000	best: 0.1920000 (50)	total: 740ms	remaining: 1.1s
250:	learn: 0.0026667	test: 0.2048000	best: 0.1920000 (50)	total: 857ms	remaining: 850ms
300:	learn: 0.0005333	test: 0.2048000	best: 0.1920000 (50)	total: 973ms	remaining: 643ms
350:	learn: 0.0000000	test: 0.2128000	best: 0.1920000 (50)	total: 1.12s	remaining: 475ms
400:	learn: 0.0000000	test: 0.2144000	best: 0.1920000 (50)	total: 1.29s	remaining: 319ms
450:	learn: 0.0000000	test: 0.2176000	best: 0.1920000 (50)	total: 1.45s	remaining: 157ms
499:	learn: 0.0000000	test: 0.2208000	best: 0.1920000 (50)	total: 1.59s	remaining: 0

<catboost.core.CatBoostClassifier at 0x1d13a626800>

In [5]:
# Predict for test data


test_predict = clf.predict(X_test)

In [6]:
test_predict

array([[0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 1, 0, 0, 0],
       [1, 1, 0, 0, 1],
       [1, 1, 0, 1, 0],
       [0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 0, 1, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0],
       [1, 0, 0, 1, 0],
       [1, 1, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [1, 0, 1, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 1, 1],
       [0, 0, 0, 0, 0],
       [0, 1, 0, 1, 0],
       [0, 1, 1, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 1, 1, 0, 0],
       [0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 1, 0, 1, 0],
       [1, 1, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [1, 0, 0,

In [10]:
# Precision, Recall, F1
# These metrics are calculated for each class individually:


for metric in ('Precision', 'Recall', 'F1'):
    print(metric)
    values = eval_metric(Y_test, test_predict, metric)
    for cls, value in zip(clf.classes_, values):
        print(f'class={cls}: {value:.4f}')
    print()

Precision
class=A: 0.8333
class=B: 0.7273
class=C: 0.7600
class=D: 0.7714
class=E: 0.9231

Recall
class=A: 0.4412
class=B: 0.6780
class=C: 0.5588
class=D: 0.5510
class=E: 0.6857

F1
class=A: 0.5769
class=B: 0.7018
class=C: 0.6441
class=D: 0.6429
class=E: 0.7869

