diff --git a/doc/cookbook/source/examples/classifier/multiclass_logisticregression.rst b/doc/cookbook/source/examples/classifier/multiclass_logisticregression.rst new file mode 100644 index 00000000000..c8402c074d3 --- /dev/null +++ b/doc/cookbook/source/examples/classifier/multiclass_logisticregression.rst @@ -0,0 +1,50 @@ +=============================== +Multi-class Logistic Regression +=============================== + +Multinomial logistic regression assigns the sample :math:`\mathbf{x}_i` to class :math:`c` +based on the probability for sample :math:`\mathbf{x}_i` to be in class :math:`c`: + +.. math:: + + P(Y_i = c | \mathbf{x}_i) = \frac{\exp(\mathbf{\theta}^\top_c\mathbf{x}_i)}{1+ \sum_{k=1}^{K}\exp(\mathbf{\theta}^\top_k\mathbf{x}_i)} + +in which :math:`K` is the number of classes. + +The loss function that needs to be minimized is: + +.. math:: + + {\min_{\mathbf{\theta}}}\sum_{k=1}^{K}\sum_{i=1}^{m}w_{ik}\log(1+\exp(-y_{ik}(\mathbf{x}_k^\top\mathbf{a}_{ik} + c_k))) + \lambda\left \| \mathbf{x} \right \|_{l_1/l_q} + +where :math:`\mathbf{a}_{ik}` denotes the :math:`i`-th sample for the :math:`k`-th class, :math:`w_{ik}` is the weight for :math:`\mathbf{a}_{ik}^\top`, +:math:`y_{ik}` is the response of :math:`\mathbf{a}_{ik}`, and :math:`c_k` is the intercept (scalar) for the :math:`k`-th class. +:math:`\lambda` is the :math:`l_1/l_q`-norm regularization parameter. + +------- +Example +------- + +Imagine we have files with training and test data. We create CDenseFeatures (here 64 bit floats aka RealFeatures) and :sgclass:`CMulticlassLabels` as + +.. sgexample:: multiclass_logisticregression.sg:create_features + +We create an instance of the :sgclass:`CMulticlassLogisticRegression` classifier by passing it the dataset, lables, and specifying the regularization constant :math:`\lambda` for each machine + +.. sgexample:: multiclass_logisticregression.sg:create_instance + +Then we train and apply it to test data, which here gives :sgclass:`CMulticlassLabels`. + +.. sgexample:: multiclass_logisticregression.sg:train_and_apply + +We can evaluate test performance via e.g. :sgclass:`CMulticlassAccuracy`. + +.. sgexample:: multiclass_logisticregression.sg:evaluate_accuracy + +---------- +References +---------- + +:wiki:`Multinomial_logistic_regression` + +:wiki:`Multiclass_classification` diff --git a/examples/meta/src/classifier/multiclass_logisticregression.sg b/examples/meta/src/classifier/multiclass_logisticregression.sg new file mode 100644 index 00000000000..ef8a88413da --- /dev/null +++ b/examples/meta/src/classifier/multiclass_logisticregression.sg @@ -0,0 +1,29 @@ +CSVFile f_feats_train("../../data/classifier_4class_2d_linear_features_train.dat") +CSVFile f_feats_test("../../data/classifier_4class_2d_linear_features_test.dat") +CSVFile f_labels_train("../../data/classifier_4class_2d_linear_labels_train.dat") +CSVFile f_labels_test("../../data/classifier_4class_2d_linear_labels_test.dat") + +#![create_features] +RealFeatures features_train(f_feats_train) +RealFeatures features_test(f_feats_test) +MulticlassLabels labels_train(f_labels_train) +MulticlassLabels labels_test(f_labels_test) +#![create_features] + + +#![create_instance] +MulticlassLogisticRegression classifier(1, features_train, labels_train) +#![create_instance] + +#![train_and_apply] +classifier.train() +MulticlassLabels labels_predict = classifier.apply_multiclass(features_test) +#![train_and_apply] + +#![evaluate_accuracy] +MulticlassAccuracy eval() +real accuracy = eval.evaluate(labels_predict, labels_test) +#![evaluate_accuracy] + +# integration testing variables +RealVector output = labels_predict.get_labels() diff --git a/examples/undocumented/python_modular/classifier_multiclasslogisticregression_modular.py b/examples/undocumented/python_modular/classifier_multiclasslogisticregression_modular.py deleted file mode 100644 index ef0af89c33c..00000000000 --- a/examples/undocumented/python_modular/classifier_multiclasslogisticregression_modular.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -from tools.multiclass_shared import prepare_data - - - -[traindat, label_traindat, testdat, label_testdat] = prepare_data(False) - -parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1e-5]] - -def classifier_multiclasslogisticregression_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,z=1,epsilon=1e-5): - from modshogun import RealFeatures, MulticlassLabels - try: - from modshogun import MulticlassLogisticRegression - except ImportError: - print("recompile shogun with Eigen3 support") - return - - feats_train=RealFeatures(fm_train_real) - feats_test=RealFeatures(fm_test_real) - - labels=MulticlassLabels(label_train_multiclass) - - classifier = MulticlassLogisticRegression(z,feats_train,labels) - classifier.train() - - label_pred = classifier.apply(feats_test) - out = label_pred.get_labels() - - if label_test_multiclass is not None: - from modshogun import MulticlassAccuracy - labels_test = MulticlassLabels(label_test_multiclass) - evaluator = MulticlassAccuracy() - acc = evaluator.evaluate(label_pred, labels_test) - print('Accuracy = %.4f' % acc) - - return out - -if __name__=='__main__': - print('MulticlassLogisticRegression') - classifier_multiclasslogisticregression_modular(*parameter_list[0])