diff --git a/data b/data index 7b23694342c..b9b2d9058c2 160000 --- a/data +++ b/data @@ -1 +1 @@ -Subproject commit 7b23694342c8954f470da3a861511ed4465a6ced +Subproject commit b9b2d9058c22dae86a4181db7271dfa02485af7d diff --git a/doc/cookbook/source/examples/multiclass_classifier/shareboost.rst b/doc/cookbook/source/examples/multiclass_classifier/shareboost.rst new file mode 100644 index 00000000000..001d42857a1 --- /dev/null +++ b/doc/cookbook/source/examples/multiclass_classifier/shareboost.rst @@ -0,0 +1,33 @@ +========== +ShareBoost +========== + +ShareBoost algorithm learns a multiclass predictor from a subset of shared features of the samples with forward greedy selection approach. + +See :cite:`shalev2011shareboost` for a detailed introduction. + +------- +Example +------- +Imagine we have files with training and test data. We create CDenseFeatures (here 64 bit floats aka RealFeatures) and :sgclass:`CMulticlassLabels` as + +.. sgexample:: shareboost.sg:create_features + +We create an instance of the :sgclass:`CShareBoost` classifier by setting the number of features expected to be used for learning. + +.. sgexample:: shareboost.sg:create_instance + +Then we train and apply it to test data, which gives :sgclass:`CMulticlassLabels`. + +.. sgexample:: shareboost.sg:train_and_apply + +We can evaluate test performance via e.g. :sgclass:`CMulticlassAccuracy`. + +.. sgexample:: shareboost.sg:evaluate_accuracy + +---------- +References +---------- + +.. bibliography:: ../../references.bib + :filter: docname in docnames diff --git a/doc/cookbook/source/references.bib b/doc/cookbook/source/references.bib index 91f242f88f3..3322b8cf452 100644 --- a/doc/cookbook/source/references.bib +++ b/doc/cookbook/source/references.bib @@ -79,3 +79,10 @@ @inproceedings{gao2011discriminative year={2011}, organization={IEEE} } +@inproceedings{shalev2011shareboost, + title={Shareboost: Efficient multiclass learning with feature sharing}, + author={Shalev-Shwartz, Shai and Wexler, Yonatan and Shashua, Amnon}, + booktitle={Advances in Neural Information Processing Systems}, + pages={1179--1187}, + year={2011} +} diff --git a/examples/meta/generator/targets/cpp.json b/examples/meta/generator/targets/cpp.json index fa4b8583944..03496a0cb94 100644 --- a/examples/meta/generator/targets/cpp.json +++ b/examples/meta/generator/targets/cpp.json @@ -15,6 +15,7 @@ "Assign": "$lhs = $expr", "Type": { "RealFeatures": "DenseFeatures", + "RealSubsetFeatures": "DenseSubsetFeatures", "StringCharFeatures": "CStringFeatures", "Default": "$type", "bool": "bool", diff --git a/examples/meta/src/multiclass_classifier/shareboost.sg b/examples/meta/src/multiclass_classifier/shareboost.sg new file mode 100644 index 00000000000..94bf52614db --- /dev/null +++ b/examples/meta/src/multiclass_classifier/shareboost.sg @@ -0,0 +1,29 @@ +CSVFile f_feats_train("../../data/classifier_4class_2d_linear_features_train.dat") +CSVFile f_feats_test("../../data/classifier_4class_2d_linear_features_test.dat") +CSVFile f_labels_train("../../data/classifier_4class_2d_linear_labels_train.dat") +CSVFile f_labels_test("../../data/classifier_4class_2d_linear_labels_test.dat") + +#![create_features] +RealFeatures features_train(f_feats_train) +RealFeatures features_test(f_feats_test) +MulticlassLabels labels_train(f_labels_train) +MulticlassLabels labels_test(f_labels_test) +#![create_features] + +#![create_instance] +ShareBoost shareboost(features_train, labels_train, 2) +#![create_instance] + +#![train_and_apply] +shareboost.train() +RealSubsetFeatures features_test_sub(features_test, shareboost.get_activeset()) +MulticlassLabels labels_predict = shareboost.apply_multiclass(features_test_sub) +#![train_and_apply] + +#![evaluate_accuracy] +MulticlassAccuracy eval() +real accuracy = eval.evaluate(labels_predict, labels_test) +#![evaluate_accuracy] + +# additional integration testing variables +RealVector output = labels_predict.get_labels() diff --git a/examples/undocumented/python_modular/classifier_multiclass_shareboost.py b/examples/undocumented/python_modular/classifier_multiclass_shareboost.py deleted file mode 100644 index b2c96c0080e..00000000000 --- a/examples/undocumented/python_modular/classifier_multiclass_shareboost.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python -from tools.multiclass_shared import prepare_data - -[traindat, label_traindat, testdat, label_testdat] = prepare_data(False) - -parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]] - -def classifier_multiclass_shareboost (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): - from modshogun import RealFeatures, RealSubsetFeatures, MulticlassLabels - from modshogun import ShareBoost - - #print('Working on a problem of %d features and %d samples' % fm_train_real.shape) - - feats_train = RealFeatures(fm_train_real) - - labels = MulticlassLabels(label_train_multiclass) - - shareboost = ShareBoost(feats_train, labels, min(fm_train_real.shape[0]-1, 30)) - shareboost.train(); - #print(shareboost.get_activeset()) - - feats_test = RealSubsetFeatures(RealFeatures(fm_test_real), shareboost.get_activeset()) - label_pred = shareboost.apply(feats_test) - - out = label_pred.get_labels() - - if label_test_multiclass is not None: - from modshogun import MulticlassAccuracy - labels_test = MulticlassLabels(label_test_multiclass) - evaluator = MulticlassAccuracy() - acc = evaluator.evaluate(label_pred, labels_test) - #print('Accuracy = %.4f' % acc) - - return out - -if __name__=='__main__': - print('MulticlassMachine') - classifier_multiclass_shareboost(*parameter_list[0]) -