From 962f8ca7be0448e32af570410077008e1daf0f69 Mon Sep 17 00:00:00 2001 From: Sanuj Date: Sat, 13 Aug 2016 11:01:48 +0530 Subject: [PATCH] add cookbook for mkl binary classification --- .../source/examples/binary_classifier/mkl.rst | 59 +++++++++++++ .../descriptions/modular/mkl_binclass.txt | 13 --- examples/meta/src/binary_classifier/mkl.sg | 55 ++++++++++++ .../python_modular/mkl_binclass_modular.py | 88 ------------------- 4 files changed, 114 insertions(+), 101 deletions(-) create mode 100644 doc/cookbook/source/examples/binary_classifier/mkl.rst delete mode 100644 examples/descriptions/modular/mkl_binclass.txt create mode 100644 examples/meta/src/binary_classifier/mkl.sg delete mode 100644 examples/undocumented/python_modular/mkl_binclass_modular.py diff --git a/doc/cookbook/source/examples/binary_classifier/mkl.rst b/doc/cookbook/source/examples/binary_classifier/mkl.rst new file mode 100644 index 00000000000..52f1186a6a0 --- /dev/null +++ b/doc/cookbook/source/examples/binary_classifier/mkl.rst @@ -0,0 +1,59 @@ +======================== +Multiple Kernel Learning +======================== + +Multiple kernel learning (MKL) is based on convex combinations of arbitrary kernels over potentially different domains. + +.. math:: + + {\bf k}(x_i,x_j)=\sum_{i=1}^{K} \beta_k {\bf k}_i(x_i, x_j) + +where :math:`\beta_k > 0`, :math:`\sum_{k=1}^{K} \beta_k = 1`, :math:`K` is the number of sub-kernels, :math:`\bf{k}` is a combined kernel, :math:`{\bf k}_i` is an individual kernel and :math:`{x_i}_i` are the training data. + +Classification is done by using Support Vector Machines (SVM). See :doc:`linear_svm` for more details. Optimal :math:`\alpha` and :math:`b` for SVM and :math:`\beta` are determined via training. + +See :cite:`sonnenburg2006large` for more details. + +------- +Example +------- + +Imagine we have files with training and test data. We create CDenseFeatures (here 64 bit floats aka RealFeatures) and :sgclass:`CBinaryLabels` as + +.. sgexample:: mkl.sg:create_features + +Then we create indvidual kernels like :sgclass:`CPolyKernel` and :sgclass:`CGaussianKernel` which will be later combined in one :sgclass:`CCombinedKernel`. + +.. sgexample:: mkl.sg:create_kernel + +We create an instance of :sgclass:`CCombinedKernel` and append the :sgclass:`CKernel` objects. + +.. sgexample:: mkl.sg:create_combined_train + +We create an object of :sgclass:`CMKLClassification`, provide the combined kernel and labels before training it. + +.. sgexample:: mkl.sg:train_mkl + +After training, we can extract :math:`\beta`, SVM coefficients :math:`\alpha` and :math:`b`. + +.. sgexample:: mkl.sg:extract_weights + +We update the :sgclass:`CCombinedKernel` object for testing data. + +.. sgexample:: mkl.sg:create_combined_test + +We set the updated kernel and predict :sgclass:`CBinaryLabels` for test data. + +.. sgexample:: mkl.sg:mkl_apply + +Finally, we can evaluate test performance via e.g. :sgclass:`CAccuracyMeasure`. + +.. sgexample:: mkl.sg:evaluate_accuracy + +---------- +References +---------- +:wiki:`Multiple_kernel_learning` + +.. bibliography:: ../../references.bib + :filter: docname in docnames diff --git a/examples/descriptions/modular/mkl_binclass.txt b/examples/descriptions/modular/mkl_binclass.txt deleted file mode 100644 index 8a5873f7daf..00000000000 --- a/examples/descriptions/modular/mkl_binclass.txt +++ /dev/null @@ -1,13 +0,0 @@ -In this example we show how to perform Multiple Kernel Learning (MKL) -with the modular interface. First, we create a number of base kernels. -These kernels can capture different views of the same features, or actually -consider entirely different features associated with the same example -(e.g. DNA sequences = strings AND gene expression data = real values of the same tissue sample). -The base kernels are then subsequently added to a CombinedKernel, which -contains a weight for each kernel and encapsulates the base kernels -from the training procedure. When the CombinedKernel between two examples is -evaluated it computes the corresponding linear combination of kernels according to their weights. -We then show how to create an MKLClassifier that trains an SVM and learns the optimal -weighting of kernels (w.r.t. a given norm q) at the same time. -Finally, the example shows how to classify with a trained MKLClassifier. - diff --git a/examples/meta/src/binary_classifier/mkl.sg b/examples/meta/src/binary_classifier/mkl.sg new file mode 100644 index 00000000000..d5cbe9b3015 --- /dev/null +++ b/examples/meta/src/binary_classifier/mkl.sg @@ -0,0 +1,55 @@ +CSVFile f_feats_train("../../data/classifier_binary_2d_nonlinear_features_train.dat") +CSVFile f_feats_test("../../data/classifier_binary_2d_nonlinear_features_test.dat") +CSVFile f_labels_train("../../data/classifier_binary_2d_nonlinear_labels_train.dat") +CSVFile f_labels_test("../../data/classifier_binary_2d_nonlinear_labels_test.dat") + +#![create_features] +RealFeatures features_train(f_feats_train) +RealFeatures features_test(f_feats_test) +BinaryLabels labels_train(f_labels_train) +BinaryLabels labels_test(f_labels_test) +#![create_features] + +#![create_kernel] +PolyKernel poly_kernel(10,2) +GaussianKernel gauss_kernel_1(2.0) +GaussianKernel gauss_kernel_2(3.0) +#![create_kernel] + +#![create_combined_train] +CombinedKernel combined_kernel() +combined_kernel.append_kernel(poly_kernel) +combined_kernel.append_kernel(gauss_kernel_1) +combined_kernel.append_kernel(gauss_kernel_2) +combined_kernel.init(features_train, features_train) +#![create_combined_train] + +#![train_mkl] +MKLClassification mkl() +mkl.set_kernel(combined_kernel) +mkl.set_labels(labels_train) +mkl.train() +#![train_mkl] + +#![extract_weights] +RealVector beta = combined_kernel.get_subkernel_weights() +RealVector alpha = mkl.get_alphas() +real bias = mkl.get_bias() +#![extract_weights] + +#![create_combined_test] +combined_kernel.init(features_train, features_test) +#![create_combined_test] + +#![mkl_apply] +mkl.set_kernel(combined_kernel) +BinaryLabels labels_predict = mkl.apply_binary() +#![mkl_apply] + +#![evaluate_accuracy] +AccuracyMeasure eval() +real accuracy = eval.evaluate(labels_predict, labels_test) +#![evaluate_accuracy] + +# additional integration testing variables +RealVector output = labels_predict.get_labels() diff --git a/examples/undocumented/python_modular/mkl_binclass_modular.py b/examples/undocumented/python_modular/mkl_binclass_modular.py deleted file mode 100644 index 90ab575b541..00000000000 --- a/examples/undocumented/python_modular/mkl_binclass_modular.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env python -from modshogun import CombinedFeatures, RealFeatures, BinaryLabels -from modshogun import CombinedKernel, PolyKernel, CustomKernel -from modshogun import MKLClassification -from tools.load import LoadMatrix -lm=LoadMatrix() - -#only run example if SVMLight is included as LibSVM solver crashes in MKLClassification -try: - from modshogun import SVMLight -except ImportError: - print("SVMLight not available") - exit(0) - -traindat = lm.load_numbers('../data/fm_train_real.dat') -testdat = lm.load_numbers('../data/fm_test_real.dat') -label_traindat = lm.load_labels('../data/label_train_twoclass.dat') - -parameter_list = [[traindat,testdat,label_traindat],[traindat,testdat,label_traindat]] -# fm_train_real.shape -# fm_test_real.shape -# combined_custom() - -def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat): - - ################################## - # set up and train - - # create some poly train/test matrix - tfeats = RealFeatures(fm_train_real) - tkernel = PolyKernel(10,3) - tkernel.init(tfeats, tfeats) - K_train = tkernel.get_kernel_matrix() - - pfeats = RealFeatures(fm_test_real) - tkernel.init(tfeats, pfeats) - K_test = tkernel.get_kernel_matrix() - - # create combined train features - feats_train = CombinedFeatures() - feats_train.append_feature_obj(RealFeatures(fm_train_real)) - - # and corresponding combined kernel - kernel = CombinedKernel() - kernel.append_kernel(CustomKernel(K_train)) - kernel.append_kernel(PolyKernel(10,2)) - kernel.init(feats_train, feats_train) - - # train mkl - labels = BinaryLabels(fm_label_twoclass) - mkl = MKLClassification() - - # which norm to use for MKL - mkl.set_mkl_norm(1) #2,3 - - # set cost (neg, pos) - mkl.set_C(1, 1) - - # set kernel and labels - mkl.set_kernel(kernel) - mkl.set_labels(labels) - - # train - mkl.train() - #w=kernel.get_subkernel_weights() - #kernel.set_subkernel_weights(w) - - - ################################## - # test - - # create combined test features - feats_pred = CombinedFeatures() - feats_pred.append_feature_obj(RealFeatures(fm_test_real)) - - # and corresponding combined kernel - kernel = CombinedKernel() - kernel.append_kernel(CustomKernel(K_test)) - kernel.append_kernel(PolyKernel(10, 2)) - kernel.init(feats_train, feats_pred) - - # and classify - mkl.set_kernel(kernel) - mkl.apply() - return mkl.apply(),kernel - -if __name__=='__main__': - mkl_binclass_modular (*parameter_list[0])