From 5131bb50ee2e46d9965f21d94a68c4e6557c3f2c Mon Sep 17 00:00:00 2001 From: Heiko Strathmann Date: Sun, 10 Mar 2013 20:39:54 +0000 Subject: [PATCH 1/5] made example way better (random input data means non-sense output) and prepared for unit-test --- .../libshogun/classifier_larank.cpp | 87 ++++++++++++++----- 1 file changed, 64 insertions(+), 23 deletions(-) diff --git a/examples/undocumented/libshogun/classifier_larank.cpp b/examples/undocumented/libshogun/classifier_larank.cpp index 2a09e0c223b..315f7beb7f0 100644 --- a/examples/undocumented/libshogun/classifier_larank.cpp +++ b/examples/undocumented/libshogun/classifier_larank.cpp @@ -1,3 +1,12 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Written (W) 2013 Heiko Strathmann and others + */ + #include #include #include @@ -6,28 +15,42 @@ using namespace shogun; -void print_message(FILE* target, const char* str) +void test() { - fprintf(target, "%s", str); -} + index_t num_vec=10; + index_t num_feat=3; + index_t num_class=num_feat; // to make data easy + float64_t distance=15; -int main(int argc, char** argv) -{ - init_shogun(&print_message); - index_t num_vec=3; - index_t num_feat=2; - index_t num_class=2; + // create some linearly seperable data + SGMatrix matrix(num_class, num_vec); + SGMatrix matrix_test(num_class, num_vec); + CMulticlassLabels* labels=new CMulticlassLabels(num_vec); + CMulticlassLabels* labels_test=new CMulticlassLabels(num_vec); + for (index_t i=0; iset_label(i, label); + labels_test->set_label(i, label); + } - // create some data - SGMatrix matrix(num_feat, num_vec); - SGVector::range_fill_vector(matrix.matrix, num_feat*num_vec); + /* make sure data is linearly seperable per class */ + matrix(label,i)+=distance; + matrix_test(label,i)+=distance; + } + matrix.display_matrix("matrix"); + labels->get_int_labels().display_vector("labels"); - // create vectors // shogun will now own the matrix created CDenseFeatures* features=new CDenseFeatures(matrix); + CDenseFeatures* features_test= + new CDenseFeatures(matrix_test); // create three labels - CMulticlassLabels* labels=new CMulticlassLabels(num_vec); for (index_t i=0; iset_label(i, i%num_class); @@ -41,21 +64,39 @@ int main(int argc, char** argv) svm->train(); // classify on training examples - CMulticlassLabels* output=CMulticlassLabels::obtain_from_generic(svm->apply()); - SGVector::display_vector(output->get_labels().vector, output->get_num_labels(), - "batch output"); + CMulticlassLabels* output=(CMulticlassLabels*)svm->apply(); + output->get_labels().display_vector("batch output"); /* assert that batch apply and apply(index_t) give same result */ + SGVector single_outputs(output->get_num_labels()); for (index_t i=0; iget_num_labels(); ++i) - { - float64_t label=svm->apply_one(i); - SG_SPRINT("single output[%d]=%f\n", i, label); - ASSERT(output->get_label(i)==label); - } - SG_UNREF(output); + single_outputs[i]=svm->apply_one(i); + + single_outputs.display_vector("single_outputs"); + + for (index_t i=0; iget_num_labels(); ++i) + ASSERT(output->get_label(i)==single_outputs[i]); + + CMulticlassLabels* output_test= + (CMulticlassLabels*)svm->apply(features_test); + labels_test->get_labels().display_vector("labels_test"); + output_test->get_labels().display_vector("output_test"); + + for (index_t i=0; iget_num_labels(); ++i) + ASSERT(labels_test->get_label(i)==output_test->get_label(i)); // free up memory + SG_UNREF(output); + SG_UNREF(labels_test); + SG_UNREF(output_test); SG_UNREF(svm); +} + +int main(int argc, char** argv) +{ + init_shogun_with_defaults(); + + test(); exit_shogun(); return 0; From 4b7ded480f37fdefe091557d6f264f25757f5c67 Mon Sep 17 00:00:00 2001 From: Heiko Strathmann Date: Sun, 10 Mar 2013 20:49:03 +0000 Subject: [PATCH 2/5] added unit-test for LaRank which asserts basic functionality of classifier --- tests/unit/multiclass/LaRank_unittest.cc | 87 ++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 tests/unit/multiclass/LaRank_unittest.cc diff --git a/tests/unit/multiclass/LaRank_unittest.cc b/tests/unit/multiclass/LaRank_unittest.cc new file mode 100644 index 00000000000..11a44c8d987 --- /dev/null +++ b/tests/unit/multiclass/LaRank_unittest.cc @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include + +using namespace shogun; + +TEST(LaRank,train) +{ + index_t num_vec=10; + index_t num_feat=3; + index_t num_class=num_feat; // to make data easy + float64_t distance=15; + + // create some linearly seperable data + SGMatrix matrix(num_class, num_vec); + SGMatrix matrix_test(num_class, num_vec); + CMulticlassLabels* labels=new CMulticlassLabels(num_vec); + CMulticlassLabels* labels_test=new CMulticlassLabels(num_vec); + for (index_t i=0; iset_label(i, label); + labels_test->set_label(i, label); + } + + /* make sure data is linearly seperable per class */ + matrix(label,i)+=distance; + matrix_test(label,i)+=distance; + } + //matrix.display_matrix("matrix"); + //labels->get_int_labels().display_vector("labels"); + + // shogun will now own the matrix created + CDenseFeatures* features=new CDenseFeatures(matrix); + CDenseFeatures* features_test= + new CDenseFeatures(matrix_test); + + // create three labels + for (index_t i=0; iset_label(i, i%num_class); + + // create gaussian kernel with cache 10MB, width 0.5 + CGaussianKernel* kernel = new CGaussianKernel(10, 0.5); + kernel->init(features, features); + + // create libsvm with C=10 and train + CLaRank* svm = new CLaRank(10, kernel, labels); + svm->train(); + svm->train(); + + // classify on training examples + CMulticlassLabels* output=(CMulticlassLabels*)svm->apply(); + output->get_labels().display_vector("batch output"); + + /* assert that batch apply and apply(index_t) give same result */ + SGVector single_outputs(output->get_num_labels()); + for (index_t i=0; iget_num_labels(); ++i) + single_outputs[i]=svm->apply_one(i); + + //single_outputs.display_vector("single_outputs"); + + for (index_t i=0; iget_num_labels(); ++i) + EXPECT_EQ(output->get_label(i), single_outputs[i]); + + // predict test labels (since data is easy this has to be correct + CMulticlassLabels* output_test= + (CMulticlassLabels*)svm->apply(features_test); + //labels_test->get_labels().display_vector("labels_test"); + //output_test->get_labels().display_vector("output_test"); + + for (index_t i=0; iget_num_labels(); ++i) + EXPECT_EQ(labels_test->get_label(i), output_test->get_label(i)); + + // free up memory + SG_UNREF(output); + SG_UNREF(labels_test); + SG_UNREF(output_test); + SG_UNREF(svm); +} + From d3f1eeefc1d4ac709b8f4879654a77eb3a08f786 Mon Sep 17 00:00:00 2001 From: Heiko Strathmann Date: Sun, 10 Mar 2013 22:51:37 +0000 Subject: [PATCH 3/5] used a more sensible data set to actually see whether the method works --- .../classifier_larank_modular.py | 36 +++++++++++-------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/examples/undocumented/python_modular/classifier_larank_modular.py b/examples/undocumented/python_modular/classifier_larank_modular.py index 019992df49a..fdccfeefbfe 100644 --- a/examples/undocumented/python_modular/classifier_larank_modular.py +++ b/examples/undocumented/python_modular/classifier_larank_modular.py @@ -1,27 +1,33 @@ #!/usr/bin/env python -from tools.load import LoadMatrix -lm=LoadMatrix() +from numpy import * +parameter_list = [[10,3,15,0.9,1,2000,1],[20,4,15,0.9,1,5000,2]] -traindat = lm.load_numbers('../data/fm_train_real.dat') -testdat = lm.load_numbers('../data/fm_test_real.dat') -label_traindat = lm.load_labels('../data/label_train_multiclass.dat') - -parameter_list = [[traindat,testdat,label_traindat,0.9,1,2000],[traindat,testdat,label_traindat,3,1,5000]] - -def classifier_larank_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,C=0.9,num_threads=1,num_iter=5): +def classifier_larank_modular (num_vec,num_class,distance,C=0.9,num_threads=1,num_iter=5,seed=1): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Kernel import GaussianKernel from shogun.Classifier import LaRank from shogun.Mathematics import Math_init_random - Math_init_random(17) - - feats_train=RealFeatures(fm_train_real) - feats_test=RealFeatures(fm_test_real) + + # reproducible results + Math_init_random(seed) + random.seed(seed) + + # generate some training data where each class pair is linearly separable + label_train=array([mod(x,num_class) for x in range(num_vec)],dtype="float64") + label_test=array([mod(x,num_class) for x in range(num_vec)],dtype="float64") + fm_train=array(random.randn(num_class,num_vec)) + fm_test=array(random.randn(num_class,num_vec)) + for i in range(len(label_train)): + fm_train[label_train[i],i]+=distance + fm_test[label_test[i],i]+=distance + + feats_train=RealFeatures(fm_train) + feats_test=RealFeatures(fm_test) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) epsilon=1e-5 - labels=MulticlassLabels(label_train_multiclass) + labels=MulticlassLabels(label_train) svm=LaRank(C, kernel, labels) #svm.set_tau(1e-3) @@ -29,7 +35,7 @@ def classifier_larank_modular (fm_train_real=traindat,fm_test_real=testdat,label #svm.io.enable_progress() svm.set_epsilon(epsilon) svm.train() - out=svm.apply(feats_train).get_labels() + out=svm.apply(feats_test).get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels() From 4c9352a018c5c5adfc91c11b02110f5d5ae6b583 Mon Sep 17 00:00:00 2001 From: Heiko Strathmann Date: Sun, 10 Mar 2013 23:01:16 +0000 Subject: [PATCH 4/5] empty line to make things look nicer --- .../undocumented/python_modular/classifier_larank_modular.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/undocumented/python_modular/classifier_larank_modular.py b/examples/undocumented/python_modular/classifier_larank_modular.py index fdccfeefbfe..e5aa1fc6006 100644 --- a/examples/undocumented/python_modular/classifier_larank_modular.py +++ b/examples/undocumented/python_modular/classifier_larank_modular.py @@ -23,6 +23,7 @@ def classifier_larank_modular (num_vec,num_class,distance,C=0.9,num_threads=1,nu feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) + width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) From 11d764e59461c41a833bb18ea156614519c267e4 Mon Sep 17 00:00:00 2001 From: Heiko Strathmann Date: Sun, 10 Mar 2013 23:01:47 +0000 Subject: [PATCH 5/5] new easy toy data data for example --- .../classifier_multiclassocas_modular.py | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/examples/undocumented/python_modular/classifier_multiclassocas_modular.py b/examples/undocumented/python_modular/classifier_multiclassocas_modular.py index 1120b0e00f7..9ff4f55e169 100644 --- a/examples/undocumented/python_modular/classifier_multiclassocas_modular.py +++ b/examples/undocumented/python_modular/classifier_multiclassocas_modular.py @@ -1,24 +1,37 @@ #!/usr/bin/env python -from tools.multiclass_shared import prepare_data +from numpy import * +parameter_list = [[10,3,15,2.1,1,1e-5,1],[20,4,15,2.2,2,1e-5,2]] -[traindat, label_traindat, testdat, label_testdat] = prepare_data() - -parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]] - -def classifier_multiclassocas_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5): +def classifier_multiclassocas_modular (num_vec=10,num_class=3,distance=15,width=2.1,C=1,epsilon=1e-5,seed=1): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import MulticlassOCAS + from shogun.Mathematics import Math_init_random + + # reproducible results + random.seed(seed) + Math_init_random(seed) + + # generate some training data where each class pair is linearly separable + label_train=array([mod(x,num_class) for x in range(num_vec)],dtype="float64") + label_test=array([mod(x,num_class) for x in range(num_vec)],dtype="float64") + fm_train=array(random.randn(num_class,num_vec)) + fm_test=array(random.randn(num_class,num_vec)) + for i in range(len(label_train)): + fm_train[label_train[i],i]+=distance + fm_test[label_test[i],i]+=distance - feats_train=RealFeatures(fm_train_real) - feats_test=RealFeatures(fm_test_real) + feats_train=RealFeatures(fm_train) + feats_test=RealFeatures(fm_test) - labels=MulticlassLabels(label_train_multiclass) + labels=MulticlassLabels(label_train) classifier = MulticlassOCAS(C,feats_train,labels) classifier.train() out = classifier.apply(feats_test).get_labels() - return out + #print label_test + #print out + return out,classifier if __name__=='__main__': print('MulticlassOCAS')