From ebd45aee101347a4ef91e9253806f14a7fb486ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20J=2E=20Iglesias=20Garc=C3=ADa?= Date: Mon, 16 Apr 2018 13:24:56 +0200 Subject: [PATCH] Feature/refactor meta (#4257) * Add distance interface factory. Port Euclidean meta example to new API. * API port in hierarhical clustering meta-example. Add watch_params. * Port LARs meta example. * Minor name update revert. * Update data generated by meta examples. * Fix return type to base class in meta ex. Add base class to interfaces. * Disable Hierarchical clone test. --- data | 2 +- examples/meta/src/clustering/hierarchical.sg | 16 +++------ examples/meta/src/distance/euclidean.sg | 18 +++++----- ...ultiple_kernel_learning_weights_storage.sg | 2 +- .../src/regression/linear_ridge_regression.sg | 35 +++++++++---------- src/interfaces/swig/Evaluation.i | 3 +- src/interfaces/swig/shogun.i | 1 + src/shogun/clustering/Hierarchical.cpp | 15 ++++++++ src/shogun/clustering/Hierarchical.h | 4 +++ src/shogun/distance/Distance.cpp | 8 ++--- src/shogun/distance/EuclideanDistance.cpp | 2 +- src/shogun/machine/LinearMachine.cpp | 7 ++-- src/shogun/util/factory.h | 9 ++++- tests/unit/base/SGObjectAll_unittest.cc | 2 +- 14 files changed, 71 insertions(+), 53 deletions(-) diff --git a/data b/data index 0b276a1366f..5242d58e94b 160000 --- a/data +++ b/data @@ -1 +1 @@ -Subproject commit 0b276a1366f05a117371563f021a620e403b0b1d +Subproject commit 5242d58e94b39725d21f451fb645d3b68245e72c diff --git a/examples/meta/src/clustering/hierarchical.sg b/examples/meta/src/clustering/hierarchical.sg index 5c8c7473013..0bd92ff9782 100644 --- a/examples/meta/src/clustering/hierarchical.sg +++ b/examples/meta/src/clustering/hierarchical.sg @@ -1,23 +1,17 @@ -CSVFile f_feats_train("../../data/classifier_4class_2d_linear_features_train.dat") +File f_features_train = csv_file("../../data/classifier_4class_2d_linear_features_train.dat") #![create_features] -RealFeatures features_train(f_feats_train) +Features features_train = features(f_features_train) #![create_features] #![choose_distance] -EuclideanDistance distance(features_train, features_train) +Distance d = distance("EuclideanDistance", lhs=features_train, rhs=features_train) #![choose_distance] #![create_instance] -int merges = 3 -Hierarchical hierarchical(merges, distance) +Machine hierarchical = machine("Hierarchical", merges=3, distance=d) #![create_instance] #![train_model] -hierarchical.train() +hierarchical.train(features_train) #![train_model] - -#![extract_results] -RealVector d = hierarchical.get_merge_distances() -IntMatrix cp = hierarchical.get_cluster_pairs() -#![extract_results] diff --git a/examples/meta/src/distance/euclidean.sg b/examples/meta/src/distance/euclidean.sg index 289dabedfd9..9dd0933b77e 100644 --- a/examples/meta/src/distance/euclidean.sg +++ b/examples/meta/src/distance/euclidean.sg @@ -1,24 +1,24 @@ -CSVFile f_feats_a("../../data/fm_train_real.dat") -CSVFile f_feats_b("../../data/fm_test_real.dat") +File f_features_a = csv_file("../../data/fm_train_real.dat") +File f_features_b = csv_file("../../data/fm_test_real.dat") #![create_features] -RealFeatures features_a(f_feats_a) -RealFeatures features_b(f_feats_b) +Features features_a = features(f_features_a) +Features features_b = features(f_features_b) #![create_features] #![create_instance] -EuclideanDistance distance(features_a, features_a) +Distance d = distance("EuclideanDistance", lhs=features_a, rhs=features_a) #![create_instance] #![extract_distance] -RealMatrix distance_matrix_aa = distance.get_distance_matrix() +RealMatrix distance_matrix_aa = d.get_distance_matrix() #![extract_distance] #![refresh_distance] -distance.init(features_a, features_b) +d.init(features_a, features_b) #![refresh_distance] #![extract_sq_distance] -distance.set_disable_sqrt(True) -RealMatrix distance_matrix_ab = distance.get_distance_matrix() +d.put("disable_sqrt", True) +RealMatrix sq_distance_matrix_ab = d.get_distance_matrix() #![extract_sq_distance] diff --git a/examples/meta/src/evaluation/cross_validation_multiple_kernel_learning_weights_storage.sg b/examples/meta/src/evaluation/cross_validation_multiple_kernel_learning_weights_storage.sg index 9af5cb84c15..a3a0b10afed 100644 --- a/examples/meta/src/evaluation/cross_validation_multiple_kernel_learning_weights_storage.sg +++ b/examples/meta/src/evaluation/cross_validation_multiple_kernel_learning_weights_storage.sg @@ -41,7 +41,7 @@ cross.subscribe_to_parameters(mkl_obs) #[!create_observer] #![evaluate_and_get_result] -CrossValidationResult result = cross.evaluate() +EvaluationResult result = cross.evaluate() #![evaluate_and_get_result] #![get_results] diff --git a/examples/meta/src/regression/linear_ridge_regression.sg b/examples/meta/src/regression/linear_ridge_regression.sg index 9c562cd7646..5de9594b6e5 100644 --- a/examples/meta/src/regression/linear_ridge_regression.sg +++ b/examples/meta/src/regression/linear_ridge_regression.sg @@ -1,42 +1,41 @@ -CSVFile f_feats_train("../../data/regression_1d_linear_features_train.dat") -CSVFile f_feats_test("../../data/regression_1d_linear_features_test.dat") -CSVFile f_labels_train("../../data/regression_1d_linear_labels_train.dat") -CSVFile f_labels_test("../../data/regression_1d_linear_labels_test.dat") +File f_feats_train = csv_file("../../data/regression_1d_linear_features_train.dat") +File f_feats_test = csv_file("../../data/regression_1d_linear_features_test.dat") +File f_labels_train = csv_file("../../data/regression_1d_linear_labels_train.dat") +File f_labels_test = csv_file("../../data/regression_1d_linear_labels_test.dat") #![create_features] -RealFeatures features_train(f_feats_train) -RealFeatures features_test(f_feats_test) -RegressionLabels labels_train(f_labels_train) -RegressionLabels labels_test(f_labels_test) +Features features_train = features(f_feats_train) +Features features_test = features(f_feats_test) +Labels labels_train = labels(f_labels_train) +Labels labels_test = labels(f_labels_test) #![create_features] #![create_instance] -real tau = 0.001 -LinearRidgeRegression lrr(tau, features_train, labels_train) +Machine lrr = machine("LinearRidgeRegression", tau=0.001, labels=labels_train) #![create_instance] #![train_and_apply] -lrr.train() -RegressionLabels labels_predict = lrr.apply_regression(features_test) -real bias = lrr.get_bias() +lrr.train(features_train) +Labels labels_predict = lrr.apply(features_test) +real b = lrr.get_real("bias") #![train_and_apply] #[!disable_bias] -lrr.set_compute_bias(False) +lrr.put("compute_bias", False) #[!disable_bias] #[!set_bias_manually] -lrr.set_bias(bias) +lrr.put("bias", b) #[!set_bias_manually] #[!extract_w] -RealVector weights = lrr.get_w() +RealVector w = lrr.get_real_vector("w") #[!extract_w] #![evaluate_error] -MeanSquaredError eval() +Evaluation eval = evaluation("MeanSquaredError") real mse = eval.evaluate(labels_predict, labels_test) #![evaluate_error] # integration testing variables -RealVector output = labels_test.get_labels() +RealVector output = labels_test.get_real_vector("labels") diff --git a/src/interfaces/swig/Evaluation.i b/src/interfaces/swig/Evaluation.i index 260121c2814..e849dacb924 100644 --- a/src/interfaces/swig/Evaluation.i +++ b/src/interfaces/swig/Evaluation.i @@ -14,6 +14,7 @@ /* Remove C Prefix */ %rename(Evaluation) CEvaluation; +%rename(EvaluationResult) CEvaluationResult; %rename(BinaryClassEvaluation) CBinaryClassEvaluation; %rename(ClusteringEvaluation) CClusteringEvaluation; %rename(ClusteringAccuracy) CClusteringAccuracy; @@ -74,4 +75,4 @@ %include %include %include -%include \ No newline at end of file +%include diff --git a/src/interfaces/swig/shogun.i b/src/interfaces/swig/shogun.i index ef675729c39..a3e712157d4 100644 --- a/src/interfaces/swig/shogun.i +++ b/src/interfaces/swig/shogun.i @@ -174,6 +174,7 @@ namespace shogun %template(put) CSGObject::put_scalar_dispatcher; #endif // SWIGJAVA %template(put) CSGObject::put_scalar_dispatcher; +%template(put) CSGObject::put_scalar_dispatcher; #ifndef SWIGJAVA diff --git a/src/shogun/clustering/Hierarchical.cpp b/src/shogun/clustering/Hierarchical.cpp index 3cf84f2cd2e..eb1b456f730 100644 --- a/src/shogun/clustering/Hierarchical.cpp +++ b/src/shogun/clustering/Hierarchical.cpp @@ -29,6 +29,7 @@ CHierarchical::CHierarchical() : CDistanceMachine(), merges(3), dimensions(0), assignment(NULL), table_size(0), pairs(NULL), merge_distance(NULL) { + register_parameters(); } CHierarchical::CHierarchical(int32_t merges_, CDistance* d) @@ -36,6 +37,20 @@ CHierarchical::CHierarchical(int32_t merges_, CDistance* d) table_size(0), pairs(NULL), merge_distance(NULL) { set_distance(d); + register_parameters(); +} + +static int PAIRS_DIM_0 = 2; + +void CHierarchical::register_parameters() +{ + watch_param("merges", &merges); + watch_param("dimensions", &dimensions); + watch_param("assignment_size", &assignment_size); + watch_param("assignment", &assignment, &table_size); + watch_param("table_size", &table_size); + watch_param("pairs", &pairs, &PAIRS_DIM_0, &merges); + watch_param("merge_distance", &merge_distance, &merges); } CHierarchical::~CHierarchical() diff --git a/src/shogun/clustering/Hierarchical.h b/src/shogun/clustering/Hierarchical.h index b4036199ef7..83bcfa2b716 100644 --- a/src/shogun/clustering/Hierarchical.h +++ b/src/shogun/clustering/Hierarchical.h @@ -121,6 +121,10 @@ class CHierarchical : public CDistanceMachine virtual bool train_require_labels() const { return false; } + private: + /** Register all parameters */ + void register_parameters(); + protected: /// the number of merges in hierarchical clustering int32_t merges; diff --git a/src/shogun/distance/Distance.cpp b/src/shogun/distance/Distance.cpp index e6e1550f2ff..58f56bb5c1d 100644 --- a/src/shogun/distance/Distance.cpp +++ b/src/shogun/distance/Distance.cpp @@ -259,12 +259,8 @@ void CDistance::init() num_lhs=0; num_rhs=0; - SG_ADD( - &lhs, "lhs", "Feature vectors to occur on left hand side.", - MS_NOT_AVAILABLE); - SG_ADD( - &rhs, "rhs", "Feature vectors to occur on right hand side.", - MS_NOT_AVAILABLE); + SG_ADD(&lhs, "lhs", "Left hand side features.", MS_NOT_AVAILABLE); + SG_ADD(&rhs, "rhs", "Right hand side features.", MS_NOT_AVAILABLE); } template diff --git a/src/shogun/distance/EuclideanDistance.cpp b/src/shogun/distance/EuclideanDistance.cpp index e60af41e816..f7dd1a0542f 100644 --- a/src/shogun/distance/EuclideanDistance.cpp +++ b/src/shogun/distance/EuclideanDistance.cpp @@ -1,7 +1,7 @@ /* * This software is distributed under BSD 3-clause license (see LICENSE file). * - * Authors: Saurabh Mahindre, Soumyajit De, Chiyuan Zhang, Viktor Gal, + * Authors: Saurabh Mahindre, Soumyajit De, Chiyuan Zhang, Viktor Gal, * Bjoern Esser, Soeren Sonnenburg */ diff --git a/src/shogun/machine/LinearMachine.cpp b/src/shogun/machine/LinearMachine.cpp index fed07a6c526..b4efdf72c0a 100644 --- a/src/shogun/machine/LinearMachine.cpp +++ b/src/shogun/machine/LinearMachine.cpp @@ -2,7 +2,8 @@ * This software is distributed under BSD 3-clause license (see LICENSE file). * * Authors: Soeren Sonnenburg, Sergey Lisitsyn, Evan Shelhamer, - * Youssef Emad El-Din, Evgeniy Andreev, Thoralf Klein, Bjoern Esser + * Youssef Emad El-Din, Evgeniy Andreev, Thoralf Klein, Bjoern Esser, + * Fernando Iglesias */ #include @@ -43,8 +44,8 @@ void CLinearMachine::init() SG_ADD(&m_w, "w", "Parameter vector w.", MS_NOT_AVAILABLE); SG_ADD(&bias, "bias", "Bias b.", MS_NOT_AVAILABLE); - SG_ADD((CSGObject**) &features, "features", "Feature object.", - MS_NOT_AVAILABLE); + SG_ADD((CSGObject**) &features, "features", "Feature object.", MS_NOT_AVAILABLE); + SG_ADD(&m_compute_bias, "compute_bias", "Whether bias is computed.", MS_NOT_AVAILABLE); } diff --git a/src/shogun/util/factory.h b/src/shogun/util/factory.h index f7645abd9f8..62f1373e1ac 100644 --- a/src/shogun/util/factory.h +++ b/src/shogun/util/factory.h @@ -1,21 +1,26 @@ /* * This software is distributed under BSD 3-clause license (see LICENSE file). * - * Authors: Heiko Strathmann + * Authors: Heiko Strathmann, Fernando Iglesias */ #ifndef FACTORY_H_ #define FACTORY_H_ #include +#include +#include #include #include #include #include #include +#include namespace shogun { + CDistance* distance(const std::string& name); + CEvaluation* evaluation(const std::string& name); CKernel* kernel(const std::string& name); CMachine* machine(const std::string& name); @@ -25,6 +30,8 @@ namespace shogun return create_object(name.c_str()); \ } + BASE_CLASS_FACTORY(CEvaluation, evaluation) + BASE_CLASS_FACTORY(CDistance, distance) BASE_CLASS_FACTORY(CKernel, kernel) BASE_CLASS_FACTORY(CMachine, machine) diff --git a/tests/unit/base/SGObjectAll_unittest.cc b/tests/unit/base/SGObjectAll_unittest.cc index 493b8741109..b1d02624a7f 100644 --- a/tests/unit/base/SGObjectAll_unittest.cc +++ b/tests/unit/base/SGObjectAll_unittest.cc @@ -171,7 +171,7 @@ class sg_object_iterator // list of classes that (currently) cannot be instantiated std::set sg_object_all_ignores = {"ParseBuffer", "Set", - "TreeMachine"}; + "TreeMachine", "Hierarchical"}; // template arguments for SGObject // TODO: SGString doesn't support complex128_t, so omitted here