Skip to content

Commit

Permalink
Feature/refactor meta (#4257)
Browse files Browse the repository at this point in the history
* Add distance interface factory. Port Euclidean meta example to new API.

* API port in hierarhical clustering meta-example. Add watch_params.

* Port LARs meta example.

* Minor name update revert.

* Update data generated by meta examples.

* Fix return type to base class in meta ex. Add base class to interfaces.

* Disable Hierarchical clone test.
  • Loading branch information
iglesias authored and karlnapf committed Apr 16, 2018
1 parent 0ea421a commit ebd45ae
Show file tree
Hide file tree
Showing 14 changed files with 71 additions and 53 deletions.
16 changes: 5 additions & 11 deletions examples/meta/src/clustering/hierarchical.sg
@@ -1,23 +1,17 @@
CSVFile f_feats_train("../../data/classifier_4class_2d_linear_features_train.dat")
File f_features_train = csv_file("../../data/classifier_4class_2d_linear_features_train.dat")

#![create_features]
RealFeatures features_train(f_feats_train)
Features features_train = features(f_features_train)
#![create_features]

#![choose_distance]
EuclideanDistance distance(features_train, features_train)
Distance d = distance("EuclideanDistance", lhs=features_train, rhs=features_train)
#![choose_distance]

#![create_instance]
int merges = 3
Hierarchical hierarchical(merges, distance)
Machine hierarchical = machine("Hierarchical", merges=3, distance=d)
#![create_instance]

#![train_model]
hierarchical.train()
hierarchical.train(features_train)
#![train_model]

#![extract_results]
RealVector d = hierarchical.get_merge_distances()
IntMatrix cp = hierarchical.get_cluster_pairs()
#![extract_results]
18 changes: 9 additions & 9 deletions examples/meta/src/distance/euclidean.sg
@@ -1,24 +1,24 @@
CSVFile f_feats_a("../../data/fm_train_real.dat")
CSVFile f_feats_b("../../data/fm_test_real.dat")
File f_features_a = csv_file("../../data/fm_train_real.dat")
File f_features_b = csv_file("../../data/fm_test_real.dat")

#![create_features]
RealFeatures features_a(f_feats_a)
RealFeatures features_b(f_feats_b)
Features features_a = features(f_features_a)
Features features_b = features(f_features_b)
#![create_features]

#![create_instance]
EuclideanDistance distance(features_a, features_a)
Distance d = distance("EuclideanDistance", lhs=features_a, rhs=features_a)
#![create_instance]

#![extract_distance]
RealMatrix distance_matrix_aa = distance.get_distance_matrix()
RealMatrix distance_matrix_aa = d.get_distance_matrix()
#![extract_distance]

#![refresh_distance]
distance.init(features_a, features_b)
d.init(features_a, features_b)
#![refresh_distance]

#![extract_sq_distance]
distance.set_disable_sqrt(True)
RealMatrix distance_matrix_ab = distance.get_distance_matrix()
d.put("disable_sqrt", True)
RealMatrix sq_distance_matrix_ab = d.get_distance_matrix()
#![extract_sq_distance]
Expand Up @@ -41,7 +41,7 @@ cross.subscribe_to_parameters(mkl_obs)
#[!create_observer]

#![evaluate_and_get_result]
CrossValidationResult result = cross.evaluate()
EvaluationResult result = cross.evaluate()
#![evaluate_and_get_result]

#![get_results]
Expand Down
35 changes: 17 additions & 18 deletions examples/meta/src/regression/linear_ridge_regression.sg
@@ -1,42 +1,41 @@
CSVFile f_feats_train("../../data/regression_1d_linear_features_train.dat")
CSVFile f_feats_test("../../data/regression_1d_linear_features_test.dat")
CSVFile f_labels_train("../../data/regression_1d_linear_labels_train.dat")
CSVFile f_labels_test("../../data/regression_1d_linear_labels_test.dat")
File f_feats_train = csv_file("../../data/regression_1d_linear_features_train.dat")
File f_feats_test = csv_file("../../data/regression_1d_linear_features_test.dat")
File f_labels_train = csv_file("../../data/regression_1d_linear_labels_train.dat")
File f_labels_test = csv_file("../../data/regression_1d_linear_labels_test.dat")

#![create_features]
RealFeatures features_train(f_feats_train)
RealFeatures features_test(f_feats_test)
RegressionLabels labels_train(f_labels_train)
RegressionLabels labels_test(f_labels_test)
Features features_train = features(f_feats_train)
Features features_test = features(f_feats_test)
Labels labels_train = labels(f_labels_train)
Labels labels_test = labels(f_labels_test)
#![create_features]

#![create_instance]
real tau = 0.001
LinearRidgeRegression lrr(tau, features_train, labels_train)
Machine lrr = machine("LinearRidgeRegression", tau=0.001, labels=labels_train)
#![create_instance]

#![train_and_apply]
lrr.train()
RegressionLabels labels_predict = lrr.apply_regression(features_test)
real bias = lrr.get_bias()
lrr.train(features_train)
Labels labels_predict = lrr.apply(features_test)
real b = lrr.get_real("bias")
#![train_and_apply]

#[!disable_bias]
lrr.set_compute_bias(False)
lrr.put("compute_bias", False)
#[!disable_bias]

#[!set_bias_manually]
lrr.set_bias(bias)
lrr.put("bias", b)
#[!set_bias_manually]

#[!extract_w]
RealVector weights = lrr.get_w()
RealVector w = lrr.get_real_vector("w")
#[!extract_w]

#![evaluate_error]
MeanSquaredError eval()
Evaluation eval = evaluation("MeanSquaredError")
real mse = eval.evaluate(labels_predict, labels_test)
#![evaluate_error]

# integration testing variables
RealVector output = labels_test.get_labels()
RealVector output = labels_test.get_real_vector("labels")
3 changes: 2 additions & 1 deletion src/interfaces/swig/Evaluation.i
Expand Up @@ -14,6 +14,7 @@

/* Remove C Prefix */
%rename(Evaluation) CEvaluation;
%rename(EvaluationResult) CEvaluationResult;
%rename(BinaryClassEvaluation) CBinaryClassEvaluation;
%rename(ClusteringEvaluation) CClusteringEvaluation;
%rename(ClusteringAccuracy) CClusteringAccuracy;
Expand Down Expand Up @@ -74,4 +75,4 @@
%include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
%include <shogun/evaluation/CrossValidationSplitting.h>
%include <shogun/evaluation/StructuredAccuracy.h>
%include <shogun/evaluation/DirectorContingencyTableEvaluation.h>
%include <shogun/evaluation/DirectorContingencyTableEvaluation.h>
1 change: 1 addition & 0 deletions src/interfaces/swig/shogun.i
Expand Up @@ -174,6 +174,7 @@ namespace shogun
%template(put) CSGObject::put_scalar_dispatcher<int64_t, int64_t>;
#endif // SWIGJAVA
%template(put) CSGObject::put_scalar_dispatcher<float64_t, float64_t>;
%template(put) CSGObject::put_scalar_dispatcher<bool, bool>;


#ifndef SWIGJAVA
Expand Down
15 changes: 15 additions & 0 deletions src/shogun/clustering/Hierarchical.cpp
Expand Up @@ -29,13 +29,28 @@ CHierarchical::CHierarchical()
: CDistanceMachine(), merges(3), dimensions(0), assignment(NULL),
table_size(0), pairs(NULL), merge_distance(NULL)
{
register_parameters();
}

CHierarchical::CHierarchical(int32_t merges_, CDistance* d)
: CDistanceMachine(), merges(merges_), dimensions(0), assignment(NULL),
table_size(0), pairs(NULL), merge_distance(NULL)
{
set_distance(d);
register_parameters();
}

static int PAIRS_DIM_0 = 2;

void CHierarchical::register_parameters()
{
watch_param("merges", &merges);
watch_param("dimensions", &dimensions);
watch_param("assignment_size", &assignment_size);
watch_param("assignment", &assignment, &table_size);
watch_param("table_size", &table_size);
watch_param("pairs", &pairs, &PAIRS_DIM_0, &merges);
watch_param("merge_distance", &merge_distance, &merges);
}

CHierarchical::~CHierarchical()
Expand Down
4 changes: 4 additions & 0 deletions src/shogun/clustering/Hierarchical.h
Expand Up @@ -121,6 +121,10 @@ class CHierarchical : public CDistanceMachine

virtual bool train_require_labels() const { return false; }

private:
/** Register all parameters */
void register_parameters();

protected:
/// the number of merges in hierarchical clustering
int32_t merges;
Expand Down
8 changes: 2 additions & 6 deletions src/shogun/distance/Distance.cpp
Expand Up @@ -259,12 +259,8 @@ void CDistance::init()
num_lhs=0;
num_rhs=0;

SG_ADD(
&lhs, "lhs", "Feature vectors to occur on left hand side.",
MS_NOT_AVAILABLE);
SG_ADD(
&rhs, "rhs", "Feature vectors to occur on right hand side.",
MS_NOT_AVAILABLE);
SG_ADD(&lhs, "lhs", "Left hand side features.", MS_NOT_AVAILABLE);
SG_ADD(&rhs, "rhs", "Right hand side features.", MS_NOT_AVAILABLE);
}

template <class T>
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/distance/EuclideanDistance.cpp
@@ -1,7 +1,7 @@
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Saurabh Mahindre, Soumyajit De, Chiyuan Zhang, Viktor Gal,
* Authors: Saurabh Mahindre, Soumyajit De, Chiyuan Zhang, Viktor Gal,
* Bjoern Esser, Soeren Sonnenburg
*/

Expand Down
7 changes: 4 additions & 3 deletions src/shogun/machine/LinearMachine.cpp
Expand Up @@ -2,7 +2,8 @@
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Soeren Sonnenburg, Sergey Lisitsyn, Evan Shelhamer,
* Youssef Emad El-Din, Evgeniy Andreev, Thoralf Klein, Bjoern Esser
* Youssef Emad El-Din, Evgeniy Andreev, Thoralf Klein, Bjoern Esser,
* Fernando Iglesias
*/

#include <shogun/machine/LinearMachine.h>
Expand Down Expand Up @@ -43,8 +44,8 @@ void CLinearMachine::init()

SG_ADD(&m_w, "w", "Parameter vector w.", MS_NOT_AVAILABLE);
SG_ADD(&bias, "bias", "Bias b.", MS_NOT_AVAILABLE);
SG_ADD((CSGObject**) &features, "features", "Feature object.",
MS_NOT_AVAILABLE);
SG_ADD((CSGObject**) &features, "features", "Feature object.", MS_NOT_AVAILABLE);
SG_ADD(&m_compute_bias, "compute_bias", "Whether bias is computed.", MS_NOT_AVAILABLE);
}


Expand Down
9 changes: 8 additions & 1 deletion src/shogun/util/factory.h
@@ -1,21 +1,26 @@
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Heiko Strathmann
* Authors: Heiko Strathmann, Fernando Iglesias
*/
#ifndef FACTORY_H_
#define FACTORY_H_

#include <shogun/base/class_list.h>
#include <shogun/distance/Distance.h>
#include <shogun/evaluation/Evaluation.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/io/CSVFile.h>
#include <shogun/io/SGIO.h>
#include <shogun/kernel/Kernel.h>
#include <shogun/labels/DenseLabels.h>
#include <shogun/machine/Machine.h>

namespace shogun
{

CDistance* distance(const std::string& name);
CEvaluation* evaluation(const std::string& name);
CKernel* kernel(const std::string& name);
CMachine* machine(const std::string& name);

Expand All @@ -25,6 +30,8 @@ namespace shogun
return create_object<T>(name.c_str()); \
}

BASE_CLASS_FACTORY(CEvaluation, evaluation)
BASE_CLASS_FACTORY(CDistance, distance)
BASE_CLASS_FACTORY(CKernel, kernel)
BASE_CLASS_FACTORY(CMachine, machine)

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/base/SGObjectAll_unittest.cc
Expand Up @@ -171,7 +171,7 @@ class sg_object_iterator

// list of classes that (currently) cannot be instantiated
std::set<std::string> sg_object_all_ignores = {"ParseBuffer", "Set",
"TreeMachine"};
"TreeMachine", "Hierarchical"};

// template arguments for SGObject
// TODO: SGString doesn't support complex128_t, so omitted here
Expand Down

0 comments on commit ebd45ae

Please sign in to comment.