diff --git a/src/interfaces/modular/Converter.i b/src/interfaces/modular/Converter.i index ece4553446b..edf66629374 100644 --- a/src/interfaces/modular/Converter.i +++ b/src/interfaces/modular/Converter.i @@ -23,6 +23,7 @@ %rename(StochasticProximityEmbedding) CStochasticProximityEmbedding; %rename(FactorAnalysis) CFactorAnalysis; %rename (TDistributedStochasticNeighborEmbedding) CTDistributedStochasticNeighborEmbedding; +%rename (ManifoldSculpting) CManifoldSculpting; %newobject shogun::CEmbeddingConverter::apply; %newobject shogun::*::embed_kernel; @@ -44,3 +45,4 @@ %include %include %include +%include diff --git a/src/interfaces/modular/Converter_includes.i b/src/interfaces/modular/Converter_includes.i index b5eed81da7c..39ef5dd4711 100644 --- a/src/interfaces/modular/Converter_includes.i +++ b/src/interfaces/modular/Converter_includes.i @@ -15,4 +15,5 @@ #include #include #include +#include %} diff --git a/src/shogun/converter/ManifoldSculpting.cpp b/src/shogun/converter/ManifoldSculpting.cpp new file mode 100644 index 00000000000..d9d9d855f35 --- /dev/null +++ b/src/shogun/converter/ManifoldSculpting.cpp @@ -0,0 +1,103 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Written (W) 2013 Vladyslav S. Gorbatiuk + * Copyright (C) 2011-2013 Vladyslav S. Gorbatiuk + */ + +#include +#ifdef HAVE_EIGEN3 +#include +#include +#include + +using namespace shogun; + +CManifoldSculpting::CManifoldSculpting() : + CEmbeddingConverter() +{ + // Default values + m_k = 10; + m_squishing_rate = 0.8; + m_max_iteration = 80; + init(); +} + +void CManifoldSculpting::init() +{ + SG_ADD(&m_k, "k", "number of neighbors", MS_NOT_AVAILABLE); + SG_ADD(&m_squishing_rate, "quishing_rate", + "squishing rate",MS_NOT_AVAILABLE); + SG_ADD(&m_max_iteration, "max_iteration", + "maximum number of algorithm's iterations", MS_NOT_AVAILABLE); +} + +CManifoldSculpting::~CManifoldSculpting() +{ +} + +const char* CManifoldSculpting::get_name() const +{ + return "ManifoldSculpting"; +} + +void CManifoldSculpting::set_k(const int32_t k) +{ + ASSERT(k>0) + m_k = k; +} + +int32_t CManifoldSculpting::get_k() const +{ + return m_k; +} + +void CManifoldSculpting::set_squishing_rate(const float64_t squishing_rate) +{ + ASSERT(squishing_rate >= 0 && squishing_rate < 1) + m_squishing_rate = squishing_rate; +} + +float64_t CManifoldSculpting::get_squishing_rate() const +{ + return m_squishing_rate; +} + +void CManifoldSculpting::set_max_iteration(const int32_t max_iteration) +{ + ASSERT(max_iteration > 0) + m_max_iteration = max_iteration; +} + +int32_t CManifoldSculpting::get_max_iteration() const +{ + return m_max_iteration; +} + +CFeatures* CManifoldSculpting::apply(CFeatures* features) +{ + CDenseFeatures* feats = (CDenseFeatures*)features; + SG_REF(feats); + CDistance* euclidean_distance = + new CEuclideanDistance(feats, feats); + + TAPKEE_PARAMETERS_FOR_SHOGUN parameters; + parameters.n_neighbors = m_k; + parameters.squishing_rate = m_squishing_rate; + parameters.max_iteration = m_max_iteration; + parameters.features = feats; + parameters.distance = euclidean_distance; + + parameters.method = SHOGUN_MANIFOLD_SCULPTING; + parameters.target_dimension = m_target_dim; + CDenseFeatures* embedding = tapkee_embed(parameters); + + SG_UNREF(euclidean_distance); + + return embedding; +} + +#endif /* HAVE_EIGEN */ diff --git a/src/shogun/converter/ManifoldSculpting.h b/src/shogun/converter/ManifoldSculpting.h new file mode 100644 index 00000000000..2219e9dd5e7 --- /dev/null +++ b/src/shogun/converter/ManifoldSculpting.h @@ -0,0 +1,105 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Written (W) 2013 Vladyslav S. Gorbatiuk + * Copyright (C) 2011-2013 Vladyslav S. Gorbatiuk + */ + +#ifndef MANIFOLDSCULPTING_H_ +#define MANIFOLDSCULPTING_H_ +#include +#ifdef HAVE_EIGEN3 +#include +#include + +namespace shogun +{ + +/** @class class CManifoldSculpting used to embed + * data using manifold sculpting embedding algorithm. + * + * Uses implementation from the Tapkee library. + * + */ +class CManifoldSculpting : public CEmbeddingConverter +{ +public: + + /** constructor */ + CManifoldSculpting(); + + /** destructor */ + virtual ~CManifoldSculpting(); + + /** get name */ + virtual const char* get_name() const; + + /** apply preprocessor to features + * + * @param features features to embed + */ + virtual CFeatures* apply(CFeatures* features); + + /** setter for the k + * + * @param k the number of neighbors + */ + void set_k(const int32_t k); + + /** getter for the number of neighbors + * + * @return the number of neighbors k + */ + int32_t get_k() const; + + /** setter for squishing_rate + * + * @param squishing_rate the squishing rate + */ + void set_squishing_rate(const float64_t squishing_rate); + + /** getter for squishing_rate + * + * @return squishing_rate + */ + float64_t get_squishing_rate() const; + + /** setter for the maximum number of iterations + * + * @param max_iteration the maximum number of iterations + */ + void set_max_iteration(const int32_t max_iteration); + + /** getter for the maximum number of iterations + * + * @return the maximum number of iterations + */ + int32_t get_max_iteration() const; + +private: + + /** default init */ + void init(); + +private: + + /** k - number of neighbors */ + float64_t m_k; + + /** squishing_rate */ + float64_t m_squishing_rate; + + /** max_iteration - the maximum number of algorithm's + * iterations + */ + float64_t m_max_iteration; + +}; /* class CManifoldSculpting */ + +} /* namespace shogun */ + +#endif /* HAVE_EIGEN3 */ +#endif /* MANIFOLDSCULPTING_H_ */ diff --git a/src/shogun/lib/tapkee/routines/manifold_sculpting.hpp b/src/shogun/lib/tapkee/routines/manifold_sculpting.hpp index 0b795f4860b..b2140787b25 100644 --- a/src/shogun/lib/tapkee/routines/manifold_sculpting.hpp +++ b/src/shogun/lib/tapkee/routines/manifold_sculpting.hpp @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -21,15 +22,12 @@ namespace tapkee namespace tapkee_internal { -const ScalarType max_number_of_iterations_without_improvement = 50; +const ScalarType max_number_of_iterations_without_improvement = 20; const ScalarType multiplier_treshold = 0.01; const ScalarType weight_for_adjusted_point = 10.0; const ScalarType learning_rate_grow_factor = 1.1; const ScalarType learning_rate_shrink_factor = 0.9; -using std::deque; -using std::set; - /** @brief Data needed to compute error function */ struct DataForErrorFunc @@ -57,7 +55,7 @@ struct DataForErrorFunc /** a set of indices of points, that have been * already adjusted */ - const set& adjusted_points; + const std::set& adjusted_points; /** initial average distance between neighbors */ const ScalarType average_distance; }; @@ -131,8 +129,8 @@ SparseMatrixNeighborsPair angles_matrix_and_neighbors(const Neighbors& neighbors SparseTriplet triplet(i, most_collinear_current_neighbors[j], min_cos_value); sparse_triplets.push_back(triplet); - most_collinear_neighbors_of_neighbors.push_back(most_collinear_current_neighbors); } + most_collinear_neighbors_of_neighbors.push_back(most_collinear_current_neighbors); } return SparseMatrixNeighborsPair (sparse_matrix_from_triplets(sparse_triplets, n_vectors, n_vectors), @@ -204,13 +202,16 @@ ScalarType compute_error_for_point(const IndexType index, const DenseMatrix& dat * data, needed for error function calculation - such * as initial distances between neighbors, initial * angles, etc. + * @param point_error - will be set to the error function + * value, calculated for the point * @return a number of steps it took to adjust the * point */ IndexType adjust_point_at_index(const IndexType index, DenseMatrix& data, const IndexType target_dimension, const ScalarType learning_rate, - const DataForErrorFunc& error_func_data) + const DataForErrorFunc& error_func_data, + ScalarType& point_error) { IndexType n_steps = 0; ScalarType old_error, new_error; @@ -244,6 +245,7 @@ IndexType adjust_point_at_index(const IndexType index, DenseMatrix& data, } ++n_steps; } + point_error = compute_error_for_point(index, data, error_func_data); return n_steps; } @@ -267,6 +269,7 @@ void manifold_sculpting_embed(DenseMatrix& data, const IndexType target_dimensio ScalarType no_improvement_counter = 0, normal_counter = 0; ScalarType current_multiplier = squishingRate; ScalarType learning_rate = initial_average_distance; + ScalarType best_error = DBL_MAX, current_error, point_error; std::srand(static_cast(std::time(NULL))); /* Step 3: Do until no improvement is made for some period * (or until max_iteration number is reached): @@ -290,10 +293,11 @@ void manifold_sculpting_embed(DenseMatrix& data, const IndexType target_dimensio */ /* Start adjusting from a random point */ IndexType start_point_index = std::rand() % data.cols(); - deque points_to_adjust; + std::deque points_to_adjust; points_to_adjust.push_back(start_point_index); ScalarType steps_made = 0; - set adjusted_points; + current_error = 0; + std::set adjusted_points; while (!points_to_adjust.empty()) { @@ -310,7 +314,8 @@ void manifold_sculpting_embed(DenseMatrix& data, const IndexType target_dimensio initial_average_distance }; adjust_point_at_index(current_point_index, data, target_dimension, - learning_rate, error_func_data); + learning_rate, error_func_data, point_error); + current_error += point_error; /* Insert all neighbors into deque */ std::copy(neighbors[current_point_index].begin(), neighbors[current_point_index].end(), @@ -323,8 +328,14 @@ void manifold_sculpting_embed(DenseMatrix& data, const IndexType target_dimensio learning_rate *= learning_rate_grow_factor; else learning_rate *= learning_rate_shrink_factor; + if (current_error < best_error) + { + best_error = current_error; + no_improvement_counter = 0; + } } data.conservativeResize(target_dimension, Eigen::NoChange); + data.transposeInPlace(); } } diff --git a/tests/unit/converter/ManifoldSculpting_unittest.cc b/tests/unit/converter/ManifoldSculpting_unittest.cc new file mode 100644 index 00000000000..612987df374 --- /dev/null +++ b/tests/unit/converter/ManifoldSculpting_unittest.cc @@ -0,0 +1,37 @@ +#include +#include +#include +#include + +using namespace shogun; + +#ifdef HAVE_EIGEN3 + +/* Basic test for manifold sculpting, that just checks that it works anyhow */ +TEST(ManifoldSculptingTest,basic) +{ + const index_t n_samples = 15; + const index_t n_dimensions = 3; + const index_t n_target_dimensions = 2; + CDenseFeatures* high_dimensional_features = + new CDenseFeatures(CDataGenerator::generate_gaussians(n_samples, 1, n_dimensions)); + + CManifoldSculpting* embedder = + new CManifoldSculpting(); + + embedder->set_target_dim(n_target_dimensions); + EXPECT_EQ(n_target_dimensions, embedder->get_target_dim()); + + embedder->set_k(5); + + CDenseFeatures* low_dimensional_features = + embedder->embed(high_dimensional_features); + + EXPECT_EQ(n_target_dimensions,low_dimensional_features->get_dim_feature_space()); + EXPECT_EQ(high_dimensional_features->get_num_vectors(),low_dimensional_features->get_num_vectors()); + + SG_UNREF(embedder); + SG_UNREF(high_dimensional_features); +} + +#endif