Skip to content

Commit

Permalink
Added a wrapper for a manifold sculpting DR method implemented in tap…
Browse files Browse the repository at this point in the history
…kee; added a modular wrapper for the wrapper:); fixed some bugs in the algorithm itself.
  • Loading branch information
vladislav-horbatiuk committed May 16, 2013
1 parent b99f50d commit f6db86c
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 10 deletions.
2 changes: 2 additions & 0 deletions src/interfaces/modular/Converter.i
Expand Up @@ -23,6 +23,7 @@
%rename(StochasticProximityEmbedding) CStochasticProximityEmbedding;
%rename(FactorAnalysis) CFactorAnalysis;
%rename (TDistributedStochasticNeighborEmbedding) CTDistributedStochasticNeighborEmbedding;
%rename (ManifoldSculpting) CManifoldSculpting;

%newobject shogun::CEmbeddingConverter::apply;
%newobject shogun::*::embed_kernel;
Expand All @@ -44,3 +45,4 @@
%include <shogun/converter/StochasticProximityEmbedding.h>
%include <shogun/converter/FactorAnalysis.h>
%include <shogun/converter/TDistributedStochasticNeighborEmbedding.h>
%include <shogun/converter/ManifoldSculpting.h>
1 change: 1 addition & 0 deletions src/interfaces/modular/Converter_includes.i
Expand Up @@ -15,4 +15,5 @@
#include <shogun/converter/StochasticProximityEmbedding.h>
#include <shogun/converter/FactorAnalysis.h>
#include <shogun/converter/TDistributedStochasticNeighborEmbedding.h>
#include <shogun/converter/ManifoldSculpting.h>
%}
103 changes: 103 additions & 0 deletions src/shogun/converter/ManifoldSculpting.cpp
@@ -0,0 +1,103 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Vladyslav S. Gorbatiuk
* Copyright (C) 2011-2013 Vladyslav S. Gorbatiuk
*/

#include <shogun/converter/ManifoldSculpting.h>
#ifdef HAVE_EIGEN3
#include <shogun/lib/tapkee/tapkee_shogun.hpp>
#include <shogun/features/DenseFeatures.h>
#include <shogun/distance/EuclideanDistance.h>

using namespace shogun;

CManifoldSculpting::CManifoldSculpting() :
CEmbeddingConverter()
{
// Default values
m_k = 10;
m_squishing_rate = 0.8;
m_max_iteration = 80;
init();
}

void CManifoldSculpting::init()
{
SG_ADD(&m_k, "k", "number of neighbors", MS_NOT_AVAILABLE);
SG_ADD(&m_squishing_rate, "quishing_rate",
"squishing rate",MS_NOT_AVAILABLE);
SG_ADD(&m_max_iteration, "max_iteration",
"maximum number of algorithm's iterations", MS_NOT_AVAILABLE);
}

CManifoldSculpting::~CManifoldSculpting()
{
}

const char* CManifoldSculpting::get_name() const
{
return "ManifoldSculpting";
}

void CManifoldSculpting::set_k(const int32_t k)
{
ASSERT(k>0)
m_k = k;
}

int32_t CManifoldSculpting::get_k() const
{
return m_k;
}

void CManifoldSculpting::set_squishing_rate(const float64_t squishing_rate)
{
ASSERT(squishing_rate >= 0 && squishing_rate < 1)
m_squishing_rate = squishing_rate;
}

float64_t CManifoldSculpting::get_squishing_rate() const
{
return m_squishing_rate;
}

void CManifoldSculpting::set_max_iteration(const int32_t max_iteration)
{
ASSERT(max_iteration > 0)
m_max_iteration = max_iteration;
}

int32_t CManifoldSculpting::get_max_iteration() const
{
return m_max_iteration;
}

CFeatures* CManifoldSculpting::apply(CFeatures* features)
{
CDenseFeatures<float64_t>* feats = (CDenseFeatures<float64_t>*)features;
SG_REF(feats);
CDistance* euclidean_distance =
new CEuclideanDistance(feats, feats);

TAPKEE_PARAMETERS_FOR_SHOGUN parameters;
parameters.n_neighbors = m_k;
parameters.squishing_rate = m_squishing_rate;
parameters.max_iteration = m_max_iteration;
parameters.features = feats;
parameters.distance = euclidean_distance;

parameters.method = SHOGUN_MANIFOLD_SCULPTING;
parameters.target_dimension = m_target_dim;
CDenseFeatures<float64_t>* embedding = tapkee_embed(parameters);

SG_UNREF(euclidean_distance);

return embedding;
}

#endif /* HAVE_EIGEN */
105 changes: 105 additions & 0 deletions src/shogun/converter/ManifoldSculpting.h
@@ -0,0 +1,105 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Vladyslav S. Gorbatiuk
* Copyright (C) 2011-2013 Vladyslav S. Gorbatiuk
*/

#ifndef MANIFOLDSCULPTING_H_
#define MANIFOLDSCULPTING_H_
#include <shogun/lib/config.h>
#ifdef HAVE_EIGEN3
#include <shogun/converter/EmbeddingConverter.h>
#include <shogun/features/Features.h>

namespace shogun
{

/** @class class CManifoldSculpting used to embed
* data using manifold sculpting embedding algorithm.
*
* Uses implementation from the Tapkee library.
*
*/
class CManifoldSculpting : public CEmbeddingConverter
{
public:

/** constructor */
CManifoldSculpting();

/** destructor */
virtual ~CManifoldSculpting();

/** get name */
virtual const char* get_name() const;

/** apply preprocessor to features
*
* @param features features to embed
*/
virtual CFeatures* apply(CFeatures* features);

/** setter for the k
*
* @param k the number of neighbors
*/
void set_k(const int32_t k);

/** getter for the number of neighbors
*
* @return the number of neighbors k
*/
int32_t get_k() const;

/** setter for squishing_rate
*
* @param squishing_rate the squishing rate
*/
void set_squishing_rate(const float64_t squishing_rate);

/** getter for squishing_rate
*
* @return squishing_rate
*/
float64_t get_squishing_rate() const;

/** setter for the maximum number of iterations
*
* @param max_iteration the maximum number of iterations
*/
void set_max_iteration(const int32_t max_iteration);

/** getter for the maximum number of iterations
*
* @return the maximum number of iterations
*/
int32_t get_max_iteration() const;

private:

/** default init */
void init();

private:

/** k - number of neighbors */
float64_t m_k;

/** squishing_rate */
float64_t m_squishing_rate;

/** max_iteration - the maximum number of algorithm's
* iterations
*/
float64_t m_max_iteration;

}; /* class CManifoldSculpting */

} /* namespace shogun */

#endif /* HAVE_EIGEN3 */
#endif /* MANIFOLDSCULPTING_H_ */
31 changes: 21 additions & 10 deletions src/shogun/lib/tapkee/routines/manifold_sculpting.hpp
Expand Up @@ -13,6 +13,7 @@

#include <math.h>
#include <ctime>
#include <cfloat>
#include <deque>
#include <set>

Expand All @@ -21,15 +22,12 @@ namespace tapkee
namespace tapkee_internal
{

const ScalarType max_number_of_iterations_without_improvement = 50;
const ScalarType max_number_of_iterations_without_improvement = 20;
const ScalarType multiplier_treshold = 0.01;
const ScalarType weight_for_adjusted_point = 10.0;
const ScalarType learning_rate_grow_factor = 1.1;
const ScalarType learning_rate_shrink_factor = 0.9;

using std::deque;
using std::set;

/** @brief Data needed to compute error function
*/
struct DataForErrorFunc
Expand Down Expand Up @@ -57,7 +55,7 @@ struct DataForErrorFunc
/** a set of indices of points, that have been
* already adjusted
*/
const set<IndexType>& adjusted_points;
const std::set<IndexType>& adjusted_points;
/** initial average distance between neighbors */
const ScalarType average_distance;
};
Expand Down Expand Up @@ -131,8 +129,8 @@ SparseMatrixNeighborsPair angles_matrix_and_neighbors(const Neighbors& neighbors

SparseTriplet triplet(i, most_collinear_current_neighbors[j], min_cos_value);
sparse_triplets.push_back(triplet);
most_collinear_neighbors_of_neighbors.push_back(most_collinear_current_neighbors);
}
most_collinear_neighbors_of_neighbors.push_back(most_collinear_current_neighbors);
}
return SparseMatrixNeighborsPair
(sparse_matrix_from_triplets(sparse_triplets, n_vectors, n_vectors),
Expand Down Expand Up @@ -204,13 +202,16 @@ ScalarType compute_error_for_point(const IndexType index, const DenseMatrix& dat
* data, needed for error function calculation - such
* as initial distances between neighbors, initial
* angles, etc.
* @param point_error - will be set to the error function
* value, calculated for the point
* @return a number of steps it took to adjust the
* point
*/
IndexType adjust_point_at_index(const IndexType index, DenseMatrix& data,
const IndexType target_dimension,
const ScalarType learning_rate,
const DataForErrorFunc& error_func_data)
const DataForErrorFunc& error_func_data,
ScalarType& point_error)
{
IndexType n_steps = 0;
ScalarType old_error, new_error;
Expand Down Expand Up @@ -244,6 +245,7 @@ IndexType adjust_point_at_index(const IndexType index, DenseMatrix& data,
}
++n_steps;
}
point_error = compute_error_for_point(index, data, error_func_data);
return n_steps;
}

Expand All @@ -267,6 +269,7 @@ void manifold_sculpting_embed(DenseMatrix& data, const IndexType target_dimensio
ScalarType no_improvement_counter = 0, normal_counter = 0;
ScalarType current_multiplier = squishingRate;
ScalarType learning_rate = initial_average_distance;
ScalarType best_error = DBL_MAX, current_error, point_error;
std::srand(static_cast<unsigned int>(std::time(NULL)));
/* Step 3: Do until no improvement is made for some period
* (or until max_iteration number is reached):
Expand All @@ -290,10 +293,11 @@ void manifold_sculpting_embed(DenseMatrix& data, const IndexType target_dimensio
*/
/* Start adjusting from a random point */
IndexType start_point_index = std::rand() % data.cols();
deque<IndexType> points_to_adjust;
std::deque<IndexType> points_to_adjust;
points_to_adjust.push_back(start_point_index);
ScalarType steps_made = 0;
set<IndexType> adjusted_points;
current_error = 0;
std::set<IndexType> adjusted_points;

while (!points_to_adjust.empty())
{
Expand All @@ -310,7 +314,8 @@ void manifold_sculpting_embed(DenseMatrix& data, const IndexType target_dimensio
initial_average_distance
};
adjust_point_at_index(current_point_index, data, target_dimension,
learning_rate, error_func_data);
learning_rate, error_func_data, point_error);
current_error += point_error;
/* Insert all neighbors into deque */
std::copy(neighbors[current_point_index].begin(),
neighbors[current_point_index].end(),
Expand All @@ -323,8 +328,14 @@ void manifold_sculpting_embed(DenseMatrix& data, const IndexType target_dimensio
learning_rate *= learning_rate_grow_factor;
else
learning_rate *= learning_rate_shrink_factor;
if (current_error < best_error)
{
best_error = current_error;
no_improvement_counter = 0;
}
}
data.conservativeResize(target_dimension, Eigen::NoChange);
data.transposeInPlace();
}

}
Expand Down
37 changes: 37 additions & 0 deletions tests/unit/converter/ManifoldSculpting_unittest.cc
@@ -0,0 +1,37 @@
#include <shogun/converter/ManifoldSculpting.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/DataGenerator.h>
#include <gtest/gtest.h>

using namespace shogun;

#ifdef HAVE_EIGEN3

/* Basic test for manifold sculpting, that just checks that it works anyhow */
TEST(ManifoldSculptingTest,basic)
{
const index_t n_samples = 15;
const index_t n_dimensions = 3;
const index_t n_target_dimensions = 2;
CDenseFeatures<float64_t>* high_dimensional_features =
new CDenseFeatures<float64_t>(CDataGenerator::generate_gaussians(n_samples, 1, n_dimensions));

CManifoldSculpting* embedder =
new CManifoldSculpting();

embedder->set_target_dim(n_target_dimensions);
EXPECT_EQ(n_target_dimensions, embedder->get_target_dim());

embedder->set_k(5);

CDenseFeatures<float64_t>* low_dimensional_features =
embedder->embed(high_dimensional_features);

EXPECT_EQ(n_target_dimensions,low_dimensional_features->get_dim_feature_space());
EXPECT_EQ(high_dimensional_features->get_num_vectors(),low_dimensional_features->get_num_vectors());

SG_UNREF(embedder);
SG_UNREF(high_dimensional_features);
}

#endif

0 comments on commit f6db86c

Please sign in to comment.