Skip to content

Commit

Permalink
Update tapkee with manifold sculpting merge and 'external' randomizat…
Browse files Browse the repository at this point in the history
…ion routines
  • Loading branch information
lisitsyn committed May 23, 2013
1 parent 683cab4 commit 97760bf
Show file tree
Hide file tree
Showing 16 changed files with 172 additions and 125 deletions.
1 change: 1 addition & 0 deletions src/shogun/lib/tapkee/defines.hpp
Expand Up @@ -26,6 +26,7 @@
#include <shogun/lib/tapkee/defines/keywords.hpp>
#include <shogun/lib/tapkee/defines/stdtypes.hpp>
#include <shogun/lib/tapkee/defines/synonyms.hpp>
#include <shogun/lib/tapkee/defines/random.hpp>
#include <shogun/lib/tapkee/projection.hpp>
/* End of Tapkee includes */

Expand Down
66 changes: 66 additions & 0 deletions src/shogun/lib/tapkee/defines/random.hpp
@@ -0,0 +1,66 @@
/* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Copyright (c) 2012-2013 Sergey Lisitsyn
*/

#ifndef TAPKEE_DEFINES_RANDOM_H_
#define TAPKEE_DEFINES_RANDOM_H_

#include <cstdlib>
#include <algorithm>

namespace tapkee
{

IndexType uniform_random_index()
{
#ifdef CUSTOM_UNIFORM_RANDOM_INDEX_FUNCTION
return CUSTOM_UNIFORM_RANDOM_INDEX_FUNCTION;
#else
return rand();
#endif
}

IndexType uniform_random_index_bounded(IndexType upper)
{
return uniform_random_index() % upper;
}

ScalarType uniform_random()
{
#ifdef CUSTOM_UNIFORM_RANDOM_FUNCTION
return CUSTOM_UNIFORM_RANDOM_FUNCTION;
#else
return rand()/static_cast<ScalarType>(RAND_MAX);
#endif
}

ScalarType gaussian_random()
{
#ifdef CUSTOM_GAUSSIAN_RANDOM_FUNCTION
return CUSTOM_GAUSSIAN_RANDOM_FUNCTION;
#else
ScalarType x, y, radius;
do {
x = 2 * (rand() / ((double) RAND_MAX + 1)) - 1;
y = 2 * (rand() / ((double) RAND_MAX + 1)) - 1;
radius = (x * x) + (y * y);
} while ((radius >= 1.0) || (radius == 0.0));
radius = sqrt(-2 * log(radius) / radius);
x *= radius;
y *= radius;
return x;
#endif
}

template <class RAI>
void random_shuffle(RAI first, RAI last)
{
std::random_shuffle(first,last,uniform_random_index_bounded);
}

}

#endif


15 changes: 1 addition & 14 deletions src/shogun/lib/tapkee/external/barnes_hut_sne/tsne.hpp
Expand Up @@ -129,7 +129,7 @@ class TSNE
else { for(int i = 0; i < row_P[N]; i++) val_P[i] *= 12.0; }

// Initialize solution (randomly)
for(int i = 0; i < N * no_dims; i++) Y[i] = randn() * .0001;
for(int i = 0; i < N * no_dims; i++) Y[i] = tapkee::gaussian_random() * .0001;
}

{
Expand Down Expand Up @@ -777,19 +777,6 @@ class TSNE
free(dataSums); dataSums = NULL;
}

double randn()
{
double x, y, radius;
do {
x = 2 * (rand() / ((double) RAND_MAX + 1)) - 1;
y = 2 * (rand() / ((double) RAND_MAX + 1)) - 1;
radius = (x * x) + (y * y);
} while((radius >= 1.0) || (radius == 0.0));
radius = sqrt(-2 * log(radius) / radius);
x *= radius;
y *= radius;
return x;
}
};

}
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/lib/tapkee/external/barnes_hut_sne/vptree.hpp
Expand Up @@ -199,7 +199,7 @@ class VpTree
if (upper - lower > 1) { // if we did not arrive at leaf yet

// Choose an arbitrary point and move it to the start
int i = (int) ((double)rand() / RAND_MAX * (upper - lower - 1)) + lower;
int i = (int) (tapkee::uniform_random() * (upper - lower - 1)) + lower;
std::swap(_items[lower], _items[i]);

// Partition around the median distance
Expand Down
8 changes: 4 additions & 4 deletions src/shogun/lib/tapkee/methods.hpp
Expand Up @@ -497,15 +497,15 @@ class ImplementationBase
TapkeeOutput embedManifoldSculpting()
{
squishing_rate.checked()
.inRange(static_cast<ScalarType>(0.0), static_cast<ScalarType>(1.0));
.inRange(static_cast<ScalarType>(0.0),
static_cast<ScalarType>(1.0));

DenseMatrix embedding =
dense_matrix_from_features(features, begin, end);

Neighbors neighbors = find_neighbors(neighbors_method,begin,end,plain_distance,
n_neighbors,check_connectivity);
Neighbors neighbors = findNeighborsWith(plain_distance);

manifold_sculpting_embed(embedding, target_dimension, neighbors, distance, max_iteration, squishing_rate);
manifold_sculpting_embed(begin, end, embedding, target_dimension, neighbors, distance, max_iteration, squishing_rate);

return TapkeeOutput(embedding, tapkee::ProjectingFunction());
}
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/lib/tapkee/neighbors/vptree.hpp
Expand Up @@ -161,7 +161,7 @@ class VantagePointTree

if (upper - lower > 1)
{
int i = (int) ((double)rand() / RAND_MAX * (upper - lower - 1)) + lower;
int i = (int) (tapkee::uniform_random() * (upper - lower - 1)) + lower;
std::swap(items[lower], items[i]);

int median = (upper + lower) / 2;
Expand Down
2 changes: 2 additions & 0 deletions src/shogun/lib/tapkee/parameters/defaults.hpp
Expand Up @@ -31,7 +31,9 @@ const ParametersSet defaults = (
tapkee::keywords::progress_function = tapkee::keywords::by_default,
tapkee::keywords::cancel_function = tapkee::keywords::by_default,
tapkee::keywords::sne_perplexity = tapkee::keywords::by_default,
tapkee::keywords::squishing_rate = tapkee::keywords::by_default,
tapkee::keywords::sne_theta = tapkee::keywords::by_default);

}

}
Expand Down
17 changes: 3 additions & 14 deletions src/shogun/lib/tapkee/routines/eigendecomposition.hpp
Expand Up @@ -90,21 +90,10 @@ EigendecompositionResult eigendecomposition_impl_randomized(const MatrixType& wm
DenseMatrix O(wm.rows(), target_dimension+skip);
for (IndexType i=0; i<O.rows(); ++i)
{
IndexType j=0;
for ( ; j+1 < O.cols(); j+= 2)
for (IndexType j=0; j+1 < O.cols(); j++)
{
ScalarType v1 = (ScalarType)(rand()+1.f)/((float)RAND_MAX+2.f);
ScalarType v2 = (ScalarType)(rand()+1.f)/((float)RAND_MAX+2.f);
ScalarType len = sqrt(-2.f*log(v1));
O(i,j) = len*cos(2.f*M_PI*v2);
O(i,j+1) = len*sin(2.f*M_PI*v2);
}
for ( ; j < O.cols(); j++)
{
ScalarType v1 = (ScalarType)(rand()+1.f)/((float)RAND_MAX+2.f);
ScalarType v2 = (ScalarType)(rand()+1.f)/((float)RAND_MAX+2.f);
ScalarType len = sqrt(-2.f*log(v1));
O(i,j) = len*cos(2.f*M_PI*v2);
O(i,j) = tapkee::gaussian_random();
O(i,j+1) = tapkee::gaussian_random();
}
}
MatrixOperationType operation(wm);
Expand Down
155 changes: 82 additions & 73 deletions src/shogun/lib/tapkee/routines/manifold_sculpting.hpp
Expand Up @@ -22,11 +22,14 @@ namespace tapkee
namespace tapkee_internal
{

const ScalarType max_number_of_iterations_without_improvement = 20;
const ScalarType multiplier_treshold = 0.01;
const ScalarType weight_for_adjusted_point = 10.0;
const ScalarType learning_rate_grow_factor = 1.1;
const ScalarType learning_rate_shrink_factor = 0.9;
namespace
{
const ScalarType max_number_of_iterations_without_improvement = 20;
const ScalarType multiplier_treshold = 0.01;
const ScalarType weight_for_adjusted_point = 10.0;
const ScalarType learning_rate_grow_factor = 1.1;
const ScalarType learning_rate_shrink_factor = 0.9;
}

/** @brief Data needed to compute error function
*/
Expand Down Expand Up @@ -60,12 +63,14 @@ struct DataForErrorFunc
const ScalarType average_distance;
};

template<class DistanceCallback>
SparseMatrix neighbors_distances_matrix(const Neighbors& neighbors, const DistanceCallback& callback,
ScalarType& average_distance)
template<class RandomAccessIterator, class DistanceCallback>
SparseMatrix neighbors_distances_matrix(RandomAccessIterator begin, RandomAccessIterator end,
const Neighbors& neighbors, DistanceCallback callback,
ScalarType& average_distance)
{
const IndexType k = neighbors[0].size();
const IndexType n = neighbors.size();
assert((end-begin)==n);
SparseTriplets sparse_triplets;
sparse_triplets.reserve(k*n);
average_distance = 0;
Expand All @@ -76,7 +81,7 @@ SparseMatrix neighbors_distances_matrix(const Neighbors& neighbors, const Distan
const LocalNeighbors& current_neighbors = neighbors[i];
for (IndexType j = 0; j < k; ++j)
{
current_distance = callback.distance(i, current_neighbors[j]);
current_distance = callback.distance(begin[i], begin[current_neighbors[j]]);
average_distance += current_distance;
SparseTriplet triplet(i, current_neighbors[j], current_distance);
sparse_triplets.push_back(triplet);
Expand Down Expand Up @@ -249,91 +254,95 @@ IndexType adjust_point_at_index(const IndexType index, DenseMatrix& data,
return n_steps;
}

template <class DistanceCallback>
void manifold_sculpting_embed(DenseMatrix& data, const IndexType target_dimension,
const Neighbors& neighbors, const DistanceCallback& callback,
const IndexType max_iteration, const ScalarType squishingRate)
template <class RandomAccessIterator, class DistanceCallback>
void manifold_sculpting_embed(RandomAccessIterator begin, RandomAccessIterator end,
DenseMatrix& data, IndexType target_dimension,
const Neighbors& neighbors, DistanceCallback callback,
IndexType max_iteration, ScalarType squishing_rate)
{
/* Step 1: Get initial distances to each neighbor and initial
* angles between the point Pi, each neighbor Nij, and the most
* collinear neighbor of Nij.
*/
ScalarType initial_average_distance;
SparseMatrix distances_to_neighbors = neighbors_distances_matrix(neighbors, callback, initial_average_distance);
SparseMatrix distances_to_neighbors =
neighbors_distances_matrix(begin, end, neighbors, callback, initial_average_distance);
SparseMatrixNeighborsPair angles_and_neighbors =
angles_matrix_and_neighbors(neighbors, data);
angles_matrix_and_neighbors(neighbors, data);

/* Step 2: Optionally preprocess the data using PCA
* (skipped for now).
*/

ScalarType no_improvement_counter = 0, normal_counter = 0;
ScalarType current_multiplier = squishingRate;
ScalarType current_multiplier = squishing_rate;
ScalarType learning_rate = initial_average_distance;
ScalarType best_error = DBL_MAX, current_error, point_error;
std::srand(static_cast<unsigned int>(std::time(NULL)));
/* Step 3: Do until no improvement is made for some period
* (or until max_iteration number is reached):
*/
while (((no_improvement_counter++ < max_number_of_iterations_without_improvement)
|| (current_multiplier > multiplier_treshold))
&& (normal_counter++ < max_iteration))
|| (current_multiplier > multiplier_treshold))
&& (normal_counter++ < max_iteration))
{
/* Step 3a: Scale the data in non-preserved dimensions
* by a factor of squishingRate.
*/
data.bottomRows(data.rows() - target_dimension) *= squishingRate;
while ( average_neighbor_distance(data, neighbors) < initial_average_distance)
{
data.topRows(target_dimension) /= squishingRate;
}
current_multiplier *= squishingRate;
/* Step 3b: Restore the previously computed relationships
* (distances to neighbors and angles to ...) by adjusting
* data points in first target_dimension dimensions.
*/
/* Start adjusting from a random point */
IndexType start_point_index = std::rand() % data.cols();
std::deque<IndexType> points_to_adjust;
points_to_adjust.push_back(start_point_index);
ScalarType steps_made = 0;
current_error = 0;
std::set<IndexType> adjusted_points;
/* Step 3a: Scale the data in non-preserved dimensions
* by a factor of squishing_rate.
*/
data.bottomRows(data.rows() - target_dimension) *= squishing_rate;
while (average_neighbor_distance(data, neighbors) < initial_average_distance)
{
data.topRows(target_dimension) /= squishing_rate;
}
current_multiplier *= squishing_rate;

while (!points_to_adjust.empty())
{
IndexType current_point_index = points_to_adjust.front();
points_to_adjust.pop_front();
if (adjusted_points.count(current_point_index) == 0)
{
DataForErrorFunc error_func_data = {
distances_to_neighbors,
angles_and_neighbors.first,
neighbors,
angles_and_neighbors.second,
adjusted_points,
initial_average_distance
};
adjust_point_at_index(current_point_index, data, target_dimension,
learning_rate, error_func_data, point_error);
current_error += point_error;
/* Insert all neighbors into deque */
std::copy(neighbors[current_point_index].begin(),
neighbors[current_point_index].end(),
std::back_inserter(points_to_adjust));
adjusted_points.insert(current_point_index);
}
}
/* Step 3b: Restore the previously computed relationships
* (distances to neighbors and angles to ...) by adjusting
* data points in first target_dimension dimensions.
*/
/* Start adjusting from a random point */
IndexType start_point_index = std::rand() % data.cols();
std::deque<IndexType> points_to_adjust;
points_to_adjust.push_back(start_point_index);
ScalarType steps_made = 0;
current_error = 0;
std::set<IndexType> adjusted_points;

if (steps_made > data.cols())
learning_rate *= learning_rate_grow_factor;
else
learning_rate *= learning_rate_shrink_factor;
if (current_error < best_error)
{
best_error = current_error;
no_improvement_counter = 0;
}
while (!points_to_adjust.empty())
{
IndexType current_point_index = points_to_adjust.front();
points_to_adjust.pop_front();
if (adjusted_points.count(current_point_index) == 0)
{
DataForErrorFunc error_func_data = {
distances_to_neighbors,
angles_and_neighbors.first,
neighbors,
angles_and_neighbors.second,
adjusted_points,
initial_average_distance
};
adjust_point_at_index(current_point_index, data, target_dimension,
learning_rate, error_func_data, point_error);
current_error += point_error;
/* Insert all neighbors into deque */
std::copy(neighbors[current_point_index].begin(),
neighbors[current_point_index].end(),
std::back_inserter(points_to_adjust));
adjusted_points.insert(current_point_index);
}
}

if (steps_made > data.cols())
learning_rate *= learning_rate_grow_factor;
else
learning_rate *= learning_rate_shrink_factor;

if (current_error < best_error)
{
best_error = current_error;
no_improvement_counter = 0;
}
}

data.conservativeResize(target_dimension, Eigen::NoChange);
data.transposeInPlace();
}
Expand Down

0 comments on commit 97760bf

Please sign in to comment.