-
-
Notifications
You must be signed in to change notification settings - Fork 1k
/
MultidimensionalScaling.h
131 lines (109 loc) · 3.71 KB
/
MultidimensionalScaling.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Sergey Lisitsyn, Heiko Strathmann, Soeren Sonnenburg,
* Evan Shelhamer
*/
#ifndef MULTIDIMENSIONALSCALING_H_
#define MULTIDIMENSIONALSCALING_H_
#include <shogun/lib/config.h>
#include <shogun/converter/EmbeddingConverter.h>
#include <shogun/features/Features.h>
#include <shogun/distance/Distance.h>
namespace shogun
{
class CFeatures;
class CDistance;
/** @brief class Multidimensionalscaling is used to perform
* multidimensional scaling (capable of landmark approximation
* if requested).
*
* Description of classical embedding is given on p.261 (Section 12.1) of
* Borg, I., & Groenen, P. J. F. (2005).
* Modern multidimensional scaling: Theory and applications. Springer.
*
* Description of landmark MDS approximation is given in
*
* Sparse multidimensional scaling using landmark points
* V De Silva, J B Tenenbaum (2004) Technology, p. 1-4
*
* Note that target dimension should be set with reasonable value
* (using set_target_dim). In case it is higher than intrinsic
* dimensionality of the dataset 'extra' features of the output
* might be inconsistent (essentially, according to zero or
* negative eigenvalues). In this case a warning is fired.
*
* It is possible to apply multidimensional scaling to any
* given distance using apply_to_distance_matrix method.
* By default euclidean distance is used (with parallel
* instance replaced by preprocessor's one).
*
* Faster landmark approximation is parallel using posix threads.
* As for choice of landmark number it should be at least 3 for
* proper triangulation. For reasonable embedding accuracy greater
* values (30%-50% of total examples number) is pretty good for the
* most tasks.
*
* Uses implementation from the Tapkee library.
*
* To use this converter with static interfaces please refer it by
* sg('create_converter','mds');
*
*/
class CMultidimensionalScaling: public CEmbeddingConverter
{
public:
/* constructor */
CMultidimensionalScaling();
/* destructor */
virtual ~CMultidimensionalScaling();
/** apply preprocessor to CDistance
* @param distance (should be approximate euclidean for consistent result)
* @return new features with distance similar to given as much as possible
*/
virtual CDenseFeatures<float64_t>* embed_distance(CDistance* distance);
/** apply preprocessor to feature matrix,
* changes feature matrix to the one having target dimensionality
* @param features features which feature matrix should be processed
* @return new feature matrix
*/
virtual CFeatures* apply(CFeatures* features, bool inplace = true);
/** get name */
const char* get_name() const;
/** get last embedding eigenvectors
* @return vector with last eigenvalues
*/
SGVector<float64_t> get_eigenvalues() const;
/** set number of landmarks
* should be lesser than number of examples and greater than 3
* for consistent embedding as triangulation is used
* @param num number of landmark to be set
*/
void set_landmark_number(int32_t num);
/** get number of landmarks
* @return current number of landmarks
*/
int32_t get_landmark_number() const;
/** setter for landmark parameter
* @param landmark true if landmark embedding should be used
*/
void set_landmark(bool landmark);
/** getter for landmark parameter
* @return true if landmark embedding is used
*/
bool get_landmark() const;
/// HELPERS
protected:
/** default initialization */
virtual void init();
/// FIELDS
protected:
/** last embedding eigenvalues */
SGVector<float64_t> m_eigenvalues;
/** use landmark approximation? */
bool m_landmark;
/** number of landmarks */
int32_t m_landmark_number;
};
}
#endif /* MULTIDIMENSIONALSCALING_H_ */