diff --git a/data b/data index 3235e132a00..44020d24e38 160000 --- a/data +++ b/data @@ -1 +1 @@ -Subproject commit 3235e132a0037213c876e160451426cbe77ed030 +Subproject commit 44020d24e3801d79c78fa89f759e97363c6c1da8 diff --git a/doc/cookbook/source/examples/converter/diffusion_maps.rst b/doc/cookbook/source/examples/converter/diffusion_maps.rst new file mode 100644 index 00000000000..958badd3119 --- /dev/null +++ b/doc/cookbook/source/examples/converter/diffusion_maps.rst @@ -0,0 +1,43 @@ +================ +Diffusion Maps +================ + +Diffusion Maps is a non-linear dimensionality reduction method that uses +eigenfunctions of Markov matrices to diffusion maps for efficient +representations of complex geometric structures. +The diffusion kernel :math: `k` must satisfy +the following properties: + +1. :math: `k` is symmetric :math: `{\bf k}(x, y) = {\bf k}(y, x)` +2. :math: `k` is positivity preserving :math: `{\bf k}(x, y) ≥ 0` + + +For more information see :cite:`Coifman-Lafon2006Diffusionmaps`. + +------- +Example +------- + +We create CDenseFeatures (RealFeatures, here 64 bit float values). + +.. sgexample:: diffusionmaps.sg:create_features + +We create a :sgclass:`CDiffusionMaps` instance, and set its parameters. + +.. sgexample:: diffusionmaps.sg:set_parameters + +Then we apply diffusion maps, which gives us distance embeddings. + +.. sgexample:: diffusionmaps.sg:apply_convert + +We can also extract the estimated feature_matrix. + +.. sgexample:: diffusionmaps.sg:extract + +---------- +References +---------- +:wiki:`Diffusion_map` + +.. bibliography:: ../../references.bib + :filter: docname in docnames diff --git a/examples/meta/src/converter/diffusionmaps.sg b/examples/meta/src/converter/diffusionmaps.sg new file mode 100644 index 00000000000..904f8a0ae81 --- /dev/null +++ b/examples/meta/src/converter/diffusionmaps.sg @@ -0,0 +1,18 @@ +File f_feats = csv_file("../../data/fm_train_real.dat") + +#![create_features] +Features feats = features(f_feats) +#![create_features] + +#![set_parameters] +Kernel k = kernel("GaussianKernel", cache_size=10, log_width=10.0) +Converter diffusion_maps = converter("DiffusionMaps", target_dim=1, kernel=k, t=10) +#![set_parameters] + +#![apply_convert] +Features converted = diffusion_maps.apply(feats) +#![apply_convert] + +#![extract] +RealMatrix feature_matrix = converted.get_real_matrix("feature_matrix") +#![extract] diff --git a/examples/meta/src/converter/independent_component_analysis_fast.sg b/examples/meta/src/converter/independent_component_analysis_fast.sg index 35d3d916073..bf8b978f25a 100644 --- a/examples/meta/src/converter/independent_component_analysis_fast.sg +++ b/examples/meta/src/converter/independent_component_analysis_fast.sg @@ -1,4 +1,4 @@ -CSVFile f_feats("../../data/ica_2_sources.dat") +File f_feats = csv_file("../../data/ica_2_sources.dat") Math:init_random(1) @@ -7,9 +7,7 @@ Features feats = features(f_feats) #![create_features] #![set_parameters] -FastICA ica() -ica.set_max_iter(200) -ica.set_tol(0.00001) +Converter ica = converter("FastICA", max_iter=200, tol=0.00001) #![set_parameters] #![apply_convert] @@ -17,7 +15,6 @@ Features converted = ica.apply(feats) #![apply_convert] #![extract] -RealMatrix mixing_matrix = ica.get_mixing_matrix() +RealMatrix mixing_matrix = ica.get_real_matrix("mixing_matrix") RealMatrix unmixed_signal = converted.get_real_matrix("feature_matrix") #![extract] - diff --git a/examples/meta/src/converter/independent_component_analysis_ff_sep.sg b/examples/meta/src/converter/independent_component_analysis_ff_sep.sg index 4a680628447..bc9a91dcba5 100644 --- a/examples/meta/src/converter/independent_component_analysis_ff_sep.sg +++ b/examples/meta/src/converter/independent_component_analysis_ff_sep.sg @@ -1,4 +1,4 @@ -CSVFile f_feats("../../data/ica_2_sources.dat") +File f_feats = csv_file("../../data/ica_2_sources.dat") Math:init_random(1) @@ -7,9 +7,7 @@ Features feats = features(f_feats) #![create_features] #![set_parameters] -FFSep ica() -ica.set_max_iter(200) -ica.set_tol(0.00001) +Converter ica = converter("FFSep", max_iter=200, tol=0.00001) #![set_parameters] #![apply_convert] @@ -17,7 +15,6 @@ Features converted = ica.apply(feats) #![apply_convert] #![extract] -RealMatrix mixing_matrix = ica.get_mixing_matrix() +RealMatrix mixing_matrix = ica.get_real_matrix("mixing_matrix") RealMatrix unmixed_signal = converted.get_real_matrix("feature_matrix") #![extract] - diff --git a/examples/meta/src/converter/independent_component_analysis_jade.sg b/examples/meta/src/converter/independent_component_analysis_jade.sg index 566dece1dfe..41c9a58fb2a 100644 --- a/examples/meta/src/converter/independent_component_analysis_jade.sg +++ b/examples/meta/src/converter/independent_component_analysis_jade.sg @@ -1,4 +1,4 @@ -CSVFile f_feats("../../data/ica_2_sources.dat") +File f_feats = csv_file("../../data/ica_2_sources.dat") Math:init_random(1) @@ -7,9 +7,7 @@ Features feats = features(f_feats) #![create_features] #![set_parameters] -Jade ica() -ica.set_max_iter(200) -ica.set_tol(0.00001) +Converter ica = converter("Jade", max_iter=200, tol=0.00001) #![set_parameters] #![apply_convert] @@ -17,7 +15,6 @@ Features converted = ica.apply(feats) #![apply_convert] #![extract] -RealMatrix mixing_matrix = ica.get_mixing_matrix() +RealMatrix mixing_matrix = ica.get_real_matrix("mixing_matrix") RealMatrix unmixed_signal = converted.get_real_matrix("feature_matrix") #![extract] - diff --git a/examples/meta/src/converter/independent_component_analysis_jedi_sep.sg b/examples/meta/src/converter/independent_component_analysis_jedi_sep.sg index 765b79fe50a..272f4fd5d0d 100644 --- a/examples/meta/src/converter/independent_component_analysis_jedi_sep.sg +++ b/examples/meta/src/converter/independent_component_analysis_jedi_sep.sg @@ -1,4 +1,4 @@ -CSVFile f_feats("../../data/ica_2_sources.dat") +File f_feats = csv_file("../../data/ica_2_sources.dat") Math:init_random(1) @@ -7,9 +7,7 @@ Features feats = features(f_feats) #![create_features] #![set_parameters] -JediSep ica() -ica.set_max_iter(200) -ica.set_tol(0.00001) +Converter ica = converter("JediSep", max_iter=200, tol=0.00001) #![set_parameters] #![apply_convert] @@ -17,7 +15,6 @@ Features converted = ica.apply(feats) #![apply_convert] #![extract] -RealMatrix mixing_matrix = ica.get_mixing_matrix() +RealMatrix mixing_matrix = ica.get_real_matrix("mixing_matrix") RealMatrix unmixed_signal = converted.get_real_matrix("feature_matrix") #![extract] - diff --git a/examples/meta/src/converter/independent_component_analysis_sobi.sg b/examples/meta/src/converter/independent_component_analysis_sobi.sg index 86ed83d544f..b735c272a2f 100644 --- a/examples/meta/src/converter/independent_component_analysis_sobi.sg +++ b/examples/meta/src/converter/independent_component_analysis_sobi.sg @@ -1,4 +1,4 @@ -CSVFile f_feats("../../data/ica_2_sources.dat") +File f_feats = csv_file("../../data/ica_2_sources.dat") Math:init_random(1) @@ -7,9 +7,7 @@ Features feats = features(f_feats) #![create_features] #![set_parameters] -SOBI ica() -ica.set_max_iter(200) -ica.set_tol(0.00001) +Converter ica = converter("SOBI", max_iter=200, tol=0.00001) #![set_parameters] #![apply_convert] @@ -17,7 +15,6 @@ Features converted = ica.apply(feats) #![apply_convert] #![extract] -RealMatrix mixing_matrix = ica.get_mixing_matrix() +RealMatrix mixing_matrix = ica.get_real_matrix("mixing_matrix") RealMatrix unmixed_signal = converted.get_real_matrix("feature_matrix") #![extract] - diff --git a/examples/undocumented/python/converter_diffusionmaps.py b/examples/undocumented/python/converter_diffusionmaps.py deleted file mode 100644 index ce134aae07f..00000000000 --- a/examples/undocumented/python/converter_diffusionmaps.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python -data = '../data/fm_train_real.dat' -parameter_list = [[data,10],[data,20]] - -def converter_diffusionmaps (data_fname,t): - try: - from shogun import RealFeatures, DiffusionMaps, GaussianKernel, CSVFile - - features = RealFeatures(CSVFile(data_fname)) - - converter = DiffusionMaps() - converter.set_target_dim(1) - converter.set_kernel(GaussianKernel(10,10.0)) - converter.set_t(t) - converter.apply(features) - - return features - except ImportError: - print('No Eigen3 available') - -if __name__=='__main__': - print('DiffusionMaps') - converter_diffusionmaps(*parameter_list[0]) - diff --git a/src/interfaces/swig/Converter.i b/src/interfaces/swig/Converter.i index 95b740d0500..7d5140f1823 100644 --- a/src/interfaces/swig/Converter.i +++ b/src/interfaces/swig/Converter.i @@ -29,6 +29,7 @@ %rename(FFSep) CFFSep; %rename(JediSep) CJediSep; %rename(UWedgeSep) CUWedgeSep; +%rename(Converter) CConverter; %rename(FastICA) CFastICA; %newobject shogun::CEmbeddingConverter::apply; diff --git a/src/shogun/converter/EmbeddingConverter.cpp b/src/shogun/converter/EmbeddingConverter.cpp index 5d9362022ce..eb77fc8d4bb 100644 --- a/src/shogun/converter/EmbeddingConverter.cpp +++ b/src/shogun/converter/EmbeddingConverter.cpp @@ -75,9 +75,10 @@ void CEmbeddingConverter::init() { SG_ADD(&m_target_dim, "target_dim", "target dimensionality of preprocessor", MS_AVAILABLE); - SG_ADD((CSGObject**)&m_distance, "distance", - "distance to be used for embedding", MS_AVAILABLE); - SG_ADD((CSGObject**)&m_kernel, "kernel", "kernel to be used for embedding", - MS_AVAILABLE); + SG_ADD( + &m_distance, "distance", "distance to be used for embedding", + MS_AVAILABLE); + SG_ADD( + &m_kernel, "kernel", "kernel to be used for embedding", MS_AVAILABLE); } } diff --git a/src/shogun/util/factory.h b/src/shogun/util/factory.h index 5d6aceace22..f2d20235cba 100644 --- a/src/shogun/util/factory.h +++ b/src/shogun/util/factory.h @@ -7,6 +7,7 @@ #define FACTORY_H_ #include +#include #include #include #include @@ -31,6 +32,7 @@ namespace shogun CMulticlassStrategy* multiclass_strategy(const std::string& name); CECOCEncoder* ecoc_encoder(const std::string& name); CECOCDecoder* ecoc_decoder(const std::string& name); + CConverter* converter(const std::string& name); #define BASE_CLASS_FACTORY(T, factory_name) \ T* factory_name(const std::string& name) \ @@ -49,6 +51,7 @@ namespace shogun BASE_CLASS_FACTORY(CMulticlassStrategy, multiclass_strategy) BASE_CLASS_FACTORY(CECOCEncoder, ecoc_encoder) BASE_CLASS_FACTORY(CECOCDecoder, ecoc_decoder) + BASE_CLASS_FACTORY(CConverter, converter) template CFeatures* features(SGMatrix mat)