Add transformer base class and adapt preprocessor / converter to fit + apply #4285
Conversation
great start! |
} | ||
|
||
/** Fit transformer to features */ | ||
virtual void fit(CFeatures* features) |
vigsterkr
May 15, 2018
Member
this could be a pure virtual function in order to be an abstract class, or?
this could be a pure virtual function in order to be an abstract class, or?
vinx13
May 15, 2018
Author
Member
some transformers actually do not need fitting, so i leave this a no-op
some transformers actually do not need fitting, so i leave this a no-op
@@ -163,7 +156,7 @@ void CHomogeneousKernelMap::init() | |||
|
|||
SGMatrix<float64_t> CHomogeneousKernelMap::apply_to_feature_matrix (CFeatures* features) | |||
{ | |||
CDenseFeatures<float64_t>* simple_features = (CDenseFeatures<float64_t>*)features; | |||
auto simple_features = features->as<CDenseFeatures<float64_t>>(); |
vigsterkr
May 15, 2018
Member
needs a quickfix to check whether it has been trained or not... :)
needs a quickfix to check whether it has been trained or not... :)
kernel_matrix, eigenvalues, eigenvectors, m_target_dim); | ||
SGVector<float64_t> bias_tmp = linalg::rowwise_sum(kernel_matrix); | ||
linalg::scale(bias_tmp, bias_tmp, -1.0 / n); | ||
float64_t s = linalg::sum(bias_tmp) / n; |
vigsterkr
May 15, 2018
Member
auto
auto
SGMatrix<float64_t> eigenvectors(kernel_matrix.num_rows, m_target_dim); | ||
linalg::eigen_solver_symmetric( | ||
kernel_matrix, eigenvalues, eigenvectors, m_target_dim); | ||
SGVector<float64_t> bias_tmp = linalg::rowwise_sum(kernel_matrix); |
vigsterkr
May 15, 2018
Member
auto
auto
float64_t* var=SG_MALLOC(float64_t, num_features); | ||
int32_t i,j; | ||
m_mean.resize_vector(num_features); | ||
float64_t* var = SG_MALLOC(float64_t, num_features); |
vigsterkr
May 15, 2018
Member
sgvector could be used, or?
sgvector could be used, or?
int32_t num_ok=0; | ||
int32_t* idx_ok=SG_MALLOC(int32_t, num_features); | ||
int32_t num_ok = 0; | ||
int32_t* idx_ok = SG_MALLOC(int32_t, num_features); |
vigsterkr
May 15, 2018
Member
sgvector?
sgvector?
|
||
for (i=0; i<num_vec; i++) | ||
{ | ||
int32_t len=0; | ||
bool free_vec; | ||
uint64_t* vec=((CStringFeatures<uint64_t>*)f)-> | ||
get_feature_vector(i, len, free_vec); | ||
uint64_t* vec = sf->get_feature_vector(i, len, free_vec); |
vigsterkr
May 15, 2018
Member
auto?
auto?
@@ -56,14 +47,14 @@ bool CSortUlongString::save(FILE* f) | |||
bool CSortUlongString::apply_to_string_features(CFeatures* f) | |||
{ | |||
int32_t i; | |||
int32_t num_vec=((CStringFeatures<uint64_t>*)f)->get_num_vectors(); | |||
auto sf = f->as<CStringFeatures<uint64_t>>(); | |||
int32_t num_vec = sf->get_num_vectors(); |
vigsterkr
May 15, 2018
Member
auto
auto
@@ -56,13 +47,14 @@ bool CSortWordString::save(FILE* f) | |||
bool CSortWordString::apply_to_string_features(CFeatures* f) | |||
{ | |||
int32_t i; | |||
int32_t num_vec=((CStringFeatures<uint16_t>*)f)->get_num_vectors() ; | |||
auto sf = f->as<CStringFeatures<uint16_t>>(); | |||
int32_t num_vec = sf->get_num_vectors(); |
vigsterkr
May 15, 2018
Member
auto
auto
|
||
for (i=0; i<num_vec; i++) | ||
{ | ||
int32_t len = 0 ; | ||
bool free_vec; | ||
uint16_t* vec = ((CStringFeatures<uint16_t>*)f)->get_feature_vector(i, len, free_vec); | ||
uint16_t* vec = sf->get_feature_vector(i, len, free_vec); |
vigsterkr
May 15, 2018
Member
auto
auto
This comment has been minimized.
This comment has been minimized.
vigsterkr
commented on examples/undocumented/python/preprocessor_sortulongstring.py
in 5cdd269
May 16, 2018
note the indenting! |
Implement apply in PCA and FisherLDA Inherit directly from dense preproc
This comment has been minimized.
This comment has been minimized.
karlnapf
commented on examples/undocumented/python/preprocessor_sortwordstring.py
in 5cdd269
May 21, 2018
whitespace |
This is a cool push! |
initial review |
Normalize.apply_to_feature_matrix(features_train) | ||
Normalize.apply_to_feature_matrix(features_test) | ||
SubMean.fit(features_train) | ||
Features features_train1 = SubMean.apply(features_train) |
vigsterkr
May 29, 2018
Member
a more 'representative' naming would be more desirable :P than just features_train1
a more 'representative' naming would be more desirable :P than just features_train1
Features features_train1 = SubMean.apply(features_train) | ||
Features features_test1 = SubMean.apply(features_test) | ||
Normalize.fit(features_train) | ||
Features features_train2 = Normalize.apply(features_train1) |
vigsterkr
May 29, 2018
Member
maybe a better name would be normalized_features_train/test
maybe a better name would be normalized_features_train/test
{ | ||
ASSERT(features); | ||
SG_REF(features); | ||
|
||
SGMatrix<float64_t> X = ((CDenseFeatures<float64_t>*)features)->get_feature_matrix(); | ||
auto X = features->as<CDenseFeatures<float64_t>>()->get_feature_matrix(); |
vigsterkr
May 29, 2018
Member
we could do before this an assertion for get_feature_class() == C_DENSE
just to have a sane error message?
we could do before this an assertion for get_feature_class() == C_DENSE
just to have a sane error message?
vigsterkr
May 29, 2018
Member
although .as has a quite good message as well...
although .as has a quite good message as well...
@@ -59,3 +61,26 @@ float64_t CICAConverter::get_tol() const | |||
return tol; | |||
} | |||
|
|||
CFeatures* CICAConverter::apply(CFeatures* features, bool inplace) | |||
{ | |||
REQUIRE(m_mixing_matrix.matrix, "ICAConverter not fitted."); |
vigsterkr
May 29, 2018
Member
ICAConverter has not been fitted.
ICAConverter has not been fitted.
*/ | ||
virtual CFeatures* apply(CFeatures* features) = 0; | ||
virtual void fit(CFeatures* features) = 0; |
vigsterkr
May 29, 2018
Member
if we know that this class is an abstract class for other classes that does conversion over CDenseFeatures<float64_t>
then here it would be good to actually do the type mapping, meaning not to have this function pure virtual but implement something like this:
virtual void fit(CFeatures* features)
{
REQUIRE(features->get_feature_class() == C_DENSE, "ICA converters only work with dense features")
fit((CDenseFeatures<float64_t>*)features);
}
and have a pure virtual protected member function that has a declaration:
virtual void fit(CDenseFeatures<float64_t>*) = 0;
this way the casting thing you need to do it in one place, all the other places you can already assume CDenseFeatures<float64_t> as input.
if we know that this class is an abstract class for other classes that does conversion over CDenseFeatures<float64_t>
then here it would be good to actually do the type mapping, meaning not to have this function pure virtual but implement something like this:
virtual void fit(CFeatures* features)
{
REQUIRE(features->get_feature_class() == C_DENSE, "ICA converters only work with dense features")
fit((CDenseFeatures<float64_t>*)features);
}
and have a pure virtual protected member function that has a declaration:
virtual void fit(CDenseFeatures<float64_t>*) = 0;
this way the casting thing you need to do it in one place, all the other places you can already assume CDenseFeatures<float64_t> as input.
namespace shogun | ||
{ | ||
|
||
template <> |
vigsterkr
May 29, 2018
Member
why move this into implementation ?
why move this into implementation ?
vinx13
May 29, 2018
Author
Member
the header is mixed with declaration and implementation, so i split into .h and .cpp as they are done in other template classes, e.g. DensePreprocessor
the header is mixed with declaration and implementation, so i split into .h and .cpp as they are done in other template classes, e.g. DensePreprocessor
vigsterkr
May 29, 2018
Member
gotcha!
gotcha!
[[deprecated]] | ||
#endif | ||
virtual bool | ||
apply_to_string_features(CFeatures* f); |
vigsterkr
May 29, 2018
Member
whitespace problem.
whitespace problem.
* @param inplace whether transform in place | ||
* @return the result feature object after applying the transformer | ||
*/ | ||
virtual CFeatures* apply(CFeatures* features, bool inplace = true) = 0; |
vigsterkr
May 29, 2018
Member
if this is pure virtual, then surely fit could be as well?
i'm not sure if i follow the logic here why fit has a default implementation and why the apply does not or vice versa.
if this is pure virtual, then surely fit could be as well?
i'm not sure if i follow the logic here why fit has a default implementation and why the apply does not or vice versa.
vinx13
May 29, 2018
Author
Member
there are many transformers that do not need fitting, we don't need to put a empty fit in every subclasses, but only one place here
but as for apply, it should always be implemented (except those abstract classes)
there are many transformers that do not need fitting, we don't need to put a empty fit in every subclasses, but only one place here
but as for apply, it should always be implemented (except those abstract classes)
vigsterkr
May 29, 2018
Member
ah yeah i remember now... sorry short memory :(
ah yeah i remember now... sorry short memory :(
vigsterkr
May 29, 2018
Member
mmm actually i've realised... dont we actually wanna follow actual transformer api, meaning
.fit and .transform ? :)
mmm actually i've realised... dont we actually wanna follow actual transformer api, meaning
.fit and .transform ? :)
feats_train.apply_preprocessor() | ||
preproc = SortWordString() | ||
preproc.fit(feats_train) | ||
feats_train = preproc.apply(feats_train) |
vigsterkr
May 29, 2018
Member
indent
indent
@@ -16,7 +16,6 @@ | |||
%rename(PNorm) CPNorm; | |||
%rename(RescaleFeatures) CRescaleFeatures; | |||
|
|||
%rename(DimensionReductionPreprocessor) CDimensionReductionPreprocessor; |
vigsterkr
May 29, 2018
Member
why do we drop CDimensionReductionPreprocessor
? :)
why do we drop CDimensionReductionPreprocessor
? :)
vinx13
May 29, 2018
Author
Member
CDimensionReductionPreprocessor
is dropped from both cpp and interface.
It has fields like m_converter, m_kernel, m_distance but they are not used by subclasses it most cases (e.g PCA). So it is not very helpful as a superclass. And we don't need to wrap a converter with it now
CDimensionReductionPreprocessor
is dropped from both cpp and interface.
It has fields like m_converter, m_kernel, m_distance but they are not used by subclasses it most cases (e.g PCA). So it is not very helpful as a superclass. And we don't need to wrap a converter with it now
vigsterkr
May 29, 2018
Member
okie!
okie!
have u seen these @vinx13
|
d2baac3
into
shogun-toolbox:feature/transformers
@@ -13,7 +13,8 @@ ica.set_tol(0.00001) | |||
#![set_parameters] | |||
|
|||
#![apply_convert] | |||
Features converted = ica.apply(feats) | |||
ica.fit(feats) |
karlnapf
May 30, 2018
Member
I like the new API here!
I like the new API here!
CPreprocessor::init(CFeatures *)
tofit
, possibly call cleanup infit
if already fitted to support re-fitCFeatures::as
instead in apply phaseCDimensionReductionPreprocessor
since it is only a wrapper of converters and provides irrelevant things (kernel, distance, ..) for subtypes.CDimensionReductionPreprocessor
inherit directly from `CDensePreprocessor<float64_t>CPreprocessor
since it can work with other types of features