Skip to content

Commit

Permalink
Merge pull request #3942 from micmn/feature/serialization-tests2
Browse files Browse the repository at this point in the history
Add Multiclass machines to generated serialization tests
  • Loading branch information
vigsterkr committed Aug 5, 2017
2 parents e2b323a + 37a5e4e commit 832e8fd
Show file tree
Hide file tree
Showing 9 changed files with 158 additions and 81 deletions.
11 changes: 7 additions & 4 deletions src/shogun/labels/DenseLabels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@
* Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
*/

#include <shogun/labels/Labels.h>
#include <shogun/labels/DenseLabels.h>
#include <shogun/lib/common.h>
#include <shogun/base/Parameter.h>
#include <shogun/io/File.h>
#include <shogun/io/SGIO.h>
#include <shogun/labels/DenseLabels.h>
#include <shogun/labels/Labels.h>
#include <shogun/lib/common.h>
#include <shogun/mathematics/Math.h>
#include <shogun/base/Parameter.h>
#include <shogun/mathematics/linalg/LinalgNamespace.h>

using namespace shogun;

Expand All @@ -32,6 +33,8 @@ CDenseLabels::CDenseLabels(int32_t num_lab)
init();
m_labels = SGVector<float64_t>(num_lab);
m_current_values=SGVector<float64_t>(num_lab);
linalg::zero(m_labels);
linalg::zero(m_current_values);
}

CDenseLabels::CDenseLabels(CFile* loader)
Expand Down
8 changes: 8 additions & 0 deletions src/shogun/lib/SGMatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,14 @@ void SGMatrix<T>::create_diagonal_matrix(T* matrix, T* v,int32_t size)
}
}

template <class T>
SGMatrix<T> SGMatrix<T>::submatrix(index_t col_start, index_t col_end) const
{
assert_on_cpu();
return SGMatrix<T>(
get_column_vector(col_start), num_rows, col_end - col_start, false);
}

template <class T>
SGVector<T> SGMatrix<T>::get_column(index_t col) const
{
Expand Down
9 changes: 9 additions & 0 deletions src/shogun/lib/SGMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,15 @@ template<class T> class SGMatrix : public SGReferencedData
return &matrix[c*num_rows];
}

/** Given a range of columns (start, end), return a view
* of the matrix from column start to end excluded.
* \warning The returned SGMatrix is non-owning!
* @param col_start column index (inclusive)
* @param col_end column index (excluded)
* @return the submatrix
*/
SGMatrix<T> submatrix(index_t col_start, index_t col_end) const;

/** Map a column to a SGVector
* \warning The returned SGVector is non-owning!
* @param col column index
Expand Down
3 changes: 3 additions & 0 deletions src/shogun/multiclass/MCLDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ void CMCLDA::init()
SG_ADD(&m_xbar, "m_xbar", "total mean", MS_NOT_AVAILABLE);
SG_ADD(&m_scalings, "m_scalings", "scalings", MS_NOT_AVAILABLE);
SG_ADD(&m_rank, "m_rank", "rank", MS_NOT_AVAILABLE);
SG_ADD(&m_dim, "m_dim", "dimension of feature space", MS_NOT_AVAILABLE);
SG_ADD(
&m_num_classes, "m_num_classes", "number of classes", MS_NOT_AVAILABLE);
SG_ADD(&m_coef, "m_coef", "weight vector", MS_NOT_AVAILABLE);
SG_ADD(&m_intercept, "m_intercept", "intercept", MS_NOT_AVAILABLE);

Expand Down
16 changes: 7 additions & 9 deletions src/shogun/multiclass/QDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,10 @@ void CQDA::init()
SG_ADD((CSGObject**) &m_features, "m_features", "Feature object.", MS_NOT_AVAILABLE);
SG_ADD(&m_means, "m_means", "Mean vectors list", MS_NOT_AVAILABLE);
SG_ADD(&m_slog, "m_slog", "Vector used in classification", MS_NOT_AVAILABLE);

//TODO include SGNDArray objects for serialization
SG_ADD(&m_dim, "m_dim", "dimension of feature space", MS_NOT_AVAILABLE);
SG_ADD(
&m_num_classes, "m_num_classes", "number of classes", MS_NOT_AVAILABLE);
SG_ADD(&m_M, "m_M", "Matrices used in classification", MS_NOT_AVAILABLE);

m_features = NULL;
}
Expand Down Expand Up @@ -144,7 +146,7 @@ CMulticlassLabels* CQDA::apply_multiclass(CFeatures* data)
rf->free_feature_vector(vec, i);
}

Map< MatrixXd > Em_M(m_M.get_matrix(k), m_dim, m_dim);
Map<MatrixXd> Em_M(m_M.submatrix(m_dim * k, m_dim * (k + 1)));
A = X*Em_M;

for (int i = 0; i < num_vecs; i++)
Expand Down Expand Up @@ -302,12 +304,8 @@ bool CQDA::train_machine(CFeatures* data)
/* Computation of terms required for classification */
SGVector< float32_t > sinvsqrt(m_dim);

// M_dims will be freed in m_M.destroy_ndarray()
index_t* M_dims = SG_MALLOC(index_t, 3);
M_dims[0] = m_dim;
M_dims[1] = m_dim;
M_dims[2] = m_num_classes;
m_M = SGNDArray< float64_t >(M_dims, 3);
// m_num_classes matrices of dimension (m_dim, m_dim) stacked horizontally
m_M = SGMatrix<float64_t>(m_dim, m_dim * m_num_classes);

m_slog = SGVector< float32_t >(m_num_classes);
m_slog.zero();
Expand Down
7 changes: 5 additions & 2 deletions src/shogun/multiclass/QDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,11 @@ class CQDA : public CNativeMulticlassMachine
/** feature means for each of the classes in the training data */
SGMatrix< float64_t > m_means;

/** matrices computed in training and used in classification */
SGNDArray< float64_t > m_M;
/** matrices computed in training and used in classification
* the matrices are stacked horizontally into a matrix of size
* (m_dim, m_dim*m_num_classes).
*/
SGMatrix<float64_t> m_M;

/** vector computed in training and used in classification */
SGVector< float32_t > m_slog;
Expand Down
95 changes: 55 additions & 40 deletions tests/unit/base/trained_model_serialization_unittest.cc.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/

#include <gtest/gtest.h>
#include <shogun/base/some.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/machine/Machine.h>
#include <shogun/features/DenseFeatures.h>
Expand All @@ -14,13 +15,20 @@
#include <shogun/io/SerializableHdf5File.h>
#include <shogun/io/CSVFile.h>
#include <shogun/io/SGIO.h>
#include <shogun/machine/gp/ExactInferenceMethod.h>
#include <shogun/machine/gp/GaussianLikelihood.h>
#include <shogun/machine/gp/ProbitLikelihood.h>
#include <shogun/machine/gp/SingleLaplaceInferenceMethod.h>
#include <shogun/machine/gp/ZeroMean.h>
#include "environments/LinearTestEnvironment.h"
#include "environments/RegressionTestEnvironment.h"
#include "environments/MultiLabelTestEnvironment.h"
#include "utils/Utils.h"

using namespace shogun;

extern LinearTestEnvironment* linear_test_env;
extern MultiLabelTestEnvironment* multilabel_test_env;
extern RegressionTestEnvironment* regression_test_env;

class TrainedModelSerializationTest : public ::testing::Test
Expand All @@ -40,12 +48,23 @@ protected:
switch (pt)
{
case PT_BINARY:
case PT_CLASS:
{
std::shared_ptr<GaussianCheckerboard> mockData =
std::shared_ptr<GaussianCheckerboard> mock_data =
linear_test_env->getBinaryLabelData();
train_feats = mockData->get_features_train();
test_feats = mockData->get_features_test();
train_labels = (CBinaryLabels *) mockData->get_labels_train();
train_feats = mock_data->get_features_train();
test_feats = mock_data->get_features_test();
train_labels = mock_data->get_labels_train();
break;
}

case PT_MULTICLASS:
{
std::shared_ptr<GaussianCheckerboard> mock_data =
multilabel_test_env->getMulticlassFixture();
train_feats = mock_data->get_features_train();
test_feats = mock_data->get_features_test();
train_labels = mock_data->get_labels_train();
break;
}

Expand All @@ -69,13 +88,14 @@ protected:
CLabels *train_labels;
};

bool serialize_machine(CMachine* machine, std::string &filename)
bool serialize_machine(CMachine* machine, std::string &filename, bool store_model_features=false)
{
std::string class_name = machine->get_name();
filename = "shogun-unittest-trained-model-serialization-" + class_name + ".XXXXXX";
generate_temp_filename(const_cast<char*>(filename.c_str()));

CSerializableHdf5File *file=new CSerializableHdf5File(filename.c_str(), 'w');
machine->set_store_model_features(store_model_features);
bool save_success=machine->save_serializable(file);
file->close();
SG_FREE(file);
Expand All @@ -95,10 +115,12 @@ bool deserialize_machine(CMachine *machine, std::string filename)
return load_success && (delete_success == 0);
}

{% macro linear_machine_test(class) -%}
TEST_F(TrainedModelSerializationTest, LinearMachine_{{class}})
const float64_t accuracy=1e-7;

{% macro machine_test(class) -%}
TEST_F(TrainedModelSerializationTest, {{class}})
{
auto machine=new {{class}}();
auto machine=some<{{class}}>();
load_data(machine->get_machine_problem_type());

machine->set_features(train_feats);
Expand All @@ -111,70 +133,63 @@ TEST_F(TrainedModelSerializationTest, LinearMachine_{{class}})
// machine->set_features(NULL);
// machine->set_labels(NULL);

CLabels* predictions=machine->apply(test_feats);
auto predictions=wrap<CLabels>(machine->apply(test_feats));

std::string filename;
ASSERT_TRUE(serialize_machine(machine, filename));

auto deserialized_machine=new {{class}}();
auto deserialized_machine=some<{{class}}>();
ASSERT_TRUE(deserialize_machine(deserialized_machine, filename));

CLabels* deserialized_predictions=deserialized_machine->apply(test_feats);

float64_t accuracy=1e-13;
auto deserialized_predictions=wrap<CLabels>(deserialized_machine->apply(test_feats));
ASSERT(predictions->equals(deserialized_predictions, accuracy, true))

SG_FREE(machine);
SG_FREE(deserialized_machine);
SG_FREE(predictions);
SG_FREE(deserialized_predictions);
}
{%- endmacro %}

{% macro kernel_machine_test(class) -%}
TEST_F(TrainedModelSerializationTest, KernelMachine_{{class}})
{% for store_model_features in ["true", "false"] -%}
{% if store_model_features == "true" -%}
{% set test_name = class + "_store_model_features" -%}
{% else -%}
{% set test_name = class -%}
{% endif -%}
TEST_F(TrainedModelSerializationTest, {{test_name}})
{
auto machine=new {{class}}();
auto machine=some<{{class}}>();
load_data(machine->get_machine_problem_type());

CGaussianKernel *kernel = new CGaussianKernel(2.0);
CGaussianKernel *kernel=new CGaussianKernel(2.0);
kernel->init(train_feats, train_feats);
machine->set_kernel(kernel);
machine->set_labels(train_labels);

bool train_success=machine->train();
ASSERT_TRUE(train_success);

CLabels* predictions=machine->apply(test_feats);
auto predictions=Some<CLabels>(machine->apply(test_feats));

std::string filename;
ASSERT_TRUE(serialize_machine(machine, filename));
ASSERT_TRUE(serialize_machine(machine, filename, {{store_model_features}}));

auto deserialized_machine=new {{class}}();
auto deserialized_machine=some<{{class}}>();
ASSERT_TRUE(deserialize_machine(deserialized_machine, filename));

CLabels* deserialized_predictions=deserialized_machine->apply(test_feats);

float64_t accuracy=1e-13;
auto deserialized_predictions=Some<CLabels>(deserialized_machine->apply(test_feats));
ASSERT(predictions->equals(deserialized_predictions, accuracy, true))

SG_FREE(machine);
SG_FREE(deserialized_machine);
SG_FREE(predictions);
SG_FREE(deserialized_predictions);
}
{% endfor %}
{%- endmacro %}

{%
set macros = {
'CLinearMachine': linear_machine_test,
'CKernelMachine': kernel_machine_test}
'CLinearMachine': machine_test,
'CNativeMulticlassMachine': machine_test,
'CLinearMulticlassMachine': machine_test,
'CKernelMachine': kernel_machine_test,
'CKernelMulticlassMachine': kernel_machine_test}
%}
{% for name, attrs in machines.items() %}
{% for base in bases %}
{% if base in attrs['ancestors'] and not name in ignores[base] %}
{% for b, m in machines.items() -%}
{% for name, attrs in m.items() -%}
#include <{{attrs['include']}}>
{{ macros[base](name) }}
{% endif %}
{{ macros[b](name) }}
{% endfor %}
{% endfor %}
71 changes: 45 additions & 26 deletions tests/unit/base/trained_model_serialization_unittest.cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,33 @@
# (at your option) any later version.
#

# Classes that need custom initialization
IGNORE_MACHINES = {
'CLinearMachine' : [
'CFeatureBlockLogisticRegression', 'CLibLinearMTL',
'CMultitaskLinearMachine', 'CMultitaskLogisticRegression',
'CMultitaskL12LogisticRegression', 'CMultitaskLeastSquaresRegression',
'CMultitaskTraceLogisticRegression', 'CMultitaskClusteredLogisticRegression',
'CLatentSVM', 'CLatentSOSVM', 'CDomainAdaptationSVMLinear'
],
'CKernelMachine' : [
'CDomainAdaptationSVM', 'CMKLRegression',
'CMKLClassification', 'CMKLOneClass', 'CSVM'
]
}
# Classes to ignore: mostly because default initialization isn't enough
# to setup the machine for training (i.e. Multitask and DomainAdaptation),
# different reasons are given below.
IGNORE = [
# LinearMachines
'CFeatureBlockLogisticRegression', 'CLibLinearMTL',
'CMultitaskLinearMachine', 'CMultitaskLogisticRegression',
'CMultitaskL12LogisticRegression', 'CMultitaskLeastSquaresRegression',
'CMultitaskTraceLogisticRegression', 'CMultitaskClusteredLogisticRegression',
'CLatentSVM', 'CLatentSOSVM', 'CDomainAdaptationSVMLinear',

# KernelMachines
'CDomainAdaptationSVM', 'CMKLRegression',
'CMKLClassification', 'CMKLOneClass',
'CSVM', # doesn't implement a solver

# LinearMulticlassMachines
'CDomainAdaptationMulticlassLibLinear',
'CMulticlassTreeGuidedLogisticRegression',
'CShareBoost', # apply() takes features subset

# KernelMulticlassMachines
'CMulticlassSVM', # doesn't implement a solver
'CMKLMulticlass',
'CScatterSVM', # error C <= 0
'CMulticlassLibSVM' # error C <= 0
]

def read_defined_guards(config_file):
with open(config_file) as f:
Expand Down Expand Up @@ -86,21 +99,27 @@ def entry(templateFile, input_file, config_file):
classes = get_shogun_classes(tags)
guards = read_defined_guards(config_file)

# Get all linear/kernel machines
machines = {}
bases = ['CLinearMachine', 'CKernelMachine']
bases = [
'CLinearMachine', 'CKernelMachine', 'CLinearMulticlassMachine',
'CKernelMulticlassMachine', 'CNativeMulticlassMachine'
]

# Gather all the machines that inherit from the classes in bases
machines = {b: {} for b in bases}

for name, attrs in classes.items():
ancestors = get_ancestors(classes, name)
if any([b in ancestors for b in bases])\
and not is_guarded(attrs['include'], guards)\
and not is_pure_virtual(name, tags)\
and not ignore_in_class_list(attrs['include'])\
and use_gpl(attrs['include'], guards):
machines[name] = attrs
machines[name]['ancestors'] = ancestors

templateVars = {"machines" : machines, "bases" : bases, "ignores" : IGNORE_MACHINES}
header = attrs['include']
for base in bases:
if (base in ancestors)\
and not name in IGNORE\
and not is_guarded(header, guards)\
and not is_pure_virtual(name, tags)\
and not ignore_in_class_list(header)\
and use_gpl(header, guards):
machines[base][name] = attrs

templateVars = {"machines" : machines}

return template.render(templateVars)

Expand Down

0 comments on commit 832e8fd

Please sign in to comment.