Skip to content

Commit

Permalink
factory methods in lda meta example #4247 (#4236)
Browse files Browse the repository at this point in the history
* factory methods in lda
* some-ize lda unit test
* data update
  • Loading branch information
shubham808 authored and karlnapf committed May 7, 2018
1 parent ee641ce commit 18cf07f
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 86 deletions.
2 changes: 1 addition & 1 deletion data
22 changes: 10 additions & 12 deletions examples/meta/src/binary/linear_discriminant_analysis.sg
@@ -1,13 +1,13 @@
CSVFile f_feats_train("../../data/classifier_binary_2d_linear_features_train.dat")
CSVFile f_feats_test("../../data/classifier_binary_2d_linear_features_test.dat")
CSVFile f_labels_train("../../data/classifier_binary_2d_linear_labels_train.dat")
CSVFile f_labels_test("../../data/classifier_binary_2d_linear_labels_test.dat")
File f_feats_train = csv_file("../../data/classifier_binary_2d_linear_features_train.dat")
File f_feats_test = csv_file("../../data/classifier_binary_2d_linear_features_test.dat")
File f_labels_train = csv_file("../../data/classifier_binary_2d_linear_labels_train.dat")
File f_labels_test = csv_file("../../data/classifier_binary_2d_linear_labels_test.dat")

#![create_features]
Features features_train = features(f_feats_train)
Features features_test = features(f_feats_test)
BinaryLabels labels_train(f_labels_train)
BinaryLabels labels_test(f_labels_test)
Labels labels_train = labels(f_labels_train)
Labels labels_test = labels(f_labels_test)
#![create_features]

#![create_instance]
Expand All @@ -16,17 +16,15 @@ Machine lda = machine("LDA", labels=labels_train)

#![train_and_apply]
lda.train(features_train)
BinaryLabels labels_predict = lda.apply_binary(features_test)
Labels labels_predict = lda.apply(features_test)
RealVector labels = labels_predict.get_real_vector("labels")
#![train_and_apply]

#![extract_weights]
RealVector w = lda.get_real_vector("w")
RealVector weights = lda.get_real_vector("w")
#![extract_weights]

#![evaluate_accuracy]
AccuracyMeasure eval()
Evaluation eval = evaluation("AccuracyMeasure")
real accuracy = eval.evaluate(labels_predict, labels_test)
#![evaluate_accuracy]

#additional integration testing variables
RealVector output = labels_predict.get_labels()
31 changes: 11 additions & 20 deletions src/shogun/classifier/LDA.cpp
Expand Up @@ -49,9 +49,7 @@ void CLDA::init()
SG_ADD(
(machine_int_t*)&m_method, "m_method",
"Method used for LDA calculation", MS_NOT_AVAILABLE);
SG_ADD(
(machine_int_t*)&m_gamma, "m_gamma", "Regularization parameter",
MS_NOT_AVAILABLE);
SG_ADD(&m_gamma, "m_gamma", "Regularization parameter", MS_AVAILABLE);
SG_ADD(&m_bdc_svd, "m_bdc_svd", "Use BDC-SVD algorithm", MS_NOT_AVAILABLE);
}

Expand All @@ -62,10 +60,6 @@ CLDA::~CLDA()
bool CLDA::train_machine(CFeatures *data)
{
REQUIRE(m_labels, "Labels for the given features are not specified!\n")
REQUIRE(
m_labels->get_label_type() == LT_BINARY,
"The labels should of type CBinaryLabels! Provided type is %s \n",
m_labels->get_name())

if(data)
{
Expand All @@ -78,12 +72,6 @@ bool CLDA::train_machine(CFeatures *data)
REQUIRE(data, "Features have not been provided.\n")
}

REQUIRE(
features->get_num_vectors() == m_labels->get_num_labels(),
"Number of training examples(%d) should be equal to number of labels "
"(%d)!\n",
features->get_num_vectors(), m_labels->get_num_labels());

REQUIRE(
features->get_feature_class() == C_DENSE,
"LDA only works with dense features")
Expand Down Expand Up @@ -117,14 +105,16 @@ template <typename ST>
bool CLDA::solver_svd()
{
auto dense_feat = static_cast<CDenseFeatures<ST>*>(features);
auto labels = multiclass_labels(m_labels);
REQUIRE(
labels->get_num_classes() == 2, "Number of classes (%d) must be 2\n",
labels->get_num_classes())

// keep just one dimension to do binary classification
const index_t projection_dim = 1;
auto solver = std::unique_ptr<LDACanVarSolver<ST>>(
new LDACanVarSolver<ST>(
dense_feat,
new CMulticlassLabels(static_cast<CBinaryLabels*>(m_labels)),
projection_dim, m_gamma, m_bdc_svd));
dense_feat, labels, projection_dim, m_gamma, m_bdc_svd));

SGVector<ST> w_st(solver->get_eigenvectors());

Expand All @@ -150,13 +140,14 @@ template <typename ST>
bool CLDA::solver_classic()
{
auto dense_feat = static_cast<CDenseFeatures<ST>*>(features);
auto labels = multiclass_labels(m_labels);
REQUIRE(
labels->get_num_classes() == 2, "Number of classes (%d) must be 2\n",
labels->get_num_classes())
index_t num_feat = dense_feat->get_num_features();

auto solver = std::unique_ptr<LDASolver<ST>>(
new LDASolver<ST>(
dense_feat,
new CMulticlassLabels(static_cast<CBinaryLabels*>(m_labels)),
m_gamma));
new LDASolver<ST>(dense_feat, labels, m_gamma));

auto class_mean = solver->get_class_mean();
auto class_count = solver->get_class_count();
Expand Down
2 changes: 2 additions & 0 deletions src/shogun/labels/MulticlassLabels.cpp
Expand Up @@ -235,6 +235,8 @@ namespace shogun
orig->as<CMulticlassLabels>());
case LT_DENSE_GENERIC:
return to_multiclass(orig->as<CDenseLabels>());
case LT_BINARY:
return to_multiclass(orig->as<CBinaryLabels>());
default:
SG_SNOTIMPLEMENTED
}
Expand Down
80 changes: 27 additions & 53 deletions tests/unit/classifier/LDA_unittest.cc
Expand Up @@ -96,63 +96,23 @@ typedef ::testing::Types<float32_t, float64_t, floatmax_t> FloatTypes;
TYPED_TEST_CASE(LDATest, FloatTypes);

template <typename ST>
void FLD_test(SGVector<ST> &projection_FLD, SGVector<ST> &w_FLD)
void test_with_method(
SGVector<ST>& projection, SGVector<ST>& w, ELDAMethod method)
{
SGVector<float64_t> lab;
SGMatrix<float64_t> feat;

CBinaryLabels* labels;
CDenseFeatures<float64_t>* features;

generate_test_data<float64_t>(lab, feat);

labels = new CBinaryLabels(lab);
features = new CDenseFeatures<float64_t>(feat);

SG_REF(labels);
SG_REF(features);

CLDA lda_FLD(0, features, labels, FLD_LDA);
lda_FLD.train();
auto features = some<CDenseFeatures<float64_t>>(feat);
auto labels = some<CBinaryLabels>(lab);

CRegressionLabels* results_FLD=(lda_FLD.apply_regression(features));
SG_REF(results_FLD);
projection_FLD=results_FLD->get_labels();
w_FLD = lda_FLD.get_w();
SG_UNREF(results_FLD);
auto lda = some<CLDA>(0, features, labels, method);
lda->train();

SG_UNREF(features);
SG_UNREF(labels);
}

template <typename ST>
void SVD_test(SGVector<ST> &projection_SVD, SGVector<ST> &w_SVD)
{
SGVector<float64_t> lab;
SGMatrix<float64_t> feat;

CBinaryLabels* labels;
CDenseFeatures<float64_t>* features;

generate_test_data<float64_t>(lab, feat);

labels = new CBinaryLabels(lab);
features = new CDenseFeatures<float64_t>(feat);

SG_REF(labels);
SG_REF(features);

CLDA lda_SVD(0, features, labels, SVD_LDA);
lda_SVD.train();

CRegressionLabels* results_SVD=lda_SVD.apply_regression(features);
SG_REF(results_SVD);
projection_SVD=results_SVD->get_labels();
w_SVD=lda_SVD.get_w();
SG_UNREF(results_SVD);

SG_UNREF(features);
SG_UNREF(labels);
auto results = lda->apply_regression(features);
projection = results->get_labels();
w = lda->get_w();
}

template <typename ST>
Expand All @@ -161,7 +121,7 @@ void check_eigenvectors_fld()
SGVector<float64_t> projection_FLD;
SGVector<float64_t> w_FLD;

FLD_test<float64_t>(projection_FLD, w_FLD);
test_with_method<float64_t>(projection_FLD, w_FLD, FLD_LDA);

// normalize 'w' since the magnitude is irrelevant
w_FLD = linalg::scale(w_FLD, 1.0 / linalg::norm(w_FLD));
Expand All @@ -179,7 +139,7 @@ TEST(LDA, DISABLED_CheckProjection_FLD)
SGVector<float64_t> projection_FLD;
SGVector<float64_t> w_FLD;

FLD_test<float64_t>(projection_FLD, w_FLD);
test_with_method<float64_t>(projection_FLD, w_FLD, FLD_LDA);

// No need of checking the binary labels if the following passes.
float64_t epsilon=0.00000001;
Expand All @@ -201,7 +161,7 @@ void check_eigenvectors_svd()
SGVector<float64_t> projection_SVD;
SGVector<float64_t> w_SVD;

SVD_test<float64_t>(projection_SVD, w_SVD);
test_with_method<float64_t>(projection_SVD, w_SVD, SVD_LDA);

// comparing against the eigenvectors of the CanonVar implementation
// in the brml toolbox, MATLAB.
Expand All @@ -216,7 +176,7 @@ TEST(LDA, DISABLED_CheckProjection_SVD)
SGVector<float64_t> projection_SVD;
SGVector<float64_t> w_SVD;

FLD_test<float64_t>(projection_SVD, w_SVD);
test_with_method<float64_t>(projection_SVD, w_SVD, FLD_LDA);

//comparing agianst the projections from the CanonVar implementation
//in the brml toolbox, MATLAB.
Expand All @@ -233,6 +193,20 @@ TEST(LDA, DISABLED_CheckProjection_SVD)
EXPECT_NEAR(+7.96084156, projection_SVD[9], epsilon);
}

// label type exception test
TEST(LDA, num_classes_in_labels_exception)
{
SGVector<float64_t> lab{1, -1, 2};
SGMatrix<float64_t> feat(1, 3);
auto labels = some<CDenseLabels>();
labels->set_labels(lab);
auto features = some<CDenseFeatures<float64_t>>(feat);
auto lda = some<CLDA>(0, features, labels, SVD_LDA);
// should throw an incorrect number of classes exception (expected value is
// 2)
EXPECT_THROW(lda->train(), ShogunException);
}

//FLD template testing
TYPED_TEST(LDATest, check_eigenvectors_fld)
{
Expand Down

0 comments on commit 18cf07f

Please sign in to comment.