factory methods in lda meta example #4247 (#4236)

* factory methods in lda * some-ize lda unit test * data update
shogun-toolbox · May 7, 2018 · 18cf07f · 18cf07f
1 parent ee641ce
commit 18cf07f
Show file tree

Hide file tree

Showing 5 changed files with 51 additions and 86 deletions.
diff --git a/data b/data
diff --git a/examples/meta/src/binary/linear_discriminant_analysis.sg b/examples/meta/src/binary/linear_discriminant_analysis.sg
@@ -1,13 +1,13 @@
-CSVFile f_feats_train("../../data/classifier_binary_2d_linear_features_train.dat")
-CSVFile f_feats_test("../../data/classifier_binary_2d_linear_features_test.dat")
-CSVFile f_labels_train("../../data/classifier_binary_2d_linear_labels_train.dat")
-CSVFile f_labels_test("../../data/classifier_binary_2d_linear_labels_test.dat")
+File f_feats_train = csv_file("../../data/classifier_binary_2d_linear_features_train.dat")
+File f_feats_test = csv_file("../../data/classifier_binary_2d_linear_features_test.dat")
+File f_labels_train = csv_file("../../data/classifier_binary_2d_linear_labels_train.dat")
+File f_labels_test = csv_file("../../data/classifier_binary_2d_linear_labels_test.dat")
 
 #![create_features]
 Features features_train = features(f_feats_train)
 Features features_test = features(f_feats_test)
-BinaryLabels labels_train(f_labels_train)
-BinaryLabels labels_test(f_labels_test)
+Labels labels_train = labels(f_labels_train)
+Labels labels_test = labels(f_labels_test)
 #![create_features]
 
 #![create_instance]
@@ -16,17 +16,15 @@ Machine lda = machine("LDA", labels=labels_train)
 
 #![train_and_apply]
 lda.train(features_train)
-BinaryLabels labels_predict = lda.apply_binary(features_test)
+Labels labels_predict = lda.apply(features_test)
+RealVector labels = labels_predict.get_real_vector("labels")
 #![train_and_apply]
 
 #![extract_weights]
-RealVector w = lda.get_real_vector("w")
+RealVector weights = lda.get_real_vector("w")
 #![extract_weights]
 
 #![evaluate_accuracy]
-AccuracyMeasure eval()
+Evaluation eval = evaluation("AccuracyMeasure")
 real accuracy = eval.evaluate(labels_predict, labels_test)
 #![evaluate_accuracy]
-
-#additional integration testing variables
-RealVector output = labels_predict.get_labels()
diff --git a/src/shogun/classifier/LDA.cpp b/src/shogun/classifier/LDA.cpp
@@ -49,9 +49,7 @@ void CLDA::init()
 	SG_ADD(
 	    (machine_int_t*)&m_method, "m_method",
 	    "Method used for LDA calculation", MS_NOT_AVAILABLE);
-	SG_ADD(
-	    (machine_int_t*)&m_gamma, "m_gamma", "Regularization parameter",
-	    MS_NOT_AVAILABLE);
+	SG_ADD(&m_gamma, "m_gamma", "Regularization parameter", MS_AVAILABLE);
 	SG_ADD(&m_bdc_svd, "m_bdc_svd", "Use BDC-SVD algorithm", MS_NOT_AVAILABLE);
 }
 
@@ -62,10 +60,6 @@ CLDA::~CLDA()
 bool CLDA::train_machine(CFeatures *data)
 {
 	REQUIRE(m_labels, "Labels for the given features are not specified!\n")
-	REQUIRE(
-	    m_labels->get_label_type() == LT_BINARY,
-	    "The labels should of type CBinaryLabels! Provided type is %s \n",
-	    m_labels->get_name())
 
 	if(data)
 	{
@@ -78,12 +72,6 @@ bool CLDA::train_machine(CFeatures *data)
 		REQUIRE(data, "Features have not been provided.\n")
 	}
 
-	REQUIRE(
-	    features->get_num_vectors() == m_labels->get_num_labels(),
-	    "Number of training examples(%d) should be equal to number of labels "
-	    "(%d)!\n",
-	    features->get_num_vectors(), m_labels->get_num_labels());
-
 	REQUIRE(
 	    features->get_feature_class() == C_DENSE,
 	    "LDA only works with dense features")
@@ -117,14 +105,16 @@ template <typename ST>
 bool CLDA::solver_svd()
 {
 	auto dense_feat = static_cast<CDenseFeatures<ST>*>(features);
+	auto labels = multiclass_labels(m_labels);
+	REQUIRE(
+	    labels->get_num_classes() == 2, "Number of classes (%d) must be 2\n",
+	    labels->get_num_classes())
 
 	// keep just one dimension to do binary classification
 	const index_t projection_dim = 1;
 	auto solver = std::unique_ptr<LDACanVarSolver<ST>>(
 	    new LDACanVarSolver<ST>(
-	        dense_feat,
-	        new CMulticlassLabels(static_cast<CBinaryLabels*>(m_labels)),
-	        projection_dim, m_gamma, m_bdc_svd));
+	        dense_feat, labels, projection_dim, m_gamma, m_bdc_svd));
 
 	SGVector<ST> w_st(solver->get_eigenvectors());
 
@@ -150,13 +140,14 @@ template <typename ST>
 bool CLDA::solver_classic()
 {
 	auto dense_feat = static_cast<CDenseFeatures<ST>*>(features);
+	auto labels = multiclass_labels(m_labels);
+	REQUIRE(
+	    labels->get_num_classes() == 2, "Number of classes (%d) must be 2\n",
+	    labels->get_num_classes())
 	index_t num_feat = dense_feat->get_num_features();
 
 	auto solver = std::unique_ptr<LDASolver<ST>>(
-	    new LDASolver<ST>(
-	        dense_feat,
-	        new CMulticlassLabels(static_cast<CBinaryLabels*>(m_labels)),
-	        m_gamma));
+	    new LDASolver<ST>(dense_feat, labels, m_gamma));
 
 	auto class_mean = solver->get_class_mean();
 	auto class_count = solver->get_class_count();

diff --git a/src/shogun/labels/MulticlassLabels.cpp b/src/shogun/labels/MulticlassLabels.cpp
@@ -235,6 +235,8 @@ namespace shogun
 				    orig->as<CMulticlassLabels>());
 			case LT_DENSE_GENERIC:
 				return to_multiclass(orig->as<CDenseLabels>());
+			case LT_BINARY:
+				return to_multiclass(orig->as<CBinaryLabels>());
 			default:
 				SG_SNOTIMPLEMENTED
 			}

diff --git a/tests/unit/classifier/LDA_unittest.cc b/tests/unit/classifier/LDA_unittest.cc
@@ -96,63 +96,23 @@ typedef ::testing::Types<float32_t, float64_t, floatmax_t> FloatTypes;
 TYPED_TEST_CASE(LDATest, FloatTypes);
 
 template <typename ST>
-void FLD_test(SGVector<ST> &projection_FLD, SGVector<ST> &w_FLD)
+void test_with_method(
+    SGVector<ST>& projection, SGVector<ST>& w, ELDAMethod method)
 {
 	SGVector<float64_t> lab;
 	SGMatrix<float64_t> feat;
 
-	CBinaryLabels* labels;
-	CDenseFeatures<float64_t>* features;
-
 	generate_test_data<float64_t>(lab, feat);
 
-	labels = new CBinaryLabels(lab);
-	features = new CDenseFeatures<float64_t>(feat);
-
-	SG_REF(labels);
-	SG_REF(features);
-
-	CLDA lda_FLD(0, features, labels, FLD_LDA);
-	lda_FLD.train();
+	auto features = some<CDenseFeatures<float64_t>>(feat);
+	auto labels = some<CBinaryLabels>(lab);
 
-	CRegressionLabels* results_FLD=(lda_FLD.apply_regression(features));
-	SG_REF(results_FLD);
-	projection_FLD=results_FLD->get_labels();
-	w_FLD = lda_FLD.get_w();
-	SG_UNREF(results_FLD);
+	auto lda = some<CLDA>(0, features, labels, method);
+	lda->train();
 
-	SG_UNREF(features);
-	SG_UNREF(labels);
-}
-
-template <typename ST>
-void SVD_test(SGVector<ST> &projection_SVD, SGVector<ST> &w_SVD)
-{
-	SGVector<float64_t> lab;
-	SGMatrix<float64_t> feat;
-
-	CBinaryLabels* labels;
-	CDenseFeatures<float64_t>* features;
-
-	generate_test_data<float64_t>(lab, feat);
-
-	labels = new CBinaryLabels(lab);
-	features = new CDenseFeatures<float64_t>(feat);
-
-	SG_REF(labels);
-	SG_REF(features);
-
-	CLDA lda_SVD(0, features, labels, SVD_LDA);
-	lda_SVD.train();
-
-	CRegressionLabels* results_SVD=lda_SVD.apply_regression(features);
-	SG_REF(results_SVD);
-	projection_SVD=results_SVD->get_labels();
-	w_SVD=lda_SVD.get_w();
-	SG_UNREF(results_SVD);
-
-	SG_UNREF(features);
-	SG_UNREF(labels);
+	auto results = lda->apply_regression(features);
+	projection = results->get_labels();
+	w = lda->get_w();
 }
 
 template <typename ST>
@@ -161,7 +121,7 @@ void check_eigenvectors_fld()
 	SGVector<float64_t> projection_FLD;
 	SGVector<float64_t> w_FLD;
 
-	FLD_test<float64_t>(projection_FLD, w_FLD);
+	test_with_method<float64_t>(projection_FLD, w_FLD, FLD_LDA);
 
 	// normalize 'w' since the magnitude is irrelevant
 	w_FLD = linalg::scale(w_FLD, 1.0 / linalg::norm(w_FLD));
@@ -179,7 +139,7 @@ TEST(LDA, DISABLED_CheckProjection_FLD)
 	SGVector<float64_t> projection_FLD;
 	SGVector<float64_t> w_FLD;
 
-	FLD_test<float64_t>(projection_FLD, w_FLD);
+	test_with_method<float64_t>(projection_FLD, w_FLD, FLD_LDA);
 
 	// No need of checking the binary labels if the following passes.
 	float64_t epsilon=0.00000001;
@@ -201,7 +161,7 @@ void check_eigenvectors_svd()
 	SGVector<float64_t> projection_SVD;
 	SGVector<float64_t> w_SVD;
 
-	SVD_test<float64_t>(projection_SVD, w_SVD);
+	test_with_method<float64_t>(projection_SVD, w_SVD, SVD_LDA);
 
 	// comparing against the eigenvectors of the CanonVar implementation
 	// in the brml toolbox, MATLAB.
@@ -216,7 +176,7 @@ TEST(LDA, DISABLED_CheckProjection_SVD)
 	SGVector<float64_t> projection_SVD;
 	SGVector<float64_t> w_SVD;
 
-	FLD_test<float64_t>(projection_SVD, w_SVD);
+	test_with_method<float64_t>(projection_SVD, w_SVD, FLD_LDA);
 
 	//comparing agianst the projections from the CanonVar implementation
 	//in the brml toolbox, MATLAB.
@@ -233,6 +193,20 @@ TEST(LDA, DISABLED_CheckProjection_SVD)
 	EXPECT_NEAR(+7.96084156, projection_SVD[9], epsilon);
 }
 
+// label type exception test
+TEST(LDA, num_classes_in_labels_exception)
+{
+	SGVector<float64_t> lab{1, -1, 2};
+	SGMatrix<float64_t> feat(1, 3);
+	auto labels = some<CDenseLabels>();
+	labels->set_labels(lab);
+	auto features = some<CDenseFeatures<float64_t>>(feat);
+	auto lda = some<CLDA>(0, features, labels, SVD_LDA);
+	// should throw an incorrect number of classes exception (expected value is
+	// 2)
+	EXPECT_THROW(lda->train(), ShogunException);
+}
+
 //FLD template testing
 TYPED_TEST(LDATest, check_eigenvectors_fld)
 {