Replaced all matrix operations in the regressor-learning with Eigen

- Moving At*A and x=AtAInv*At*b from OpenCV to Eigen is ~9x faster - Replaced FullPivLU with ColPivHouseholderQR (3-5x faster) - Updated the unit tests accordingly (relaxed the thresholds a bit)
patrikhuber · Jan 23, 2015 · e4b43fb · e4b43fb
1 parent 0a1492e
commit e4b43fb
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 49 deletions.
diff --git a/include/superviseddescent/regressors.hpp b/include/superviseddescent/regressors.hpp
@@ -209,31 +209,38 @@ class LinearRegressor : public Regressor
 	bool learn(cv::Mat data, cv::Mat labels)
 	{
 		using cv::Mat;
+		// Map the cv::Mat data and labels to Eigen matrices:
+		Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> A_Eigen(data.ptr<float>(), data.rows, data.cols);
+		Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> labels_Eigen(labels.ptr<float>(), labels.rows, labels.cols);
 
-		Mat AtA = data.t() * data;
-
-		Mat regularisationMatrix = regulariser.getMatrix(AtA, data.rows);
+		Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> AtA_Eigen = A_Eigen.transpose() * A_Eigen;
 
-		AtA = AtA + regularisationMatrix;
-		assert(AtA.isContinuous());
-		// Map the cv::Mat data to an Eigen::Matrix and perform a FullPivLU:
-		Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> AtA_Eigen(AtA.ptr<float>(), AtA.rows, AtA.cols);
-		Eigen::FullPivLU<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> luOfAtA(AtA_Eigen);
-		auto rankOfAtA = luOfAtA.rank();
-		if (!luOfAtA.isInvertible()) {
-			// Eigen will most likely return garbage here (according to their docu anyway). We have a few options:
-			// - Increase lambda
-			// - Calculate the pseudo-inverse. See: http://eigen.tuxfamily.org/index.php?title=FAQ#Is_there_a_method_to_compute_the_.28Moore-Penrose.29_pseudo_inverse_.3F
-			std::cout << "The regularised AtA is not invertible. We continued learning, but Eigen calculates garbage in this case according to their documentation. (The rank is " << std::to_string(rankOfAtA) << ", full rank would be " << std::to_string(AtA_Eigen.rows()) << "). Increase lambda (or use the pseudo-inverse, which is not implemented yet)." << std::endl;
+		// Note: This is a bit of unnecessary back-and-forth mapping, just for the regularisation:
+		Mat AtA_Map(static_cast<int>(AtA_Eigen.rows()), static_cast<int>(AtA_Eigen.cols()), CV_32FC1, AtA_Eigen.data());
+		Mat regularisationMatrix = regulariser.getMatrix(AtA_Map, data.rows);
+		Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> reg_Eigen(regularisationMatrix.ptr<float>(), regularisationMatrix.rows, regularisationMatrix.cols);
+
+		AtA_Eigen = AtA_Eigen + reg_Eigen;
+
+		// Perform a ColPivHouseholderQR (faster than FullPivLU) that allows to check for invertibility:
+		Eigen::ColPivHouseholderQR<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> qrOfAtA(AtA_Eigen);
+		auto rankOfAtA = qrOfAtA.rank();
+		if (!qrOfAtA.isInvertible()) {
+			// Eigen may return garbage (their docu is not very specific). Best option is to increase regularisation.
+			std::cout << "The regularised AtA is not invertible. We continued learning, but Eigen may return garbage (their docu is not very specific). (The rank is " << std::to_string(rankOfAtA) << ", full rank would be " << std::to_string(AtA_Eigen.rows()) << "). Increase lambda." << std::endl;
 		}
-		Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> AtAInv_Eigen = luOfAtA.inverse();
-		// Map the inverted matrix back to a cv::Mat by creating a Mat header:
-		Mat AtAInv(static_cast<int>(AtAInv_Eigen.rows()), static_cast<int>(AtAInv_Eigen.cols()), CV_32FC1, AtAInv_Eigen.data());
+		Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> AtAInv_Eigen = qrOfAtA.inverse();
 
 		// x = (AtAReg)^-1 * At * b:
-		x = AtAInv * data.t() * labels; // store the result in the member variable
+		Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> x_Eigen = AtAInv_Eigen * A_Eigen.transpose() * labels_Eigen;
+
+		// Map the resulting x back to a cv::Mat by creating a Mat header:
+		Mat x(static_cast<int>(x_Eigen.rows()), static_cast<int>(x_Eigen.cols()), CV_32FC1, x_Eigen.data());
+
+		// We have to copy the data because the underlying data is managed by Eigen::Matrix x_Eigen, which will go out of scope after we leave this function:
+		this->x = x.clone();
 
-		return luOfAtA.isInvertible();
+		return qrOfAtA.isInvertible();
 	};
 
 	/**

diff --git a/test/test_LinearRegressorND.cpp b/test/test_LinearRegressorND.cpp
@@ -82,7 +82,7 @@ TEST(LinearRegressor, NDimTwoExamplesTestingResidual) {
 	groundtruth.at<float>(1) = 2.0f;
 	groundtruth.at<float>(2) = -1.0f; // gt = [ 0 (error 0), 2 (error 0), -1 (error -3)]
 	double residual = lr.test(test, groundtruth);
-	ASSERT_DOUBLE_EQ(1.3416407864998738, residual) << "Expected the residual to be 1.34164...";
+	ASSERT_NEAR(1.3416407, residual, 0.0000001);
 }
 
 TEST(LinearRegressor, NDimTwoExamplesNDimYLearning) {
@@ -144,7 +144,7 @@ TEST(LinearRegressor, NDimTwoExamplesNDimYTestingResidual) {
 	groundtruth.at<float>(1, 1) = 4.0f;
 	groundtruth.at<float>(2, 1) = -2.0f;
 	double residual = lr.test(test, groundtruth);
-	ASSERT_DOUBLE_EQ(1.1135528725660042, residual) << "Expected the residual to be 1.11355...";
+	ASSERT_NEAR(1.11355285, residual, 0.00000004);
 }
 
 TEST(LinearRegressor, NDimManyExamplesNDimY) {
@@ -155,12 +155,12 @@ TEST(LinearRegressor, NDimManyExamplesNDimY) {
 	LinearRegressor lr;
 	bool isInvertible = lr.learn(data, labels);
 	EXPECT_EQ(true, isInvertible);
-	EXPECT_FLOAT_EQ(0.489539176f, lr.x.at<float>(0, 0)) << "Expected the learned x_0_0 to be different"; // Every col is a learned regressor for a label
-	EXPECT_FLOAT_EQ(-0.0660829917f, lr.x.at<float>(1, 0)) << "Expected the learned x_1_0 to be different";
-	EXPECT_FLOAT_EQ(0.339629412f, lr.x.at<float>(2, 0)) << "Expected the learned x_2_0 to be different";
-	EXPECT_FLOAT_EQ(-0.833899379f, lr.x.at<float>(0, 1)) << "Expected the learned x_0_1 to be different";
-	EXPECT_FLOAT_EQ(0.626753688f, lr.x.at<float>(1, 1)) << "Expected the learned x_1_1 to be different";
-	EXPECT_FLOAT_EQ(0.744218946f, lr.x.at<float>(2, 1)) << "Expected the learned x_2_1 to be different";
+	EXPECT_NEAR(0.489539f, lr.x.at<float>(0, 0), 0.000002); // Every col is a learned regressor for a label
+	EXPECT_NEAR(-0.06608297f, lr.x.at<float>(1, 0), 0.00000003);
+	EXPECT_FLOAT_EQ(0.339629412f, lr.x.at<float>(2, 0));
+	EXPECT_FLOAT_EQ(-0.833899379f, lr.x.at<float>(0, 1));
+	EXPECT_FLOAT_EQ(0.626753688f, lr.x.at<float>(1, 1));
+	EXPECT_FLOAT_EQ(0.744218946f, lr.x.at<float>(2, 1));
 
 	// Testing:
 	Mat test = (cv::Mat_<float>(3, 3) << 2.0f, 6.0f, 5.0f, 2.9f, -11.3, 6.0f, -2.0f, -8.438f, 3.3f);
@@ -181,7 +181,7 @@ TEST(LinearRegressor, NDimManyExamplesNDimYRegularisation) {
 	EXPECT_FLOAT_EQ(0.282755911f, lr.x.at<float>(0, 0)) << "Expected the learned x_0_0 to be different"; // Every col is a learned regressor for a label
 	EXPECT_FLOAT_EQ(0.0360795595f, lr.x.at<float>(1, 0)) << "Expected the learned x_1_0 to be different";
 	EXPECT_FLOAT_EQ(0.291039944f, lr.x.at<float>(2, 0)) << "Expected the learned x_2_0 to be different";
-	EXPECT_FLOAT_EQ(-0.0989616737f, lr.x.at<float>(0, 1)) << "Expected the learned x_0_1 to be different";
+	EXPECT_NEAR(-0.0989616f, lr.x.at<float>(0, 1), 0.0000001) << "Expected the learned x_0_1 to be different";
 	EXPECT_FLOAT_EQ(0.330635577f, lr.x.at<float>(1, 1)) << "Expected the learned x_1_1 to be different";
 	EXPECT_FLOAT_EQ(0.217046738f, lr.x.at<float>(2, 1)) << "Expected the learned x_2_1 to be different";
 
@@ -202,22 +202,22 @@ TEST(LinearRegressor, NDimManyExamplesNDimYBias) {
 	cv::hconcat(data, biasColumn, data);
 	bool isInvertible = lr.learn(data, labels);
 	EXPECT_EQ(true, isInvertible);
-	EXPECT_FLOAT_EQ(0.485008746f, lr.x.at<float>(0, 0)) << "Expected the learned x_0_0 to be different"; // Every col is a learned regressor for a label
-	EXPECT_FLOAT_EQ(0.0122186244f, lr.x.at<float>(1, 0)) << "Expected the learned x_1_0 to be different";
-	EXPECT_FLOAT_EQ(0.407824278f, lr.x.at<float>(2, 0)) << "Expected the learned x_2_0 to be different";
-	EXPECT_FLOAT_EQ(-0.615155935f, lr.x.at<float>(3, 0)) << "Expected the learned x_3_0 to be different";
-	EXPECT_FLOAT_EQ(-0.894791782f, lr.x.at<float>(0, 1)) << "Expected the learned x_0_1 to be different";
-	EXPECT_FLOAT_EQ(1.67920494f, lr.x.at<float>(1, 1)) << "Expected the learned x_1_1 to be different";
-	EXPECT_FLOAT_EQ(1.66081595f, lr.x.at<float>(2, 1)) << "Expected the learned x_2_1 to be different";
-	EXPECT_FLOAT_EQ(-8.26834011f, lr.x.at<float>(3, 1)) << "Expected the learned x_3_1 to be different";
+	EXPECT_NEAR(0.485009f, lr.x.at<float>(0, 0), 0.000001) << "Expected the learned x_0_0 to be different"; // Every col is a learned regressor for a label
+	EXPECT_NEAR(0.012218f, lr.x.at<float>(1, 0), 0.000002) << "Expected the learned x_1_0 to be different";
+	EXPECT_NEAR(0.407823f, lr.x.at<float>(2, 0), 0.000002) << "Expected the learned x_2_0 to be different";
+	EXPECT_NEAR(-0.61515f, lr.x.at<float>(3, 0), 0.00001) << "Expected the learned x_3_0 to be different";
+	EXPECT_NEAR(-0.894791f, lr.x.at<float>(0, 1), 0.000001) << "Expected the learned x_0_1 to be different";
+	EXPECT_NEAR(1.679203f, lr.x.at<float>(1, 1), 0.000003) << "Expected the learned x_1_1 to be different";
+	EXPECT_NEAR(1.660814f, lr.x.at<float>(2, 1), 0.000002) << "Expected the learned x_2_1 to be different";
+	EXPECT_NEAR(-8.26833f, lr.x.at<float>(3, 1), 0.00002) << "Expected the learned x_3_1 to be different";
 
 	// Testing:
 	Mat test = (cv::Mat_<float>(3, 3) << 2.0f, 6.0f, 5.0f, 2.9f, -11.3, 6.0f, -2.0f, -8.438f, 3.3f);
 	Mat biasColumnTest = Mat::ones(test.rows, 1, CV_32FC1);
 	cv::hconcat(test, biasColumnTest, test);
 	Mat groundtruth = (cv::Mat_<float>(3, 2) << 2.4673f, 8.3214f, 3.1002f, -19.8734f, -0.3425f, -15.1672f);
 	double residual = lr.test(test, groundtruth);
-	ASSERT_LE(residual, 0.000003) << "Expected the residual to be smaller than "; // we could relax that a bit
+	ASSERT_LE(residual, 0.000006); // we could relax that a bit
 }
 
 // Actually we could test the Regulariser separately, no need to have separate unit tests.
@@ -232,14 +232,14 @@ TEST(LinearRegressor, NDimManyExamplesNDimYBiasRegularisation) {
 	cv::hconcat(data, biasColumn, data);
 	bool isInvertible = lr.learn(data, labels);
 	EXPECT_EQ(true, isInvertible);
-	EXPECT_FLOAT_EQ(0.281424582f, lr.x.at<float>(0, 0)) << "Expected the learned x_0_0 to be different"; // Every col is a learned regressor for a label
+	EXPECT_NEAR(0.2814246f, lr.x.at<float>(0, 0), 0.0000002) << "Expected the learned x_0_0 to be different"; // Every col is a learned regressor for a label
 	EXPECT_FLOAT_EQ(0.0331765190f, lr.x.at<float>(1, 0)) << "Expected the learned x_1_0 to be different";
 	EXPECT_FLOAT_EQ(0.289116770f, lr.x.at<float>(2, 0)) << "Expected the learned x_2_0 to be different";
 	EXPECT_FLOAT_EQ(0.0320090912f, lr.x.at<float>(3, 0)) << "Expected the learned x_3_0 to be different";
-	EXPECT_FLOAT_EQ(-0.100544833f, lr.x.at<float>(0, 1)) << "Expected the learned x_0_1 to be different";
+	EXPECT_NEAR(-0.1005448f, lr.x.at<float>(0, 1), 0.0000001) << "Expected the learned x_0_1 to be different";
 	EXPECT_FLOAT_EQ(0.327183396f, lr.x.at<float>(1, 1)) << "Expected the learned x_1_1 to be different";
 	EXPECT_FLOAT_EQ(0.214759737f, lr.x.at<float>(2, 1)) << "Expected the learned x_2_1 to be different";
-	EXPECT_FLOAT_EQ(0.0380640067f, lr.x.at<float>(3, 1)) << "Expected the learned x_3_1 to be different";
+	EXPECT_NEAR(0.03806401f, lr.x.at<float>(3, 1), 0.00000002) << "Expected the learned x_3_1 to be different";
 
 	// Testing:
 	Mat test = (cv::Mat_<float>(3, 3) << 2.0f, 6.0f, 5.0f, 2.9f, -11.3, 6.0f, -2.0f, -8.438f, 3.3f);
@@ -261,13 +261,13 @@ TEST(LinearRegressor, NDimManyExamplesNDimYBiasRegularisationButNotBias) {
 	cv::hconcat(data, biasColumn, data);
 	bool isInvertible = lr.learn(data, labels);
 	EXPECT_EQ(true, isInvertible);
-	EXPECT_FLOAT_EQ(0.218878254f, lr.x.at<float>(0, 0)) << "Expected the learned x_0_0 to be different"; // Every col is a learned regressor for a label
-	EXPECT_FLOAT_EQ(-0.103211358f, lr.x.at<float>(1, 0)) << "Expected the learned x_1_0 to be different";
-	EXPECT_FLOAT_EQ(0.198760599f, lr.x.at<float>(2, 0)) << "Expected the learned x_2_0 to be different";
+	EXPECT_NEAR(0.2188783f, lr.x.at<float>(0, 0), 0.0000002) << "Expected the learned x_0_0 to be different"; // Every col is a learned regressor for a label
+	EXPECT_NEAR(-0.1032114f, lr.x.at<float>(1, 0), 0.0000001) << "Expected the learned x_1_0 to be different";
+	EXPECT_NEAR(0.1987606f, lr.x.at<float>(2, 0), 0.0000002) << "Expected the learned x_2_0 to be different";
 	EXPECT_FLOAT_EQ(1.53583705f, lr.x.at<float>(3, 0)) << "Expected the learned x_3_0 to be different";
 	EXPECT_FLOAT_EQ(-0.174922630f, lr.x.at<float>(0, 1)) << "Expected the learned x_0_1 to be different";
 	EXPECT_FLOAT_EQ(0.164996058f, lr.x.at<float>(1, 1)) << "Expected the learned x_1_1 to be different";
-	EXPECT_FLOAT_EQ(0.107311621f, lr.x.at<float>(2, 1)) << "Expected the learned x_2_1 to be different";
+	EXPECT_NEAR(0.1073116f, lr.x.at<float>(2, 1), 0.0000001) << "Expected the learned x_2_1 to be different";
 	EXPECT_FLOAT_EQ(1.82635951f, lr.x.at<float>(3, 1)) << "Expected the learned x_3_1 to be different";
 
 	// Testing:

diff --git a/test/test_SupervisedDescentOptimiser.cpp b/test/test_SupervisedDescentOptimiser.cpp
@@ -133,7 +133,7 @@ TEST(SupervisedDescentOptimiser, SinConvergenceCascade) {
 	// Make sure the training converges, i.e. the residual is correct on the training data:
 	Mat predictions = sdo.test(x0, y_tr, h);
 	double trainingResidual = normalisedLeastSquaresResidual(predictions, x_tr);
-	EXPECT_DOUBLE_EQ(0.040279395431369915, trainingResidual);
+	EXPECT_NEAR(0.040279395, trainingResidual, 0.000000001);
 
 	// Test the trained model:
 	// Test data with finer resolution:
@@ -153,7 +153,7 @@ TEST(SupervisedDescentOptimiser, SinConvergenceCascade) {
 
 	predictions = sdo.test(x0_ts, y_ts, h);
 	double testResidual = normalisedLeastSquaresResidual(predictions, x_ts_gt);
-	ASSERT_NEAR(0.0261567751, testResidual, 0.0000000001);
+	ASSERT_NEAR(0.026156775, testResidual, 0.000000004);
 }
 
 TEST(SupervisedDescentOptimiser, XCubeConvergence) {
@@ -202,7 +202,7 @@ TEST(SupervisedDescentOptimiser, XCubeConvergence) {
 
 	predictions = sdo.test(x0_ts, y_ts, h);
 	double testResidual = normalisedLeastSquaresResidual(predictions, x_ts_gt);
-	ASSERT_NEAR(0.353428615, testResidual, 0.000000002);
+	ASSERT_NEAR(0.353428615, testResidual, 0.00002);
 }
 
 TEST(SupervisedDescentOptimiser, XCubeConvergenceCascade) {
@@ -311,7 +311,7 @@ TEST(SupervisedDescentOptimiser, ErfConvergence) {
 
 	predictions = sdo.test(x0_ts, y_ts, h);
 	double testResidual = normalisedLeastSquaresResidual(predictions, x_ts_gt);
-	ASSERT_NEAR(0.25736006, testResidual, 0.00000004);
+	ASSERT_NEAR(0.25736006, testResidual, 0.0000002);
 }
 
 TEST(SupervisedDescentOptimiser, ErfConvergenceCascade) {
@@ -546,7 +546,7 @@ TEST(SupervisedDescentOptimiser, SinErfConvergenceCascadeMultiY) {
 	sdo.train(x_tr, x0, y_tr, h);
 	Mat predictions = sdo.test(x0, y_tr, h);
 	double trainingResidual = cv::norm(predictions, x_tr, cv::NORM_L2) / cv::norm(x_tr, cv::NORM_L2);
-	EXPECT_NEAR(0.0002677, trainingResidual, 0.0000003);
+	EXPECT_NEAR(0.0002677, trainingResidual, 0.0000004);
 
 	// Test the trained model:
 	// Test data with finer resolution: