From d101ca546118ae8736920915c6ac0dead448161f Mon Sep 17 00:00:00 2001 From: Patrik Huber Date: Sat, 18 Feb 2023 12:25:40 +0000 Subject: [PATCH] Removed glm from Ceres fitting, and rewrote it using Eigen. This means no more messing around with GLM_FORCE_UNRESTRICTED_GENTYPE. Eigen and Ceres work much nicer together. Updated the cost functions in ceres_nonlinear.hpp, and the example fit-model.ceres: * Renamed LandmarkCost to PerspectiveProjectionLandmarkCost * Renamed ImageCost to VertexColorCost * Removed the use_perspective switch - only perspective projection now. * Passing the rotation, translation and intrinsics now as separate function/template parameters * VertexColorCost now uses eos::core::Image instead of cv::Mat * Added Create factory functions to the cost functions to make it easier for user code to create them (following the Ceres tutorial) * Number of shape, blendshape and colour coefficients can now be set, though they have to be set at compile time. We could make them runtime choosable but then we'd need to use Ceres' dynamic cost functions. * get_shape_at_point() and get_vertex_color_at_point() are now much simpler with Eigen * Added project() and perspective() functions, equivalent to the ones in GLM * Added eos/core/math.hpp, with functions to convert between degrees and radians (inspired by GLM) I have not measured the performance difference between the GLM and Eigen implementations, but running the fitting now is quite fast (a few seconds with VertexColorCost). I wouldn't be surprised if it became faster. Two notes about fit-model-ceres: * The weights for the cost functions are not really tuned, see #348 * The contour fitting is commented out at the moment, see #349 --- CMakeLists.txt | 1 + examples/fit-model-ceres.cpp | 387 ++++++++---------- include/eos/core/math.hpp | 55 +++ include/eos/fitting/ceres_nonlinear.hpp | 514 +++++++++++++----------- 4 files changed, 507 insertions(+), 450 deletions(-) create mode 100644 include/eos/core/math.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e572a97..daad4373 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,7 @@ set(HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/core/read_obj.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/core/write_obj.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/core/Rect.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/core/math.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/morphablemodel/PcaModel.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/morphablemodel/MorphableModel.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/morphablemodel/Blendshape.hpp diff --git a/examples/fit-model-ceres.cpp b/examples/fit-model-ceres.cpp index 8907acfb..dfca9d13 100644 --- a/examples/fit-model-ceres.cpp +++ b/examples/fit-model-ceres.cpp @@ -3,7 +3,7 @@ * * File: examples/fit-model-ceres.cpp * - * Copyright 2016 Patrik Huber + * Copyright 2016-2023 Patrik Huber * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,24 +17,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#define GLM_FORCE_UNRESTRICTED_GENTYPE #include "eos/core/Landmark.hpp" #include "eos/core/LandmarkMapper.hpp" #include "eos/core/read_pts_landmarks.hpp" +#include "eos/core/Image.hpp" +#include "eos/core/image/opencv_interop.hpp" #include "eos/core/write_obj.hpp" +#include "eos/core/math.hpp" +#include "eos/morphablemodel/MorphableModel.hpp" +#include "eos/morphablemodel/Blendshape.hpp" #include "eos/fitting/ceres_nonlinear.hpp" #include "eos/fitting/contour_correspondence.hpp" -#include "eos/fitting/fitting.hpp" -#include "eos/morphablemodel/Blendshape.hpp" #include "Eigen/Core" -#include "glm/ext.hpp" -#include "glm/glm.hpp" - #include "ceres/ceres.h" -#include "ceres/cubic_interpolation.h" -#include "ceres/rotation.h" #include "opencv2/core/core.hpp" #include "opencv2/imgproc/imgproc.hpp" @@ -48,7 +45,6 @@ #include using namespace eos; -using namespace glm; using namespace ceres; namespace po = boost::program_options; namespace fs = boost::filesystem; @@ -168,7 +164,7 @@ int main(int argc, char* argv[]) std::vector blendshapes = eos::morphablemodel::load_blendshapes(blendshapesfile.string()); - // Draw the loaded landmarks: + // Draw the loaded landmarks (blue): Mat outimg = image.clone(); for (const auto& lm : landmarks) { @@ -176,21 +172,19 @@ int main(int argc, char* argv[]) cv::Point2f(lm.coordinates[0] + 2.0f, lm.coordinates[1] + 2.0f), {255, 0, 0}); } - constexpr bool use_perspective = false; - // These will be the 2D image points and their corresponding 3D vertex id's used for the fitting: vector image_points; // the 2D landmark points - vector vertex_indices; // their corresponding vertex indices + vector vertex_indices; // their corresponding vertex indices // Sub-select all the landmarks which we have a mapping for (i.e. that are defined in the 3DMM): for (int i = 0; i < landmarks.size(); ++i) { - auto converted_name = landmark_mapper.convert(landmarks[i].name); + const auto converted_name = landmark_mapper.convert(landmarks[i].name); if (!converted_name) { // no mapping defined for the current landmark continue; } - int vertex_idx = std::stoi(converted_name.value()); + const int vertex_idx = std::stoi(converted_name.value()); vertex_indices.emplace_back(vertex_idx); image_points.emplace_back(landmarks[i].coordinates); } @@ -199,66 +193,70 @@ int main(int argc, char* argv[]) std::stringstream fitting_log; auto start = std::chrono::steady_clock::now(); - std::vector camera_rotation; // Quaternion, [w x y z]. Todo: Actually, use std::array for all of these. - camera_rotation.resize(4); // initialises with zeros - camera_rotation[0] = 1.0; - std::vector camera_translation_and_intrinsics; - constexpr int num_cam_trans_intr_params = use_perspective ? 4 : 3; - // Parameters for the orthographic projection: [t_x, t_y, frustum_scale] - // And perspective projection: [t_x, t_y, t_z, fov]. - // Origin is assumed at center of image, and no lens distortions. + // The following have to be constexpr because they're used in template parameters in this implementation: + constexpr int num_shape_coeffs = 50; + constexpr int num_blendshape_coeffs = 6; + constexpr int num_color_coeffs = 50; + + // Weights for the different cost function terms: + // Note: To find good weights, ceres::ResidualBlockId can be used to store the IDs to each added residual + // blocks. Then, one can use ceres::Problem::EvaluateOptions and ceres::Problem::Evaluate(...) to retrieve + // the values of each individual term. We don't do that here for simplicity reasons. See the Ceres + // documentation for details, and https://github.com/patrikhuber/eos/issues/348. + const double landmark_cost_weight = 100.0; + const double shape_prior_cost_weight = 500000.0; + const double blendshapes_prior_cost_weight = 25.0; + const double color_prior_cost_weight = 500000.0; + const double vertex_color_cost_weight = 1.0; + + // The model parameters that we're estimating: + using ModelCoefficients = vector; + ModelCoefficients shape_coefficients(num_shape_coeffs, 0.0); + ModelCoefficients blendshape_coefficients(num_blendshape_coeffs, 0.0); + ModelCoefficients color_coefficients(num_color_coeffs, 0.0); + + // Parameters for the perspective projection: A rotation quaternion, [t_x, t_y, t_z], and fov_y (field of + // view). + // The origin is assumed at center of image, and no lens distortions. // Note: Actually, we estimate the model-view matrix and not the camera position. But one defines the // other. - camera_translation_and_intrinsics.resize(num_cam_trans_intr_params); // initialises with zeros - if (use_perspective) - { - camera_translation_and_intrinsics[2] = -400.0; // Move the model back (along the -z axis) - camera_translation_and_intrinsics[3] = glm::radians(45.0f); // fov - } else - { - camera_translation_and_intrinsics[2] = 110.0; // frustum_scale - } - - std::vector shape_coefficients; - shape_coefficients.resize(10); // Todo: Currently, the value '10' is hard-coded everywhere. Make it dynamic. - std::vector blendshape_coefficients; - blendshape_coefficients.resize(6); + Eigen::Quaterniond camera_rotation(1.0, 0.0, 0.0, 0.0); // The c'tor takes wxyz. Storage is xyzw. + Eigen::Vector3d camera_translation(0.0, 0.0, -400.0); // Move the model back (along the -z axis) + double fov_y = core::radians(45.0); + // Set up just a landmark cost and optimise for rotation and translation, with fov_y and the 3DMM + // parameters fixed (i.e. using the mean face): Problem camera_costfunction; for (int i = 0; i < image_points.size(); ++i) { - CostFunction* cost_function = - new AutoDiffCostFunction( - new fitting::LandmarkCost(morphable_model.get_shape_model(), blendshapes, image_points[i], - vertex_indices[i], image.cols, image.rows, use_perspective)); - - camera_costfunction.AddResidualBlock(cost_function, NULL, &camera_rotation[0], - &camera_translation_and_intrinsics[0], &shape_coefficients[0], - &blendshape_coefficients[0]); + auto landmark_cost = + fitting::PerspectiveProjectionLandmarkCost::Create( + morphable_model.get_shape_model(), blendshapes, image_points[i], vertex_indices[i], + image.cols, image.rows); + ScaledLoss* landmark_cost_scaled_loss = + new ScaledLoss(nullptr, landmark_cost_weight, ceres::TAKE_OWNERSHIP); + camera_costfunction.AddResidualBlock(landmark_cost, landmark_cost_scaled_loss, + &camera_rotation.coeffs()[0], &camera_translation.data()[0], + &fov_y, &shape_coefficients[0], &blendshape_coefficients[0]); } - camera_costfunction.SetParameterBlockConstant(&shape_coefficients[0]); // keep the shape constant + // Keep the model coeffs constant (i.e. all at zero): + camera_costfunction.SetParameterBlockConstant(&shape_coefficients[0]); camera_costfunction.SetParameterBlockConstant(&blendshape_coefficients[0]); - if (use_perspective) - { - camera_costfunction.SetParameterUpperBound( - &camera_translation_and_intrinsics[0], 2, - -std::numeric_limits::epsilon()); // t_z has to be negative - camera_costfunction.SetParameterLowerBound(&camera_translation_and_intrinsics[0], 3, - 0.01); // fov in radians, must be > 0 - } else - { - camera_costfunction.SetParameterLowerBound(&camera_translation_and_intrinsics[0], 2, - 1.0); // frustum_scale must be > 0 - } - QuaternionParameterization* camera_fit_quaternion_parameterisation = new QuaternionParameterization; - camera_costfunction.SetParameterization(&camera_rotation[0], camera_fit_quaternion_parameterisation); - + // Keep the fov_y constant too: + camera_costfunction.SetParameterBlockConstant(&fov_y); + // Keep t_z negative (i.e. we want to look at the model from the front): + camera_costfunction.SetParameterUpperBound(&camera_translation[0], 2, + -std::numeric_limits::epsilon()); + // Keep the fov_y (in radians) > 0 (but we're not optimising for it now): + // camera_costfunction.SetParameterLowerBound(&fov_y, 0, 0.01); + + EigenQuaternionManifold* quaternion_manifold = new EigenQuaternionManifold; + camera_costfunction.SetManifold(&camera_rotation.coeffs()[0], quaternion_manifold); + + // Set up the solver, and optimise: Solver::Options solver_options; - solver_options.linear_solver_type = ITERATIVE_SCHUR; - solver_options.num_threads = 8; + // solver_options.linear_solver_type = ITERATIVE_SCHUR; + // solver_options.num_threads = 8; solver_options.minimizer_progress_to_stdout = true; // solver_options.max_num_iterations = 100; Solver::Summary solver_summary; @@ -268,63 +266,41 @@ int main(int argc, char* argv[]) // Draw the mean-face landmarks projected using the estimated camera: // Construct the rotation & translation (model-view) matrices, projection matrix, and viewport: - glm::dquat estimated_rotation(camera_rotation[0], camera_rotation[1], camera_rotation[2], - camera_rotation[3]); - auto rot_mtx = glm::mat4_cast(estimated_rotation); + Eigen::Matrix4d model_view_mtx = Eigen::Matrix4d::Identity(); + model_view_mtx.block<3, 3>(0, 0) = camera_rotation.toRotationMatrix(); + model_view_mtx.col(3).head<3>() = camera_translation; const double aspect = static_cast(image.cols) / image.rows; - auto get_translation_matrix = [](auto&& camera_translation_and_intrinsics, auto&& use_perspective) { - if (use_perspective) - { - return glm::translate(glm::dvec3(camera_translation_and_intrinsics[0], - camera_translation_and_intrinsics[1], - camera_translation_and_intrinsics[2])); - } else - { - return glm::translate( - glm::dvec3(camera_translation_and_intrinsics[0], camera_translation_and_intrinsics[1], 0.0)); - } - }; - auto get_projection_matrix = [](auto&& camera_translation_and_intrinsics, auto&& aspect, - auto&& use_perspective) { - if (use_perspective) - { - const auto& focal = camera_translation_and_intrinsics[3]; - return glm::perspective(focal, aspect, 0.1, 1000.0); - } else - { - const auto& frustum_scale = camera_translation_and_intrinsics[2]; - return glm::ortho(-1.0 * aspect * frustum_scale, 1.0 * aspect * frustum_scale, - -1.0 * frustum_scale, 1.0 * frustum_scale); - } - }; - auto t_mtx = get_translation_matrix(camera_translation_and_intrinsics, use_perspective); - auto projection_mtx = get_projection_matrix(camera_translation_and_intrinsics, aspect, use_perspective); - const glm::dvec4 viewport(0, image.rows, image.cols, -image.rows); // OpenCV convention + auto projection_mtx = fitting::perspective(fov_y, aspect, 0.1, 1000.0); + // Todo: use get_opencv_viewport() from nonlin_cam_esti.hpp. + const Eigen::Vector4d viewport(0, image.rows, image.cols, -image.rows); // OpenCV convention - auto mean_mesh = morphable_model.get_mean(); + const auto& mean_mesh = morphable_model.get_mean(); for (auto idx : vertex_indices) { - glm::dvec3 point_3d(mean_mesh.vertices[idx][0], mean_mesh.vertices[idx][1], - mean_mesh.vertices[idx][2]); // The 3D model point - glm::dvec3 projected_point = glm::project(point_3d, t_mtx * rot_mtx, projection_mtx, viewport); - cv::circle(outimg, cv::Point2f(projected_point.x, projected_point.y), 3, {0.0f, 0.0f, 255.0f}); // red + const auto& point_3d = mean_mesh.vertices[idx]; + const auto projected_point = + fitting::project(point_3d.cast(), model_view_mtx, projection_mtx, viewport); + cv::circle(outimg, cv::Point2f(projected_point.x(), projected_point.y()), 3, + {0.0f, 0.0f, 255.0f}); // red } + // Draw the subset of the detected landmarks that we use in the fitting (yellow): for (const auto& lm : image_points) { - cv::circle(outimg, cv::Point2f(lm(0), lm(1)), 3, - {0.0f, 255.0f, 255.0f}); // yellow: subset of the detected LMs that we use + cv::circle(outimg, cv::Point2f(lm(0), lm(1)), 3, {0.0f, 255.0f, 255.0f}); } - auto euler_angles = glm::eulerAngles(estimated_rotation); // returns [P, Y, R] - fitting_log << "Pose fit with mean shape:\tYaw " << glm::degrees(euler_angles[1]) << ", Pitch " - << glm::degrees(euler_angles[0]) << ", Roll " << glm::degrees(euler_angles[2]) - << "; t & f: " << camera_translation_and_intrinsics << '\n'; - fitting_log << "Ceres took: " + + fitting_log << "Pose-only fitting took: " << std::chrono::duration_cast(end - start).count() << "ms.\n"; - // Contour fitting: + // Now add contour fitting: // These are the additional contour-correspondences we're going to find and then use: + // Note: This needs extracting the yaw angle from the rotation quaternion, which Eigen currently doesn't + // have a function for. Not using contour fitting for now. + // euler_angles = glm::eulerAngles(estimated_rotation); // returns [P, Y, R]; where estimated_rotation is + // a glm::quat. + /* vector image_points_contour; // the 2D landmark points - vector vertex_indices_contour; // their corresponding 3D vertex indices + vector vertex_indices_contour; // their corresponding 3D vertex indices // For each 2D contour landmark, get the corresponding 3D vertex point and vertex id: std::tie(image_points_contour, std::ignore, vertex_indices_contour) = fitting::get_contour_correspondences(landmarks, ibug_contour, model_contour, @@ -333,64 +309,59 @@ int main(int argc, char* argv[]) using eos::fitting::concat; vertex_indices = concat(vertex_indices, vertex_indices_contour); image_points = concat(image_points, image_points_contour); + */ // Full fitting - Estimate shape and pose, given the previous pose estimate: start = std::chrono::steady_clock::now(); Problem fitting_costfunction; - // Landmark constraint: + // Landmark cost: for (int i = 0; i < image_points.size(); ++i) { - CostFunction* cost_function = - new AutoDiffCostFunction( - new fitting::LandmarkCost(morphable_model.get_shape_model(), blendshapes, image_points[i], - vertex_indices[i], image.cols, image.rows, use_perspective)); - fitting_costfunction.AddResidualBlock(cost_function, NULL, &camera_rotation[0], - &camera_translation_and_intrinsics[0], &shape_coefficients[0], - &blendshape_coefficients[0]); + auto landmark_cost = + fitting::PerspectiveProjectionLandmarkCost::Create( + morphable_model.get_shape_model(), blendshapes, image_points[i], vertex_indices[i], + image.cols, image.rows); + ScaledLoss* landmark_cost_scaled_loss = + new ScaledLoss(nullptr, landmark_cost_weight, ceres::TAKE_OWNERSHIP); + fitting_costfunction.AddResidualBlock(landmark_cost, landmark_cost_scaled_loss, + &camera_rotation.coeffs()[0], &camera_translation.data()[0], + &fov_y, &shape_coefficients[0], &blendshape_coefficients[0]); } - // Shape prior and bounds, for 10 shape coefficients: - CostFunction* shape_prior_cost = fitting::NormCost::Create<10>(); + // Prior and bounds for the shape coefficients: + CostFunction* shape_prior_cost = fitting::NormCost::Create(); ScaledLoss* shape_prior_scaled_loss = - new ScaledLoss(nullptr, 35.0, Ownership::TAKE_OWNERSHIP); + new ScaledLoss(nullptr, shape_prior_cost_weight, + Ownership::TAKE_OWNERSHIP); // weight was 35.0 previously, but it was inside the + // residual, now it's outside fitting_costfunction.AddResidualBlock(shape_prior_cost, shape_prior_scaled_loss, &shape_coefficients[0]); - for (int i = 0; i < 10; ++i) + for (int i = 0; i < num_shape_coeffs; ++i) { fitting_costfunction.SetParameterLowerBound(&shape_coefficients[0], i, -3.0); fitting_costfunction.SetParameterUpperBound(&shape_coefficients[0], i, 3.0); } // Prior and constraints on blendshapes: - CostFunction* blendshapes_prior_cost = fitting::NormCost::Create<6>(); + CostFunction* blendshapes_prior_cost = fitting::NormCost::Create(); LossFunction* blendshapes_prior_scaled_loss = - new ScaledLoss(new SoftLOneLoss(1.0), 10.0, Ownership::TAKE_OWNERSHIP); + new ScaledLoss(new SoftLOneLoss(1.0), blendshapes_prior_cost_weight, + Ownership::TAKE_OWNERSHIP); // weight was 10.0 previously fitting_costfunction.AddResidualBlock(blendshapes_prior_cost, blendshapes_prior_scaled_loss, &blendshape_coefficients[0]); - fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 0, 0.0); - fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 1, 0.0); - fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 2, 0.0); - fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 3, 0.0); - fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 4, 0.0); - fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 5, 0.0); - // Some constraints on the camera translation and fov/scale: - if (use_perspective) - { - fitting_costfunction.SetParameterUpperBound( - &camera_translation_and_intrinsics[0], 2, - -std::numeric_limits::epsilon()); // t_z has to be negative - fitting_costfunction.SetParameterLowerBound(&camera_translation_and_intrinsics[0], 3, - 0.01); // fov in radians, must be > 0 - } else + for (int i = 0; i < num_blendshape_coeffs; ++i) { - fitting_costfunction.SetParameterLowerBound(&camera_translation_and_intrinsics[0], 2, - 1.0); // frustum_scale must be > 0 + fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], i, 0.0); + fitting_costfunction.SetParameterUpperBound(&blendshape_coefficients[0], i, 1.0); } - - QuaternionParameterization* full_fit_quaternion_parameterisation = new QuaternionParameterization; - fitting_costfunction.SetParameterization(&camera_rotation[0], full_fit_quaternion_parameterisation); - - // Colour model fitting (this needs a Morphable Model with colour (albedo) model, see note above main()): + // Keep t_z negative (i.e. we want to look at the model from the front): + fitting_costfunction.SetParameterUpperBound(&camera_translation[0], 2, + -std::numeric_limits::epsilon()); + // Keep the fov_y (in radians) > 0: + fitting_costfunction.SetParameterLowerBound(&fov_y, 0, 0.01); + // Note: We create a new manifold object, since camera_costfunction took ownership of the previous one, + // and will delete it upon destruction. + EigenQuaternionManifold* quaternion_manifold_full_fitting = new EigenQuaternionManifold; + fitting_costfunction.SetManifold(&camera_rotation.coeffs()[0], quaternion_manifold_full_fitting); + + // Colour model cost (this needs a Morphable Model with colour (albedo) model, see note above main()): if (!morphable_model.has_color_model()) { cout << "Error: The MorphableModel used does not contain a colour (albedo) model. ImageCost requires " @@ -398,89 +369,81 @@ int main(int argc, char* argv[]) "different morphable model, or remove this section."; return EXIT_FAILURE; } - std::vector colour_coefficients; - colour_coefficients.resize(10); - // Add a residual for each vertex: + // Add a colour residual for each vertex: + cv::cvtColor(image, image, cv::COLOR_BGR2RGB); + const core::Image3u image_eos_rgb = eos::core::from_mat(image); + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); for (int i = 0; i < morphable_model.get_shape_model().get_data_dimension() / 3; ++i) { - CostFunction* cost_function = - new AutoDiffCostFunction( - new fitting::ImageCost(morphable_model, blendshapes, image, i, use_perspective)); - fitting_costfunction.AddResidualBlock(cost_function, NULL, &camera_rotation[0], - &camera_translation_and_intrinsics[0], &shape_coefficients[0], - &blendshape_coefficients[0], &colour_coefficients[0]); + auto vertex_color_cost = + fitting::VertexColorCost::Create( + morphable_model.get_shape_model(), blendshapes, morphable_model.get_color_model(), i, + image_eos_rgb); + ScaledLoss* vertex_color_cost_scaled_loss = + new ScaledLoss(nullptr, vertex_color_cost_weight, ceres::TAKE_OWNERSHIP); + fitting_costfunction.AddResidualBlock(vertex_color_cost, vertex_color_cost_scaled_loss, + &camera_rotation.coeffs()[0], &camera_translation.data()[0], + &fov_y, &shape_coefficients[0], &blendshape_coefficients[0], + &color_coefficients[0]); } - // Prior and bounds, for 10 colour coefficients: - CostFunction* color_prior_cost = fitting::NormCost::Create<10>(); - ScaledLoss* color_prior_scaled_loss = - new ScaledLoss(nullptr, 35.0, Ownership::TAKE_OWNERSHIP); - fitting_costfunction.AddResidualBlock(color_prior_cost, color_prior_scaled_loss, &colour_coefficients[0]); - for (int i = 0; i < 10; ++i) + // Prior and bounds, for the colour coefficients: + CostFunction* color_prior_cost = fitting::NormCost::Create(); + ScaledLoss* color_prior_scaled_loss = new ScaledLoss( + nullptr, color_prior_cost_weight, Ownership::TAKE_OWNERSHIP); // weight was previously 35.0 + fitting_costfunction.AddResidualBlock(color_prior_cost, color_prior_scaled_loss, &color_coefficients[0]); + for (int i = 0; i < num_color_coeffs; ++i) { - fitting_costfunction.SetParameterLowerBound(&colour_coefficients[0], i, -3.0); - fitting_costfunction.SetParameterUpperBound(&colour_coefficients[0], i, 3.0); + fitting_costfunction.SetParameterLowerBound(&color_coefficients[0], i, -3.0); + fitting_costfunction.SetParameterUpperBound(&color_coefficients[0], i, 3.0); } // Set different options for the full fitting: - /* solver_options.linear_solver_type = ceres::ITERATIVE_SCHUR; - //solver_options.linear_solver_type = ceres::DENSE_QR; - //solver_options.minimizer_type = ceres::TRUST_REGION; // default I think - //solver_options.minimizer_type = ceres::LINE_SEARCH; - solver_options.num_threads = 8; - solver_options.num_linear_solver_threads = 8; // only SPARSE_SCHUR can use this - solver_options.minimizer_progress_to_stdout = true; - solver_options.max_num_iterations = 100; - */ + // solver_options.linear_solver_type = ceres::ITERATIVE_SCHUR; + // solver_options.linear_solver_type = ceres::DENSE_QR; + // solver_options.minimizer_type = ceres::TRUST_REGION; // default I think + // solver_options.minimizer_type = ceres::LINE_SEARCH; + solver_options.num_threads = 16; // Make sure to adjust this, if you have fewer (or more) CPU cores. + // solver_options.minimizer_progress_to_stdout = true; + // solver_options.max_num_iterations = 100; Solve(solver_options, &fitting_costfunction, &solver_summary); std::cout << solver_summary.BriefReport() << "\n"; end = std::chrono::steady_clock::now(); // Draw the landmarks projected using all estimated parameters: // Construct the rotation & translation (model-view) matrices, projection matrix, and viewport: - estimated_rotation = - glm::dquat(camera_rotation[0], camera_rotation[1], camera_rotation[2], camera_rotation[3]); - rot_mtx = glm::mat4_cast(estimated_rotation); - t_mtx = get_translation_matrix(camera_translation_and_intrinsics, use_perspective); - projection_mtx = get_projection_matrix(camera_translation_and_intrinsics, aspect, use_perspective); + model_view_mtx = Eigen::Matrix4d::Identity(); + model_view_mtx.block<3, 3>(0, 0) = camera_rotation.toRotationMatrix(); + model_view_mtx.col(3).head<3>() = camera_translation; + projection_mtx = fitting::perspective(fov_y, aspect, 0.1, 1000.0); auto vectord_to_vectorf = [](const std::vector& vec) { return std::vector(std::begin(vec), std::end(vec)); }; - auto blendshape_coeffs_float = vectord_to_vectorf(blendshape_coefficients); - Eigen::VectorXf shape_ceres = - morphable_model.get_shape_model().draw_sample(shape_coefficients) + - to_matrix(blendshapes) * - Eigen::Map(blendshape_coeffs_float.data(), blendshape_coeffs_float.size()); - core::Mesh mesh = morphablemodel::sample_to_mesh( - shape_ceres, morphable_model.get_color_model().draw_sample(colour_coefficients), - morphable_model.get_shape_model().get_triangle_list(), - morphable_model.get_color_model().get_triangle_list(), morphable_model.get_texture_coordinates()); + const auto shape_coeffs_float = vectord_to_vectorf(shape_coefficients); + const auto blendshape_coeffs_float = vectord_to_vectorf(blendshape_coefficients); + const auto color_coeffs_float = vectord_to_vectorf(color_coefficients); + + morphablemodel::MorphableModel morphable_model_with_expressions( + morphable_model.get_shape_model(), blendshapes, morphable_model.get_color_model(), + morphable_model.get_landmark_definitions(), morphable_model.get_texture_coordinates(), + morphable_model.get_texture_triangle_indices()); + const core::Mesh mesh = morphable_model_with_expressions.draw_sample( + shape_coeffs_float, blendshape_coeffs_float, color_coeffs_float); + for (auto idx : vertex_indices) { - glm::dvec3 point_3d(mesh.vertices[idx][0], mesh.vertices[idx][1], - mesh.vertices[idx][2]); // The 3D model point - glm::dvec3 projected_point = glm::project(point_3d, t_mtx * rot_mtx, projection_mtx, viewport); - cv::circle(outimg, cv::Point2f(projected_point.x, projected_point.y), 3, + const auto& point_3d = mesh.vertices[idx]; + const auto projected_point = + fitting::project(point_3d.cast(), model_view_mtx, projection_mtx, viewport); + cv::circle(outimg, cv::Point2f(projected_point.x(), projected_point.y()), 3, {0.0f, 76.0f, 255.0f}); // orange } - for (const auto& lm : image_points) - { - cv::circle(outimg, cv::Point2f(lm(0), lm(1)), 3, - {0.0f, 255.0f, - 255.0f}); // yellow: subset of the detected LMs that we use (including contour landmarks) - } - estimated_rotation = - glm::dquat(camera_rotation[0], camera_rotation[1], camera_rotation[2], camera_rotation[3]); - euler_angles = glm::eulerAngles(estimated_rotation); // returns [P, Y, R] - fitting_log << "Final fit:\t\t\tYaw " << glm::degrees(euler_angles[1]) << ", Pitch " - << glm::degrees(euler_angles[0]) << ", Roll " << glm::degrees(euler_angles[2]) - << "; t & f: " << camera_translation_and_intrinsics << '\n'; - fitting_log << "Ceres took: " - << std::chrono::duration_cast(end - start).count() << "ms.\n"; + // Note: We previously used contour landmarks and plotted image_points again here (in yellow), which then + // included the contour landmarks. + + fitting_log << "Full fitting took: " + << std::chrono::duration_cast(end - start).count() << "s.\n"; cout << fitting_log.str(); diff --git a/include/eos/core/math.hpp b/include/eos/core/math.hpp new file mode 100644 index 00000000..f5d75b36 --- /dev/null +++ b/include/eos/core/math.hpp @@ -0,0 +1,55 @@ +/* + * eos - A 3D Morphable Model fitting library written in modern C++11/14. + * + * File: include/eos/core/math.hpp + * + * Copyright 2023 Patrik Huber + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#ifndef EOS_MATH_HPP +#define EOS_MATH_HPP + +#include + +namespace eos { +namespace core { + +/** + * @brief Convert given degrees to radians. + */ +template +T radians(T degrees) +{ + // Note: We may want to remove this assert, to allow ceres::Jet as type. + static_assert(std::numeric_limits::is_iec559, "radians() only accepts floating-point inputs."); + return degrees * static_cast(0.01745329251994329576923690768489); +} + +/** + * @brief Convert given radians to degree. + */ +template +T degrees(T radians) +{ + // Note: We may want to remove this assert, to allow ceres::Jet as type. + static_assert(std::numeric_limits::is_iec559, "radians() only accepts floating-point inputs."); + return radians * static_cast(57.295779513082320876798154814105); +} + +} /* namespace core */ +} /* namespace eos */ + +#endif /* EOS_MATH_HPP */ diff --git a/include/eos/fitting/ceres_nonlinear.hpp b/include/eos/fitting/ceres_nonlinear.hpp index 4cd2d557..ac2be716 100644 --- a/include/eos/fitting/ceres_nonlinear.hpp +++ b/include/eos/fitting/ceres_nonlinear.hpp @@ -24,18 +24,13 @@ #include "eos/morphablemodel/MorphableModel.hpp" #include "eos/morphablemodel/Blendshape.hpp" +#include "eos/core/Image.hpp" #include "Eigen/Core" -#include "glm/gtc/quaternion.hpp" -#include "glm/gtx/transform.hpp" - #include "ceres/ceres.h" #include "ceres/cubic_interpolation.h" -#include "opencv2/core/core.hpp" - -#include #include namespace eos { @@ -43,17 +38,25 @@ namespace fitting { // Forward declarations: template -std::array get_shape_point(const morphablemodel::PcaModel& shape_model, - const std::vector& blendshapes, int vertex_id, - const T* const shape_coeffs, const T* const blendshape_coeffs); +Eigen::Vector3 get_shape_point(const eos::morphablemodel::PcaModel& shape_model, + const eos::morphablemodel::Blendshapes& blendshapes, int vertex_id, + Eigen::Map> shape_coeffs, + Eigen::Map> blendshape_coeffs); + +template +Eigen::Vector3 get_vertex_colour(const eos::morphablemodel::PcaModel& color_model, int vertex_id, + Eigen::Map> color_coeffs); + +template +Eigen::Matrix4 perspective(T fov_y, T aspect, T z_near, T z_far); template -std::array get_vertex_colour(const morphablemodel::PcaModel& colour_model, int vertex_id, - const T* const colour_coeffs); +Eigen::Vector3 project(const Eigen::Vector3& obj, const Eigen::Matrix4& model, + const Eigen::Matrix4& proj, const Eigen::Vector4& viewport); /** * Cost function that consists of the parameter values themselves as residual. - * + * * If used with a squared loss, this corresponds to an L2 norm constraint on the parameters. * This class is implemented exactly like Ceres' NormalPrior. */ @@ -103,14 +106,12 @@ struct NormCost }; /** - * 2D landmark error cost function. - * - * Computes the landmark reprojection error in 2D. - * Models the cost for one landmark. The residual is 2-dim, [x, y]. - * Its input params are camera parameters, shape coefficients and - * blendshape coefficients. + * 2D landmark error cost function, using perspective projection (in OpenGL convention). + * + * Computes the landmark reprojection error in 2D. Models the cost for one landmark. The residual is 2-dim, + * [x, y]. Its input params are camera parameters, shape coefficients and blendshape coefficients. */ -struct LandmarkCost +struct PerspectiveProjectionLandmarkCost { /** @@ -121,19 +122,23 @@ struct LandmarkCost * * @param[in] shape_model A PCA 3D shape model. Do not use a temporary. * @param[in] blendshapes A set of 3D blendshapes. Do not use a temporary. + * @param[in] num_shape_coeffs Number of shape coefficients that are being optimised for. + * @param[in] num_blendshape_coeffs Number of blendshape coefficients that are being optimised for. * @param[in] observed_landmark An observed 2D landmark in an image. * @param[in] vertex_id The vertex id that the given observed landmark corresponds to. * @param[in] image_width Width of the image that the 2D landmark is from (needed for the model * projection). * @param[in] image_height Height of the image. - * @param[in] use_perspective Whether a perspective or an orthographic projection should be used. */ - LandmarkCost(const morphablemodel::PcaModel& shape_model, - const std::vector& blendshapes, Eigen::Vector2f observed_landmark, - int vertex_id, int image_width, int image_height, bool use_perspective) - : shape_model(shape_model), blendshapes(blendshapes), observed_landmark(observed_landmark), + PerspectiveProjectionLandmarkCost(const morphablemodel::PcaModel& shape_model, + const std::vector& blendshapes, + int num_shape_coeffs, int num_blendshape_coeffs, + Eigen::Vector2f observed_landmark, int vertex_id, int image_width, + int image_height) + : shape_model(shape_model), blendshapes(blendshapes), num_shape_coeffs(num_shape_coeffs), + num_blendshape_coeffs(num_blendshape_coeffs), observed_landmark(observed_landmark), vertex_id(vertex_id), image_width(image_width), image_height(image_height), - aspect_ratio(static_cast(image_width) / image_height), use_perspective(use_perspective){}; + aspect_ratio(static_cast(image_width) / image_height){}; /** * Landmark cost function implementation. @@ -141,119 +146,121 @@ struct LandmarkCost * Measures the landmark reprojection error of the model with the estimated parameters and the observed 2D * landmarks. For one single landmark. * - * @param[in] camera_rotation A set of camera parameters, parameterised as a quaternion [w x y z]. - * @param[in] camera_translation_and_intrinsics Camera translation and intrinsic parameters. Ortho: [t_x - * t_y frustum_scale]. Perspective: [t_x t_y t_z fov]. + * @param[in] camera_rotation A set of camera rotation parameters, parameterised as an Eigen::Quaternion. + * @param[in] camera_translation Camera translation parameters: [t_x t_y t_z]. + * @param[in] camera_intrinsics Camera intrinsics, containing the field of view (fov_y). * @param[in] shape_coeffs A set of PCA shape coefficients. * @param[in] blendshape_coeffs A set of blendshape coefficients. * @param[in] residual An array of the resulting residuals. - * @return Returns true. The ceres documentation is not clear about that I think. + * @return whether the computation of the residuals was successful. Always returns true. */ template - bool operator()(const T* const camera_rotation, const T* const camera_translation_and_intrinsics, - const T* const shape_coeffs, const T* const blendshape_coeffs, T* residual) const + bool operator()(const T* const camera_rotation, const T* const camera_translation, + const T* const camera_intrinsics, const T* const shape_coeffs, + const T* const blendshape_coeffs, T* residual) const { - using namespace glm; - // Generate shape instance (of only one vertex id!) using current parameters and 10 shape - // coefficients: Note: Why are we not returning a glm::tvec3? - const auto point_arr = - get_shape_point(shape_model, blendshapes, vertex_id, shape_coeffs, blendshape_coeffs); + // Generate shape instance (of only one vertex id) using current coefficients: + Eigen::Map> shape_coeffs_mapped(shape_coeffs, num_shape_coeffs); + Eigen::Map> blendshape_coeffs_mapped(blendshape_coeffs, + num_blendshape_coeffs); + const Eigen::Vector3 point_3d = get_shape_at_point(shape_model, blendshapes, vertex_id, + shape_coeffs_mapped, blendshape_coeffs_mapped); // Project the point to 2D: - const tvec3 point_3d(point_arr[0], point_arr[1], point_arr[2]); // I think the quaternion is always normalised because we run Ceres with QuaternionParameterization - const tquat rot_quat(camera_rotation[0], camera_rotation[1], camera_rotation[2], - camera_rotation[3]); - // We rotate ZXY*p, which is RPY*p. I'm not sure this matrix still corresponds to RPY - probably if we - // use glm::eulerAngles(), these are not RPY anymore and we'd have to adjust if we were to use - // rotation matrices. - const tmat4x4 rot_mtx = glm::mat4_cast(rot_quat); + // Previously, we used glm::tquat, which expects w, x, y, z, and called it with [0, 1, 2, 3]. + // Eigen stores it as x, y, z, w. So... are we sure this is right? + Eigen::Map> rotation(camera_rotation); + Eigen::Map> translation(camera_translation); + + Eigen::Matrix4 model_view_mtx = Eigen::Matrix4::Identity(); + model_view_mtx.block<3, 3>(0, 0) = rotation.toRotationMatrix(); + model_view_mtx.col(3).head<3>() = translation; // Todo: use get_opencv_viewport() from nonlin_cam_esti.hpp. - const tvec4 viewport(0, image_height, image_width, -image_height); // OpenCV convention + const Eigen::Vector4 viewport(T(0), T(image_height), T(image_width), + T(-image_height)); // OpenCV convention + + const T& fov = camera_intrinsics[0]; + const auto projection_mtx = perspective(fov, T(aspect_ratio), T(0.1), T(1000.0)); + + Eigen::Vector3 projected_point = project(point_3d, model_view_mtx, projection_mtx, viewport); - tvec3 projected_point; // Note: could avoid default construction by using a lambda and immediate - // invocation - if (use_perspective) - { - const auto t_mtx = glm::translate(tvec3(camera_translation_and_intrinsics[0], - camera_translation_and_intrinsics[1], - camera_translation_and_intrinsics[2])); - const T& fov = camera_translation_and_intrinsics[3]; - const auto persp_mtx = glm::perspective(fov, T(aspect_ratio), T(0.1), T(1000.0)); - projected_point = glm::project(point_3d, t_mtx * rot_mtx, persp_mtx, viewport); - } else - { - const T& frustum_scale = camera_translation_and_intrinsics[2]; - const auto t_mtx = glm::translate( - tvec3(camera_translation_and_intrinsics[0], camera_translation_and_intrinsics[1], - 0.0)); // we don't have t_z in ortho camera, it doesn't matter where it is - const auto ortho_mtx = - glm::ortho(-1.0 * aspect_ratio * frustum_scale, 1.0 * aspect_ratio * frustum_scale, - -1.0 * frustum_scale, 1.0 * frustum_scale); - projected_point = glm::project(point_3d, t_mtx * rot_mtx, ortho_mtx, viewport); - } // Residual: Projected point minus the observed 2D landmark point - residual[0] = projected_point.x - T(observed_landmark[0]); - residual[1] = projected_point.y - T(observed_landmark[1]); + residual[0] = projected_point.x() - T(observed_landmark[0]); + residual[1] = projected_point.y() - T(observed_landmark[1]); return true; }; + /** + * Factory to hide the construction of the CostFunction object from the client code. + * + * The number of parameters are given as template arguments, so that we can use Ceres' fixed-size + * constructor. + */ + template + static ceres::CostFunction* Create(const eos::morphablemodel::PcaModel& shape_model, + const eos::morphablemodel::Blendshapes& blendshapes, + Eigen::Vector2f observed_landmark, int vertex_id, int image_width, + int image_height) + { + // Template arguments: 2 residuals, 4 (camera rotation), 3 (camera translation), 1 (camera + // intrinsics), n (shape coeffs), m (blendshape coeffs) + return (new ceres::AutoDiffCostFunction( + new PerspectiveProjectionLandmarkCost(shape_model, blendshapes, num_shape_coeffs, + num_blendshape_coeffs, observed_landmark, vertex_id, + image_width, image_height))); + }; + private: const morphablemodel::PcaModel& shape_model; // Or store as pointer (non-owning) or std::reference_wrapper? const std::vector& blendshapes; + const int num_shape_coeffs; + const int num_blendshape_coeffs; const Eigen::Vector2f observed_landmark; const int vertex_id; const int image_width; const int image_height; const double aspect_ratio; - const bool use_perspective; }; /** * Image error cost function (at vertex locations). - * + * * Measures the RGB image error between a particular vertex point of the 3D * model at its projected location and the observed input image. * Models the cost for 1 vertex. The residual is 3-dim, [r, g, b]. * Its input params are cam, shape-coeffs, BS-coeffs and colour coeffs. * This projects the vertex locations - so not a full rendering pass. */ -struct ImageCost +struct VertexColorCost { /** * Constructs a new cost function object for a particular vertex id that measures the RGB image error * between the estimated model point and the observed input image. * - * Warning: Don't put in temporaries for \c morphable_model and \c blendshapes! We don't make a copy, we - * store a reference to what is given to the function! + * Warning: Don't put in temporaries for \c shape_model, \c blendshapes and \c color_model. We don't make + * a copy, we store a reference to what is given to the function. * - * @param[in] morphable_model A 3D Morphable Model. Do not use a temporary. + * @param[in] shape_model A PCA 3D shape model. Do not use a temporary. * @param[in] blendshapes A set of 3D blendshapes. Do not use a temporary. - * @param[in] image The observed image. TODO: We should assert that the image we get is 8UC3! + * @param[in] shape_model A PCA 3D color model. Do not use a temporary. + * @param[in] num_shape_coeffs Number of shape coefficients that are being optimised for. + * @param[in] num_blendshape_coeffs Number of blendshape coefficients that are being optimised for. + * @param[in] num_color_coeffs Number of colour coefficients that are being optimised for. * @param[in] vertex_id Vertex id of the 3D model that should be projected and measured. - * @param[in] use_perspective Whether a perspective or an orthographic projection should be used. - * @throws std::runtime_error if the given \c image is not of type CV_8UC3. + * @param[in] image The observed image. */ - ImageCost(const morphablemodel::MorphableModel& morphable_model, - const std::vector& blendshapes, cv::Mat image, int vertex_id, - bool use_perspective) - : morphable_model(morphable_model), blendshapes(blendshapes), image(image), - aspect_ratio(static_cast(image.cols) / image.rows), vertex_id(vertex_id), - use_perspective(use_perspective) - { - if (image.type() != CV_8UC3) - { - throw std::runtime_error("The image given to ImageCost must be of type CV_8UC3."); - } - if (!morphable_model.has_color_model()) - { - throw std::runtime_error("The MorphableModel used does not contain a colour (albedo) model. " - "ImageCost requires a model that contains a colour PCA model. You may " - "want to use the full Surrey Face Model."); - } - }; + VertexColorCost(const eos::morphablemodel::PcaModel& shape_model, + const eos::morphablemodel::Blendshapes& blendshapes, + const eos::morphablemodel::PcaModel& color_model, int num_shape_coeffs, + int num_blendshape_coeffs, int num_color_coeffs, int vertex_id, + const eos::core::Image3u& image) + : shape_model(shape_model), blendshapes(blendshapes), color_model(color_model), + num_shape_coeffs(num_shape_coeffs), num_blendshape_coeffs(num_blendshape_coeffs), + num_color_coeffs(num_color_coeffs), vertex_id(vertex_id), image(image){}; /** * Image cost function implementation. @@ -263,110 +270,105 @@ struct ImageCost * * Todo: We should deal with visibility! Don't evaluate when the vertex is self-occluded. * - * @param[in] camera_rotation A set of camera parameters, parameterised as a quaternion [w x y z]. - * @param[in] camera_translation_and_intrinsics Camera translation and intrinsic parameters. Ortho: [t_x - * t_y frustum_scale]. Perspective: [t_x t_y t_z fov]. + * @param[in] camera_rotation A set of camera rotation parameters, parameterised as an Eigen::Quaternion. + * @param[in] camera_translation Camera translation parameters: [t_x t_y t_z]. + * @param[in] camera_intrinsics Camera intrinsics, containing the field of view (fov_y). * @param[in] shape_coeffs A set of PCA shape coefficients. * @param[in] blendshape_coeffs A set of blendshape coefficients. * @param[in] color_coeffs A set of PCA colour (albedo) coefficients. * @param[in] residual An array of the resulting residuals. - * @return Returns true. The ceres documentation is not clear about that I think. + * @return whether the computation of the residuals was successful. Always returns true. */ template - bool operator()(const T* const camera_rotation, const T* const camera_translation_and_intrinsics, - const T* const shape_coeffs, const T* const blendshape_coeffs, - const T* const color_coeffs, T* residual) const + bool operator()(const T* const camera_rotation, const T* const camera_translation, + const T* const camera_intrinsics, const T* const shape_coeffs, + const T* const blendshape_coeffs, const T* const color_coeffs, T* residual) const { - using namespace glm; - // Note: The following is all duplicated code with LandmarkCost. Fix if possible performance-wise. - // Generate 3D shape point using the current parameters: - const auto point_arr = get_shape_point(morphable_model.get_shape_model(), blendshapes, vertex_id, - shape_coeffs, blendshape_coeffs); + // The following code blocks are identical to PerspectiveProjectionLandmarkCost: + + // Generate shape instance (of only one vertex id) using current coefficients: + Eigen::Map> shape_coeffs_mapped(shape_coeffs, num_shape_coeffs); + Eigen::Map> blendshape_coeffs_mapped(blendshape_coeffs, + num_blendshape_coeffs); + const Eigen::Vector3 point_3d = get_shape_at_point(shape_model, blendshapes, vertex_id, + shape_coeffs_mapped, blendshape_coeffs_mapped); // Project the point to 2D: - const tvec3 point_3d(point_arr[0], point_arr[1], point_arr[2]); // I think the quaternion is always normalised because we run Ceres with QuaternionParameterization - const tquat rot_quat(camera_rotation[0], camera_rotation[1], camera_rotation[2], - camera_rotation[3]); - // We rotate ZXY*p, which is RPY*p. I'm not sure this matrix still corresponds to RPY - probably if we - // use glm::eulerAngles(), these are not RPY anymore and we'd have to adjust if we were to use - // rotation matrices. - const tmat4x4 rot_mtx = glm::mat4_cast(rot_quat); + // Previously, we used glm::tquat, which expects w, x, y, z, and called it with [0, 1, 2, 3]. + // Eigen stores it as x, y, z, w. So... are we sure this is right? + Eigen::Map> rotation(camera_rotation); + Eigen::Map> translation(camera_translation); - // Todo: use get_opencv_viewport() from nonlin_cam_esti.hpp. - const tvec4 viewport(0, image.rows, image.cols, -image.rows); // OpenCV convention + Eigen::Matrix4 model_view_mtx = Eigen::Matrix4::Identity(); + model_view_mtx.block<3, 3>(0, 0) = rotation.toRotationMatrix(); + model_view_mtx.col(3).head<3>() = translation; - tvec3 projected_point; - if (use_perspective) - { - const auto t_mtx = glm::translate(tvec3(camera_translation_and_intrinsics[0], - camera_translation_and_intrinsics[1], - camera_translation_and_intrinsics[2])); - const T& focal = camera_translation_and_intrinsics[3]; - const auto persp_mtx = glm::perspective(focal, T(aspect_ratio), T(0.1), T(1000.0)); - projected_point = glm::project(point_3d, t_mtx * rot_mtx, persp_mtx, viewport); - } else - { - const T& frustum_scale = camera_translation_and_intrinsics[2]; - const auto t_mtx = glm::translate( - tvec3(camera_translation_and_intrinsics[0], camera_translation_and_intrinsics[1], - 0.0)); // we don't have t_z in ortho camera, it doesn't matter where it is - const auto ortho_mtx = - glm::ortho(-1.0 * aspect_ratio * frustum_scale, 1.0 * aspect_ratio * frustum_scale, - -1.0 * frustum_scale, 1.0 * frustum_scale); - projected_point = glm::project(point_3d, t_mtx * rot_mtx, ortho_mtx, viewport); - } + // Todo: use get_opencv_viewport() from nonlin_cam_esti.hpp. + const Eigen::Vector4 viewport(T(0), T(image.height()), T(image.width()), + T(-image.height())); // OpenCV convention + + const T& fov = camera_intrinsics[0]; + const double aspect_ratio = static_cast(image.width()) / image.height(); + const auto projection_mtx = perspective(fov, T(aspect_ratio), T(0.1), T(1000.0)); + + Eigen::Vector3 projected_point = project(point_3d, model_view_mtx, projection_mtx, viewport); + // End of identical block with PerspectiveProjectionLandmarkCost. + + // Access the image colour value at the projected pixel location, if inside the image - otherwise + // Ceres uses the value from the grid edge. + // Note: We could store the BiCubicInterpolator as member variable. + // The default template arguments for Grid2D are and (except for the dimension), they're the right ones for us. + ceres::Grid2D grid(&image(0, 0).data()[0], 0, image.height(), 0, image.width()); + ceres::BiCubicInterpolator> interpolator(grid); + T observed_colour[3]; + interpolator.Evaluate(projected_point.y(), projected_point.x(), &observed_colour[0]); + + // Get the vertex's colour value: + Eigen::Map> color_coeffs_mapped(color_coeffs, num_color_coeffs); + const auto model_color = get_vertex_color_at_point(color_model, vertex_id, color_coeffs_mapped); + // Returns RGB, between [0, 1]. + + // Residual: Vertex colour of model point minus the observed colour in the 2D image + // observed_colour is RGB, model_colour is RGB. Residual will be RGB. + residual[0] = model_color[0] * 255.0 - T(observed_colour[0]); // r + residual[1] = model_color[1] * 255.0 - T(observed_colour[1]); // g + residual[2] = model_color[2] * 255.0 - T(observed_colour[2]); // b - // Access the image colour value at the projected pixel location, if inside the image - otherwise set - // to (127, 127, 127) (maybe not ideal...): - if (projected_point.y < T(0) || projected_point.y >= T(image.rows) || projected_point.x < T(0) || - projected_point.x >= T(image.cols)) - { - // The point is outside the image. - residual[0] = T(127.0); - residual[1] = T(127.0); - residual[2] = T(127.0); - - // TODO: What does interpolator.Evaluate() return in this case? - /* Grid2D grid(image.ptr(0), 0, image.rows, 0, image.cols); - BiCubicInterpolator> interpolator(grid); - T observed_colour[3]; - interpolator.Evaluate(projected_y, projected_x, &observed_colour[0]); // says it returns false when (r, c) is out of bounds... but it returns void? - //std::cout << observed_colour[0] << ", " << observed_colour[1] << ", " << observed_colour[2] << "\n"; - */ - // It kind of looks like as if when it's out of bounds, there will be a vector out of bound access - // and an assert/crash? No, in debugging, it looks like it just interpolates or something. Not - // clear currently. - } else - { - // Note: We could store the BiCubicInterpolator as member variable. - // The default template arguments for Grid2D are and (except for the dimension), they're the right ones for us. - ceres::Grid2D grid(image.ptr(0), 0, image.rows, 0, image.cols); - ceres::BiCubicInterpolator> interpolator(grid); - T observed_colour[3]; - interpolator.Evaluate(projected_point.y, projected_point.x, &observed_colour[0]); - - // This probably needs to be modified if we add a light model. - auto model_colour = get_vertex_colour(morphable_model.get_color_model(), vertex_id, color_coeffs); - // I think this returns RGB, and between [0, 1]. - - // Residual: Vertex colour of model point minus the observed colour in the 2D image - // observed_colour is BGR, model_colour is RGB. Residual will be RGB. - residual[0] = model_colour[0] * 255.0 - T(observed_colour[2]); - residual[1] = model_colour[1] * 255.0 - T(observed_colour[1]); - residual[2] = model_colour[2] * 255.0 - T(observed_colour[0]); - } return true; }; + /** + * Factory to hide the construction of the CostFunction object from the client code. + * + * The number of parameters are given as template arguments, so that we can use Ceres' fixed-size + * constructor. + */ + template + static ceres::CostFunction* Create(const eos::morphablemodel::PcaModel& shape_model, + const eos::morphablemodel::Blendshapes& blendshapes, + const eos::morphablemodel::PcaModel& color_model, int vertex_id, + const eos::core::Image3u& image) + { + // Template arguments: 3 residuals (RGB), 4 (camera rotation), 3 (camera translation), 1 (camera + // intrinsics), x (shape coeffs), x (expr coeffs), x (colour coeffs) + return (new ceres::AutoDiffCostFunction( + new VertexColorCost(shape_model, blendshapes, color_model, num_shape_coeffs, + num_blendshape_coeffs, num_color_coeffs, vertex_id, image))); + }; + private: - const morphablemodel::MorphableModel& morphable_model; // Or store as pointer (non-owning) or std::reference_wrapper? - const std::vector& blendshapes; - const cv::Mat image; // the observed image - const double aspect_ratio; + const eos::morphablemodel::PcaModel& + shape_model; // Or store as pointer (non-owning) or std::reference_wrapper? + const eos::morphablemodel::Blendshapes& blendshapes; + const eos::morphablemodel::PcaModel& color_model; + const int num_shape_coeffs; + const int num_blendshape_coeffs; + const int num_color_coeffs; const int vertex_id; - const bool use_perspective; + const eos::core::Image3u& image; // the observed image }; /** @@ -380,45 +382,25 @@ struct ImageCost * @return The 3D point. */ template -std::array get_shape_point(const morphablemodel::PcaModel& shape_model, - const std::vector& blendshapes, int vertex_id, - const T* const shape_coeffs, const T* const blendshape_coeffs) +Eigen::Vector3 get_shape_at_point(const eos::morphablemodel::PcaModel& shape_model, + const eos::morphablemodel::Blendshapes& blendshapes, int vertex_id, + Eigen::Map> shape_coeffs, + Eigen::Map> blendshape_coeffs) { - int num_coeffs_fitting = 10; // Todo: Should be inferred or a function parameter! - auto mean = shape_model.get_mean_at_point(vertex_id); - auto basis = shape_model.get_rescaled_pca_basis_at_point(vertex_id); - // Computing Shape = mean + basis * coeffs: - // Note: Could use an Eigen matrix with type T to see if it gives a speedup. - std::array point{T(mean[0]), T(mean[1]), T(mean[2])}; - for (int i = 0; i < num_coeffs_fitting; ++i) - { - point[0] += T(basis.row(0).col(i)(0)) * shape_coeffs[i]; // it seems to be ~15% faster when these are - // static_cast() instead of T()? - } - for (int i = 0; i < num_coeffs_fitting; ++i) - { - point[1] += T(basis.row(1).col(i)(0)) * shape_coeffs[i]; - } - for (int i = 0; i < num_coeffs_fitting; ++i) - { - point[2] += T(basis.row(2).col(i)(0)) * shape_coeffs[i]; - } - // Adding the blendshape offsets: - // Shape = mean + basis * coeffs + blendshapes * bs_coeffs: - auto num_blendshapes = blendshapes.size(); - for (int i = 0; i < num_blendshapes; ++i) + // Computing Shape = mean + shape_basis*shape_coeffs + blendshapes*blendshape_coeffs: + const Eigen::Vector3f mean = shape_model.get_mean_at_point(vertex_id); + const Eigen::Vector3 shape_vector = + shape_model.get_rescaled_pca_basis_at_point(vertex_id).leftCols(shape_coeffs.size()).cast() * + shape_coeffs; + Eigen::Vector3 expression_vector(T(0.0), T(0.0), T(0.0)); + for (std::size_t i = 0; i < blendshape_coeffs.size(); i++) { - point[0] += T(blendshapes[i].deformation(3 * vertex_id + 0)) * blendshape_coeffs[i]; + expression_vector.x() += T(blendshapes[i].deformation(vertex_id * 3 + 0)) * blendshape_coeffs(i); + expression_vector.y() += T(blendshapes[i].deformation(vertex_id * 3 + 1)) * blendshape_coeffs(i); + expression_vector.z() += T(blendshapes[i].deformation(vertex_id * 3 + 2)) * blendshape_coeffs(i); } - for (int i = 0; i < num_blendshapes; ++i) - { - point[1] += T(blendshapes[i].deformation(3 * vertex_id + 1)) * blendshape_coeffs[i]; - } - for (int i = 0; i < num_blendshapes; ++i) - { - point[2] += T(blendshapes[i].deformation(3 * vertex_id + 2)) * blendshape_coeffs[i]; - } - return point; + + return Eigen::Vector3(mean.cast() + shape_vector + expression_vector); }; /** @@ -430,31 +412,87 @@ std::array get_shape_point(const morphablemodel::PcaModel& shape_model, * @return The colour. As RGB? In [0, 1]? */ template -std::array get_vertex_colour(const morphablemodel::PcaModel& color_model, int vertex_id, - const T* const color_coeffs) +Eigen::Vector3 get_vertex_color_at_point(const eos::morphablemodel::PcaModel& color_model, int vertex_id, + Eigen::Map> color_coeffs) { - int num_coeffs_fitting = 10; // Todo: Should be inferred or a function parameter! - auto mean = color_model.get_mean_at_point(vertex_id); - auto basis = color_model.get_rescaled_pca_basis_at_point(vertex_id); - // Computing Colour = mean + basis * coeffs - // Note: Could use an Eigen matrix with type T to see if it gives a speedup. - std::array point{T(mean[0]), T(mean[1]), T(mean[2])}; - for (int i = 0; i < num_coeffs_fitting; ++i) - { - point[0] += T(basis.row(0).col(i)(0)) * color_coeffs[i]; // it seems to be ~15% faster when these are - // static_cast() instead of T()? - } - for (int i = 0; i < num_coeffs_fitting; ++i) - { - point[1] += T(basis.row(1).col(i)(0)) * color_coeffs[i]; - } - for (int i = 0; i < num_coeffs_fitting; ++i) - { - point[2] += T(basis.row(2).col(i)(0)) * color_coeffs[i]; - } - return point; + const Eigen::Vector3f mean = color_model.get_mean_at_point(vertex_id); + const Eigen::Vector3 color_vector = + color_model.get_rescaled_pca_basis_at_point(vertex_id).leftCols(color_coeffs.size()).cast() * + color_coeffs; + + return Eigen::Vector3(mean.cast() + color_vector); }; +/** + * Creates a matrix for a right-handed, symmetric perspective-view frustrum. + * + * The function follows the OpenGL clip volume definition, which is also the GLM default. The near and far + * clip planes correspond to z normalized device coordinates of -1 and +1 respectively. + * + * This function is equivalent to glm::perspectiveRH_NO(...). + * + * More details can be found on the gluPerspective man page: + * https://www.khronos.org/registry/OpenGL-Refpages/gl2.1/xhtml/gluPerspective.xml. + * + * @param[in] fov_y Specifies the field of view angle in the y direction. Expressed in radians. + * @param[in] aspect Specifies the aspect ratio that determines the field of view in the x direction. The + * aspect ratio is the ratio of x (width) to y (height). + * @param[in] z_near Specifies the distance from the viewer to the near clipping plane (always positive). + * @param[in] z_far Specifies the distance from the viewer to the far clipping plane (always positive). + * @tparam T A floating-point scalar type, ceres::Jet, or similar compatible type. + * @return The corresponding perspective projection matrix. + */ +template +Eigen::Matrix4 perspective(T fov_y, T aspect, T z_near, T z_far) +{ + // Will this assert work? std::abs probably won't work on T? + // assert(abs(aspect - std::numeric_limits::epsilon()) > static_cast(0)); + + const T tan_half_fov_y = tan(fov_y / static_cast(2)); // ceres::tan? + + // Maybe construct with static_cast(0)? => No, doesn't have c'tor. + // Could do Eigen::Matrix4 result = {{1, 2, 3, 4}, {1, 2, 3, 4}...} I think. + Eigen::Matrix4 result = Eigen::Matrix4::Zero(); + result(0, 0) = static_cast(1) / (aspect * tan_half_fov_y); + result(1, 1) = static_cast(1) / (tan_half_fov_y); + result(2, 2) = -(z_far + z_near) / (z_far - z_near); + result(2, 3) = -static_cast(1); + result(3, 2) = -(static_cast(2) * z_far * z_near) / (z_far - z_near); + return result; +} + +/** + * Project the given point_3d (from object coordinates) into window coordinates. + * + * The function follows the OpenGL clip volume definition. The near and far clip planes correspond to + * z normalized device coordinates of -1 and +1 respectively. + * This function is equivalent to glm::projectNO(...). + * + * More details can be found on the gluProject man page: + * https://www.khronos.org/registry/OpenGL-Refpages/gl2.1/xhtml/gluProject.xml. + * + * @param[in] point_3d A 3D point in object coordinates. + * @param[in] modelview_matrix A model-view matrix, transforming the point into view (camera) space. + * @param[in] projection_matrix The projection matrix to be used. + * @param[in] viewport The viewport transformation to be used. + * @tparam T A floating-point scalar type, ceres::Jet, or similar compatible type. + * @return Return the computed window coordinates. + */ +template +Eigen::Vector3 project(const Eigen::Vector3& point_3d, const Eigen::Matrix4& modelview_matrix, + const Eigen::Matrix4& projection_matrix, const Eigen::Vector4& viewport) +{ + Eigen::Vector4 projected_point = projection_matrix * modelview_matrix * point_3d.homogeneous(); + projected_point /= projected_point.w(); + projected_point = + projected_point * static_cast(0.5) + + Eigen::Vector4(static_cast(0.5), static_cast(0.5), static_cast(0.5), static_cast(0.5)); + projected_point.x() = projected_point.x() * T(viewport(2)) + T(viewport(0)); + projected_point.y() = projected_point.y() * T(viewport(3)) + T(viewport(1)); + + return projected_point.head<3>(); +} + } /* namespace fitting */ } /* namespace eos */