diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e572a97..daad4373 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,7 @@ set(HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/core/read_obj.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/core/write_obj.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/core/Rect.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/core/math.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/morphablemodel/PcaModel.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/morphablemodel/MorphableModel.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/morphablemodel/Blendshape.hpp diff --git a/examples/fit-model-ceres.cpp b/examples/fit-model-ceres.cpp index 8907acfb..dfca9d13 100644 --- a/examples/fit-model-ceres.cpp +++ b/examples/fit-model-ceres.cpp @@ -3,7 +3,7 @@ * * File: examples/fit-model-ceres.cpp * - * Copyright 2016 Patrik Huber + * Copyright 2016-2023 Patrik Huber * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,24 +17,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#define GLM_FORCE_UNRESTRICTED_GENTYPE #include "eos/core/Landmark.hpp" #include "eos/core/LandmarkMapper.hpp" #include "eos/core/read_pts_landmarks.hpp" +#include "eos/core/Image.hpp" +#include "eos/core/image/opencv_interop.hpp" #include "eos/core/write_obj.hpp" +#include "eos/core/math.hpp" +#include "eos/morphablemodel/MorphableModel.hpp" +#include "eos/morphablemodel/Blendshape.hpp" #include "eos/fitting/ceres_nonlinear.hpp" #include "eos/fitting/contour_correspondence.hpp" -#include "eos/fitting/fitting.hpp" -#include "eos/morphablemodel/Blendshape.hpp" #include "Eigen/Core" -#include "glm/ext.hpp" -#include "glm/glm.hpp" - #include "ceres/ceres.h" -#include "ceres/cubic_interpolation.h" -#include "ceres/rotation.h" #include "opencv2/core/core.hpp" #include "opencv2/imgproc/imgproc.hpp" @@ -48,7 +45,6 @@ #include using namespace eos; -using namespace glm; using namespace ceres; namespace po = boost::program_options; namespace fs = boost::filesystem; @@ -168,7 +164,7 @@ int main(int argc, char* argv[]) std::vector blendshapes = eos::morphablemodel::load_blendshapes(blendshapesfile.string()); - // Draw the loaded landmarks: + // Draw the loaded landmarks (blue): Mat outimg = image.clone(); for (const auto& lm : landmarks) { @@ -176,21 +172,19 @@ int main(int argc, char* argv[]) cv::Point2f(lm.coordinates[0] + 2.0f, lm.coordinates[1] + 2.0f), {255, 0, 0}); } - constexpr bool use_perspective = false; - // These will be the 2D image points and their corresponding 3D vertex id's used for the fitting: vector image_points; // the 2D landmark points - vector vertex_indices; // their corresponding vertex indices + vector vertex_indices; // their corresponding vertex indices // Sub-select all the landmarks which we have a mapping for (i.e. that are defined in the 3DMM): for (int i = 0; i < landmarks.size(); ++i) { - auto converted_name = landmark_mapper.convert(landmarks[i].name); + const auto converted_name = landmark_mapper.convert(landmarks[i].name); if (!converted_name) { // no mapping defined for the current landmark continue; } - int vertex_idx = std::stoi(converted_name.value()); + const int vertex_idx = std::stoi(converted_name.value()); vertex_indices.emplace_back(vertex_idx); image_points.emplace_back(landmarks[i].coordinates); } @@ -199,66 +193,70 @@ int main(int argc, char* argv[]) std::stringstream fitting_log; auto start = std::chrono::steady_clock::now(); - std::vector camera_rotation; // Quaternion, [w x y z]. Todo: Actually, use std::array for all of these. - camera_rotation.resize(4); // initialises with zeros - camera_rotation[0] = 1.0; - std::vector camera_translation_and_intrinsics; - constexpr int num_cam_trans_intr_params = use_perspective ? 4 : 3; - // Parameters for the orthographic projection: [t_x, t_y, frustum_scale] - // And perspective projection: [t_x, t_y, t_z, fov]. - // Origin is assumed at center of image, and no lens distortions. + // The following have to be constexpr because they're used in template parameters in this implementation: + constexpr int num_shape_coeffs = 50; + constexpr int num_blendshape_coeffs = 6; + constexpr int num_color_coeffs = 50; + + // Weights for the different cost function terms: + // Note: To find good weights, ceres::ResidualBlockId can be used to store the IDs to each added residual + // blocks. Then, one can use ceres::Problem::EvaluateOptions and ceres::Problem::Evaluate(...) to retrieve + // the values of each individual term. We don't do that here for simplicity reasons. See the Ceres + // documentation for details, and https://github.com/patrikhuber/eos/issues/348. + const double landmark_cost_weight = 100.0; + const double shape_prior_cost_weight = 500000.0; + const double blendshapes_prior_cost_weight = 25.0; + const double color_prior_cost_weight = 500000.0; + const double vertex_color_cost_weight = 1.0; + + // The model parameters that we're estimating: + using ModelCoefficients = vector; + ModelCoefficients shape_coefficients(num_shape_coeffs, 0.0); + ModelCoefficients blendshape_coefficients(num_blendshape_coeffs, 0.0); + ModelCoefficients color_coefficients(num_color_coeffs, 0.0); + + // Parameters for the perspective projection: A rotation quaternion, [t_x, t_y, t_z], and fov_y (field of + // view). + // The origin is assumed at center of image, and no lens distortions. // Note: Actually, we estimate the model-view matrix and not the camera position. But one defines the // other. - camera_translation_and_intrinsics.resize(num_cam_trans_intr_params); // initialises with zeros - if (use_perspective) - { - camera_translation_and_intrinsics[2] = -400.0; // Move the model back (along the -z axis) - camera_translation_and_intrinsics[3] = glm::radians(45.0f); // fov - } else - { - camera_translation_and_intrinsics[2] = 110.0; // frustum_scale - } - - std::vector shape_coefficients; - shape_coefficients.resize(10); // Todo: Currently, the value '10' is hard-coded everywhere. Make it dynamic. - std::vector blendshape_coefficients; - blendshape_coefficients.resize(6); + Eigen::Quaterniond camera_rotation(1.0, 0.0, 0.0, 0.0); // The c'tor takes wxyz. Storage is xyzw. + Eigen::Vector3d camera_translation(0.0, 0.0, -400.0); // Move the model back (along the -z axis) + double fov_y = core::radians(45.0); + // Set up just a landmark cost and optimise for rotation and translation, with fov_y and the 3DMM + // parameters fixed (i.e. using the mean face): Problem camera_costfunction; for (int i = 0; i < image_points.size(); ++i) { - CostFunction* cost_function = - new AutoDiffCostFunction( - new fitting::LandmarkCost(morphable_model.get_shape_model(), blendshapes, image_points[i], - vertex_indices[i], image.cols, image.rows, use_perspective)); - - camera_costfunction.AddResidualBlock(cost_function, NULL, &camera_rotation[0], - &camera_translation_and_intrinsics[0], &shape_coefficients[0], - &blendshape_coefficients[0]); + auto landmark_cost = + fitting::PerspectiveProjectionLandmarkCost::Create( + morphable_model.get_shape_model(), blendshapes, image_points[i], vertex_indices[i], + image.cols, image.rows); + ScaledLoss* landmark_cost_scaled_loss = + new ScaledLoss(nullptr, landmark_cost_weight, ceres::TAKE_OWNERSHIP); + camera_costfunction.AddResidualBlock(landmark_cost, landmark_cost_scaled_loss, + &camera_rotation.coeffs()[0], &camera_translation.data()[0], + &fov_y, &shape_coefficients[0], &blendshape_coefficients[0]); } - camera_costfunction.SetParameterBlockConstant(&shape_coefficients[0]); // keep the shape constant + // Keep the model coeffs constant (i.e. all at zero): + camera_costfunction.SetParameterBlockConstant(&shape_coefficients[0]); camera_costfunction.SetParameterBlockConstant(&blendshape_coefficients[0]); - if (use_perspective) - { - camera_costfunction.SetParameterUpperBound( - &camera_translation_and_intrinsics[0], 2, - -std::numeric_limits::epsilon()); // t_z has to be negative - camera_costfunction.SetParameterLowerBound(&camera_translation_and_intrinsics[0], 3, - 0.01); // fov in radians, must be > 0 - } else - { - camera_costfunction.SetParameterLowerBound(&camera_translation_and_intrinsics[0], 2, - 1.0); // frustum_scale must be > 0 - } - QuaternionParameterization* camera_fit_quaternion_parameterisation = new QuaternionParameterization; - camera_costfunction.SetParameterization(&camera_rotation[0], camera_fit_quaternion_parameterisation); - + // Keep the fov_y constant too: + camera_costfunction.SetParameterBlockConstant(&fov_y); + // Keep t_z negative (i.e. we want to look at the model from the front): + camera_costfunction.SetParameterUpperBound(&camera_translation[0], 2, + -std::numeric_limits::epsilon()); + // Keep the fov_y (in radians) > 0 (but we're not optimising for it now): + // camera_costfunction.SetParameterLowerBound(&fov_y, 0, 0.01); + + EigenQuaternionManifold* quaternion_manifold = new EigenQuaternionManifold; + camera_costfunction.SetManifold(&camera_rotation.coeffs()[0], quaternion_manifold); + + // Set up the solver, and optimise: Solver::Options solver_options; - solver_options.linear_solver_type = ITERATIVE_SCHUR; - solver_options.num_threads = 8; + // solver_options.linear_solver_type = ITERATIVE_SCHUR; + // solver_options.num_threads = 8; solver_options.minimizer_progress_to_stdout = true; // solver_options.max_num_iterations = 100; Solver::Summary solver_summary; @@ -268,63 +266,41 @@ int main(int argc, char* argv[]) // Draw the mean-face landmarks projected using the estimated camera: // Construct the rotation & translation (model-view) matrices, projection matrix, and viewport: - glm::dquat estimated_rotation(camera_rotation[0], camera_rotation[1], camera_rotation[2], - camera_rotation[3]); - auto rot_mtx = glm::mat4_cast(estimated_rotation); + Eigen::Matrix4d model_view_mtx = Eigen::Matrix4d::Identity(); + model_view_mtx.block<3, 3>(0, 0) = camera_rotation.toRotationMatrix(); + model_view_mtx.col(3).head<3>() = camera_translation; const double aspect = static_cast(image.cols) / image.rows; - auto get_translation_matrix = [](auto&& camera_translation_and_intrinsics, auto&& use_perspective) { - if (use_perspective) - { - return glm::translate(glm::dvec3(camera_translation_and_intrinsics[0], - camera_translation_and_intrinsics[1], - camera_translation_and_intrinsics[2])); - } else - { - return glm::translate( - glm::dvec3(camera_translation_and_intrinsics[0], camera_translation_and_intrinsics[1], 0.0)); - } - }; - auto get_projection_matrix = [](auto&& camera_translation_and_intrinsics, auto&& aspect, - auto&& use_perspective) { - if (use_perspective) - { - const auto& focal = camera_translation_and_intrinsics[3]; - return glm::perspective(focal, aspect, 0.1, 1000.0); - } else - { - const auto& frustum_scale = camera_translation_and_intrinsics[2]; - return glm::ortho(-1.0 * aspect * frustum_scale, 1.0 * aspect * frustum_scale, - -1.0 * frustum_scale, 1.0 * frustum_scale); - } - }; - auto t_mtx = get_translation_matrix(camera_translation_and_intrinsics, use_perspective); - auto projection_mtx = get_projection_matrix(camera_translation_and_intrinsics, aspect, use_perspective); - const glm::dvec4 viewport(0, image.rows, image.cols, -image.rows); // OpenCV convention + auto projection_mtx = fitting::perspective(fov_y, aspect, 0.1, 1000.0); + // Todo: use get_opencv_viewport() from nonlin_cam_esti.hpp. + const Eigen::Vector4d viewport(0, image.rows, image.cols, -image.rows); // OpenCV convention - auto mean_mesh = morphable_model.get_mean(); + const auto& mean_mesh = morphable_model.get_mean(); for (auto idx : vertex_indices) { - glm::dvec3 point_3d(mean_mesh.vertices[idx][0], mean_mesh.vertices[idx][1], - mean_mesh.vertices[idx][2]); // The 3D model point - glm::dvec3 projected_point = glm::project(point_3d, t_mtx * rot_mtx, projection_mtx, viewport); - cv::circle(outimg, cv::Point2f(projected_point.x, projected_point.y), 3, {0.0f, 0.0f, 255.0f}); // red + const auto& point_3d = mean_mesh.vertices[idx]; + const auto projected_point = + fitting::project(point_3d.cast(), model_view_mtx, projection_mtx, viewport); + cv::circle(outimg, cv::Point2f(projected_point.x(), projected_point.y()), 3, + {0.0f, 0.0f, 255.0f}); // red } + // Draw the subset of the detected landmarks that we use in the fitting (yellow): for (const auto& lm : image_points) { - cv::circle(outimg, cv::Point2f(lm(0), lm(1)), 3, - {0.0f, 255.0f, 255.0f}); // yellow: subset of the detected LMs that we use + cv::circle(outimg, cv::Point2f(lm(0), lm(1)), 3, {0.0f, 255.0f, 255.0f}); } - auto euler_angles = glm::eulerAngles(estimated_rotation); // returns [P, Y, R] - fitting_log << "Pose fit with mean shape:\tYaw " << glm::degrees(euler_angles[1]) << ", Pitch " - << glm::degrees(euler_angles[0]) << ", Roll " << glm::degrees(euler_angles[2]) - << "; t & f: " << camera_translation_and_intrinsics << '\n'; - fitting_log << "Ceres took: " + + fitting_log << "Pose-only fitting took: " << std::chrono::duration_cast(end - start).count() << "ms.\n"; - // Contour fitting: + // Now add contour fitting: // These are the additional contour-correspondences we're going to find and then use: + // Note: This needs extracting the yaw angle from the rotation quaternion, which Eigen currently doesn't + // have a function for. Not using contour fitting for now. + // euler_angles = glm::eulerAngles(estimated_rotation); // returns [P, Y, R]; where estimated_rotation is + // a glm::quat. + /* vector image_points_contour; // the 2D landmark points - vector vertex_indices_contour; // their corresponding 3D vertex indices + vector vertex_indices_contour; // their corresponding 3D vertex indices // For each 2D contour landmark, get the corresponding 3D vertex point and vertex id: std::tie(image_points_contour, std::ignore, vertex_indices_contour) = fitting::get_contour_correspondences(landmarks, ibug_contour, model_contour, @@ -333,64 +309,59 @@ int main(int argc, char* argv[]) using eos::fitting::concat; vertex_indices = concat(vertex_indices, vertex_indices_contour); image_points = concat(image_points, image_points_contour); + */ // Full fitting - Estimate shape and pose, given the previous pose estimate: start = std::chrono::steady_clock::now(); Problem fitting_costfunction; - // Landmark constraint: + // Landmark cost: for (int i = 0; i < image_points.size(); ++i) { - CostFunction* cost_function = - new AutoDiffCostFunction( - new fitting::LandmarkCost(morphable_model.get_shape_model(), blendshapes, image_points[i], - vertex_indices[i], image.cols, image.rows, use_perspective)); - fitting_costfunction.AddResidualBlock(cost_function, NULL, &camera_rotation[0], - &camera_translation_and_intrinsics[0], &shape_coefficients[0], - &blendshape_coefficients[0]); + auto landmark_cost = + fitting::PerspectiveProjectionLandmarkCost::Create( + morphable_model.get_shape_model(), blendshapes, image_points[i], vertex_indices[i], + image.cols, image.rows); + ScaledLoss* landmark_cost_scaled_loss = + new ScaledLoss(nullptr, landmark_cost_weight, ceres::TAKE_OWNERSHIP); + fitting_costfunction.AddResidualBlock(landmark_cost, landmark_cost_scaled_loss, + &camera_rotation.coeffs()[0], &camera_translation.data()[0], + &fov_y, &shape_coefficients[0], &blendshape_coefficients[0]); } - // Shape prior and bounds, for 10 shape coefficients: - CostFunction* shape_prior_cost = fitting::NormCost::Create<10>(); + // Prior and bounds for the shape coefficients: + CostFunction* shape_prior_cost = fitting::NormCost::Create(); ScaledLoss* shape_prior_scaled_loss = - new ScaledLoss(nullptr, 35.0, Ownership::TAKE_OWNERSHIP); + new ScaledLoss(nullptr, shape_prior_cost_weight, + Ownership::TAKE_OWNERSHIP); // weight was 35.0 previously, but it was inside the + // residual, now it's outside fitting_costfunction.AddResidualBlock(shape_prior_cost, shape_prior_scaled_loss, &shape_coefficients[0]); - for (int i = 0; i < 10; ++i) + for (int i = 0; i < num_shape_coeffs; ++i) { fitting_costfunction.SetParameterLowerBound(&shape_coefficients[0], i, -3.0); fitting_costfunction.SetParameterUpperBound(&shape_coefficients[0], i, 3.0); } // Prior and constraints on blendshapes: - CostFunction* blendshapes_prior_cost = fitting::NormCost::Create<6>(); + CostFunction* blendshapes_prior_cost = fitting::NormCost::Create(); LossFunction* blendshapes_prior_scaled_loss = - new ScaledLoss(new SoftLOneLoss(1.0), 10.0, Ownership::TAKE_OWNERSHIP); + new ScaledLoss(new SoftLOneLoss(1.0), blendshapes_prior_cost_weight, + Ownership::TAKE_OWNERSHIP); // weight was 10.0 previously fitting_costfunction.AddResidualBlock(blendshapes_prior_cost, blendshapes_prior_scaled_loss, &blendshape_coefficients[0]); - fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 0, 0.0); - fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 1, 0.0); - fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 2, 0.0); - fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 3, 0.0); - fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 4, 0.0); - fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 5, 0.0); - // Some constraints on the camera translation and fov/scale: - if (use_perspective) - { - fitting_costfunction.SetParameterUpperBound( - &camera_translation_and_intrinsics[0], 2, - -std::numeric_limits::epsilon()); // t_z has to be negative - fitting_costfunction.SetParameterLowerBound(&camera_translation_and_intrinsics[0], 3, - 0.01); // fov in radians, must be > 0 - } else + for (int i = 0; i < num_blendshape_coeffs; ++i) { - fitting_costfunction.SetParameterLowerBound(&camera_translation_and_intrinsics[0], 2, - 1.0); // frustum_scale must be > 0 + fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], i, 0.0); + fitting_costfunction.SetParameterUpperBound(&blendshape_coefficients[0], i, 1.0); } - - QuaternionParameterization* full_fit_quaternion_parameterisation = new QuaternionParameterization; - fitting_costfunction.SetParameterization(&camera_rotation[0], full_fit_quaternion_parameterisation); - - // Colour model fitting (this needs a Morphable Model with colour (albedo) model, see note above main()): + // Keep t_z negative (i.e. we want to look at the model from the front): + fitting_costfunction.SetParameterUpperBound(&camera_translation[0], 2, + -std::numeric_limits::epsilon()); + // Keep the fov_y (in radians) > 0: + fitting_costfunction.SetParameterLowerBound(&fov_y, 0, 0.01); + // Note: We create a new manifold object, since camera_costfunction took ownership of the previous one, + // and will delete it upon destruction. + EigenQuaternionManifold* quaternion_manifold_full_fitting = new EigenQuaternionManifold; + fitting_costfunction.SetManifold(&camera_rotation.coeffs()[0], quaternion_manifold_full_fitting); + + // Colour model cost (this needs a Morphable Model with colour (albedo) model, see note above main()): if (!morphable_model.has_color_model()) { cout << "Error: The MorphableModel used does not contain a colour (albedo) model. ImageCost requires " @@ -398,89 +369,81 @@ int main(int argc, char* argv[]) "different morphable model, or remove this section."; return EXIT_FAILURE; } - std::vector colour_coefficients; - colour_coefficients.resize(10); - // Add a residual for each vertex: + // Add a colour residual for each vertex: + cv::cvtColor(image, image, cv::COLOR_BGR2RGB); + const core::Image3u image_eos_rgb = eos::core::from_mat(image); + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); for (int i = 0; i < morphable_model.get_shape_model().get_data_dimension() / 3; ++i) { - CostFunction* cost_function = - new AutoDiffCostFunction( - new fitting::ImageCost(morphable_model, blendshapes, image, i, use_perspective)); - fitting_costfunction.AddResidualBlock(cost_function, NULL, &camera_rotation[0], - &camera_translation_and_intrinsics[0], &shape_coefficients[0], - &blendshape_coefficients[0], &colour_coefficients[0]); + auto vertex_color_cost = + fitting::VertexColorCost::Create( + morphable_model.get_shape_model(), blendshapes, morphable_model.get_color_model(), i, + image_eos_rgb); + ScaledLoss* vertex_color_cost_scaled_loss = + new ScaledLoss(nullptr, vertex_color_cost_weight, ceres::TAKE_OWNERSHIP); + fitting_costfunction.AddResidualBlock(vertex_color_cost, vertex_color_cost_scaled_loss, + &camera_rotation.coeffs()[0], &camera_translation.data()[0], + &fov_y, &shape_coefficients[0], &blendshape_coefficients[0], + &color_coefficients[0]); } - // Prior and bounds, for 10 colour coefficients: - CostFunction* color_prior_cost = fitting::NormCost::Create<10>(); - ScaledLoss* color_prior_scaled_loss = - new ScaledLoss(nullptr, 35.0, Ownership::TAKE_OWNERSHIP); - fitting_costfunction.AddResidualBlock(color_prior_cost, color_prior_scaled_loss, &colour_coefficients[0]); - for (int i = 0; i < 10; ++i) + // Prior and bounds, for the colour coefficients: + CostFunction* color_prior_cost = fitting::NormCost::Create(); + ScaledLoss* color_prior_scaled_loss = new ScaledLoss( + nullptr, color_prior_cost_weight, Ownership::TAKE_OWNERSHIP); // weight was previously 35.0 + fitting_costfunction.AddResidualBlock(color_prior_cost, color_prior_scaled_loss, &color_coefficients[0]); + for (int i = 0; i < num_color_coeffs; ++i) { - fitting_costfunction.SetParameterLowerBound(&colour_coefficients[0], i, -3.0); - fitting_costfunction.SetParameterUpperBound(&colour_coefficients[0], i, 3.0); + fitting_costfunction.SetParameterLowerBound(&color_coefficients[0], i, -3.0); + fitting_costfunction.SetParameterUpperBound(&color_coefficients[0], i, 3.0); } // Set different options for the full fitting: - /* solver_options.linear_solver_type = ceres::ITERATIVE_SCHUR; - //solver_options.linear_solver_type = ceres::DENSE_QR; - //solver_options.minimizer_type = ceres::TRUST_REGION; // default I think - //solver_options.minimizer_type = ceres::LINE_SEARCH; - solver_options.num_threads = 8; - solver_options.num_linear_solver_threads = 8; // only SPARSE_SCHUR can use this - solver_options.minimizer_progress_to_stdout = true; - solver_options.max_num_iterations = 100; - */ + // solver_options.linear_solver_type = ceres::ITERATIVE_SCHUR; + // solver_options.linear_solver_type = ceres::DENSE_QR; + // solver_options.minimizer_type = ceres::TRUST_REGION; // default I think + // solver_options.minimizer_type = ceres::LINE_SEARCH; + solver_options.num_threads = 16; // Make sure to adjust this, if you have fewer (or more) CPU cores. + // solver_options.minimizer_progress_to_stdout = true; + // solver_options.max_num_iterations = 100; Solve(solver_options, &fitting_costfunction, &solver_summary); std::cout << solver_summary.BriefReport() << "\n"; end = std::chrono::steady_clock::now(); // Draw the landmarks projected using all estimated parameters: // Construct the rotation & translation (model-view) matrices, projection matrix, and viewport: - estimated_rotation = - glm::dquat(camera_rotation[0], camera_rotation[1], camera_rotation[2], camera_rotation[3]); - rot_mtx = glm::mat4_cast(estimated_rotation); - t_mtx = get_translation_matrix(camera_translation_and_intrinsics, use_perspective); - projection_mtx = get_projection_matrix(camera_translation_and_intrinsics, aspect, use_perspective); + model_view_mtx = Eigen::Matrix4d::Identity(); + model_view_mtx.block<3, 3>(0, 0) = camera_rotation.toRotationMatrix(); + model_view_mtx.col(3).head<3>() = camera_translation; + projection_mtx = fitting::perspective(fov_y, aspect, 0.1, 1000.0); auto vectord_to_vectorf = [](const std::vector& vec) { return std::vector(std::begin(vec), std::end(vec)); }; - auto blendshape_coeffs_float = vectord_to_vectorf(blendshape_coefficients); - Eigen::VectorXf shape_ceres = - morphable_model.get_shape_model().draw_sample(shape_coefficients) + - to_matrix(blendshapes) * - Eigen::Map(blendshape_coeffs_float.data(), blendshape_coeffs_float.size()); - core::Mesh mesh = morphablemodel::sample_to_mesh( - shape_ceres, morphable_model.get_color_model().draw_sample(colour_coefficients), - morphable_model.get_shape_model().get_triangle_list(), - morphable_model.get_color_model().get_triangle_list(), morphable_model.get_texture_coordinates()); + const auto shape_coeffs_float = vectord_to_vectorf(shape_coefficients); + const auto blendshape_coeffs_float = vectord_to_vectorf(blendshape_coefficients); + const auto color_coeffs_float = vectord_to_vectorf(color_coefficients); + + morphablemodel::MorphableModel morphable_model_with_expressions( + morphable_model.get_shape_model(), blendshapes, morphable_model.get_color_model(), + morphable_model.get_landmark_definitions(), morphable_model.get_texture_coordinates(), + morphable_model.get_texture_triangle_indices()); + const core::Mesh mesh = morphable_model_with_expressions.draw_sample( + shape_coeffs_float, blendshape_coeffs_float, color_coeffs_float); + for (auto idx : vertex_indices) { - glm::dvec3 point_3d(mesh.vertices[idx][0], mesh.vertices[idx][1], - mesh.vertices[idx][2]); // The 3D model point - glm::dvec3 projected_point = glm::project(point_3d, t_mtx * rot_mtx, projection_mtx, viewport); - cv::circle(outimg, cv::Point2f(projected_point.x, projected_point.y), 3, + const auto& point_3d = mesh.vertices[idx]; + const auto projected_point = + fitting::project(point_3d.cast(), model_view_mtx, projection_mtx, viewport); + cv::circle(outimg, cv::Point2f(projected_point.x(), projected_point.y()), 3, {0.0f, 76.0f, 255.0f}); // orange } - for (const auto& lm : image_points) - { - cv::circle(outimg, cv::Point2f(lm(0), lm(1)), 3, - {0.0f, 255.0f, - 255.0f}); // yellow: subset of the detected LMs that we use (including contour landmarks) - } - estimated_rotation = - glm::dquat(camera_rotation[0], camera_rotation[1], camera_rotation[2], camera_rotation[3]); - euler_angles = glm::eulerAngles(estimated_rotation); // returns [P, Y, R] - fitting_log << "Final fit:\t\t\tYaw " << glm::degrees(euler_angles[1]) << ", Pitch " - << glm::degrees(euler_angles[0]) << ", Roll " << glm::degrees(euler_angles[2]) - << "; t & f: " << camera_translation_and_intrinsics << '\n'; - fitting_log << "Ceres took: " - << std::chrono::duration_cast(end - start).count() << "ms.\n"; + // Note: We previously used contour landmarks and plotted image_points again here (in yellow), which then + // included the contour landmarks. + + fitting_log << "Full fitting took: " + << std::chrono::duration_cast(end - start).count() << "s.\n"; cout << fitting_log.str(); diff --git a/include/eos/core/math.hpp b/include/eos/core/math.hpp new file mode 100644 index 00000000..f5d75b36 --- /dev/null +++ b/include/eos/core/math.hpp @@ -0,0 +1,55 @@ +/* + * eos - A 3D Morphable Model fitting library written in modern C++11/14. + * + * File: include/eos/core/math.hpp + * + * Copyright 2023 Patrik Huber + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#ifndef EOS_MATH_HPP +#define EOS_MATH_HPP + +#include + +namespace eos { +namespace core { + +/** + * @brief Convert given degrees to radians. + */ +template +T radians(T degrees) +{ + // Note: We may want to remove this assert, to allow ceres::Jet as type. + static_assert(std::numeric_limits::is_iec559, "radians() only accepts floating-point inputs."); + return degrees * static_cast(0.01745329251994329576923690768489); +} + +/** + * @brief Convert given radians to degree. + */ +template +T degrees(T radians) +{ + // Note: We may want to remove this assert, to allow ceres::Jet as type. + static_assert(std::numeric_limits::is_iec559, "radians() only accepts floating-point inputs."); + return radians * static_cast(57.295779513082320876798154814105); +} + +} /* namespace core */ +} /* namespace eos */ + +#endif /* EOS_MATH_HPP */ diff --git a/include/eos/fitting/ceres_nonlinear.hpp b/include/eos/fitting/ceres_nonlinear.hpp index 4cd2d557..ac2be716 100644 --- a/include/eos/fitting/ceres_nonlinear.hpp +++ b/include/eos/fitting/ceres_nonlinear.hpp @@ -24,18 +24,13 @@ #include "eos/morphablemodel/MorphableModel.hpp" #include "eos/morphablemodel/Blendshape.hpp" +#include "eos/core/Image.hpp" #include "Eigen/Core" -#include "glm/gtc/quaternion.hpp" -#include "glm/gtx/transform.hpp" - #include "ceres/ceres.h" #include "ceres/cubic_interpolation.h" -#include "opencv2/core/core.hpp" - -#include #include namespace eos { @@ -43,17 +38,25 @@ namespace fitting { // Forward declarations: template -std::array get_shape_point(const morphablemodel::PcaModel& shape_model, - const std::vector& blendshapes, int vertex_id, - const T* const shape_coeffs, const T* const blendshape_coeffs); +Eigen::Vector3 get_shape_point(const eos::morphablemodel::PcaModel& shape_model, + const eos::morphablemodel::Blendshapes& blendshapes, int vertex_id, + Eigen::Map> shape_coeffs, + Eigen::Map> blendshape_coeffs); + +template +Eigen::Vector3 get_vertex_colour(const eos::morphablemodel::PcaModel& color_model, int vertex_id, + Eigen::Map> color_coeffs); + +template +Eigen::Matrix4 perspective(T fov_y, T aspect, T z_near, T z_far); template -std::array get_vertex_colour(const morphablemodel::PcaModel& colour_model, int vertex_id, - const T* const colour_coeffs); +Eigen::Vector3 project(const Eigen::Vector3& obj, const Eigen::Matrix4& model, + const Eigen::Matrix4& proj, const Eigen::Vector4& viewport); /** * Cost function that consists of the parameter values themselves as residual. - * + * * If used with a squared loss, this corresponds to an L2 norm constraint on the parameters. * This class is implemented exactly like Ceres' NormalPrior. */ @@ -103,14 +106,12 @@ struct NormCost }; /** - * 2D landmark error cost function. - * - * Computes the landmark reprojection error in 2D. - * Models the cost for one landmark. The residual is 2-dim, [x, y]. - * Its input params are camera parameters, shape coefficients and - * blendshape coefficients. + * 2D landmark error cost function, using perspective projection (in OpenGL convention). + * + * Computes the landmark reprojection error in 2D. Models the cost for one landmark. The residual is 2-dim, + * [x, y]. Its input params are camera parameters, shape coefficients and blendshape coefficients. */ -struct LandmarkCost +struct PerspectiveProjectionLandmarkCost { /** @@ -121,19 +122,23 @@ struct LandmarkCost * * @param[in] shape_model A PCA 3D shape model. Do not use a temporary. * @param[in] blendshapes A set of 3D blendshapes. Do not use a temporary. + * @param[in] num_shape_coeffs Number of shape coefficients that are being optimised for. + * @param[in] num_blendshape_coeffs Number of blendshape coefficients that are being optimised for. * @param[in] observed_landmark An observed 2D landmark in an image. * @param[in] vertex_id The vertex id that the given observed landmark corresponds to. * @param[in] image_width Width of the image that the 2D landmark is from (needed for the model * projection). * @param[in] image_height Height of the image. - * @param[in] use_perspective Whether a perspective or an orthographic projection should be used. */ - LandmarkCost(const morphablemodel::PcaModel& shape_model, - const std::vector& blendshapes, Eigen::Vector2f observed_landmark, - int vertex_id, int image_width, int image_height, bool use_perspective) - : shape_model(shape_model), blendshapes(blendshapes), observed_landmark(observed_landmark), + PerspectiveProjectionLandmarkCost(const morphablemodel::PcaModel& shape_model, + const std::vector& blendshapes, + int num_shape_coeffs, int num_blendshape_coeffs, + Eigen::Vector2f observed_landmark, int vertex_id, int image_width, + int image_height) + : shape_model(shape_model), blendshapes(blendshapes), num_shape_coeffs(num_shape_coeffs), + num_blendshape_coeffs(num_blendshape_coeffs), observed_landmark(observed_landmark), vertex_id(vertex_id), image_width(image_width), image_height(image_height), - aspect_ratio(static_cast(image_width) / image_height), use_perspective(use_perspective){}; + aspect_ratio(static_cast(image_width) / image_height){}; /** * Landmark cost function implementation. @@ -141,119 +146,121 @@ struct LandmarkCost * Measures the landmark reprojection error of the model with the estimated parameters and the observed 2D * landmarks. For one single landmark. * - * @param[in] camera_rotation A set of camera parameters, parameterised as a quaternion [w x y z]. - * @param[in] camera_translation_and_intrinsics Camera translation and intrinsic parameters. Ortho: [t_x - * t_y frustum_scale]. Perspective: [t_x t_y t_z fov]. + * @param[in] camera_rotation A set of camera rotation parameters, parameterised as an Eigen::Quaternion. + * @param[in] camera_translation Camera translation parameters: [t_x t_y t_z]. + * @param[in] camera_intrinsics Camera intrinsics, containing the field of view (fov_y). * @param[in] shape_coeffs A set of PCA shape coefficients. * @param[in] blendshape_coeffs A set of blendshape coefficients. * @param[in] residual An array of the resulting residuals. - * @return Returns true. The ceres documentation is not clear about that I think. + * @return whether the computation of the residuals was successful. Always returns true. */ template - bool operator()(const T* const camera_rotation, const T* const camera_translation_and_intrinsics, - const T* const shape_coeffs, const T* const blendshape_coeffs, T* residual) const + bool operator()(const T* const camera_rotation, const T* const camera_translation, + const T* const camera_intrinsics, const T* const shape_coeffs, + const T* const blendshape_coeffs, T* residual) const { - using namespace glm; - // Generate shape instance (of only one vertex id!) using current parameters and 10 shape - // coefficients: Note: Why are we not returning a glm::tvec3? - const auto point_arr = - get_shape_point(shape_model, blendshapes, vertex_id, shape_coeffs, blendshape_coeffs); + // Generate shape instance (of only one vertex id) using current coefficients: + Eigen::Map> shape_coeffs_mapped(shape_coeffs, num_shape_coeffs); + Eigen::Map> blendshape_coeffs_mapped(blendshape_coeffs, + num_blendshape_coeffs); + const Eigen::Vector3 point_3d = get_shape_at_point(shape_model, blendshapes, vertex_id, + shape_coeffs_mapped, blendshape_coeffs_mapped); // Project the point to 2D: - const tvec3 point_3d(point_arr[0], point_arr[1], point_arr[2]); // I think the quaternion is always normalised because we run Ceres with QuaternionParameterization - const tquat rot_quat(camera_rotation[0], camera_rotation[1], camera_rotation[2], - camera_rotation[3]); - // We rotate ZXY*p, which is RPY*p. I'm not sure this matrix still corresponds to RPY - probably if we - // use glm::eulerAngles(), these are not RPY anymore and we'd have to adjust if we were to use - // rotation matrices. - const tmat4x4 rot_mtx = glm::mat4_cast(rot_quat); + // Previously, we used glm::tquat, which expects w, x, y, z, and called it with [0, 1, 2, 3]. + // Eigen stores it as x, y, z, w. So... are we sure this is right? + Eigen::Map> rotation(camera_rotation); + Eigen::Map> translation(camera_translation); + + Eigen::Matrix4 model_view_mtx = Eigen::Matrix4::Identity(); + model_view_mtx.block<3, 3>(0, 0) = rotation.toRotationMatrix(); + model_view_mtx.col(3).head<3>() = translation; // Todo: use get_opencv_viewport() from nonlin_cam_esti.hpp. - const tvec4 viewport(0, image_height, image_width, -image_height); // OpenCV convention + const Eigen::Vector4 viewport(T(0), T(image_height), T(image_width), + T(-image_height)); // OpenCV convention + + const T& fov = camera_intrinsics[0]; + const auto projection_mtx = perspective(fov, T(aspect_ratio), T(0.1), T(1000.0)); + + Eigen::Vector3 projected_point = project(point_3d, model_view_mtx, projection_mtx, viewport); - tvec3 projected_point; // Note: could avoid default construction by using a lambda and immediate - // invocation - if (use_perspective) - { - const auto t_mtx = glm::translate(tvec3(camera_translation_and_intrinsics[0], - camera_translation_and_intrinsics[1], - camera_translation_and_intrinsics[2])); - const T& fov = camera_translation_and_intrinsics[3]; - const auto persp_mtx = glm::perspective(fov, T(aspect_ratio), T(0.1), T(1000.0)); - projected_point = glm::project(point_3d, t_mtx * rot_mtx, persp_mtx, viewport); - } else - { - const T& frustum_scale = camera_translation_and_intrinsics[2]; - const auto t_mtx = glm::translate( - tvec3(camera_translation_and_intrinsics[0], camera_translation_and_intrinsics[1], - 0.0)); // we don't have t_z in ortho camera, it doesn't matter where it is - const auto ortho_mtx = - glm::ortho(-1.0 * aspect_ratio * frustum_scale, 1.0 * aspect_ratio * frustum_scale, - -1.0 * frustum_scale, 1.0 * frustum_scale); - projected_point = glm::project(point_3d, t_mtx * rot_mtx, ortho_mtx, viewport); - } // Residual: Projected point minus the observed 2D landmark point - residual[0] = projected_point.x - T(observed_landmark[0]); - residual[1] = projected_point.y - T(observed_landmark[1]); + residual[0] = projected_point.x() - T(observed_landmark[0]); + residual[1] = projected_point.y() - T(observed_landmark[1]); return true; }; + /** + * Factory to hide the construction of the CostFunction object from the client code. + * + * The number of parameters are given as template arguments, so that we can use Ceres' fixed-size + * constructor. + */ + template + static ceres::CostFunction* Create(const eos::morphablemodel::PcaModel& shape_model, + const eos::morphablemodel::Blendshapes& blendshapes, + Eigen::Vector2f observed_landmark, int vertex_id, int image_width, + int image_height) + { + // Template arguments: 2 residuals, 4 (camera rotation), 3 (camera translation), 1 (camera + // intrinsics), n (shape coeffs), m (blendshape coeffs) + return (new ceres::AutoDiffCostFunction( + new PerspectiveProjectionLandmarkCost(shape_model, blendshapes, num_shape_coeffs, + num_blendshape_coeffs, observed_landmark, vertex_id, + image_width, image_height))); + }; + private: const morphablemodel::PcaModel& shape_model; // Or store as pointer (non-owning) or std::reference_wrapper? const std::vector& blendshapes; + const int num_shape_coeffs; + const int num_blendshape_coeffs; const Eigen::Vector2f observed_landmark; const int vertex_id; const int image_width; const int image_height; const double aspect_ratio; - const bool use_perspective; }; /** * Image error cost function (at vertex locations). - * + * * Measures the RGB image error between a particular vertex point of the 3D * model at its projected location and the observed input image. * Models the cost for 1 vertex. The residual is 3-dim, [r, g, b]. * Its input params are cam, shape-coeffs, BS-coeffs and colour coeffs. * This projects the vertex locations - so not a full rendering pass. */ -struct ImageCost +struct VertexColorCost { /** * Constructs a new cost function object for a particular vertex id that measures the RGB image error * between the estimated model point and the observed input image. * - * Warning: Don't put in temporaries for \c morphable_model and \c blendshapes! We don't make a copy, we - * store a reference to what is given to the function! + * Warning: Don't put in temporaries for \c shape_model, \c blendshapes and \c color_model. We don't make + * a copy, we store a reference to what is given to the function. * - * @param[in] morphable_model A 3D Morphable Model. Do not use a temporary. + * @param[in] shape_model A PCA 3D shape model. Do not use a temporary. * @param[in] blendshapes A set of 3D blendshapes. Do not use a temporary. - * @param[in] image The observed image. TODO: We should assert that the image we get is 8UC3! + * @param[in] shape_model A PCA 3D color model. Do not use a temporary. + * @param[in] num_shape_coeffs Number of shape coefficients that are being optimised for. + * @param[in] num_blendshape_coeffs Number of blendshape coefficients that are being optimised for. + * @param[in] num_color_coeffs Number of colour coefficients that are being optimised for. * @param[in] vertex_id Vertex id of the 3D model that should be projected and measured. - * @param[in] use_perspective Whether a perspective or an orthographic projection should be used. - * @throws std::runtime_error if the given \c image is not of type CV_8UC3. + * @param[in] image The observed image. */ - ImageCost(const morphablemodel::MorphableModel& morphable_model, - const std::vector& blendshapes, cv::Mat image, int vertex_id, - bool use_perspective) - : morphable_model(morphable_model), blendshapes(blendshapes), image(image), - aspect_ratio(static_cast(image.cols) / image.rows), vertex_id(vertex_id), - use_perspective(use_perspective) - { - if (image.type() != CV_8UC3) - { - throw std::runtime_error("The image given to ImageCost must be of type CV_8UC3."); - } - if (!morphable_model.has_color_model()) - { - throw std::runtime_error("The MorphableModel used does not contain a colour (albedo) model. " - "ImageCost requires a model that contains a colour PCA model. You may " - "want to use the full Surrey Face Model."); - } - }; + VertexColorCost(const eos::morphablemodel::PcaModel& shape_model, + const eos::morphablemodel::Blendshapes& blendshapes, + const eos::morphablemodel::PcaModel& color_model, int num_shape_coeffs, + int num_blendshape_coeffs, int num_color_coeffs, int vertex_id, + const eos::core::Image3u& image) + : shape_model(shape_model), blendshapes(blendshapes), color_model(color_model), + num_shape_coeffs(num_shape_coeffs), num_blendshape_coeffs(num_blendshape_coeffs), + num_color_coeffs(num_color_coeffs), vertex_id(vertex_id), image(image){}; /** * Image cost function implementation. @@ -263,110 +270,105 @@ struct ImageCost * * Todo: We should deal with visibility! Don't evaluate when the vertex is self-occluded. * - * @param[in] camera_rotation A set of camera parameters, parameterised as a quaternion [w x y z]. - * @param[in] camera_translation_and_intrinsics Camera translation and intrinsic parameters. Ortho: [t_x - * t_y frustum_scale]. Perspective: [t_x t_y t_z fov]. + * @param[in] camera_rotation A set of camera rotation parameters, parameterised as an Eigen::Quaternion. + * @param[in] camera_translation Camera translation parameters: [t_x t_y t_z]. + * @param[in] camera_intrinsics Camera intrinsics, containing the field of view (fov_y). * @param[in] shape_coeffs A set of PCA shape coefficients. * @param[in] blendshape_coeffs A set of blendshape coefficients. * @param[in] color_coeffs A set of PCA colour (albedo) coefficients. * @param[in] residual An array of the resulting residuals. - * @return Returns true. The ceres documentation is not clear about that I think. + * @return whether the computation of the residuals was successful. Always returns true. */ template - bool operator()(const T* const camera_rotation, const T* const camera_translation_and_intrinsics, - const T* const shape_coeffs, const T* const blendshape_coeffs, - const T* const color_coeffs, T* residual) const + bool operator()(const T* const camera_rotation, const T* const camera_translation, + const T* const camera_intrinsics, const T* const shape_coeffs, + const T* const blendshape_coeffs, const T* const color_coeffs, T* residual) const { - using namespace glm; - // Note: The following is all duplicated code with LandmarkCost. Fix if possible performance-wise. - // Generate 3D shape point using the current parameters: - const auto point_arr = get_shape_point(morphable_model.get_shape_model(), blendshapes, vertex_id, - shape_coeffs, blendshape_coeffs); + // The following code blocks are identical to PerspectiveProjectionLandmarkCost: + + // Generate shape instance (of only one vertex id) using current coefficients: + Eigen::Map> shape_coeffs_mapped(shape_coeffs, num_shape_coeffs); + Eigen::Map> blendshape_coeffs_mapped(blendshape_coeffs, + num_blendshape_coeffs); + const Eigen::Vector3 point_3d = get_shape_at_point(shape_model, blendshapes, vertex_id, + shape_coeffs_mapped, blendshape_coeffs_mapped); // Project the point to 2D: - const tvec3 point_3d(point_arr[0], point_arr[1], point_arr[2]); // I think the quaternion is always normalised because we run Ceres with QuaternionParameterization - const tquat rot_quat(camera_rotation[0], camera_rotation[1], camera_rotation[2], - camera_rotation[3]); - // We rotate ZXY*p, which is RPY*p. I'm not sure this matrix still corresponds to RPY - probably if we - // use glm::eulerAngles(), these are not RPY anymore and we'd have to adjust if we were to use - // rotation matrices. - const tmat4x4 rot_mtx = glm::mat4_cast(rot_quat); + // Previously, we used glm::tquat, which expects w, x, y, z, and called it with [0, 1, 2, 3]. + // Eigen stores it as x, y, z, w. So... are we sure this is right? + Eigen::Map> rotation(camera_rotation); + Eigen::Map> translation(camera_translation); - // Todo: use get_opencv_viewport() from nonlin_cam_esti.hpp. - const tvec4 viewport(0, image.rows, image.cols, -image.rows); // OpenCV convention + Eigen::Matrix4 model_view_mtx = Eigen::Matrix4::Identity(); + model_view_mtx.block<3, 3>(0, 0) = rotation.toRotationMatrix(); + model_view_mtx.col(3).head<3>() = translation; - tvec3 projected_point; - if (use_perspective) - { - const auto t_mtx = glm::translate(tvec3(camera_translation_and_intrinsics[0], - camera_translation_and_intrinsics[1], - camera_translation_and_intrinsics[2])); - const T& focal = camera_translation_and_intrinsics[3]; - const auto persp_mtx = glm::perspective(focal, T(aspect_ratio), T(0.1), T(1000.0)); - projected_point = glm::project(point_3d, t_mtx * rot_mtx, persp_mtx, viewport); - } else - { - const T& frustum_scale = camera_translation_and_intrinsics[2]; - const auto t_mtx = glm::translate( - tvec3(camera_translation_and_intrinsics[0], camera_translation_and_intrinsics[1], - 0.0)); // we don't have t_z in ortho camera, it doesn't matter where it is - const auto ortho_mtx = - glm::ortho(-1.0 * aspect_ratio * frustum_scale, 1.0 * aspect_ratio * frustum_scale, - -1.0 * frustum_scale, 1.0 * frustum_scale); - projected_point = glm::project(point_3d, t_mtx * rot_mtx, ortho_mtx, viewport); - } + // Todo: use get_opencv_viewport() from nonlin_cam_esti.hpp. + const Eigen::Vector4 viewport(T(0), T(image.height()), T(image.width()), + T(-image.height())); // OpenCV convention + + const T& fov = camera_intrinsics[0]; + const double aspect_ratio = static_cast(image.width()) / image.height(); + const auto projection_mtx = perspective(fov, T(aspect_ratio), T(0.1), T(1000.0)); + + Eigen::Vector3 projected_point = project(point_3d, model_view_mtx, projection_mtx, viewport); + // End of identical block with PerspectiveProjectionLandmarkCost. + + // Access the image colour value at the projected pixel location, if inside the image - otherwise + // Ceres uses the value from the grid edge. + // Note: We could store the BiCubicInterpolator as member variable. + // The default template arguments for Grid2D are and (except for the dimension), they're the right ones for us. + ceres::Grid2D grid(&image(0, 0).data()[0], 0, image.height(), 0, image.width()); + ceres::BiCubicInterpolator> interpolator(grid); + T observed_colour[3]; + interpolator.Evaluate(projected_point.y(), projected_point.x(), &observed_colour[0]); + + // Get the vertex's colour value: + Eigen::Map> color_coeffs_mapped(color_coeffs, num_color_coeffs); + const auto model_color = get_vertex_color_at_point(color_model, vertex_id, color_coeffs_mapped); + // Returns RGB, between [0, 1]. + + // Residual: Vertex colour of model point minus the observed colour in the 2D image + // observed_colour is RGB, model_colour is RGB. Residual will be RGB. + residual[0] = model_color[0] * 255.0 - T(observed_colour[0]); // r + residual[1] = model_color[1] * 255.0 - T(observed_colour[1]); // g + residual[2] = model_color[2] * 255.0 - T(observed_colour[2]); // b - // Access the image colour value at the projected pixel location, if inside the image - otherwise set - // to (127, 127, 127) (maybe not ideal...): - if (projected_point.y < T(0) || projected_point.y >= T(image.rows) || projected_point.x < T(0) || - projected_point.x >= T(image.cols)) - { - // The point is outside the image. - residual[0] = T(127.0); - residual[1] = T(127.0); - residual[2] = T(127.0); - - // TODO: What does interpolator.Evaluate() return in this case? - /* Grid2D grid(image.ptr(0), 0, image.rows, 0, image.cols); - BiCubicInterpolator> interpolator(grid); - T observed_colour[3]; - interpolator.Evaluate(projected_y, projected_x, &observed_colour[0]); // says it returns false when (r, c) is out of bounds... but it returns void? - //std::cout << observed_colour[0] << ", " << observed_colour[1] << ", " << observed_colour[2] << "\n"; - */ - // It kind of looks like as if when it's out of bounds, there will be a vector out of bound access - // and an assert/crash? No, in debugging, it looks like it just interpolates or something. Not - // clear currently. - } else - { - // Note: We could store the BiCubicInterpolator as member variable. - // The default template arguments for Grid2D are and (except for the dimension), they're the right ones for us. - ceres::Grid2D grid(image.ptr(0), 0, image.rows, 0, image.cols); - ceres::BiCubicInterpolator> interpolator(grid); - T observed_colour[3]; - interpolator.Evaluate(projected_point.y, projected_point.x, &observed_colour[0]); - - // This probably needs to be modified if we add a light model. - auto model_colour = get_vertex_colour(morphable_model.get_color_model(), vertex_id, color_coeffs); - // I think this returns RGB, and between [0, 1]. - - // Residual: Vertex colour of model point minus the observed colour in the 2D image - // observed_colour is BGR, model_colour is RGB. Residual will be RGB. - residual[0] = model_colour[0] * 255.0 - T(observed_colour[2]); - residual[1] = model_colour[1] * 255.0 - T(observed_colour[1]); - residual[2] = model_colour[2] * 255.0 - T(observed_colour[0]); - } return true; }; + /** + * Factory to hide the construction of the CostFunction object from the client code. + * + * The number of parameters are given as template arguments, so that we can use Ceres' fixed-size + * constructor. + */ + template + static ceres::CostFunction* Create(const eos::morphablemodel::PcaModel& shape_model, + const eos::morphablemodel::Blendshapes& blendshapes, + const eos::morphablemodel::PcaModel& color_model, int vertex_id, + const eos::core::Image3u& image) + { + // Template arguments: 3 residuals (RGB), 4 (camera rotation), 3 (camera translation), 1 (camera + // intrinsics), x (shape coeffs), x (expr coeffs), x (colour coeffs) + return (new ceres::AutoDiffCostFunction( + new VertexColorCost(shape_model, blendshapes, color_model, num_shape_coeffs, + num_blendshape_coeffs, num_color_coeffs, vertex_id, image))); + }; + private: - const morphablemodel::MorphableModel& morphable_model; // Or store as pointer (non-owning) or std::reference_wrapper? - const std::vector& blendshapes; - const cv::Mat image; // the observed image - const double aspect_ratio; + const eos::morphablemodel::PcaModel& + shape_model; // Or store as pointer (non-owning) or std::reference_wrapper? + const eos::morphablemodel::Blendshapes& blendshapes; + const eos::morphablemodel::PcaModel& color_model; + const int num_shape_coeffs; + const int num_blendshape_coeffs; + const int num_color_coeffs; const int vertex_id; - const bool use_perspective; + const eos::core::Image3u& image; // the observed image }; /** @@ -380,45 +382,25 @@ struct ImageCost * @return The 3D point. */ template -std::array get_shape_point(const morphablemodel::PcaModel& shape_model, - const std::vector& blendshapes, int vertex_id, - const T* const shape_coeffs, const T* const blendshape_coeffs) +Eigen::Vector3 get_shape_at_point(const eos::morphablemodel::PcaModel& shape_model, + const eos::morphablemodel::Blendshapes& blendshapes, int vertex_id, + Eigen::Map> shape_coeffs, + Eigen::Map> blendshape_coeffs) { - int num_coeffs_fitting = 10; // Todo: Should be inferred or a function parameter! - auto mean = shape_model.get_mean_at_point(vertex_id); - auto basis = shape_model.get_rescaled_pca_basis_at_point(vertex_id); - // Computing Shape = mean + basis * coeffs: - // Note: Could use an Eigen matrix with type T to see if it gives a speedup. - std::array point{T(mean[0]), T(mean[1]), T(mean[2])}; - for (int i = 0; i < num_coeffs_fitting; ++i) - { - point[0] += T(basis.row(0).col(i)(0)) * shape_coeffs[i]; // it seems to be ~15% faster when these are - // static_cast() instead of T()? - } - for (int i = 0; i < num_coeffs_fitting; ++i) - { - point[1] += T(basis.row(1).col(i)(0)) * shape_coeffs[i]; - } - for (int i = 0; i < num_coeffs_fitting; ++i) - { - point[2] += T(basis.row(2).col(i)(0)) * shape_coeffs[i]; - } - // Adding the blendshape offsets: - // Shape = mean + basis * coeffs + blendshapes * bs_coeffs: - auto num_blendshapes = blendshapes.size(); - for (int i = 0; i < num_blendshapes; ++i) + // Computing Shape = mean + shape_basis*shape_coeffs + blendshapes*blendshape_coeffs: + const Eigen::Vector3f mean = shape_model.get_mean_at_point(vertex_id); + const Eigen::Vector3 shape_vector = + shape_model.get_rescaled_pca_basis_at_point(vertex_id).leftCols(shape_coeffs.size()).cast() * + shape_coeffs; + Eigen::Vector3 expression_vector(T(0.0), T(0.0), T(0.0)); + for (std::size_t i = 0; i < blendshape_coeffs.size(); i++) { - point[0] += T(blendshapes[i].deformation(3 * vertex_id + 0)) * blendshape_coeffs[i]; + expression_vector.x() += T(blendshapes[i].deformation(vertex_id * 3 + 0)) * blendshape_coeffs(i); + expression_vector.y() += T(blendshapes[i].deformation(vertex_id * 3 + 1)) * blendshape_coeffs(i); + expression_vector.z() += T(blendshapes[i].deformation(vertex_id * 3 + 2)) * blendshape_coeffs(i); } - for (int i = 0; i < num_blendshapes; ++i) - { - point[1] += T(blendshapes[i].deformation(3 * vertex_id + 1)) * blendshape_coeffs[i]; - } - for (int i = 0; i < num_blendshapes; ++i) - { - point[2] += T(blendshapes[i].deformation(3 * vertex_id + 2)) * blendshape_coeffs[i]; - } - return point; + + return Eigen::Vector3(mean.cast() + shape_vector + expression_vector); }; /** @@ -430,31 +412,87 @@ std::array get_shape_point(const morphablemodel::PcaModel& shape_model, * @return The colour. As RGB? In [0, 1]? */ template -std::array get_vertex_colour(const morphablemodel::PcaModel& color_model, int vertex_id, - const T* const color_coeffs) +Eigen::Vector3 get_vertex_color_at_point(const eos::morphablemodel::PcaModel& color_model, int vertex_id, + Eigen::Map> color_coeffs) { - int num_coeffs_fitting = 10; // Todo: Should be inferred or a function parameter! - auto mean = color_model.get_mean_at_point(vertex_id); - auto basis = color_model.get_rescaled_pca_basis_at_point(vertex_id); - // Computing Colour = mean + basis * coeffs - // Note: Could use an Eigen matrix with type T to see if it gives a speedup. - std::array point{T(mean[0]), T(mean[1]), T(mean[2])}; - for (int i = 0; i < num_coeffs_fitting; ++i) - { - point[0] += T(basis.row(0).col(i)(0)) * color_coeffs[i]; // it seems to be ~15% faster when these are - // static_cast() instead of T()? - } - for (int i = 0; i < num_coeffs_fitting; ++i) - { - point[1] += T(basis.row(1).col(i)(0)) * color_coeffs[i]; - } - for (int i = 0; i < num_coeffs_fitting; ++i) - { - point[2] += T(basis.row(2).col(i)(0)) * color_coeffs[i]; - } - return point; + const Eigen::Vector3f mean = color_model.get_mean_at_point(vertex_id); + const Eigen::Vector3 color_vector = + color_model.get_rescaled_pca_basis_at_point(vertex_id).leftCols(color_coeffs.size()).cast() * + color_coeffs; + + return Eigen::Vector3(mean.cast() + color_vector); }; +/** + * Creates a matrix for a right-handed, symmetric perspective-view frustrum. + * + * The function follows the OpenGL clip volume definition, which is also the GLM default. The near and far + * clip planes correspond to z normalized device coordinates of -1 and +1 respectively. + * + * This function is equivalent to glm::perspectiveRH_NO(...). + * + * More details can be found on the gluPerspective man page: + * https://www.khronos.org/registry/OpenGL-Refpages/gl2.1/xhtml/gluPerspective.xml. + * + * @param[in] fov_y Specifies the field of view angle in the y direction. Expressed in radians. + * @param[in] aspect Specifies the aspect ratio that determines the field of view in the x direction. The + * aspect ratio is the ratio of x (width) to y (height). + * @param[in] z_near Specifies the distance from the viewer to the near clipping plane (always positive). + * @param[in] z_far Specifies the distance from the viewer to the far clipping plane (always positive). + * @tparam T A floating-point scalar type, ceres::Jet, or similar compatible type. + * @return The corresponding perspective projection matrix. + */ +template +Eigen::Matrix4 perspective(T fov_y, T aspect, T z_near, T z_far) +{ + // Will this assert work? std::abs probably won't work on T? + // assert(abs(aspect - std::numeric_limits::epsilon()) > static_cast(0)); + + const T tan_half_fov_y = tan(fov_y / static_cast(2)); // ceres::tan? + + // Maybe construct with static_cast(0)? => No, doesn't have c'tor. + // Could do Eigen::Matrix4 result = {{1, 2, 3, 4}, {1, 2, 3, 4}...} I think. + Eigen::Matrix4 result = Eigen::Matrix4::Zero(); + result(0, 0) = static_cast(1) / (aspect * tan_half_fov_y); + result(1, 1) = static_cast(1) / (tan_half_fov_y); + result(2, 2) = -(z_far + z_near) / (z_far - z_near); + result(2, 3) = -static_cast(1); + result(3, 2) = -(static_cast(2) * z_far * z_near) / (z_far - z_near); + return result; +} + +/** + * Project the given point_3d (from object coordinates) into window coordinates. + * + * The function follows the OpenGL clip volume definition. The near and far clip planes correspond to + * z normalized device coordinates of -1 and +1 respectively. + * This function is equivalent to glm::projectNO(...). + * + * More details can be found on the gluProject man page: + * https://www.khronos.org/registry/OpenGL-Refpages/gl2.1/xhtml/gluProject.xml. + * + * @param[in] point_3d A 3D point in object coordinates. + * @param[in] modelview_matrix A model-view matrix, transforming the point into view (camera) space. + * @param[in] projection_matrix The projection matrix to be used. + * @param[in] viewport The viewport transformation to be used. + * @tparam T A floating-point scalar type, ceres::Jet, or similar compatible type. + * @return Return the computed window coordinates. + */ +template +Eigen::Vector3 project(const Eigen::Vector3& point_3d, const Eigen::Matrix4& modelview_matrix, + const Eigen::Matrix4& projection_matrix, const Eigen::Vector4& viewport) +{ + Eigen::Vector4 projected_point = projection_matrix * modelview_matrix * point_3d.homogeneous(); + projected_point /= projected_point.w(); + projected_point = + projected_point * static_cast(0.5) + + Eigen::Vector4(static_cast(0.5), static_cast(0.5), static_cast(0.5), static_cast(0.5)); + projected_point.x() = projected_point.x() * T(viewport(2)) + T(viewport(0)); + projected_point.y() = projected_point.y() * T(viewport(3)) + T(viewport(1)); + + return projected_point.head<3>(); +} + } /* namespace fitting */ } /* namespace eos */