diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4e572a97..daad4373 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,6 +71,7 @@ set(HEADERS
   ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/core/read_obj.hpp
   ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/core/write_obj.hpp
   ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/core/Rect.hpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/core/math.hpp
   ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/morphablemodel/PcaModel.hpp
   ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/morphablemodel/MorphableModel.hpp
   ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/morphablemodel/Blendshape.hpp
diff --git a/examples/fit-model-ceres.cpp b/examples/fit-model-ceres.cpp
index 8907acfb..dfca9d13 100644
--- a/examples/fit-model-ceres.cpp
+++ b/examples/fit-model-ceres.cpp
@@ -3,7 +3,7 @@
  *
  * File: examples/fit-model-ceres.cpp
  *
- * Copyright 2016 Patrik Huber
+ * Copyright 2016-2023 Patrik Huber
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,24 +17,21 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#define GLM_FORCE_UNRESTRICTED_GENTYPE
 #include "eos/core/Landmark.hpp"
 #include "eos/core/LandmarkMapper.hpp"
 #include "eos/core/read_pts_landmarks.hpp"
+#include "eos/core/Image.hpp"
+#include "eos/core/image/opencv_interop.hpp"
 #include "eos/core/write_obj.hpp"
+#include "eos/core/math.hpp"
+#include "eos/morphablemodel/MorphableModel.hpp"
+#include "eos/morphablemodel/Blendshape.hpp"
 #include "eos/fitting/ceres_nonlinear.hpp"
 #include "eos/fitting/contour_correspondence.hpp"
-#include "eos/fitting/fitting.hpp"
-#include "eos/morphablemodel/Blendshape.hpp"
 
 #include "Eigen/Core"
 
-#include "glm/ext.hpp"
-#include "glm/glm.hpp"
-
 #include "ceres/ceres.h"
-#include "ceres/cubic_interpolation.h"
-#include "ceres/rotation.h"
 
 #include "opencv2/core/core.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
@@ -48,7 +45,6 @@
 #include <vector>
 
 using namespace eos;
-using namespace glm;
 using namespace ceres;
 namespace po = boost::program_options;
 namespace fs = boost::filesystem;
@@ -168,7 +164,7 @@ int main(int argc, char* argv[])
     std::vector<eos::morphablemodel::Blendshape> blendshapes =
         eos::morphablemodel::load_blendshapes(blendshapesfile.string());
 
-    // Draw the loaded landmarks:
+    // Draw the loaded landmarks (blue):
     Mat outimg = image.clone();
     for (const auto& lm : landmarks)
     {
@@ -176,21 +172,19 @@ int main(int argc, char* argv[])
                       cv::Point2f(lm.coordinates[0] + 2.0f, lm.coordinates[1] + 2.0f), {255, 0, 0});
     }
 
-    constexpr bool use_perspective = false;
-
     // These will be the 2D image points and their corresponding 3D vertex id's used for the fitting:
     vector<Vector2f> image_points; // the 2D landmark points
-    vector<int> vertex_indices; // their corresponding vertex indices
+    vector<int> vertex_indices;    // their corresponding vertex indices
 
     // Sub-select all the landmarks which we have a mapping for (i.e. that are defined in the 3DMM):
     for (int i = 0; i < landmarks.size(); ++i)
     {
-        auto converted_name = landmark_mapper.convert(landmarks[i].name);
+        const auto converted_name = landmark_mapper.convert(landmarks[i].name);
         if (!converted_name)
         { // no mapping defined for the current landmark
             continue;
         }
-        int vertex_idx = std::stoi(converted_name.value());
+        const int vertex_idx = std::stoi(converted_name.value());
         vertex_indices.emplace_back(vertex_idx);
         image_points.emplace_back(landmarks[i].coordinates);
     }
@@ -199,66 +193,70 @@ int main(int argc, char* argv[])
     std::stringstream fitting_log;
     auto start = std::chrono::steady_clock::now();
 
-    std::vector<double> camera_rotation; // Quaternion, [w x y z]. Todo: Actually, use std::array for all of these.
-    camera_rotation.resize(4); // initialises with zeros
-    camera_rotation[0] = 1.0;
-    std::vector<double> camera_translation_and_intrinsics;
-    constexpr int num_cam_trans_intr_params = use_perspective ? 4 : 3;
-    // Parameters for the orthographic projection: [t_x, t_y, frustum_scale]
-    // And perspective projection: [t_x, t_y, t_z, fov].
-    // Origin is assumed at center of image, and no lens distortions.
+    // The following have to be constexpr because they're used in template parameters in this implementation:
+    constexpr int num_shape_coeffs = 50;
+    constexpr int num_blendshape_coeffs = 6;
+    constexpr int num_color_coeffs = 50;
+
+    // Weights for the different cost function terms:
+    // Note: To find good weights, ceres::ResidualBlockId can be used to store the IDs to each added residual
+    // blocks. Then, one can use ceres::Problem::EvaluateOptions and ceres::Problem::Evaluate(...) to retrieve
+    // the values of each individual term. We don't do that here for simplicity reasons. See the Ceres
+    // documentation for details, and https://github.com/patrikhuber/eos/issues/348.
+    const double landmark_cost_weight = 100.0;
+    const double shape_prior_cost_weight = 500000.0;
+    const double blendshapes_prior_cost_weight = 25.0;
+    const double color_prior_cost_weight = 500000.0;
+    const double vertex_color_cost_weight = 1.0;
+
+    // The model parameters that we're estimating:
+    using ModelCoefficients = vector<double>;
+    ModelCoefficients shape_coefficients(num_shape_coeffs, 0.0);
+    ModelCoefficients blendshape_coefficients(num_blendshape_coeffs, 0.0);
+    ModelCoefficients color_coefficients(num_color_coeffs, 0.0);
+
+    // Parameters for the perspective projection: A rotation quaternion, [t_x, t_y, t_z], and fov_y (field of
+    // view).
+    // The origin is assumed at center of image, and no lens distortions.
     // Note: Actually, we estimate the model-view matrix and not the camera position. But one defines the
     // other.
-    camera_translation_and_intrinsics.resize(num_cam_trans_intr_params); // initialises with zeros
-    if (use_perspective)
-    {
-        camera_translation_and_intrinsics[2] = -400.0;              // Move the model back (along the -z axis)
-        camera_translation_and_intrinsics[3] = glm::radians(45.0f); // fov
-    } else
-    {
-        camera_translation_and_intrinsics[2] = 110.0; // frustum_scale
-    }
-
-    std::vector<double> shape_coefficients;
-    shape_coefficients.resize(10); // Todo: Currently, the value '10' is hard-coded everywhere. Make it dynamic.
-    std::vector<double> blendshape_coefficients;
-    blendshape_coefficients.resize(6);
+    Eigen::Quaterniond camera_rotation(1.0, 0.0, 0.0, 0.0); // The c'tor takes wxyz. Storage is xyzw.
+    Eigen::Vector3d camera_translation(0.0, 0.0, -400.0);   // Move the model back (along the -z axis)
+    double fov_y = core::radians(45.0);
 
+    // Set up just a landmark cost and optimise for rotation and translation, with fov_y and the 3DMM
+    // parameters fixed (i.e. using the mean face):
     Problem camera_costfunction;
     for (int i = 0; i < image_points.size(); ++i)
     {
-        CostFunction* cost_function =
-            new AutoDiffCostFunction<fitting::LandmarkCost, 2 /* num residuals */,
-                                     4 /* camera rotation (quaternion) */,
-                                     num_cam_trans_intr_params /* camera translation & fov/frustum_scale */,
-                                     10 /* shape-coeffs */, 6 /* bs-coeffs */>(
-                new fitting::LandmarkCost(morphable_model.get_shape_model(), blendshapes, image_points[i],
-                                          vertex_indices[i], image.cols, image.rows, use_perspective));
-
-        camera_costfunction.AddResidualBlock(cost_function, NULL, &camera_rotation[0],
-                                             &camera_translation_and_intrinsics[0], &shape_coefficients[0],
-                                             &blendshape_coefficients[0]);
+        auto landmark_cost =
+            fitting::PerspectiveProjectionLandmarkCost::Create<num_shape_coeffs, num_blendshape_coeffs>(
+                morphable_model.get_shape_model(), blendshapes, image_points[i], vertex_indices[i],
+                image.cols, image.rows);
+        ScaledLoss* landmark_cost_scaled_loss =
+            new ScaledLoss(nullptr, landmark_cost_weight, ceres::TAKE_OWNERSHIP);
+        camera_costfunction.AddResidualBlock(landmark_cost, landmark_cost_scaled_loss,
+                                             &camera_rotation.coeffs()[0], &camera_translation.data()[0],
+                                             &fov_y, &shape_coefficients[0], &blendshape_coefficients[0]);
     }
-    camera_costfunction.SetParameterBlockConstant(&shape_coefficients[0]); // keep the shape constant
+    // Keep the model coeffs constant (i.e. all at zero):
+    camera_costfunction.SetParameterBlockConstant(&shape_coefficients[0]);
     camera_costfunction.SetParameterBlockConstant(&blendshape_coefficients[0]);
-    if (use_perspective)
-    {
-        camera_costfunction.SetParameterUpperBound(
-            &camera_translation_and_intrinsics[0], 2,
-            -std::numeric_limits<double>::epsilon()); // t_z has to be negative
-        camera_costfunction.SetParameterLowerBound(&camera_translation_and_intrinsics[0], 3,
-                                                   0.01); // fov in radians, must be > 0
-    } else
-    {
-        camera_costfunction.SetParameterLowerBound(&camera_translation_and_intrinsics[0], 2,
-                                                   1.0); // frustum_scale must be > 0
-    }
-    QuaternionParameterization* camera_fit_quaternion_parameterisation = new QuaternionParameterization;
-    camera_costfunction.SetParameterization(&camera_rotation[0], camera_fit_quaternion_parameterisation);
-
+    // Keep the fov_y constant too:
+    camera_costfunction.SetParameterBlockConstant(&fov_y);
+    // Keep t_z negative (i.e. we want to look at the model from the front):
+    camera_costfunction.SetParameterUpperBound(&camera_translation[0], 2,
+                                               -std::numeric_limits<double>::epsilon());
+    // Keep the fov_y (in radians) > 0 (but we're not optimising for it now):
+    // camera_costfunction.SetParameterLowerBound(&fov_y, 0, 0.01);
+
+    EigenQuaternionManifold* quaternion_manifold = new EigenQuaternionManifold;
+    camera_costfunction.SetManifold(&camera_rotation.coeffs()[0], quaternion_manifold);
+
+    // Set up the solver, and optimise:
     Solver::Options solver_options;
-    solver_options.linear_solver_type = ITERATIVE_SCHUR;
-    solver_options.num_threads = 8;
+    // solver_options.linear_solver_type = ITERATIVE_SCHUR;
+    // solver_options.num_threads = 8;
     solver_options.minimizer_progress_to_stdout = true;
     // solver_options.max_num_iterations = 100;
     Solver::Summary solver_summary;
@@ -268,63 +266,41 @@ int main(int argc, char* argv[])
 
     // Draw the mean-face landmarks projected using the estimated camera:
     // Construct the rotation & translation (model-view) matrices, projection matrix, and viewport:
-    glm::dquat estimated_rotation(camera_rotation[0], camera_rotation[1], camera_rotation[2],
-                                  camera_rotation[3]);
-    auto rot_mtx = glm::mat4_cast(estimated_rotation);
+    Eigen::Matrix4d model_view_mtx = Eigen::Matrix4d::Identity();
+    model_view_mtx.block<3, 3>(0, 0) = camera_rotation.toRotationMatrix();
+    model_view_mtx.col(3).head<3>() = camera_translation;
     const double aspect = static_cast<double>(image.cols) / image.rows;
-    auto get_translation_matrix = [](auto&& camera_translation_and_intrinsics, auto&& use_perspective) {
-        if (use_perspective)
-        {
-            return glm::translate(glm::dvec3(camera_translation_and_intrinsics[0],
-                                             camera_translation_and_intrinsics[1],
-                                             camera_translation_and_intrinsics[2]));
-        } else
-        {
-            return glm::translate(
-                glm::dvec3(camera_translation_and_intrinsics[0], camera_translation_and_intrinsics[1], 0.0));
-        }
-    };
-    auto get_projection_matrix = [](auto&& camera_translation_and_intrinsics, auto&& aspect,
-                                    auto&& use_perspective) {
-        if (use_perspective)
-        {
-            const auto& focal = camera_translation_and_intrinsics[3];
-            return glm::perspective(focal, aspect, 0.1, 1000.0);
-        } else
-        {
-            const auto& frustum_scale = camera_translation_and_intrinsics[2];
-            return glm::ortho(-1.0 * aspect * frustum_scale, 1.0 * aspect * frustum_scale,
-                              -1.0 * frustum_scale, 1.0 * frustum_scale);
-        }
-    };
-    auto t_mtx = get_translation_matrix(camera_translation_and_intrinsics, use_perspective);
-    auto projection_mtx = get_projection_matrix(camera_translation_and_intrinsics, aspect, use_perspective);
-    const glm::dvec4 viewport(0, image.rows, image.cols, -image.rows); // OpenCV convention
+    auto projection_mtx = fitting::perspective(fov_y, aspect, 0.1, 1000.0);
+    // Todo: use get_opencv_viewport() from nonlin_cam_esti.hpp.
+    const Eigen::Vector4d viewport(0, image.rows, image.cols, -image.rows); // OpenCV convention
 
-    auto mean_mesh = morphable_model.get_mean();
+    const auto& mean_mesh = morphable_model.get_mean();
     for (auto idx : vertex_indices)
     {
-        glm::dvec3 point_3d(mean_mesh.vertices[idx][0], mean_mesh.vertices[idx][1],
-                            mean_mesh.vertices[idx][2]); // The 3D model point
-        glm::dvec3 projected_point = glm::project(point_3d, t_mtx * rot_mtx, projection_mtx, viewport);
-        cv::circle(outimg, cv::Point2f(projected_point.x, projected_point.y), 3, {0.0f, 0.0f, 255.0f}); // red
+        const auto& point_3d = mean_mesh.vertices[idx];
+        const auto projected_point =
+            fitting::project<double>(point_3d.cast<double>(), model_view_mtx, projection_mtx, viewport);
+        cv::circle(outimg, cv::Point2f(projected_point.x(), projected_point.y()), 3,
+                   {0.0f, 0.0f, 255.0f}); // red
     }
+    // Draw the subset of the detected landmarks that we use in the fitting (yellow):
     for (const auto& lm : image_points)
     {
-        cv::circle(outimg, cv::Point2f(lm(0), lm(1)), 3,
-                   {0.0f, 255.0f, 255.0f}); // yellow: subset of the detected LMs that we use
+        cv::circle(outimg, cv::Point2f(lm(0), lm(1)), 3, {0.0f, 255.0f, 255.0f});
     }
-    auto euler_angles = glm::eulerAngles(estimated_rotation); // returns [P, Y, R]
-    fitting_log << "Pose fit with mean shape:\tYaw " << glm::degrees(euler_angles[1]) << ", Pitch "
-                << glm::degrees(euler_angles[0]) << ", Roll " << glm::degrees(euler_angles[2])
-                << "; t & f: " << camera_translation_and_intrinsics << '\n';
-    fitting_log << "Ceres took: "
+
+    fitting_log << "Pose-only fitting took: "
                 << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms.\n";
 
-    // Contour fitting:
+    // Now add contour fitting:
     // These are the additional contour-correspondences we're going to find and then use:
+    // Note: This needs extracting the yaw angle from the rotation quaternion, which Eigen currently doesn't
+    // have a function for. Not using contour fitting for now.
+    // euler_angles = glm::eulerAngles(estimated_rotation); // returns [P, Y, R]; where estimated_rotation is
+    // a glm::quat.
+    /*
     vector<Vector2f> image_points_contour; // the 2D landmark points
-    vector<int> vertex_indices_contour; // their corresponding 3D vertex indices
+    vector<int> vertex_indices_contour;    // their corresponding 3D vertex indices
     // For each 2D contour landmark, get the corresponding 3D vertex point and vertex id:
     std::tie(image_points_contour, std::ignore, vertex_indices_contour) =
         fitting::get_contour_correspondences(landmarks, ibug_contour, model_contour,
@@ -333,64 +309,59 @@ int main(int argc, char* argv[])
     using eos::fitting::concat;
     vertex_indices = concat(vertex_indices, vertex_indices_contour);
     image_points = concat(image_points, image_points_contour);
+    */
 
     // Full fitting - Estimate shape and pose, given the previous pose estimate:
     start = std::chrono::steady_clock::now();
     Problem fitting_costfunction;
-    // Landmark constraint:
+    // Landmark cost:
     for (int i = 0; i < image_points.size(); ++i)
     {
-        CostFunction* cost_function =
-            new AutoDiffCostFunction<fitting::LandmarkCost, 2 /* num residuals */,
-                                     4 /* camera rotation (quaternion) */,
-                                     num_cam_trans_intr_params /* camera translation & focal length */,
-                                     10 /* shape-coeffs */, 6 /* bs-coeffs */>(
-                new fitting::LandmarkCost(morphable_model.get_shape_model(), blendshapes, image_points[i],
-                                          vertex_indices[i], image.cols, image.rows, use_perspective));
-        fitting_costfunction.AddResidualBlock(cost_function, NULL, &camera_rotation[0],
-                                              &camera_translation_and_intrinsics[0], &shape_coefficients[0],
-                                              &blendshape_coefficients[0]);
+        auto landmark_cost =
+            fitting::PerspectiveProjectionLandmarkCost::Create<num_shape_coeffs, num_blendshape_coeffs>(
+                morphable_model.get_shape_model(), blendshapes, image_points[i], vertex_indices[i],
+                image.cols, image.rows);
+        ScaledLoss* landmark_cost_scaled_loss =
+            new ScaledLoss(nullptr, landmark_cost_weight, ceres::TAKE_OWNERSHIP);
+        fitting_costfunction.AddResidualBlock(landmark_cost, landmark_cost_scaled_loss,
+                                              &camera_rotation.coeffs()[0], &camera_translation.data()[0],
+                                              &fov_y, &shape_coefficients[0], &blendshape_coefficients[0]);
     }
-    // Shape prior and bounds, for 10 shape coefficients:
-    CostFunction* shape_prior_cost = fitting::NormCost::Create<10>();
+    // Prior and bounds for the shape coefficients:
+    CostFunction* shape_prior_cost = fitting::NormCost::Create<num_shape_coeffs>();
     ScaledLoss* shape_prior_scaled_loss =
-        new ScaledLoss(nullptr, 35.0, Ownership::TAKE_OWNERSHIP);
+        new ScaledLoss(nullptr, shape_prior_cost_weight,
+                       Ownership::TAKE_OWNERSHIP); // weight was 35.0 previously, but it was inside the
+                                                   // residual, now it's outside
     fitting_costfunction.AddResidualBlock(shape_prior_cost, shape_prior_scaled_loss, &shape_coefficients[0]);
-    for (int i = 0; i < 10; ++i)
+    for (int i = 0; i < num_shape_coeffs; ++i)
     {
         fitting_costfunction.SetParameterLowerBound(&shape_coefficients[0], i, -3.0);
         fitting_costfunction.SetParameterUpperBound(&shape_coefficients[0], i, 3.0);
     }
     // Prior and constraints on blendshapes:
-    CostFunction* blendshapes_prior_cost = fitting::NormCost::Create<6>();
+    CostFunction* blendshapes_prior_cost = fitting::NormCost::Create<num_blendshape_coeffs>();
     LossFunction* blendshapes_prior_scaled_loss =
-        new ScaledLoss(new SoftLOneLoss(1.0), 10.0, Ownership::TAKE_OWNERSHIP);
+        new ScaledLoss(new SoftLOneLoss(1.0), blendshapes_prior_cost_weight,
+                       Ownership::TAKE_OWNERSHIP); // weight was 10.0 previously
     fitting_costfunction.AddResidualBlock(blendshapes_prior_cost, blendshapes_prior_scaled_loss,
                                           &blendshape_coefficients[0]);
-    fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 0, 0.0);
-    fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 1, 0.0);
-    fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 2, 0.0);
-    fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 3, 0.0);
-    fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 4, 0.0);
-    fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], 5, 0.0);
-    // Some constraints on the camera translation and fov/scale:
-    if (use_perspective)
-    {
-        fitting_costfunction.SetParameterUpperBound(
-            &camera_translation_and_intrinsics[0], 2,
-            -std::numeric_limits<double>::epsilon()); // t_z has to be negative
-        fitting_costfunction.SetParameterLowerBound(&camera_translation_and_intrinsics[0], 3,
-                                                    0.01); // fov in radians, must be > 0
-    } else
+    for (int i = 0; i < num_blendshape_coeffs; ++i)
     {
-        fitting_costfunction.SetParameterLowerBound(&camera_translation_and_intrinsics[0], 2,
-                                                    1.0); // frustum_scale must be > 0
+        fitting_costfunction.SetParameterLowerBound(&blendshape_coefficients[0], i, 0.0);
+        fitting_costfunction.SetParameterUpperBound(&blendshape_coefficients[0], i, 1.0);
     }
-
-    QuaternionParameterization* full_fit_quaternion_parameterisation = new QuaternionParameterization;
-    fitting_costfunction.SetParameterization(&camera_rotation[0], full_fit_quaternion_parameterisation);
-
-    // Colour model fitting (this needs a Morphable Model with colour (albedo) model, see note above main()):
+    // Keep t_z negative (i.e. we want to look at the model from the front):
+    fitting_costfunction.SetParameterUpperBound(&camera_translation[0], 2,
+                                                -std::numeric_limits<double>::epsilon());
+    // Keep the fov_y (in radians) > 0:
+    fitting_costfunction.SetParameterLowerBound(&fov_y, 0, 0.01);
+    // Note: We create a new manifold object, since camera_costfunction took ownership of the previous one,
+    // and will delete it upon destruction.
+    EigenQuaternionManifold* quaternion_manifold_full_fitting = new EigenQuaternionManifold;
+    fitting_costfunction.SetManifold(&camera_rotation.coeffs()[0], quaternion_manifold_full_fitting);
+
+    // Colour model cost (this needs a Morphable Model with colour (albedo) model, see note above main()):
     if (!morphable_model.has_color_model())
     {
         cout << "Error: The MorphableModel used does not contain a colour (albedo) model. ImageCost requires "
@@ -398,89 +369,81 @@ int main(int argc, char* argv[])
                 "different morphable model, or remove this section.";
         return EXIT_FAILURE;
     }
-    std::vector<double> colour_coefficients;
-    colour_coefficients.resize(10);
-    // Add a residual for each vertex:
+    // Add a colour residual for each vertex:
+    cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
+    const core::Image3u image_eos_rgb = eos::core::from_mat(image);
+    cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
     for (int i = 0; i < morphable_model.get_shape_model().get_data_dimension() / 3; ++i)
     {
-        CostFunction* cost_function =
-            new AutoDiffCostFunction<fitting::ImageCost, 3 /* Residuals: [R, G, B] */,
-                                     4 /* camera rotation (quaternion) */,
-                                     num_cam_trans_intr_params /* camera translation & focal length */,
-                                     10 /* shape-coeffs */, 6 /* bs-coeffs */, 10 /* colour coeffs */>(
-                new fitting::ImageCost(morphable_model, blendshapes, image, i, use_perspective));
-        fitting_costfunction.AddResidualBlock(cost_function, NULL, &camera_rotation[0],
-                                              &camera_translation_and_intrinsics[0], &shape_coefficients[0],
-                                              &blendshape_coefficients[0], &colour_coefficients[0]);
+        auto vertex_color_cost =
+            fitting::VertexColorCost::Create<num_shape_coeffs, num_blendshape_coeffs, num_color_coeffs>(
+                morphable_model.get_shape_model(), blendshapes, morphable_model.get_color_model(), i,
+                image_eos_rgb);
+        ScaledLoss* vertex_color_cost_scaled_loss =
+            new ScaledLoss(nullptr, vertex_color_cost_weight, ceres::TAKE_OWNERSHIP);
+        fitting_costfunction.AddResidualBlock(vertex_color_cost, vertex_color_cost_scaled_loss,
+                                              &camera_rotation.coeffs()[0], &camera_translation.data()[0],
+                                              &fov_y, &shape_coefficients[0], &blendshape_coefficients[0],
+                                              &color_coefficients[0]);
     }
-    // Prior and bounds, for 10 colour coefficients:
-    CostFunction* color_prior_cost = fitting::NormCost::Create<10>();
-    ScaledLoss* color_prior_scaled_loss =
-        new ScaledLoss(nullptr, 35.0, Ownership::TAKE_OWNERSHIP);
-    fitting_costfunction.AddResidualBlock(color_prior_cost, color_prior_scaled_loss, &colour_coefficients[0]);
-    for (int i = 0; i < 10; ++i)
+    // Prior and bounds, for the colour coefficients:
+    CostFunction* color_prior_cost = fitting::NormCost::Create<num_color_coeffs>();
+    ScaledLoss* color_prior_scaled_loss = new ScaledLoss(
+        nullptr, color_prior_cost_weight, Ownership::TAKE_OWNERSHIP); // weight was previously 35.0
+    fitting_costfunction.AddResidualBlock(color_prior_cost, color_prior_scaled_loss, &color_coefficients[0]);
+    for (int i = 0; i < num_color_coeffs; ++i)
     {
-        fitting_costfunction.SetParameterLowerBound(&colour_coefficients[0], i, -3.0);
-        fitting_costfunction.SetParameterUpperBound(&colour_coefficients[0], i, 3.0);
+        fitting_costfunction.SetParameterLowerBound(&color_coefficients[0], i, -3.0);
+        fitting_costfunction.SetParameterUpperBound(&color_coefficients[0], i, 3.0);
     }
 
     // Set different options for the full fitting:
-    /*	solver_options.linear_solver_type = ceres::ITERATIVE_SCHUR;
-            //solver_options.linear_solver_type = ceres::DENSE_QR;
-            //solver_options.minimizer_type = ceres::TRUST_REGION; // default I think
-            //solver_options.minimizer_type = ceres::LINE_SEARCH;
-            solver_options.num_threads = 8;
-            solver_options.num_linear_solver_threads = 8; // only SPARSE_SCHUR can use this
-            solver_options.minimizer_progress_to_stdout = true;
-            solver_options.max_num_iterations = 100;
-            */
+    // solver_options.linear_solver_type = ceres::ITERATIVE_SCHUR;
+    // solver_options.linear_solver_type = ceres::DENSE_QR;
+    // solver_options.minimizer_type = ceres::TRUST_REGION; // default I think
+    // solver_options.minimizer_type = ceres::LINE_SEARCH;
+    solver_options.num_threads = 16; // Make sure to adjust this, if you have fewer (or more) CPU cores.
+    // solver_options.minimizer_progress_to_stdout = true;
+    // solver_options.max_num_iterations = 100;
     Solve(solver_options, &fitting_costfunction, &solver_summary);
     std::cout << solver_summary.BriefReport() << "\n";
     end = std::chrono::steady_clock::now();
 
     // Draw the landmarks projected using all estimated parameters:
     // Construct the rotation & translation (model-view) matrices, projection matrix, and viewport:
-    estimated_rotation =
-        glm::dquat(camera_rotation[0], camera_rotation[1], camera_rotation[2], camera_rotation[3]);
-    rot_mtx = glm::mat4_cast(estimated_rotation);
-    t_mtx = get_translation_matrix(camera_translation_and_intrinsics, use_perspective);
-    projection_mtx = get_projection_matrix(camera_translation_and_intrinsics, aspect, use_perspective);
+    model_view_mtx = Eigen::Matrix4d::Identity();
+    model_view_mtx.block<3, 3>(0, 0) = camera_rotation.toRotationMatrix();
+    model_view_mtx.col(3).head<3>() = camera_translation;
+    projection_mtx = fitting::perspective(fov_y, aspect, 0.1, 1000.0);
 
     auto vectord_to_vectorf = [](const std::vector<double>& vec) {
         return std::vector<float>(std::begin(vec), std::end(vec));
     };
-    auto blendshape_coeffs_float = vectord_to_vectorf(blendshape_coefficients);
-    Eigen::VectorXf shape_ceres =
-        morphable_model.get_shape_model().draw_sample(shape_coefficients) +
-        to_matrix(blendshapes) *
-            Eigen::Map<const Eigen::VectorXf>(blendshape_coeffs_float.data(), blendshape_coeffs_float.size());
-    core::Mesh mesh = morphablemodel::sample_to_mesh(
-        shape_ceres, morphable_model.get_color_model().draw_sample(colour_coefficients),
-        morphable_model.get_shape_model().get_triangle_list(),
-        morphable_model.get_color_model().get_triangle_list(), morphable_model.get_texture_coordinates());
+    const auto shape_coeffs_float = vectord_to_vectorf(shape_coefficients);
+    const auto blendshape_coeffs_float = vectord_to_vectorf(blendshape_coefficients);
+    const auto color_coeffs_float = vectord_to_vectorf(color_coefficients);
+
+    morphablemodel::MorphableModel morphable_model_with_expressions(
+        morphable_model.get_shape_model(), blendshapes, morphable_model.get_color_model(),
+        morphable_model.get_landmark_definitions(), morphable_model.get_texture_coordinates(),
+        morphable_model.get_texture_triangle_indices());
+    const core::Mesh mesh = morphable_model_with_expressions.draw_sample(
+        shape_coeffs_float, blendshape_coeffs_float, color_coeffs_float);
+
     for (auto idx : vertex_indices)
     {
-        glm::dvec3 point_3d(mesh.vertices[idx][0], mesh.vertices[idx][1],
-                            mesh.vertices[idx][2]); // The 3D model point
-        glm::dvec3 projected_point = glm::project(point_3d, t_mtx * rot_mtx, projection_mtx, viewport);
-        cv::circle(outimg, cv::Point2f(projected_point.x, projected_point.y), 3,
+        const auto& point_3d = mesh.vertices[idx];
+        const auto projected_point =
+            fitting::project<double>(point_3d.cast<double>(), model_view_mtx, projection_mtx, viewport);
+        cv::circle(outimg, cv::Point2f(projected_point.x(), projected_point.y()), 3,
                    {0.0f, 76.0f, 255.0f}); // orange
     }
-    for (const auto& lm : image_points)
-    {
-        cv::circle(outimg, cv::Point2f(lm(0), lm(1)), 3,
-                   {0.0f, 255.0f,
-                    255.0f}); // yellow: subset of the detected LMs that we use (including contour landmarks)
-    }
 
-    estimated_rotation =
-        glm::dquat(camera_rotation[0], camera_rotation[1], camera_rotation[2], camera_rotation[3]);
-    euler_angles = glm::eulerAngles(estimated_rotation); // returns [P, Y, R]
-    fitting_log << "Final fit:\t\t\tYaw " << glm::degrees(euler_angles[1]) << ", Pitch "
-                << glm::degrees(euler_angles[0]) << ", Roll " << glm::degrees(euler_angles[2])
-                << "; t & f: " << camera_translation_and_intrinsics << '\n';
-    fitting_log << "Ceres took: "
-                << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms.\n";
+    // Note: We previously used contour landmarks and plotted image_points again here (in yellow), which then
+    // included the contour landmarks.
+
+    fitting_log << "Full fitting took: "
+                << std::chrono::duration_cast<std::chrono::seconds>(end - start).count() << "s.\n";
 
     cout << fitting_log.str();
 
diff --git a/include/eos/core/math.hpp b/include/eos/core/math.hpp
new file mode 100644
index 00000000..f5d75b36
--- /dev/null
+++ b/include/eos/core/math.hpp
@@ -0,0 +1,55 @@
+/*
+ * eos - A 3D Morphable Model fitting library written in modern C++11/14.
+ *
+ * File: include/eos/core/math.hpp
+ *
+ * Copyright 2023 Patrik Huber
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#ifndef EOS_MATH_HPP
+#define EOS_MATH_HPP
+
+#include <limits>
+
+namespace eos {
+namespace core {
+
+/**
+ * @brief Convert given degrees to radians.
+ */
+template <typename T>
+T radians(T degrees)
+{
+    // Note: We may want to remove this assert, to allow ceres::Jet as type.
+    static_assert(std::numeric_limits<T>::is_iec559, "radians() only accepts floating-point inputs.");
+    return degrees * static_cast<T>(0.01745329251994329576923690768489);
+}
+
+/**
+ * @brief Convert given radians to degree.
+ */
+template <typename T>
+T degrees(T radians)
+{
+    // Note: We may want to remove this assert, to allow ceres::Jet as type.
+    static_assert(std::numeric_limits<T>::is_iec559, "radians() only accepts floating-point inputs.");
+    return radians * static_cast<T>(57.295779513082320876798154814105);
+}
+
+} /* namespace core */
+} /* namespace eos */
+
+#endif /* EOS_MATH_HPP */
diff --git a/include/eos/fitting/ceres_nonlinear.hpp b/include/eos/fitting/ceres_nonlinear.hpp
index 4cd2d557..ac2be716 100644
--- a/include/eos/fitting/ceres_nonlinear.hpp
+++ b/include/eos/fitting/ceres_nonlinear.hpp
@@ -24,18 +24,13 @@
 
 #include "eos/morphablemodel/MorphableModel.hpp"
 #include "eos/morphablemodel/Blendshape.hpp"
+#include "eos/core/Image.hpp"
 
 #include "Eigen/Core"
 
-#include "glm/gtc/quaternion.hpp"
-#include "glm/gtx/transform.hpp"
-
 #include "ceres/ceres.h"
 #include "ceres/cubic_interpolation.h"
 
-#include "opencv2/core/core.hpp"
-
-#include <array>
 #include <vector>
 
 namespace eos {
@@ -43,17 +38,25 @@ namespace fitting {
 
 // Forward declarations:
 template <typename T>
-std::array<T, 3> get_shape_point(const morphablemodel::PcaModel& shape_model,
-                                 const std::vector<morphablemodel::Blendshape>& blendshapes, int vertex_id,
-                                 const T* const shape_coeffs, const T* const blendshape_coeffs);
+Eigen::Vector3<T> get_shape_point(const eos::morphablemodel::PcaModel& shape_model,
+                                  const eos::morphablemodel::Blendshapes& blendshapes, int vertex_id,
+                                  Eigen::Map<const Eigen::VectorX<T>> shape_coeffs,
+                                  Eigen::Map<const Eigen::VectorX<T>> blendshape_coeffs);
+
+template <typename T>
+Eigen::Vector3<T> get_vertex_colour(const eos::morphablemodel::PcaModel& color_model, int vertex_id,
+                                    Eigen::Map<const Eigen::VectorX<T>> color_coeffs);
+
+template <typename T>
+Eigen::Matrix4<T> perspective(T fov_y, T aspect, T z_near, T z_far);
 
 template <typename T>
-std::array<T, 3> get_vertex_colour(const morphablemodel::PcaModel& colour_model, int vertex_id,
-                                   const T* const colour_coeffs);
+Eigen::Vector3<T> project(const Eigen::Vector3<T>& obj, const Eigen::Matrix4<T>& model,
+                          const Eigen::Matrix4<T>& proj, const Eigen::Vector4<T>& viewport);
 
 /**
  * Cost function that consists of the parameter values themselves as residual.
- * 
+ *
  * If used with a squared loss, this corresponds to an L2 norm constraint on the parameters.
  * This class is implemented exactly like Ceres' NormalPrior.
  */
@@ -103,14 +106,12 @@ struct NormCost
 };
 
 /**
- * 2D landmark error cost function.
- * 
- * Computes the landmark reprojection error in 2D.
- * Models the cost for one landmark. The residual is 2-dim, [x, y].
- * Its input params are camera parameters, shape coefficients and
- * blendshape coefficients.
+ * 2D landmark error cost function, using perspective projection (in OpenGL convention).
+ *
+ * Computes the landmark reprojection error in 2D. Models the cost for one landmark. The residual is 2-dim,
+ * [x, y]. Its input params are camera parameters, shape coefficients and blendshape coefficients.
  */
-struct LandmarkCost
+struct PerspectiveProjectionLandmarkCost
 {
 
     /**
@@ -121,19 +122,23 @@ struct LandmarkCost
      *
      * @param[in] shape_model A PCA 3D shape model. Do not use a temporary.
      * @param[in] blendshapes A set of 3D blendshapes. Do not use a temporary.
+     * @param[in] num_shape_coeffs Number of shape coefficients that are being optimised for.
+     * @param[in] num_blendshape_coeffs Number of blendshape coefficients that are being optimised for.
      * @param[in] observed_landmark An observed 2D landmark in an image.
      * @param[in] vertex_id The vertex id that the given observed landmark corresponds to.
      * @param[in] image_width Width of the image that the 2D landmark is from (needed for the model
      * projection).
      * @param[in] image_height Height of the image.
-     * @param[in] use_perspective Whether a perspective or an orthographic projection should be used.
      */
-    LandmarkCost(const morphablemodel::PcaModel& shape_model,
-                 const std::vector<morphablemodel::Blendshape>& blendshapes, Eigen::Vector2f observed_landmark,
-                 int vertex_id, int image_width, int image_height, bool use_perspective)
-        : shape_model(shape_model), blendshapes(blendshapes), observed_landmark(observed_landmark),
+    PerspectiveProjectionLandmarkCost(const morphablemodel::PcaModel& shape_model,
+                                      const std::vector<morphablemodel::Blendshape>& blendshapes,
+                                      int num_shape_coeffs, int num_blendshape_coeffs,
+                                      Eigen::Vector2f observed_landmark, int vertex_id, int image_width,
+                                      int image_height)
+        : shape_model(shape_model), blendshapes(blendshapes), num_shape_coeffs(num_shape_coeffs),
+          num_blendshape_coeffs(num_blendshape_coeffs), observed_landmark(observed_landmark),
           vertex_id(vertex_id), image_width(image_width), image_height(image_height),
-          aspect_ratio(static_cast<double>(image_width) / image_height), use_perspective(use_perspective){};
+          aspect_ratio(static_cast<double>(image_width) / image_height){};
 
     /**
      * Landmark cost function implementation.
@@ -141,119 +146,121 @@ struct LandmarkCost
      * Measures the landmark reprojection error of the model with the estimated parameters and the observed 2D
      * landmarks. For one single landmark.
      *
-     * @param[in] camera_rotation A set of camera parameters, parameterised as a quaternion [w x y z].
-     * @param[in] camera_translation_and_intrinsics Camera translation and intrinsic parameters. Ortho: [t_x
-     * t_y frustum_scale]. Perspective: [t_x t_y t_z fov].
+     * @param[in] camera_rotation A set of camera rotation parameters, parameterised as an Eigen::Quaternion.
+     * @param[in] camera_translation Camera translation parameters: [t_x t_y t_z].
+     * @param[in] camera_intrinsics Camera intrinsics, containing the field of view (fov_y).
      * @param[in] shape_coeffs A set of PCA shape coefficients.
      * @param[in] blendshape_coeffs A set of blendshape coefficients.
      * @param[in] residual An array of the resulting residuals.
-     * @return Returns true. The ceres documentation is not clear about that I think.
+     * @return whether the computation of the residuals was successful. Always returns true.
      */
     template <typename T>
-    bool operator()(const T* const camera_rotation, const T* const camera_translation_and_intrinsics,
-                    const T* const shape_coeffs, const T* const blendshape_coeffs, T* residual) const
+    bool operator()(const T* const camera_rotation, const T* const camera_translation,
+                    const T* const camera_intrinsics, const T* const shape_coeffs,
+                    const T* const blendshape_coeffs, T* residual) const
     {
-        using namespace glm;
-        // Generate shape instance (of only one vertex id!) using current parameters and 10 shape
-        // coefficients: Note: Why are we not returning a glm::tvec3<T>?
-        const auto point_arr =
-            get_shape_point<T>(shape_model, blendshapes, vertex_id, shape_coeffs, blendshape_coeffs);
+        // Generate shape instance (of only one vertex id) using current coefficients:
+        Eigen::Map<const Eigen::VectorX<T>> shape_coeffs_mapped(shape_coeffs, num_shape_coeffs);
+        Eigen::Map<const Eigen::VectorX<T>> blendshape_coeffs_mapped(blendshape_coeffs,
+                                                                     num_blendshape_coeffs);
 
+        const Eigen::Vector3<T> point_3d = get_shape_at_point(shape_model, blendshapes, vertex_id,
+                                                              shape_coeffs_mapped, blendshape_coeffs_mapped);
         // Project the point to 2D:
-        const tvec3<T> point_3d(point_arr[0], point_arr[1], point_arr[2]);
         // I think the quaternion is always normalised because we run Ceres with QuaternionParameterization
-        const tquat<T> rot_quat(camera_rotation[0], camera_rotation[1], camera_rotation[2],
-                                camera_rotation[3]);
-        // We rotate ZXY*p, which is RPY*p. I'm not sure this matrix still corresponds to RPY - probably if we
-        // use glm::eulerAngles(), these are not RPY anymore and we'd have to adjust if we were to use
-        // rotation matrices.
-        const tmat4x4<T> rot_mtx = glm::mat4_cast(rot_quat);
+        // Previously, we used glm::tquat, which expects w, x, y, z, and called it with [0, 1, 2, 3].
+        // Eigen stores it as x, y, z, w. So... are we sure this is right?
+        Eigen::Map<const Eigen::Quaternion<T>> rotation(camera_rotation);
+        Eigen::Map<const Eigen::Vector3<T>> translation(camera_translation);
+
+        Eigen::Matrix4<T> model_view_mtx = Eigen::Matrix4<T>::Identity();
+        model_view_mtx.block<3, 3>(0, 0) = rotation.toRotationMatrix();
+        model_view_mtx.col(3).head<3>() = translation;
 
         // Todo: use get_opencv_viewport() from nonlin_cam_esti.hpp.
-        const tvec4<T> viewport(0, image_height, image_width, -image_height); // OpenCV convention
+        const Eigen::Vector4<T> viewport(T(0), T(image_height), T(image_width),
+                                         T(-image_height)); // OpenCV convention
+
+        const T& fov = camera_intrinsics[0];
+        const auto projection_mtx = perspective(fov, T(aspect_ratio), T(0.1), T(1000.0));
+
+        Eigen::Vector3<T> projected_point = project(point_3d, model_view_mtx, projection_mtx, viewport);
 
-        tvec3<T> projected_point; // Note: could avoid default construction by using a lambda and immediate
-                                  // invocation
-        if (use_perspective)
-        {
-            const auto t_mtx = glm::translate(tvec3<T>(camera_translation_and_intrinsics[0],
-                                                       camera_translation_and_intrinsics[1],
-                                                       camera_translation_and_intrinsics[2]));
-            const T& fov = camera_translation_and_intrinsics[3];
-            const auto persp_mtx = glm::perspective(fov, T(aspect_ratio), T(0.1), T(1000.0));
-            projected_point = glm::project(point_3d, t_mtx * rot_mtx, persp_mtx, viewport);
-        } else
-        {
-            const T& frustum_scale = camera_translation_and_intrinsics[2];
-            const auto t_mtx = glm::translate(
-                tvec3<T>(camera_translation_and_intrinsics[0], camera_translation_and_intrinsics[1],
-                         0.0)); // we don't have t_z in ortho camera, it doesn't matter where it is
-            const auto ortho_mtx =
-                glm::ortho(-1.0 * aspect_ratio * frustum_scale, 1.0 * aspect_ratio * frustum_scale,
-                           -1.0 * frustum_scale, 1.0 * frustum_scale);
-            projected_point = glm::project(point_3d, t_mtx * rot_mtx, ortho_mtx, viewport);
-        }
         // Residual: Projected point minus the observed 2D landmark point
-        residual[0] = projected_point.x - T(observed_landmark[0]);
-        residual[1] = projected_point.y - T(observed_landmark[1]);
+        residual[0] = projected_point.x() - T(observed_landmark[0]);
+        residual[1] = projected_point.y() - T(observed_landmark[1]);
         return true;
     };
 
+    /**
+     * Factory to hide the construction of the CostFunction object from the client code.
+     *
+     * The number of parameters are given as template arguments, so that we can use Ceres' fixed-size
+     * constructor.
+     */
+    template <int num_shape_coeffs, int num_blendshape_coeffs>
+    static ceres::CostFunction* Create(const eos::morphablemodel::PcaModel& shape_model,
+                                       const eos::morphablemodel::Blendshapes& blendshapes,
+                                       Eigen::Vector2f observed_landmark, int vertex_id, int image_width,
+                                       int image_height)
+    {
+        // Template arguments: 2 residuals, 4 (camera rotation), 3 (camera translation), 1 (camera
+        // intrinsics), n (shape coeffs), m (blendshape coeffs)
+        return (new ceres::AutoDiffCostFunction<PerspectiveProjectionLandmarkCost, 2, 4, 3, 1,
+                                                num_shape_coeffs, num_blendshape_coeffs>(
+            new PerspectiveProjectionLandmarkCost(shape_model, blendshapes, num_shape_coeffs,
+                                                  num_blendshape_coeffs, observed_landmark, vertex_id,
+                                                  image_width, image_height)));
+    };
+
 private:
     const morphablemodel::PcaModel&
         shape_model; // Or store as pointer (non-owning) or std::reference_wrapper?
     const std::vector<morphablemodel::Blendshape>& blendshapes;
+    const int num_shape_coeffs;
+    const int num_blendshape_coeffs;
     const Eigen::Vector2f observed_landmark;
     const int vertex_id;
     const int image_width;
     const int image_height;
     const double aspect_ratio;
-    const bool use_perspective;
 };
 
 /**
  * Image error cost function (at vertex locations).
- * 
+ *
  * Measures the RGB image error between a particular vertex point of the 3D
  * model at its projected location and the observed input image.
  * Models the cost for 1 vertex. The residual is 3-dim, [r, g, b].
  * Its input params are cam, shape-coeffs, BS-coeffs and colour coeffs.
  * This projects the vertex locations - so not a full rendering pass.
  */
-struct ImageCost
+struct VertexColorCost
 {
     /**
      * Constructs a new cost function object for a particular vertex id that measures the RGB image error
      * between the estimated model point and the observed input image.
      *
-     * Warning: Don't put in temporaries for \c morphable_model and \c blendshapes! We don't make a copy, we
-     * store a reference to what is given to the function!
+     * Warning: Don't put in temporaries for \c shape_model, \c blendshapes and \c color_model. We don't make
+     * a copy, we store a reference to what is given to the function.
      *
-     * @param[in] morphable_model A 3D Morphable Model. Do not use a temporary.
+     * @param[in] shape_model A PCA 3D shape model. Do not use a temporary.
      * @param[in] blendshapes A set of 3D blendshapes. Do not use a temporary.
-     * @param[in] image The observed image. TODO: We should assert that the image we get is 8UC3!
+     * @param[in] shape_model A PCA 3D color model. Do not use a temporary.
+     * @param[in] num_shape_coeffs Number of shape coefficients that are being optimised for.
+     * @param[in] num_blendshape_coeffs Number of blendshape coefficients that are being optimised for.
+     * @param[in] num_color_coeffs Number of colour coefficients that are being optimised for.
      * @param[in] vertex_id Vertex id of the 3D model that should be projected and measured.
-     * @param[in] use_perspective Whether a perspective or an orthographic projection should be used.
-     * @throws std::runtime_error if the given \c image is not of type CV_8UC3.
+     * @param[in] image The observed image.
      */
-    ImageCost(const morphablemodel::MorphableModel& morphable_model,
-              const std::vector<morphablemodel::Blendshape>& blendshapes, cv::Mat image, int vertex_id,
-              bool use_perspective)
-        : morphable_model(morphable_model), blendshapes(blendshapes), image(image),
-          aspect_ratio(static_cast<double>(image.cols) / image.rows), vertex_id(vertex_id),
-          use_perspective(use_perspective)
-    {
-        if (image.type() != CV_8UC3)
-        {
-            throw std::runtime_error("The image given to ImageCost must be of type CV_8UC3.");
-        }
-        if (!morphable_model.has_color_model())
-        {
-            throw std::runtime_error("The MorphableModel used does not contain a colour (albedo) model. "
-                                     "ImageCost requires a model that contains a colour PCA model. You may "
-                                     "want to use the full Surrey Face Model.");
-        }
-    };
+    VertexColorCost(const eos::morphablemodel::PcaModel& shape_model,
+                    const eos::morphablemodel::Blendshapes& blendshapes,
+                    const eos::morphablemodel::PcaModel& color_model, int num_shape_coeffs,
+                    int num_blendshape_coeffs, int num_color_coeffs, int vertex_id,
+                    const eos::core::Image3u& image)
+        : shape_model(shape_model), blendshapes(blendshapes), color_model(color_model),
+          num_shape_coeffs(num_shape_coeffs), num_blendshape_coeffs(num_blendshape_coeffs),
+          num_color_coeffs(num_color_coeffs), vertex_id(vertex_id), image(image){};
 
     /**
      * Image cost function implementation.
@@ -263,110 +270,105 @@ struct ImageCost
      *
      * Todo: We should deal with visibility! Don't evaluate when the vertex is self-occluded.
      *
-     * @param[in] camera_rotation A set of camera parameters, parameterised as a quaternion [w x y z].
-     * @param[in] camera_translation_and_intrinsics Camera translation and intrinsic parameters. Ortho: [t_x
-     * t_y frustum_scale]. Perspective: [t_x t_y t_z fov].
+     * @param[in] camera_rotation A set of camera rotation parameters, parameterised as an Eigen::Quaternion.
+     * @param[in] camera_translation Camera translation parameters: [t_x t_y t_z].
+     * @param[in] camera_intrinsics Camera intrinsics, containing the field of view (fov_y).
      * @param[in] shape_coeffs A set of PCA shape coefficients.
      * @param[in] blendshape_coeffs A set of blendshape coefficients.
      * @param[in] color_coeffs A set of PCA colour (albedo) coefficients.
      * @param[in] residual An array of the resulting residuals.
-     * @return Returns true. The ceres documentation is not clear about that I think.
+     * @return whether the computation of the residuals was successful. Always returns true.
      */
     template <typename T>
-    bool operator()(const T* const camera_rotation, const T* const camera_translation_and_intrinsics,
-                    const T* const shape_coeffs, const T* const blendshape_coeffs,
-                    const T* const color_coeffs, T* residual) const
+    bool operator()(const T* const camera_rotation, const T* const camera_translation,
+                    const T* const camera_intrinsics, const T* const shape_coeffs,
+                    const T* const blendshape_coeffs, const T* const color_coeffs, T* residual) const
     {
-        using namespace glm;
-        // Note: The following is all duplicated code with LandmarkCost. Fix if possible performance-wise.
-        // Generate 3D shape point using the current parameters:
-        const auto point_arr = get_shape_point<T>(morphable_model.get_shape_model(), blendshapes, vertex_id,
-                                                  shape_coeffs, blendshape_coeffs);
+        // The following code blocks are identical to PerspectiveProjectionLandmarkCost:
+
+        // Generate shape instance (of only one vertex id) using current coefficients:
+        Eigen::Map<const Eigen::VectorX<T>> shape_coeffs_mapped(shape_coeffs, num_shape_coeffs);
+        Eigen::Map<const Eigen::VectorX<T>> blendshape_coeffs_mapped(blendshape_coeffs,
+                                                                     num_blendshape_coeffs);
 
+        const Eigen::Vector3<T> point_3d = get_shape_at_point(shape_model, blendshapes, vertex_id,
+                                                              shape_coeffs_mapped, blendshape_coeffs_mapped);
         // Project the point to 2D:
-        const tvec3<T> point_3d(point_arr[0], point_arr[1], point_arr[2]);
         // I think the quaternion is always normalised because we run Ceres with QuaternionParameterization
-        const tquat<T> rot_quat(camera_rotation[0], camera_rotation[1], camera_rotation[2],
-                                camera_rotation[3]);
-        // We rotate ZXY*p, which is RPY*p. I'm not sure this matrix still corresponds to RPY - probably if we
-        // use glm::eulerAngles(), these are not RPY anymore and we'd have to adjust if we were to use
-        // rotation matrices.
-        const tmat4x4<T> rot_mtx = glm::mat4_cast(rot_quat);
+        // Previously, we used glm::tquat, which expects w, x, y, z, and called it with [0, 1, 2, 3].
+        // Eigen stores it as x, y, z, w. So... are we sure this is right?
+        Eigen::Map<const Eigen::Quaternion<T>> rotation(camera_rotation);
+        Eigen::Map<const Eigen::Vector3<T>> translation(camera_translation);
 
-        // Todo: use get_opencv_viewport() from nonlin_cam_esti.hpp.
-        const tvec4<T> viewport(0, image.rows, image.cols, -image.rows); // OpenCV convention
+        Eigen::Matrix4<T> model_view_mtx = Eigen::Matrix4<T>::Identity();
+        model_view_mtx.block<3, 3>(0, 0) = rotation.toRotationMatrix();
+        model_view_mtx.col(3).head<3>() = translation;
 
-        tvec3<T> projected_point;
-        if (use_perspective)
-        {
-            const auto t_mtx = glm::translate(tvec3<T>(camera_translation_and_intrinsics[0],
-                                                       camera_translation_and_intrinsics[1],
-                                                       camera_translation_and_intrinsics[2]));
-            const T& focal = camera_translation_and_intrinsics[3];
-            const auto persp_mtx = glm::perspective(focal, T(aspect_ratio), T(0.1), T(1000.0));
-            projected_point = glm::project(point_3d, t_mtx * rot_mtx, persp_mtx, viewport);
-        } else
-        {
-            const T& frustum_scale = camera_translation_and_intrinsics[2];
-            const auto t_mtx = glm::translate(
-                tvec3<T>(camera_translation_and_intrinsics[0], camera_translation_and_intrinsics[1],
-                         0.0)); // we don't have t_z in ortho camera, it doesn't matter where it is
-            const auto ortho_mtx =
-                glm::ortho(-1.0 * aspect_ratio * frustum_scale, 1.0 * aspect_ratio * frustum_scale,
-                           -1.0 * frustum_scale, 1.0 * frustum_scale);
-            projected_point = glm::project(point_3d, t_mtx * rot_mtx, ortho_mtx, viewport);
-        }
+        // Todo: use get_opencv_viewport() from nonlin_cam_esti.hpp.
+        const Eigen::Vector4<T> viewport(T(0), T(image.height()), T(image.width()),
+                                         T(-image.height())); // OpenCV convention
+
+        const T& fov = camera_intrinsics[0];
+        const double aspect_ratio = static_cast<double>(image.width()) / image.height();
+        const auto projection_mtx = perspective(fov, T(aspect_ratio), T(0.1), T(1000.0));
+
+        Eigen::Vector3<T> projected_point = project(point_3d, model_view_mtx, projection_mtx, viewport);
+        // End of identical block with PerspectiveProjectionLandmarkCost.
+
+        // Access the image colour value at the projected pixel location, if inside the image - otherwise
+        // Ceres uses the value from the grid edge.
+        // Note: We could store the BiCubicInterpolator as member variable.
+        // The default template arguments for Grid2D are <T, kDataDim=1, kRowMajor=true,
+        // kInterleaved=true> and (except for the dimension), they're the right ones for us.
+        ceres::Grid2D<unsigned char, 3> grid(&image(0, 0).data()[0], 0, image.height(), 0, image.width());
+        ceres::BiCubicInterpolator<ceres::Grid2D<unsigned char, 3>> interpolator(grid);
+        T observed_colour[3];
+        interpolator.Evaluate(projected_point.y(), projected_point.x(), &observed_colour[0]);
+
+        // Get the vertex's colour value:
+        Eigen::Map<const Eigen::VectorX<T>> color_coeffs_mapped(color_coeffs, num_color_coeffs);
+        const auto model_color = get_vertex_color_at_point(color_model, vertex_id, color_coeffs_mapped);
+        // Returns RGB, between [0, 1].
+
+        // Residual: Vertex colour of model point minus the observed colour in the 2D image
+        // observed_colour is RGB, model_colour is RGB. Residual will be RGB.
+        residual[0] = model_color[0] * 255.0 - T(observed_colour[0]); // r
+        residual[1] = model_color[1] * 255.0 - T(observed_colour[1]); // g
+        residual[2] = model_color[2] * 255.0 - T(observed_colour[2]); // b
 
-        // Access the image colour value at the projected pixel location, if inside the image - otherwise set
-        // to (127, 127, 127) (maybe not ideal...):
-        if (projected_point.y < T(0) || projected_point.y >= T(image.rows) || projected_point.x < T(0) ||
-            projected_point.x >= T(image.cols))
-        {
-            // The point is outside the image.
-            residual[0] = T(127.0);
-            residual[1] = T(127.0);
-            residual[2] = T(127.0);
-
-            // TODO: What does interpolator.Evaluate() return in this case?
-            /*	Grid2D<uchar, 3> grid(image.ptr(0), 0, image.rows, 0, image.cols);
-            BiCubicInterpolator<Grid2D<uchar, 3>> interpolator(grid);
-            T observed_colour[3];
-            interpolator.Evaluate(projected_y, projected_x, &observed_colour[0]); // says it returns false when (r, c) is out of bounds... but it returns void?
-            //std::cout << observed_colour[0] << ", " << observed_colour[1] << ", " << observed_colour[2] << "\n";
-            */
-            // It kind of looks like as if when it's out of bounds, there will be a vector out of bound access
-            // and an assert/crash? No, in debugging, it looks like it just interpolates or something. Not
-            // clear currently.
-        } else
-        {
-            // Note: We could store the BiCubicInterpolator as member variable.
-            // The default template arguments for Grid2D are <T, kDataDim=1, kRowMajor=true,
-            // kInterleaved=true> and (except for the dimension), they're the right ones for us.
-            ceres::Grid2D<uchar, 3> grid(image.ptr(0), 0, image.rows, 0, image.cols);
-            ceres::BiCubicInterpolator<ceres::Grid2D<uchar, 3>> interpolator(grid);
-            T observed_colour[3];
-            interpolator.Evaluate(projected_point.y, projected_point.x, &observed_colour[0]);
-
-            // This probably needs to be modified if we add a light model.
-            auto model_colour = get_vertex_colour(morphable_model.get_color_model(), vertex_id, color_coeffs);
-            // I think this returns RGB, and between [0, 1].
-
-            // Residual: Vertex colour of model point minus the observed colour in the 2D image
-            // observed_colour is BGR, model_colour is RGB. Residual will be RGB.
-            residual[0] = model_colour[0] * 255.0 - T(observed_colour[2]);
-            residual[1] = model_colour[1] * 255.0 - T(observed_colour[1]);
-            residual[2] = model_colour[2] * 255.0 - T(observed_colour[0]);
-        }
         return true;
     };
 
+    /**
+     * Factory to hide the construction of the CostFunction object from the client code.
+     *
+     * The number of parameters are given as template arguments, so that we can use Ceres' fixed-size
+     * constructor.
+     */
+    template <int num_shape_coeffs, int num_blendshape_coeffs, int num_color_coeffs>
+    static ceres::CostFunction* Create(const eos::morphablemodel::PcaModel& shape_model,
+                                       const eos::morphablemodel::Blendshapes& blendshapes,
+                                       const eos::morphablemodel::PcaModel& color_model, int vertex_id,
+                                       const eos::core::Image3u& image)
+    {
+        // Template arguments: 3 residuals (RGB), 4 (camera rotation), 3 (camera translation), 1 (camera
+        // intrinsics), x (shape coeffs), x (expr coeffs), x (colour coeffs)
+        return (new ceres::AutoDiffCostFunction<VertexColorCost, 3, 4, 3, 1, num_shape_coeffs,
+                                                num_blendshape_coeffs, num_color_coeffs>(
+            new VertexColorCost(shape_model, blendshapes, color_model, num_shape_coeffs,
+                                num_blendshape_coeffs, num_color_coeffs, vertex_id, image)));
+    };
+
 private:
-    const morphablemodel::MorphableModel& morphable_model; // Or store as pointer (non-owning) or std::reference_wrapper?
-    const std::vector<morphablemodel::Blendshape>& blendshapes;
-    const cv::Mat image; // the observed image
-    const double aspect_ratio;
+    const eos::morphablemodel::PcaModel&
+        shape_model; // Or store as pointer (non-owning) or std::reference_wrapper?
+    const eos::morphablemodel::Blendshapes& blendshapes;
+    const eos::morphablemodel::PcaModel& color_model;
+    const int num_shape_coeffs;
+    const int num_blendshape_coeffs;
+    const int num_color_coeffs;
     const int vertex_id;
-    const bool use_perspective;
+    const eos::core::Image3u& image; // the observed image
 };
 
 /**
@@ -380,45 +382,25 @@ struct ImageCost
  * @return The 3D point.
  */
 template <typename T>
-std::array<T, 3> get_shape_point(const morphablemodel::PcaModel& shape_model,
-                                 const std::vector<morphablemodel::Blendshape>& blendshapes, int vertex_id,
-                                 const T* const shape_coeffs, const T* const blendshape_coeffs)
+Eigen::Vector3<T> get_shape_at_point(const eos::morphablemodel::PcaModel& shape_model,
+                                     const eos::morphablemodel::Blendshapes& blendshapes, int vertex_id,
+                                     Eigen::Map<const Eigen::VectorX<T>> shape_coeffs,
+                                     Eigen::Map<const Eigen::VectorX<T>> blendshape_coeffs)
 {
-    int num_coeffs_fitting = 10; // Todo: Should be inferred or a function parameter!
-    auto mean = shape_model.get_mean_at_point(vertex_id);
-    auto basis = shape_model.get_rescaled_pca_basis_at_point(vertex_id);
-    // Computing Shape = mean + basis * coeffs:
-    // Note: Could use an Eigen matrix with type T to see if it gives a speedup.
-    std::array<T, 3> point{T(mean[0]), T(mean[1]), T(mean[2])};
-    for (int i = 0; i < num_coeffs_fitting; ++i)
-    {
-        point[0] += T(basis.row(0).col(i)(0)) * shape_coeffs[i]; // it seems to be ~15% faster when these are
-                                                                 // static_cast<double>() instead of T()?
-    }
-    for (int i = 0; i < num_coeffs_fitting; ++i)
-    {
-        point[1] += T(basis.row(1).col(i)(0)) * shape_coeffs[i];
-    }
-    for (int i = 0; i < num_coeffs_fitting; ++i)
-    {
-        point[2] += T(basis.row(2).col(i)(0)) * shape_coeffs[i];
-    }
-    // Adding the blendshape offsets:
-    // Shape = mean + basis * coeffs + blendshapes * bs_coeffs:
-    auto num_blendshapes = blendshapes.size();
-    for (int i = 0; i < num_blendshapes; ++i)
+    // Computing Shape = mean + shape_basis*shape_coeffs + blendshapes*blendshape_coeffs:
+    const Eigen::Vector3f mean = shape_model.get_mean_at_point(vertex_id);
+    const Eigen::Vector3<T> shape_vector =
+        shape_model.get_rescaled_pca_basis_at_point(vertex_id).leftCols(shape_coeffs.size()).cast<T>() *
+        shape_coeffs;
+    Eigen::Vector3<T> expression_vector(T(0.0), T(0.0), T(0.0));
+    for (std::size_t i = 0; i < blendshape_coeffs.size(); i++)
     {
-        point[0] += T(blendshapes[i].deformation(3 * vertex_id + 0)) * blendshape_coeffs[i];
+        expression_vector.x() += T(blendshapes[i].deformation(vertex_id * 3 + 0)) * blendshape_coeffs(i);
+        expression_vector.y() += T(blendshapes[i].deformation(vertex_id * 3 + 1)) * blendshape_coeffs(i);
+        expression_vector.z() += T(blendshapes[i].deformation(vertex_id * 3 + 2)) * blendshape_coeffs(i);
     }
-    for (int i = 0; i < num_blendshapes; ++i)
-    {
-        point[1] += T(blendshapes[i].deformation(3 * vertex_id + 1)) * blendshape_coeffs[i];
-    }
-    for (int i = 0; i < num_blendshapes; ++i)
-    {
-        point[2] += T(blendshapes[i].deformation(3 * vertex_id + 2)) * blendshape_coeffs[i];
-    }
-    return point;
+
+    return Eigen::Vector3<T>(mean.cast<T>() + shape_vector + expression_vector);
 };
 
 /**
@@ -430,31 +412,87 @@ std::array<T, 3> get_shape_point(const morphablemodel::PcaModel& shape_model,
  * @return The colour. As RGB? In [0, 1]?
  */
 template <typename T>
-std::array<T, 3> get_vertex_colour(const morphablemodel::PcaModel& color_model, int vertex_id,
-                                   const T* const color_coeffs)
+Eigen::Vector3<T> get_vertex_color_at_point(const eos::morphablemodel::PcaModel& color_model, int vertex_id,
+                                            Eigen::Map<const Eigen::VectorX<T>> color_coeffs)
 {
-    int num_coeffs_fitting = 10; // Todo: Should be inferred or a function parameter!
-    auto mean = color_model.get_mean_at_point(vertex_id);
-    auto basis = color_model.get_rescaled_pca_basis_at_point(vertex_id);
-    // Computing Colour = mean + basis * coeffs
-    // Note: Could use an Eigen matrix with type T to see if it gives a speedup.
-    std::array<T, 3> point{T(mean[0]), T(mean[1]), T(mean[2])};
-    for (int i = 0; i < num_coeffs_fitting; ++i)
-    {
-        point[0] += T(basis.row(0).col(i)(0)) * color_coeffs[i]; // it seems to be ~15% faster when these are
-                                                                 // static_cast<double>() instead of T()?
-    }
-    for (int i = 0; i < num_coeffs_fitting; ++i)
-    {
-        point[1] += T(basis.row(1).col(i)(0)) * color_coeffs[i];
-    }
-    for (int i = 0; i < num_coeffs_fitting; ++i)
-    {
-        point[2] += T(basis.row(2).col(i)(0)) * color_coeffs[i];
-    }
-    return point;
+    const Eigen::Vector3f mean = color_model.get_mean_at_point(vertex_id);
+    const Eigen::Vector3<T> color_vector =
+        color_model.get_rescaled_pca_basis_at_point(vertex_id).leftCols(color_coeffs.size()).cast<T>() *
+        color_coeffs;
+
+    return Eigen::Vector3<T>(mean.cast<T>() + color_vector);
 };
 
+/**
+ * Creates a matrix for a right-handed, symmetric perspective-view frustrum.
+ *
+ * The function follows the OpenGL clip volume definition, which is also the GLM default. The near and far
+ * clip planes correspond to z normalized device coordinates of -1 and +1 respectively.
+ *
+ * This function is equivalent to glm::perspectiveRH_NO(...).
+ *
+ * More details can be found on the gluPerspective man page:
+ * https://www.khronos.org/registry/OpenGL-Refpages/gl2.1/xhtml/gluPerspective.xml.
+ *
+ * @param[in] fov_y Specifies the field of view angle in the y direction. Expressed in radians.
+ * @param[in] aspect Specifies the aspect ratio that determines the field of view in the x direction. The
+ * aspect ratio is the ratio of x (width) to y (height).
+ * @param[in] z_near Specifies the distance from the viewer to the near clipping plane (always positive).
+ * @param[in] z_far Specifies the distance from the viewer to the far clipping plane (always positive).
+ * @tparam T A floating-point scalar type, ceres::Jet, or similar compatible type.
+ * @return The corresponding perspective projection matrix.
+ */
+template <typename T>
+Eigen::Matrix4<T> perspective(T fov_y, T aspect, T z_near, T z_far)
+{
+    // Will this assert work? std::abs probably won't work on T?
+    // assert(abs(aspect - std::numeric_limits<T>::epsilon()) > static_cast<T>(0));
+
+    const T tan_half_fov_y = tan(fov_y / static_cast<T>(2)); // ceres::tan?
+
+    // Maybe construct with static_cast<T>(0)? => No, doesn't have c'tor.
+    // Could do Eigen::Matrix4<T> result = {{1, 2, 3, 4}, {1, 2, 3, 4}...} I think.
+    Eigen::Matrix4<T> result = Eigen::Matrix4<T>::Zero();
+    result(0, 0) = static_cast<T>(1) / (aspect * tan_half_fov_y);
+    result(1, 1) = static_cast<T>(1) / (tan_half_fov_y);
+    result(2, 2) = -(z_far + z_near) / (z_far - z_near);
+    result(2, 3) = -static_cast<T>(1);
+    result(3, 2) = -(static_cast<T>(2) * z_far * z_near) / (z_far - z_near);
+    return result;
+}
+
+/**
+ * Project the given point_3d (from object coordinates) into window coordinates.
+ *
+ * The function follows the OpenGL clip volume definition. The near and far clip planes correspond to
+ * z normalized device coordinates of -1 and +1 respectively.
+ * This function is equivalent to glm::projectNO(...).
+ *
+ * More details can be found on the gluProject man page:
+ * https://www.khronos.org/registry/OpenGL-Refpages/gl2.1/xhtml/gluProject.xml.
+ *
+ * @param[in] point_3d A 3D point in object coordinates.
+ * @param[in] modelview_matrix A model-view matrix, transforming the point into view (camera) space.
+ * @param[in] projection_matrix The projection matrix to be used.
+ * @param[in] viewport The viewport transformation to be used.
+ * @tparam T A floating-point scalar type, ceres::Jet, or similar compatible type.
+ * @return Return the computed window coordinates.
+ */
+template <typename T>
+Eigen::Vector3<T> project(const Eigen::Vector3<T>& point_3d, const Eigen::Matrix4<T>& modelview_matrix,
+                          const Eigen::Matrix4<T>& projection_matrix, const Eigen::Vector4<T>& viewport)
+{
+    Eigen::Vector4<T> projected_point = projection_matrix * modelview_matrix * point_3d.homogeneous();
+    projected_point /= projected_point.w();
+    projected_point =
+        projected_point * static_cast<T>(0.5) +
+        Eigen::Vector4<T>(static_cast<T>(0.5), static_cast<T>(0.5), static_cast<T>(0.5), static_cast<T>(0.5));
+    projected_point.x() = projected_point.x() * T(viewport(2)) + T(viewport(0));
+    projected_point.y() = projected_point.y() * T(viewport(3)) + T(viewport(1));
+
+    return projected_point.head<3>();
+}
+
 } /* namespace fitting */
 } /* namespace eos */