tutorial/tracking/model-based/generic-rgbd-blender/tutorial-mb-generic-tracker-rgbd-blender.cpp

//! \example tutorial-mb-generic-tracker-rgbd-blender.cpp
#include <iostream>

#include <visp3/core/vpDisplay.h>
#include <visp3/core/vpIoTools.h>
#include <visp3/core/vpXmlParserCamera.h>
#include <visp3/gui/vpDisplayGDI.h>
#include <visp3/gui/vpDisplayOpenCV.h>
#include <visp3/gui/vpDisplayX.h>
#include <visp3/io/vpImageIo.h>
#include <visp3/mbt/vpMbGenericTracker.h>

#if defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS) && defined(VISP_HAVE_PUGIXML)
namespace
{
bool read_data(unsigned int cpt, const std::string &video_color_images, const std::string &video_depth_images,
               bool disable_depth, const std::string &video_ground_truth,
               vpImage<unsigned char> &I, vpImage<uint16_t> &I_depth_raw,
               unsigned int &depth_width, unsigned int &depth_height,
               std::vector<vpColVector> &pointcloud, const vpCameraParameters &cam_depth,
               vpHomogeneousMatrix &cMo_ground_truth)
{
  char buffer[FILENAME_MAX];
  // Read color
  snprintf(buffer, FILENAME_MAX, video_color_images.c_str(), cpt);
  std::string filename_color = buffer;

  if (!vpIoTools::checkFilename(filename_color)) {
    std::cerr << "Cannot read: " << filename_color << std::endl;
    return false;
  }
  vpImageIo::read(I, filename_color);

  if (!disable_depth) {
    // Read depth
    snprintf(buffer, FILENAME_MAX, video_depth_images.c_str(), cpt);
    std::string filename_depth = buffer;

    if (!vpIoTools::checkFilename(filename_depth)) {
      std::cerr << "Cannot read: " << filename_depth << std::endl;
      return false;
    }
    cv::Mat depth_raw = cv::imread(filename_depth, cv::IMREAD_ANYDEPTH | cv::IMREAD_ANYCOLOR);
    if (depth_raw.empty()) {
      std::cerr << "Cannot read: " << filename_depth << std::endl;
      return false;
    }

    depth_width = static_cast<unsigned int>(depth_raw.cols);
    depth_height = static_cast<unsigned int>(depth_raw.rows);
    I_depth_raw.resize(depth_height, depth_width);
    pointcloud.resize(depth_width * depth_height);

    for (int i = 0; i < depth_raw.rows; i++) {
      for (int j = 0; j < depth_raw.cols; j++) {
        I_depth_raw[i][j] = static_cast<uint16_t>(32767.5f * depth_raw.at<cv::Vec3f>(i, j)[0]);
        double x = 0.0, y = 0.0;
        // Manually limit the field of view of the depth camera
        double Z = depth_raw.at<cv::Vec3f>(i, j)[0] > 2.0f ? 0.0 : static_cast<double>(depth_raw.at<cv::Vec3f>(i, j)[0]);
        vpPixelMeterConversion::convertPoint(cam_depth, j, i, x, y);
        size_t idx = static_cast<size_t>(i * depth_raw.cols + j);
        pointcloud[idx].resize(3);
        pointcloud[idx][0] = x * Z;
        pointcloud[idx][1] = y * Z;
        pointcloud[idx][2] = Z;
      }
    }
  }

  // Read ground truth
  snprintf(buffer, FILENAME_MAX, video_ground_truth.c_str(), cpt);
  std::string filename_pose = buffer;

  cMo_ground_truth.load(filename_pose);

  return true;
}
} // namespace

void usage(const char **argv, int error, const std::string &data_path, const std::string &model_path, int first_frame)
{
  std::cout << "Synopsis" << std::endl
    << "  " << argv[0]
    << " [--data-path <path>] [--model-path <path>] [--first-frame <index>] [--disable-depth] "
    << " [--disable-klt] [--step-by-step] [--display-ground-truth] [--help, -h]" << std::endl
    << std::endl;
  std::cout << "Description" << std::endl
    << "  --data-path <path>   Path to the data generated by Blender get_camera_pose_teabox.py" << std::endl
    << "    Python script."
    << "    Default: " << data_path << std::endl
    << std::endl
    << "  --model-path <path>   Path to the cad model and tracker settings." << std::endl
    << "    Default: " << model_path << std::endl
    << std::endl
    << "  --first-frame <index>   First frame number to process." << std::endl
    << "    Default: " << first_frame << std::endl
    << std::endl
    << "  --disable-depth  Flag to turn off tracker depth features." << std::endl
    << std::endl
    << "  --disable-klt  Flag to turn off tracker keypoints features." << std::endl
    << std::endl
    << "  --step-by-step  Flag to enable step by step mode." << std::endl
    << std::endl
    << "  --display-ground-truth  Flag to enable displaying ground truth." << std::endl
    << "    When this flag is enabled, there is no tracking. This flag is useful" << std::endl
    << "    to validate the ground truth over the rendered images." << std::endl
    << std::endl
    << "  --help, -h  Print this helper message." << std::endl
    << std::endl;
  if (error) {
    std::cout << "Error" << std::endl
      << "  "
      << "Unsupported parameter " << argv[error] << std::endl;
  }
}

int main(int argc, const char **argv)
{
  std::string opt_data_path = "data/teabox";
  std::string opt_model_path = "model/teabox";
  unsigned int opt_first_frame = 1;
  bool opt_disable_depth = false;
  bool opt_disable_klt = false;
  bool opt_display_ground_truth = false;
  bool opt_step_by_step = false;

  for (int i = 1; i < argc; i++) {
    if (std::string(argv[i]) == "--data-path" && i + 1 < argc) {
      opt_data_path = std::string(argv[i + 1]);
      i++;
    }
    else if (std::string(argv[i]) == "--model-path" && i + 1 < argc) {
      opt_model_path = std::string(argv[i + 1]);
      i++;
    }
    else if (std::string(argv[i]) == "--disable-depth") {
      opt_disable_depth = true;
    }
    else if (std::string(argv[i]) == "--disable-klt") {
      opt_disable_klt = true;
    }
    else if (std::string(argv[i]) == "--display-ground-truth") {
      opt_display_ground_truth = true;
    }
    else if (std::string(argv[i]) == "--step-by-step") {
      opt_step_by_step = true;
    }
    else if (std::string(argv[i]) == "--first-frame" && i + 1 < argc) {
      opt_first_frame = static_cast<unsigned int>(atoi(argv[i + 1]));
      i++;
    }
    else if (std::string(argv[i]) == "--help" || std::string(argv[i]) == "-h") {
      usage(argv, 0, opt_data_path, opt_model_path, opt_first_frame);
      return EXIT_SUCCESS;
    }
    else {
      usage(argv, i, opt_data_path, opt_model_path, opt_first_frame);
      return EXIT_FAILURE;
    }
  }

  std::string video_color_images = vpIoTools::createFilePath(opt_data_path, "color/%04d_L.jpg");
  std::string video_depth_images = vpIoTools::createFilePath(opt_data_path, "depth/Image%04d_R.exr");
  std::string ground_truth = vpIoTools::createFilePath(opt_data_path, "ground-truth/Camera_L_%04d.txt");
  std::string extrinsic_file = vpIoTools::createFilePath(opt_data_path, "depth_M_color.txt");
  std::string color_camera_name = "Camera_L";
  std::string depth_camera_name = "Camera_R";
  std::string color_intrinsic_file = vpIoTools::createFilePath(opt_data_path, color_camera_name + ".xml");
  std::string depth_intrinsic_file = vpIoTools::createFilePath(opt_data_path, depth_camera_name + ".xml");
  std::string mbt_config_color = vpIoTools::createFilePath(opt_model_path, "teabox_color.xml");
  std::string mbt_config_depth = vpIoTools::createFilePath(opt_model_path, "teabox_depth.xml");
  std::string mbt_cad_model = vpIoTools::createFilePath(opt_model_path, "teabox.cao");
  std::string mbt_init_file = vpIoTools::createFilePath(opt_model_path, "teabox.init");

  std::cout << "Input data" << std::endl;
  std::cout << "  Color images    : " << video_color_images << std::endl;
  std::cout << "  Depth images    : " << (opt_disable_depth ? "Disabled" : video_depth_images) << std::endl;
  std::cout << "  Extrinsics      : " << (opt_disable_depth ? "Disabled" : extrinsic_file) << std::endl;
  std::cout << "  Color intrinsics: " << color_intrinsic_file << std::endl;
  std::cout << "  Depth intrinsics: " << (opt_disable_depth ? "Disabled" : depth_intrinsic_file) << std::endl;
  std::cout << "  Ground truth    : " << ground_truth << std::endl;
  std::cout << "Tracker settings" << std::endl;
  std::cout << "  Color config    : " << mbt_config_color << std::endl;
  std::cout << "  Depth config    : " << mbt_config_depth << std::endl;
  std::cout << "  CAD model       : " << mbt_cad_model << std::endl;
  std::cout << "  First frame     : " << opt_first_frame << std::endl;
  std::cout << "  Step by step    : " << opt_step_by_step << std::endl;
  if (opt_display_ground_truth) {
    std::cout << "  Ground truth is used to project the cad model (no tracking)" << std::endl;
  }
  else {
    std::cout << "  Init file       : " << mbt_init_file << std::endl;
    std::cout << "  Features        : moving-edges " << (opt_disable_klt ? "" : "+ keypoints") << (opt_disable_depth ? "" : " + depth") << std::endl;
  }

  std::vector<int> tracker_types;
  if (opt_disable_klt) {
    tracker_types.push_back(vpMbGenericTracker::EDGE_TRACKER);
  }
  else {
#if defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGPROC) && defined(HAVE_OPENCV_VIDEO)
    tracker_types.push_back(vpMbGenericTracker::EDGE_TRACKER | vpMbGenericTracker::KLT_TRACKER);
#else
    std::cout << "Warning: keypoints cannot be used as features since ViSP is not build with OpenCV 3rd party" << std::endl;
#endif
  }
  if (!opt_disable_depth)
    tracker_types.push_back(vpMbGenericTracker::DEPTH_DENSE_TRACKER);

  vpMbGenericTracker tracker(tracker_types);
  if (!opt_disable_depth)
    tracker.loadConfigFile(mbt_config_color, mbt_config_depth);
  else
    tracker.loadConfigFile(mbt_config_color);
  tracker.loadModel(mbt_cad_model);
  vpCameraParameters cam_color, cam_depth;

  // Update intrinsics camera parameters from Blender generated data
  vpXmlParserCamera p;
  if (p.parse(cam_color, color_intrinsic_file, color_camera_name, vpCameraParameters::perspectiveProjWithoutDistortion)
     != vpXmlParserCamera::SEQUENCE_OK) {
    std::cout << "Cannot found intrinsics for camera " << color_camera_name << std::endl;
  }
  if (p.parse(cam_depth, depth_intrinsic_file, depth_camera_name, vpCameraParameters::perspectiveProjWithoutDistortion)
     != vpXmlParserCamera::SEQUENCE_OK) {
    std::cout << "Cannot found intrinsics for camera " << depth_camera_name << std::endl;
  }

  if (!opt_disable_depth)
    tracker.setCameraParameters(cam_color, cam_depth);
  else
    tracker.setCameraParameters(cam_color);

  // Reload intrinsics from tracker (useless)
  if (!opt_disable_depth)
    tracker.getCameraParameters(cam_color, cam_depth);
  else
    tracker.getCameraParameters(cam_color);
  tracker.setDisplayFeatures(true);
  std::cout << "cam_color:\n" << cam_color << std::endl;

  if (!opt_disable_depth)
    std::cout << "cam_depth:\n" << cam_depth << std::endl;

  vpImage<uint16_t> I_depth_raw;
  vpImage<unsigned char> I, I_depth;
  unsigned int depth_width = 0, depth_height = 0;
  std::vector<vpColVector> pointcloud;
  vpHomogeneousMatrix cMo_ground_truth;

  unsigned int frame_cpt = opt_first_frame;
  read_data(frame_cpt, video_color_images, video_depth_images, opt_disable_depth, ground_truth,
            I, I_depth_raw, depth_width, depth_height, pointcloud, cam_depth, cMo_ground_truth);
  vpImageConvert::createDepthHistogram(I_depth_raw, I_depth);

#if defined(VISP_HAVE_X11)
  vpDisplayX d1, d2;
#elif defined(VISP_HAVE_GDI)
  vpDisplayGDI d1, d2;
#elif defined (HAVE_OPENCV_HIGHGUI)
  vpDisplayOpenCV d1, d2;
#endif

  d1.init(I, 0, 0, "Color image");
  if (!opt_disable_depth) {
    d2.init(I_depth, static_cast<int>(I.getWidth()), 0, "Depth image");
  }

  vpHomogeneousMatrix depth_M_color;
  if (!opt_disable_depth) {
    depth_M_color.load(extrinsic_file);
    tracker.setCameraTransformationMatrix("Camera2", depth_M_color);
    std::cout << "depth_M_color:\n" << depth_M_color << std::endl;
  }

  if (opt_display_ground_truth) {
    tracker.initFromPose(I, cMo_ground_truth); // I and I_depth must be the same size when using depth features!
  }
  else {
    tracker.initClick(I, mbt_init_file, true); // I and I_depth must be the same size when using depth features!
  }

  try {
    bool quit = false;
    while (!quit && read_data(frame_cpt, video_color_images, video_depth_images, opt_disable_depth,
                              ground_truth, I, I_depth_raw, depth_width, depth_height, pointcloud, cam_depth,
                              cMo_ground_truth)) {
      vpImageConvert::createDepthHistogram(I_depth_raw, I_depth);
      vpDisplay::display(I);
      vpDisplay::display(I_depth);

      if (opt_display_ground_truth) {
        tracker.initFromPose(I, cMo_ground_truth); // I and I_depth must be the same size when using depth features!
      }
      else {
        if (!opt_disable_depth) {
          std::map<std::string, const vpImage<unsigned char> *> mapOfImages;
          std::map<std::string, const std::vector<vpColVector> *> mapOfPointClouds;
          std::map<std::string, unsigned int> mapOfPointCloudWidths;
          std::map<std::string, unsigned int> mapOfPointCloudHeights;

          mapOfImages["Camera1"] = &I;
          mapOfPointClouds["Camera2"] = &pointcloud;
          mapOfPointCloudWidths["Camera2"] = depth_width;
          mapOfPointCloudHeights["Camera2"] = depth_height;
          tracker.track(mapOfImages, mapOfPointClouds, mapOfPointCloudWidths, mapOfPointCloudHeights);
        }
        else {
          tracker.track(I);
        }
      }

      vpHomogeneousMatrix cMo = tracker.getPose();
      std::cout << "\nFrame: " << frame_cpt << std::endl;
      if (!opt_display_ground_truth)
        std::cout << "cMo:\n" << cMo << std::endl;
      std::cout << "cMo ground truth:\n" << cMo_ground_truth << std::endl;
      if (!opt_disable_depth) {
        tracker.display(I, I_depth, cMo, depth_M_color * cMo, cam_color, cam_depth, vpColor::red, 2);
        vpDisplay::displayFrame(I_depth, depth_M_color * cMo, cam_depth, 0.05, vpColor::none, 2);
      }
      else {
        tracker.display(I, cMo, cam_color, vpColor::red, 2);
      }

      vpDisplay::displayFrame(I, cMo, cam_color, 0.05, vpColor::none, 2);
      std::ostringstream oss;
      oss << "Frame: " << frame_cpt;
      vpDisplay::setTitle(I, oss.str());
      if (opt_step_by_step) {
        vpDisplay::displayText(I, 20, 10, "Left click to trigger next step", vpColor::red);
        vpDisplay::displayText(I, 40, 10, "Right click to quit step-by-step mode", vpColor::red);
      }
      else {
        vpDisplay::displayText(I, 20, 10, "Left click to trigger step-by-step mode", vpColor::red);
        vpDisplay::displayText(I, 40, 10, "Right click to exit...", vpColor::red);
      }
      if (!opt_display_ground_truth) {
        {
          std::stringstream ss;
          ss << "Nb features: " << tracker.getError().size();
          vpDisplay::displayText(I, I.getHeight() - 50, 20, ss.str(), vpColor::red);
        }
        {
          std::stringstream ss;
          ss << "Features: edges " << tracker.getNbFeaturesEdge() << ", klt " << tracker.getNbFeaturesKlt()
            << ", depth " << tracker.getNbFeaturesDepthDense();
          vpDisplay::displayText(I, I.getHeight() - 30, 20, ss.str(), vpColor::red);
        }
      }

      vpDisplay::flush(I);
      vpDisplay::flush(I_depth);

      // Button 1: start step by step if not enabled from command line option
      // Button 2: enables step by step mode
      // Button 3: ends step by step mode if enabled
      //           quit otherwise
      vpMouseButton::vpMouseButtonType button;
      if (vpDisplay::getClick(I, button, opt_step_by_step)) {
        if (button == vpMouseButton::button1 && opt_step_by_step == false) {
          opt_step_by_step = true;
        }
        else if (button == vpMouseButton::button3 && opt_step_by_step == true) {
          opt_step_by_step = false;
        }
        else if (button == vpMouseButton::button3 && opt_step_by_step == false) {
          quit = true;
        }
        else if (button == vpMouseButton::button2) {
          opt_step_by_step = true;
        }
      }

      frame_cpt++;
    }

    vpDisplay::flush(I);
    vpDisplay::getClick(I);
  }
  catch (std::exception &e) {
    std::cerr << "Catch exception: " << e.what() << std::endl;
  }

  return EXIT_SUCCESS;
}
#else
int main()
{
  std::cout << "To run this tutorial, ViSP should be built with OpenCV and pugixml libraries." << std::endl;
  return EXIT_SUCCESS;
}
#endif