diff --git a/modules/tracking/include/opencv2/tracking/tracker.hpp b/modules/tracking/include/opencv2/tracking/tracker.hpp index 8612cc57a68..9224aa46d11 100644 --- a/modules/tracking/include/opencv2/tracking/tracker.hpp +++ b/modules/tracking/include/opencv2/tracking/tracker.hpp @@ -1462,6 +1462,71 @@ class CV_EXPORTS MultiTrackerTLD : public MultiTracker_Alt //! @} +/*********************************** CSRT ************************************/ +/** @brief Discriminative Correlation Filter Tracker with Channel and Spatial Reliability +*/ +class CV_EXPORTS_W TrackerCSRT : public Tracker +{ +public: + struct CV_EXPORTS Params + { + /** + * \brief Constructor + */ + Params(); + + /** + * \brief Read parameters from file + */ + void read(const FileNode& /*fn*/); + + /** + * \brief Write parameters from file + */ + void write(cv::FileStorage& fs) const; + + bool use_hog; + bool use_color_names; + bool use_gray; + bool use_rgb; + bool use_channel_weights; + bool use_segmentation; + + std::string window_function; //!< Window function: "hann", "cheb", "kaiser" + float kaiser_alpha; + float cheb_attenuation; + + float template_size; + float gsl_sigma; + float hog_orientations; + float hog_clip; + float padding; + float filter_lr; + float weights_lr; + int num_hog_channels_used; + int admm_iterations; + int histogram_bins; + float histogram_lr; + int background_ratio; + int number_of_scales; + float scale_sigma_factor; + float scale_model_max_area; + float scale_lr; + float scale_step; + }; + + /** @brief Constructor + @param parameters CSRT parameters TrackerCSRT::Params + */ + static Ptr create(const TrackerCSRT::Params ¶meters); + + CV_WRAP static Ptr create(); + + virtual void setInitialMask(const Mat mask) = 0; + + virtual ~TrackerCSRT() {} +}; + } /* namespace cv */ #endif diff --git a/modules/tracking/samples/csrt.cpp b/modules/tracking/samples/csrt.cpp new file mode 100644 index 00000000000..1f77e89a8c5 --- /dev/null +++ b/modules/tracking/samples/csrt.cpp @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include +#include +#include "samples_utility.hpp" + +using namespace std; +using namespace cv; + +int main(int argc, char** argv) +{ + // show help + if (argc<2) { + cout << + " Usage: example_tracking_csrt \n" + " examples:\n" + " example_tracking_csrt Bolt/img/%04.jpg\n" + " example_tracking_csrt Bolt/img/%04.jpg Bolt/grouondtruth.txt\n" + " example_tracking_csrt faceocc2.webm\n" + << endl; + return 0; + } + + // create the tracker + Ptr tracker = TrackerCSRT::create(); + + // const char* param_file_path = "/home/amuhic/Workspace/3_dip/params.yml"; + // FileStorage fs(params_file_path, FileStorage::WRITE); + // tracker->write(fs); + // FileStorage fs(param_file_path, FileStorage::READ); + // tracker->read( fs.root()); + + // set input video + std::string video = argv[1]; + VideoCapture cap(video); + // and read first frame + Mat frame; + cap >> frame; + + // target bounding box + Rect2d roi; + if (argc > 2) { + // read first line of ground-truth file + std::string groundtruthPath = argv[2]; + std::ifstream gtIfstream(groundtruthPath.c_str()); + std::string gtLine; + getline(gtIfstream, gtLine); + gtIfstream.close(); + + // parse the line by elements + std::stringstream gtStream(gtLine); + std::string element; + std::vector elements; + while (std::getline(gtStream, element, ',')) + { + elements.push_back(cvRound(std::atof(element.c_str()))); + } + + if (elements.size() == 4) { + // ground-truth is rectangle + roi = cv::Rect(elements[0], elements[1], elements[2], elements[3]); + } + else if (elements.size() == 8) { + // ground-truth is polygon + int xMin = cvRound(min(elements[0], min(elements[2], min(elements[4], elements[6])))); + int yMin = cvRound(min(elements[1], min(elements[3], min(elements[5], elements[7])))); + int xMax = cvRound(max(elements[0], max(elements[2], max(elements[4], elements[6])))); + int yMax = cvRound(max(elements[1], max(elements[3], max(elements[5], elements[7])))); + roi = cv::Rect(xMin, yMin, xMax - xMin, yMax - yMin); + + // create mask from polygon and set it to the tracker + cv::Rect aaRect = cv::Rect(xMin, yMin, xMax - xMin, yMax - yMin); + cout << aaRect.size() << endl; + Mat mask = Mat::zeros(aaRect.size(), CV_8UC1); + const int n = 4; + std::vector poly_points(n); + //Translate x and y to rects start position + int sx = aaRect.x; + int sy = aaRect.y; + for (int i = 0; i < n; ++i) { + poly_points[i] = Point(elements[2 * i] - sx, elements[2 * i + 1] - sy); + } + cv::fillConvexPoly(mask, poly_points, Scalar(1.0), 8); + mask.convertTo(mask, CV_32FC1); + tracker->setInitialMask(mask); + } + else { + std::cout << "Number of ground-truth elements is not 4 or 8." << std::endl; + } + + } + else { + // second argument is not given - user selects target + roi = selectROI("tracker", frame, true, false); + } + + //quit if ROI was not selected + if (roi.width == 0 || roi.height == 0) + return 0; + + // initialize the tracker + int64 t1 = cv::getTickCount(); + tracker->init(frame, roi); + int64 t2 = cv::getTickCount(); + int64 tick_counter = t2 - t1; + + // do the tracking + printf("Start the tracking process, press ESC to quit.\n"); + int frame_idx = 1; + for (;;) { + // get frame from the video + cap >> frame; + + // stop the program if no more images + if (frame.rows == 0 || frame.cols == 0) + break; + + // update the tracking result + t1 = cv::getTickCount(); + bool isfound = tracker->update(frame, roi); + t2 = cv::getTickCount(); + tick_counter += t2 - t1; + frame_idx++; + + if (!isfound) { + cout << "The target has been lost...\n"; + waitKey(0); + return 0; + } + + // draw the tracked object and show the image + rectangle(frame, roi, Scalar(255, 0, 0), 2, 1); + imshow("tracker", frame); + + //quit on ESC button + if (waitKey(1) == 27)break; + } + + cout << "Elapsed sec: " << static_cast(tick_counter) / cv::getTickFrequency() << endl; + cout << "FPS: " << ((double)(frame_idx)) / (static_cast(tick_counter) / cv::getTickFrequency()) << endl; +} diff --git a/modules/tracking/samples/samples_utility.hpp b/modules/tracking/samples/samples_utility.hpp index e0327ea2521..de3525d507c 100644 --- a/modules/tracking/samples/samples_utility.hpp +++ b/modules/tracking/samples/samples_utility.hpp @@ -21,6 +21,8 @@ inline cv::Ptr createTrackerByName(cv::String name) tracker = cv::TrackerGOTURN::create(); else if (name == "MOSSE") tracker = cv::TrackerMOSSE::create(); + else if (name == "CSRT") + tracker = cv::TrackerCSRT::create(); else CV_Error(cv::Error::StsBadArg, "Invalid tracking algorithm name\n"); diff --git a/modules/tracking/src/trackerCSRT.cpp b/modules/tracking/src/trackerCSRT.cpp new file mode 100644 index 00000000000..81edce64932 --- /dev/null +++ b/modules/tracking/src/trackerCSRT.cpp @@ -0,0 +1,745 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "trackerCSRTSegmentation.hpp" +#include "trackerCSRTUtils.hpp" +#include "trackerCSRTScaleEstimation.hpp" + +namespace cv +{ +/** +* \brief Implementation of TrackerModel for CSRT algorithm +*/ +class TrackerCSRTModel : public TrackerModel +{ +public: + TrackerCSRTModel(TrackerCSRT::Params /*params*/){} + ~TrackerCSRTModel(){} +protected: + void modelEstimationImpl(const std::vector& /*responses*/){} + void modelUpdateImpl(){} +}; + + +class TrackerCSRTImpl : public TrackerCSRT +{ +public: + TrackerCSRTImpl(const TrackerCSRT::Params ¶meters = TrackerCSRT::Params()); + void read(const FileNode& fn); + void write(FileStorage& fs) const; + +protected: + TrackerCSRT::Params params; + + bool initImpl(const Mat& image, const Rect2d& boundingBox); + virtual void setInitialMask(const Mat mask); + bool updateImpl(const Mat& image, Rect2d& boundingBox); + void update_csr_filter(const Mat &image, const Mat &my_mask); + void update_histograms(const Mat &image, const Rect ®ion); + void extract_histograms(const Mat &image, cv::Rect region, Histogram &hf, Histogram &hb); + std::vector create_csr_filter(const std::vector + img_features, const cv::Mat Y, const cv::Mat P); + Mat calculate_response(const Mat &image, const std::vector filter); + Mat get_location_prior(const Rect roi, const Size2f target_size, const Size img_sz); + Mat segment_region(const Mat &image, const Point2f &object_center, + const Size2f &template_size, const Size &target_size, float scale_factor); + Point2f estimate_new_position(const Mat &image); + std::vector get_features(const Mat &patch, const Size2i &feature_size); + +private: + bool check_mask_area(const Mat &mat, const double obj_area); + float current_scale_factor; + Mat window; + Mat yf; + Rect2f bounding_box; + std::vector csr_filter; + std::vector filter_weights; + Size2f original_target_size; + Size2i image_size; + Size2f template_size; + Size2i rescaled_template_size; + float rescale_ratio; + Point2f object_center; + DSST dsst; + Histogram hist_foreground; + Histogram hist_background; + double p_b; + Mat erode_element; + Mat filter_mask; + Mat preset_mask; + Mat default_mask; + float default_mask_area; + int cell_size; +}; + +Ptr TrackerCSRT::create(const TrackerCSRT::Params ¶meters) +{ + return Ptr(new TrackerCSRTImpl(parameters)); +} +Ptr TrackerCSRT::create() +{ + return Ptr(new TrackerCSRTImpl()); +} +TrackerCSRTImpl::TrackerCSRTImpl(const TrackerCSRT::Params ¶meters) : + params(parameters) +{ + isInit = false; +} + +void TrackerCSRTImpl::read(const cv::FileNode& fn) +{ + params.read(fn); +} + +void TrackerCSRTImpl::write(cv::FileStorage& fs) const +{ + params.write(fs); +} + +void TrackerCSRTImpl::setInitialMask(const Mat mask) +{ + preset_mask = mask; +} + +bool TrackerCSRTImpl::check_mask_area(const Mat &mat, const double obj_area) +{ + double threshold = 0.05; + double mask_area= sum(mat)[0]; + if(mask_area < threshold*obj_area) { + return false; + } + return true; +} + +Mat TrackerCSRTImpl::calculate_response(const Mat &image, const std::vector filter) +{ + Mat patch = get_subwindow(image, object_center, cvFloor(current_scale_factor * template_size.width), + cvFloor(current_scale_factor * template_size.height)); + resize(patch, patch, rescaled_template_size, 0, 0, INTER_CUBIC); + + std::vector ftrs = get_features(patch, yf.size()); + std::vector Ffeatures = fourier_transform_features(ftrs); + Mat resp, res; + if(params.use_channel_weights){ + res = Mat::zeros(Ffeatures[0].size(), CV_32FC2); + Mat resp_ch; + Mat mul_mat; + for(size_t i = 0; i < Ffeatures.size(); ++i) { + mulSpectrums(Ffeatures[i], filter[i], resp_ch, 0, true); + res += (resp_ch * filter_weights[i]); + } + idft(res, res, DFT_SCALE | DFT_REAL_OUTPUT); + } else { + res = Mat::zeros(Ffeatures[0].size(), CV_32FC2); + Mat resp_ch; + for(size_t i = 0; i < Ffeatures.size(); ++i) { + mulSpectrums(Ffeatures[i], filter[i], resp_ch, 0 , true); + res = res + resp_ch; + } + idft(res, res, DFT_SCALE | DFT_REAL_OUTPUT); + } + return res; +} + +void TrackerCSRTImpl::update_csr_filter(const Mat &image, const Mat &mask) +{ + Mat patch = get_subwindow(image, object_center, cvFloor(current_scale_factor * template_size.width), + cvFloor(current_scale_factor * template_size.height)); + resize(patch, patch, rescaled_template_size, 0, 0, INTER_CUBIC); + + std::vector ftrs = get_features(patch, yf.size()); + std::vector Fftrs = fourier_transform_features(ftrs); + std::vector new_csr_filter = create_csr_filter(Fftrs, yf, mask); + //calculate per channel weights + if(params.use_channel_weights) { + Mat current_resp; + double max_val; + float sum_weights = 0; + std::vector new_filter_weights = std::vector(new_csr_filter.size()); + for(size_t i = 0; i < new_csr_filter.size(); ++i) { + mulSpectrums(Fftrs[i], new_csr_filter[i], current_resp, 0, true); + idft(current_resp, current_resp, DFT_SCALE | DFT_REAL_OUTPUT); + minMaxLoc(current_resp, NULL, &max_val, NULL, NULL); + sum_weights += static_cast(max_val); + new_filter_weights[i] = static_cast(max_val); + } + //update filter weights with new values + float updated_sum = 0; + for(size_t i = 0; i < filter_weights.size(); ++i) { + filter_weights[i] = filter_weights[i]*(1.0f - params.weights_lr) + + params.weights_lr * (new_filter_weights[i] / sum_weights); + updated_sum += filter_weights[i]; + } + //normalize weights + for(size_t i = 0; i < filter_weights.size(); ++i) { + filter_weights[i] /= updated_sum; + } + } + for(size_t i = 0; i < csr_filter.size(); ++i) { + csr_filter[i] = (1.0f - params.filter_lr)*csr_filter[i] + params.filter_lr * new_csr_filter[i]; + } + std::vector().swap(ftrs); + std::vector().swap(Fftrs); +} + + +std::vector TrackerCSRTImpl::get_features(const Mat &patch, const Size2i &feature_size) +{ + std::vector features; + if (params.use_hog) { + std::vector hog = get_features_hog(patch, cell_size); + features.insert(features.end(), hog.begin(), + hog.begin()+params.num_hog_channels_used); + } + if (params.use_color_names) { + std::vector cn; + cn = get_features_cn(patch, feature_size); + features.insert(features.end(), cn.begin(), cn.end()); + } + if(params.use_gray) { + Mat gray_m; + cvtColor(patch, gray_m, CV_BGR2GRAY); + resize(gray_m, gray_m, feature_size, 0, 0, INTER_CUBIC); + gray_m.convertTo(gray_m, CV_32FC1, 1.0/255.0, -0.5); + features.push_back(gray_m); + } + if(params.use_rgb) { + std::vector rgb_features = get_features_rgb(patch, feature_size); + features.insert(features.end(), rgb_features.begin(), rgb_features.end()); + } + + for (size_t i = 0; i < features.size(); ++i) { + features.at(i) = features.at(i).mul(window); + } + return features; +} + +class ParallelCreateCSRFilter : public ParallelLoopBody { +public: + ParallelCreateCSRFilter( + const std::vector img_features, + const cv::Mat Y, + const cv::Mat P, + int admm_iterations, + std::vector &result_filter_): + result_filter(result_filter_) + { + this->img_features = img_features; + this->Y = Y; + this->P = P; + this->admm_iterations = admm_iterations; + } + virtual void operator ()(const Range& range) const + { + for (int i = range.start; i < range.end; i++) { + float mu = 5.0f; + float beta = 3.0f; + float mu_max = 20.0f; + float lambda = mu / 100.0f; + + Mat F = img_features[i]; + + Mat Sxy, Sxx; + mulSpectrums(F, Y, Sxy, 0, true); + mulSpectrums(F, F, Sxx, 0, true); + + Mat H; + H = divide_complex_matrices(Sxy, (Sxx + lambda)); + idft(H, H, DFT_SCALE|DFT_REAL_OUTPUT); + H = H.mul(P); + dft(H, H, DFT_COMPLEX_OUTPUT); + Mat L = Mat::zeros(H.size(), H.type()); //Lagrangian multiplier + Mat G; + for(int iteration = 0; iteration < admm_iterations; ++iteration) { + G = divide_complex_matrices((Sxy + (mu * H) - L) , (Sxx + mu)); + idft((mu * G) + L, H, DFT_SCALE | DFT_REAL_OUTPUT); + float lm = 1.0f / (lambda+mu); + H = H.mul(P*lm); + dft(H, H, DFT_COMPLEX_OUTPUT); + + //Update variables for next iteration + L = L + mu * (G - H); + mu = min(mu_max, beta*mu); + } + result_filter[i] = H; + } + } + + ParallelCreateCSRFilter& operator=(const ParallelCreateCSRFilter &) { + return *this; + } + +private: + int admm_iterations; + Mat Y; + Mat P; + std::vector img_features; + std::vector &result_filter; +}; + + +std::vector TrackerCSRTImpl::create_csr_filter( + const std::vector img_features, + const cv::Mat Y, + const cv::Mat P) +{ + std::vector result_filter; + result_filter.resize(img_features.size()); + ParallelCreateCSRFilter parallelCreateCSRFilter(img_features, Y, P, + params.admm_iterations, result_filter); + parallel_for_(Range(0, static_cast(result_filter.size())), parallelCreateCSRFilter); + + return result_filter; +} + +Mat TrackerCSRTImpl::get_location_prior( + const Rect roi, + const Size2f target_size, + const Size img_sz) +{ + int x1 = cvRound(max(min(roi.x-1, img_sz.width-1) , 0)); + int y1 = cvRound(max(min(roi.y-1, img_sz.height-1) , 0)); + + int x2 = cvRound(min(max(roi.width-1, 0) , img_sz.width-1)); + int y2 = cvRound(min(max(roi.height-1, 0) , img_sz.height-1)); + + Size target_sz; + target_sz.width = target_sz.height = cvFloor(min(target_size.width, target_size.height)); + + double cx = x1 + (x2-x1)/2.; + double cy = y1 + (y2-y1)/2.; + double kernel_size_width = 1.0/(0.5*static_cast(target_sz.width)*1.4142+1); + double kernel_size_height = 1.0/(0.5*static_cast(target_sz.height)*1.4142+1); + + cv::Mat kernel_weight = Mat::zeros(1 + cvFloor(y2 - y1) , 1+cvFloor(-(x1-cx) + (x2-cx)), CV_64FC1); + for (int y = y1; y < y2+1; ++y){ + double * weightPtr = kernel_weight.ptr(y); + double tmp_y = std::pow((cy-y)*kernel_size_height, 2); + for (int x = x1; x < x2+1; ++x){ + weightPtr[x] = kernel_epan(std::pow((cx-x)*kernel_size_width,2) + tmp_y); + } + } + + double max_val; + cv::minMaxLoc(kernel_weight, NULL, &max_val, NULL, NULL); + Mat fg_prior = kernel_weight / max_val; + fg_prior.setTo(0.5, fg_prior < 0.5); + fg_prior.setTo(0.9, fg_prior > 0.9); + return fg_prior; +} + +Mat TrackerCSRTImpl::segment_region( + const Mat &image, + const Point2f &object_center, + const Size2f &template_size, + const Size &target_size, + float scale_factor) +{ + Rect valid_pixels; + Mat patch = get_subwindow(image, object_center, cvFloor(scale_factor * template_size.width), + cvFloor(scale_factor * template_size.height), &valid_pixels); + Size2f scaled_target = Size2f(target_size.width * scale_factor, + target_size.height * scale_factor); + Mat fg_prior = get_location_prior( + Rect(0,0, patch.size().width, patch.size().height), + scaled_target , patch.size()); + + std::vector img_channels; + split(patch, img_channels); + std::pair probs = Segment::computePosteriors2(img_channels, 0, 0, patch.cols, patch.rows, + p_b, fg_prior, 1.0-fg_prior, hist_foreground, hist_background); + + Mat mask = Mat::zeros(probs.first.size(), probs.first.type()); + probs.first(valid_pixels).copyTo(mask(valid_pixels)); + double max_resp = get_max(mask); + threshold(mask, mask, max_resp / 2.0, 1, THRESH_BINARY); + mask.convertTo(mask, CV_32FC1, 1.0); + return mask; +} + + +void TrackerCSRTImpl::extract_histograms(const Mat &image, cv::Rect region, Histogram &hf, Histogram &hb) +{ + // get coordinates of the region + int x1 = std::min(std::max(0, region.x), image.cols-1); + int y1 = std::min(std::max(0, region.y), image.rows-1); + int x2 = std::min(std::max(0, region.x + region.width), image.cols-1); + int y2 = std::min(std::max(0, region.y + region.height), image.rows-1); + + // calculate coordinates of the background region + int offsetX = (x2-x1+1) / params.background_ratio; + int offsetY = (y2-y1+1) / params.background_ratio; + int outer_y1 = std::max(0, (int)(y1-offsetY)); + int outer_y2 = std::min(image.rows, (int)(y2+offsetY+1)); + int outer_x1 = std::max(0, (int)(x1-offsetX)); + int outer_x2 = std::min(image.cols, (int)(x2+offsetX+1)); + + // calculate probability for the background + p_b = 1.0 - ((x2-x1+1) * (y2-y1+1)) / + ((double) (outer_x2-outer_x1+1) * (outer_y2-outer_y1+1)); + + // split multi-channel image into the std::vector of matrices + std::vector img_channels(image.channels()); + split(image, img_channels); + for(size_t k=0; k().swap(img_channels); +} + +void TrackerCSRTImpl::update_histograms(const Mat &image, const Rect ®ion) +{ + // create temporary histograms + Histogram hf(image.channels(), params.histogram_bins); + Histogram hb(image.channels(), params.histogram_bins); + extract_histograms(image, region, hf, hb); + + // get histogram vectors from temporary histograms + std::vector hf_vect_new = hf.getHistogramVector(); + std::vector hb_vect_new = hb.getHistogramVector(); + // get histogram vectors from learned histograms + std::vector hf_vect = hist_foreground.getHistogramVector(); + std::vector hb_vect = hist_background.getHistogramVector(); + + // update histograms - use learning rate + for(size_t i=0; i().swap(hf_vect); + std::vector().swap(hb_vect); +} + +Point2f TrackerCSRTImpl::estimate_new_position(const Mat &image) +{ + + Mat resp = calculate_response(image, csr_filter); + + Point max_loc; + minMaxLoc(resp, NULL, NULL, NULL, &max_loc); + // take into account also subpixel accuracy + float col = ((float) max_loc.x) + subpixel_peak(resp, "horizontal", max_loc); + float row = ((float) max_loc.y) + subpixel_peak(resp, "vertical", max_loc); + if(row + 1 > (float)resp.rows / 2.0f) { + row = row - resp.rows; + } + if(col + 1 > (float)resp.cols / 2.0f) { + col = col - resp.cols; + } + // calculate x and y displacements + Point2f new_center = object_center + Point2f(current_scale_factor * (1.0f / rescale_ratio) *cell_size*(col), + current_scale_factor * (1.0f / rescale_ratio) *cell_size*(row)); + //sanity checks + if(new_center.x < 0) + new_center.x = 0; + if(new_center.x >= image_size.width) + new_center.x = static_cast(image_size.width - 1); + if(new_center.y < 0) + new_center.y = 0; + if(new_center.y >= image_size.height) + new_center.y = static_cast(image_size.height - 1); + + return new_center; +} + +// ********************************************************************* +// * Update API function * +// ********************************************************************* +bool TrackerCSRTImpl::updateImpl(const Mat& image_, Rect2d& boundingBox) +{ + //treat gray image as color image + Mat image; + if(image_.channels() == 1) { + std::vector channels(3); + channels[0] = channels[1] = channels[2] = image_; + merge(channels, image); + } else { + image = image_; + } + + object_center = estimate_new_position(image); + + current_scale_factor = dsst.getScale(image, object_center); + //update bouding_box according to new scale and location + bounding_box.x = object_center.x - current_scale_factor * original_target_size.width / 2.0f; + bounding_box.y = object_center.y - current_scale_factor * original_target_size.height / 2.0f; + bounding_box.width = current_scale_factor * original_target_size.width; + bounding_box.height = current_scale_factor * original_target_size.height; + + //update tracker + if(params.use_segmentation) { + Mat hsv_img = bgr2hsv(image); + update_histograms(hsv_img, bounding_box); + filter_mask = segment_region(hsv_img, object_center, + template_size,original_target_size, current_scale_factor); + resize(filter_mask, filter_mask, yf.size(), 0, 0, INTER_NEAREST); + if(check_mask_area(filter_mask, default_mask_area)) { + dilate(filter_mask , filter_mask, erode_element); + } else { + filter_mask = default_mask; + } + } else { + filter_mask = default_mask; + } + update_csr_filter(image, filter_mask); + dsst.update(image, object_center); + boundingBox = bounding_box; + return true; +} + +// ********************************************************************* +// * Init API function * +// ********************************************************************* +bool TrackerCSRTImpl::initImpl(const Mat& image_, const Rect2d& boundingBox) +{ + cv::setNumThreads(getNumThreads()); + + //treat gray image as color image + Mat image; + if(image_.channels() == 1) { + std::vector channels(3); + channels[0] = channels[1] = channels[2] = image_; + merge(channels, image); + } else { + image = image_; + } + + current_scale_factor = 1.0; + image_size = image.size(); + bounding_box = boundingBox; + cell_size = cvFloor(std::min(4.0, std::max(1.0, static_cast( + cvCeil((bounding_box.width * bounding_box.height)/400.0))))); + original_target_size = Size(bounding_box.size()); + + template_size.width = static_cast(cvFloor(original_target_size.width + params.padding * + sqrt(original_target_size.width * original_target_size.height))); + template_size.height = static_cast(cvFloor(original_target_size.height + params.padding * + sqrt(original_target_size.width * original_target_size.height))); + template_size.width = template_size.height = + (template_size.width + template_size.height) / 2.0f; + rescale_ratio = sqrt(pow(params.template_size,2) / (template_size.width * template_size.height)); + if(rescale_ratio > 1) { + rescale_ratio = 1; + } + rescaled_template_size = Size2i(cvFloor(template_size.width * rescale_ratio), + cvFloor(template_size.height * rescale_ratio)); + object_center = Point2f(static_cast(boundingBox.x) + original_target_size.width / 2.0f, + static_cast(boundingBox.y) + original_target_size.height / 2.0f); + + yf = gaussian_shaped_labels(params.gsl_sigma, + rescaled_template_size.width / cell_size, rescaled_template_size.height / cell_size); + if(params.window_function.compare("hann") == 0) { + window = get_hann_win(Size(yf.cols,yf.rows)); + } else if(params.window_function.compare("cheb") == 0) { + window = get_chebyshev_win(Size(yf.cols,yf.rows), params.cheb_attenuation); + } else if(params.window_function.compare("kaiser") == 0) { + window = get_kaiser_win(Size(yf.cols,yf.rows), params.kaiser_alpha); + } else { + std::cout << "Not a valid window function" << std::endl; + return false; + } + + Size2i scaled_obj_size = Size2i(cvFloor(original_target_size.width * rescale_ratio / cell_size), + cvFloor(original_target_size.height * rescale_ratio / cell_size)); + //set dummy mask and area; + int x0 = std::max((yf.size().width - scaled_obj_size.width)/2 - 1, 0); + int y0 = std::max((yf.size().height - scaled_obj_size.height)/2 - 1, 0); + default_mask = Mat::zeros(yf.size(), CV_32FC1); + default_mask(Rect(x0,y0,scaled_obj_size.width, scaled_obj_size.height)) = 1.0f; + default_mask_area = static_cast(sum(default_mask)[0]); + + //initalize segmentation + if(params.use_segmentation) { + Mat hsv_img = bgr2hsv(image); + hist_foreground = Histogram(hsv_img.channels(), params.histogram_bins); + hist_background = Histogram(hsv_img.channels(), params.histogram_bins); + extract_histograms(hsv_img, bounding_box, hist_foreground, hist_background); + filter_mask = segment_region(hsv_img, object_center, template_size, + original_target_size, current_scale_factor); + //update calculated mask with preset mask + if(preset_mask.data){ + Mat preset_mask_padded = Mat::zeros(filter_mask.size(), filter_mask.type()); + int sx = std::max((int)cvFloor(preset_mask_padded.cols / 2.0f - preset_mask.cols / 2.0f) - 1, 0); + int sy = std::max((int)cvFloor(preset_mask_padded.rows / 2.0f - preset_mask.rows / 2.0f) - 1, 0); + preset_mask.copyTo(preset_mask_padded( + Rect(sx, sy, preset_mask.cols, preset_mask.rows))); + filter_mask = filter_mask.mul(preset_mask_padded); + } + erode_element = getStructuringElement(MORPH_ELLIPSE, Size(3,3), Point(1,1)); + resize(filter_mask, filter_mask, yf.size(), 0, 0, INTER_NEAREST); + if(check_mask_area(filter_mask, default_mask_area)) { + dilate(filter_mask , filter_mask, erode_element); + } else { + filter_mask = default_mask; + } + + } else { + filter_mask = default_mask; + } + + //initialize filter + Mat patch = get_subwindow(image, object_center, cvFloor(current_scale_factor * template_size.width), + cvFloor(current_scale_factor * template_size.height)); + resize(patch, patch, rescaled_template_size, 0, 0, INTER_CUBIC); + std::vector patch_ftrs = get_features(patch, yf.size()); + std::vector Fftrs = fourier_transform_features(patch_ftrs); + csr_filter = create_csr_filter(Fftrs, yf, filter_mask); + + if(params.use_channel_weights) { + Mat current_resp; + filter_weights = std::vector(csr_filter.size()); + float chw_sum = 0; + for (size_t i = 0; i < csr_filter.size(); ++i) { + mulSpectrums(Fftrs[i], csr_filter[i], current_resp, 0, true); + idft(current_resp, current_resp, DFT_SCALE | DFT_REAL_OUTPUT); + double max_val; + minMaxLoc(current_resp, NULL, &max_val, NULL , NULL); + chw_sum += static_cast(max_val); + filter_weights[i] = static_cast(max_val); + } + for (size_t i = 0; i < filter_weights.size(); ++i) { + filter_weights[i] /= chw_sum; + } + } + + //initialize scale search + dsst = DSST(image, bounding_box, template_size, params.number_of_scales, params.scale_step, + params.scale_model_max_area, params.scale_sigma_factor, params.scale_lr); + + model=Ptr(new TrackerCSRTModel(params)); + isInit = true; + return true; +} + +TrackerCSRT::Params::Params() +{ + use_channel_weights = true; + use_segmentation = true; + use_hog = true; + use_color_names = true; + use_gray = true; + use_rgb = false; + window_function = "hann"; + kaiser_alpha = 3.75f; + cheb_attenuation = 45; + padding = 3.0f; + template_size = 200; + gsl_sigma = 1.0f; + hog_orientations = 9; + hog_clip = 0.2f; + num_hog_channels_used = 18; + filter_lr = 0.02f; + weights_lr = 0.02f; + admm_iterations = 4; + number_of_scales = 33; + scale_sigma_factor = 0.250f; + scale_model_max_area = 512.0f; + scale_lr = 0.025f; + scale_step = 1.020f; + histogram_bins = 16; + background_ratio = 2; + histogram_lr = 0.04f; +} + +void TrackerCSRT::Params::read(const FileNode& fn) +{ + *this = TrackerCSRT::Params(); + if(!fn["padding"].empty()) + fn["padding"] >> padding; + if(!fn["template_size"].empty()) + fn["template_size"] >> template_size; + if(!fn["gsl_sigma"].empty()) + fn["gsl_sigma"] >> gsl_sigma; + if(!fn["hog_orientations"].empty()) + fn["hog_orientations"] >> hog_orientations; + if(!fn["num_hog_channels_used"].empty()) + fn["num_hog_channels_used"] >> num_hog_channels_used; + if(!fn["hog_clip"].empty()) + fn["hog_clip"] >> hog_clip; + if(!fn["use_hog"].empty()) + fn["use_hog"] >> use_hog; + if(!fn["use_color_names"].empty()) + fn["use_color_names"] >> use_color_names; + if(!fn["use_gray"].empty()) + fn["use_gray"] >> use_gray; + if(!fn["use_rgb"].empty()) + fn["use_rgb"] >> use_rgb; + if(!fn["window_function"].empty()) + fn["window_function"] >> window_function; + if(!fn["kaiser_alpha"].empty()) + fn["kaiser_alpha"] >> kaiser_alpha; + if(!fn["cheb_attenuation"].empty()) + fn["cheb_attenuation"] >> cheb_attenuation; + if(!fn["filter_lr"].empty()) + fn["filter_lr"] >> filter_lr; + if(!fn["admm_iterations"].empty()) + fn["admm_iterations"] >> admm_iterations; + if(!fn["number_of_scales"].empty()) + fn["number_of_scales"] >> number_of_scales; + if(!fn["scale_sigma_factor"].empty()) + fn["scale_sigma_factor"] >> scale_sigma_factor; + if(!fn["scale_model_max_area"].empty()) + fn["scale_model_max_area"] >> scale_model_max_area; + if(!fn["scale_lr"].empty()) + fn["scale_lr"] >> scale_lr; + if(!fn["scale_step"].empty()) + fn["scale_step"] >> scale_step; + if(!fn["use_channel_weights"].empty()) + fn["use_channel_weights"] >> use_channel_weights; + if(!fn["weights_lr"].empty()) + fn["weights_lr"] >> weights_lr; + if(!fn["use_segmentation"].empty()) + fn["use_segmentation"] >> use_segmentation; + if(!fn["histogram_bins"].empty()) + fn["histogram_bins"] >> histogram_bins; + if(!fn["background_ratio"].empty()) + fn["background_ratio"] >> background_ratio; + if(!fn["histogram_lr"].empty()) + fn["histogram_lr"] >> histogram_lr; + CV_Assert(number_of_scales % 2 == 1); + CV_Assert(use_gray || use_color_names || use_hog || use_rgb); +} +void TrackerCSRT::Params::write(FileStorage& fs) const +{ + fs << "padding" << padding; + fs << "template_size" << template_size; + fs << "gsl_sigma" << gsl_sigma; + fs << "hog_orientations" << hog_orientations; + fs << "num_hog_channels_used" << num_hog_channels_used; + fs << "hog_clip" << hog_clip; + fs << "use_hog" << use_hog; + fs << "use_color_names" << use_color_names; + fs << "use_gray" << use_gray; + fs << "use_rgb" << use_rgb; + fs << "window_function" << window_function; + fs << "kaiser_alpha" << kaiser_alpha; + fs << "cheb_attenuation" << cheb_attenuation; + fs << "filter_lr" << filter_lr; + fs << "admm_iterations" << admm_iterations; + fs << "number_of_scales" << number_of_scales; + fs << "scale_sigma_factor" << scale_sigma_factor; + fs << "scale_model_max_area" << scale_model_max_area; + fs << "scale_lr" << scale_lr; + fs << "scale_step" << scale_step; + fs << "use_channel_weights" << use_channel_weights; + fs << "weights_lr" << weights_lr; + fs << "use_segmentation" << use_segmentation; + fs << "histogram_bins" << histogram_bins; + fs << "background_ratio" << background_ratio; + fs << "histogram_lr" << histogram_lr; +} +} /* namespace cv */ diff --git a/modules/tracking/src/trackerCSRTScaleEstimation.cpp b/modules/tracking/src/trackerCSRTScaleEstimation.cpp new file mode 100644 index 00000000000..c0befafa015 --- /dev/null +++ b/modules/tracking/src/trackerCSRTScaleEstimation.cpp @@ -0,0 +1,211 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "trackerCSRTScaleEstimation.hpp" +#include "trackerCSRTUtils.hpp" + +//Discriminative Scale Space Tracking +namespace cv +{ + +class ParallelGetScaleFeatures : public ParallelLoopBody +{ +public: + ParallelGetScaleFeatures( + Mat img, + Point2f pos, + Size2f base_target_sz, + float current_scale, + std::vector &scale_factors, + Mat scale_window, + Size scale_model_sz, + int col_len, + Mat &result) + { + this->img = img; + this->pos = pos; + this->base_target_sz = base_target_sz; + this->current_scale = current_scale; + this->scale_factors = scale_factors; + this->scale_window = scale_window; + this->scale_model_sz = scale_model_sz; + this->col_len = col_len; + this->result = result; + } + virtual void operator ()(const Range& range) const + { + for (int s = range.start; s < range.end; s++) { + Size patch_sz = Size(static_cast(current_scale * scale_factors[s] * base_target_sz.width), + static_cast(current_scale * scale_factors[s] * base_target_sz.height)); + Mat img_patch = get_subwindow(img, pos, patch_sz.width, patch_sz.height); + img_patch.convertTo(img_patch, CV_32FC3); + resize(img_patch, img_patch, Size(scale_model_sz.width, scale_model_sz.height),0,0,INTER_LINEAR); + std::vector hog; + hog = get_features_hog(img_patch, 4); + for (int i = 0; i < static_cast(hog.size()); ++i) { + hog[i] = hog[i].t(); + hog[i] = scale_window.at(0,s) * hog[i].reshape(0, col_len); + hog[i].copyTo(result(Rect(Point(s, i*col_len), hog[i].size()))); + } + } + } + + ParallelGetScaleFeatures& operator=(const ParallelGetScaleFeatures &) { + return *this; + } + +private: + Mat img; + Point2f pos; + Size2f base_target_sz; + float current_scale; + std::vector scale_factors; + Mat scale_window; + Size scale_model_sz; + int col_len; + Mat result; +}; + + +DSST::DSST(const Mat &image, + Rect2f bounding_box, + Size2f template_size, + int numberOfScales, + float scaleStep, + float maxModelArea, + float sigmaFactor, + float scaleLearnRate): + scales_count(numberOfScales), scale_step(scaleStep), max_model_area(maxModelArea), + sigma_factor(sigmaFactor), learn_rate(scaleLearnRate) +{ + original_targ_sz = bounding_box.size(); + Point2f object_center = Point2f(bounding_box.x + original_targ_sz.width / 2, + bounding_box.y + original_targ_sz.height / 2); + + current_scale_factor = 1.0; + if(scales_count % 2 == 0) + scales_count++; + + scale_sigma = static_cast(sqrt(scales_count) * sigma_factor); + + min_scale_factor = pow(scale_step, + cvCeil(log(max(5.0 / template_size.width, 5.0 / template_size.height)) / log(scale_step))); + max_scale_factor = powf(scale_step, + static_cast(cvFloor(log(min((float)image.rows / (float)bounding_box.width, + (float)image.cols / (float)bounding_box.height)) / log(scale_step)))); + ys = Mat(1, scales_count, CV_32FC1); + float ss, sf; + for(int i = 0; i < ys.cols; ++i) { + ss = (float)(i+1) - cvCeil((float)scales_count / 2.0f); + ys.at(0,i) = static_cast(exp(-0.5 * pow(ss,2) / pow(scale_sigma,2))); + sf = static_cast(i + 1); + scale_factors.push_back(pow(scale_step, cvCeil((float)scales_count / 2.0f) - sf)); + } + + scale_window = get_hann_win(Size(scales_count, 1)); + + float scale_model_factor = 1.0; + if(template_size.width * template_size.height * pow(scale_model_factor, 2) > max_model_area) + { + scale_model_factor = sqrt(max_model_area / + (template_size.width * template_size.height)); + } + scale_model_sz = Size(cvFloor(template_size.width * scale_model_factor), + cvFloor(template_size.height * scale_model_factor)); + + Mat scale_resp = get_scale_features(image, object_center, original_targ_sz, + current_scale_factor, scale_factors, scale_window, scale_model_sz); + + Mat ysf_row = Mat(ys.size(), CV_32FC2); + dft(ys, ysf_row, DFT_ROWS | DFT_COMPLEX_OUTPUT, 0); + ysf = repeat(ysf_row, scale_resp.rows, 1); + Mat Fscale_resp; + dft(scale_resp, Fscale_resp, DFT_ROWS | DFT_COMPLEX_OUTPUT); + mulSpectrums(ysf, Fscale_resp, sf_num, 0 , true); + Mat sf_den_all; + mulSpectrums(Fscale_resp, Fscale_resp, sf_den_all, 0, true); + reduce(sf_den_all, sf_den, 0, CV_REDUCE_SUM, -1); +} + +DSST::~DSST() +{ +} + +Mat DSST::get_scale_features( + Mat img, + Point2f pos, + Size2f base_target_sz, + float current_scale, + std::vector &scale_factors, + Mat scale_window, + Size scale_model_sz) +{ + Mat result; + int col_len = 0; + Size patch_sz = Size(cvFloor(current_scale * scale_factors[0] * base_target_sz.width), + cvFloor(current_scale * scale_factors[0] * base_target_sz.height)); + Mat img_patch = get_subwindow(img, pos, patch_sz.width, patch_sz.height); + img_patch.convertTo(img_patch, CV_32FC3); + resize(img_patch, img_patch, Size(scale_model_sz.width, scale_model_sz.height),0,0,INTER_LINEAR); + std::vector hog; + hog = get_features_hog(img_patch, 4); + result = Mat(Size((int)scale_factors.size(), hog[0].cols * hog[0].rows * (int)hog.size()), CV_32F); + col_len = hog[0].cols * hog[0].rows; + for (int i = 0; i < static_cast(hog.size()); ++i) { + hog[i] = hog[i].t(); + hog[i] = scale_window.at(0,0) * hog[i].reshape(0, col_len); + hog[i].copyTo(result(Rect(Point(0, i*col_len), hog[i].size()))); + } + + ParallelGetScaleFeatures parallelGetScaleFeatures(img, pos, base_target_sz, + current_scale, scale_factors, scale_window, scale_model_sz, col_len, result); + parallel_for_(Range(1, static_cast(scale_factors.size())), parallelGetScaleFeatures); + return result; +} + +void DSST::update(const Mat &image, const Point2f object_center) +{ + Mat scale_features = get_scale_features(image, object_center, original_targ_sz, + current_scale_factor, scale_factors, scale_window, scale_model_sz); + Mat Fscale_features; + dft(scale_features, Fscale_features, DFT_ROWS | DFT_COMPLEX_OUTPUT); + Mat new_sf_num; + Mat new_sf_den; + Mat new_sf_den_all; + mulSpectrums(ysf, Fscale_features, new_sf_num, DFT_ROWS, true); + Mat sf_den_all; + mulSpectrums(Fscale_features, Fscale_features, new_sf_den_all, DFT_ROWS, true); + reduce(new_sf_den_all, new_sf_den, 0, CV_REDUCE_SUM, -1); + + sf_num = (1 - learn_rate) * sf_num + learn_rate * new_sf_num; + sf_den = (1 - learn_rate) * sf_den + learn_rate * new_sf_den; +} + +float DSST::getScale(const Mat &image, const Point2f object_center) +{ + Mat scale_features = get_scale_features(image, object_center, original_targ_sz, + current_scale_factor, scale_factors, scale_window, scale_model_sz); + + Mat Fscale_features; + dft(scale_features, Fscale_features, DFT_ROWS | DFT_COMPLEX_OUTPUT); + + mulSpectrums(Fscale_features, sf_num, Fscale_features, 0, false); + Mat scale_resp; + reduce(Fscale_features, scale_resp, 0, CV_REDUCE_SUM, -1); + scale_resp = divide_complex_matrices(scale_resp, sf_den + 0.01f); + idft(scale_resp, scale_resp, DFT_REAL_OUTPUT|DFT_SCALE); + Point max_loc; + minMaxLoc(scale_resp, NULL, NULL, NULL, &max_loc); + + current_scale_factor *= scale_factors[max_loc.x]; + if(current_scale_factor < min_scale_factor) + current_scale_factor = min_scale_factor; + else if(current_scale_factor > max_scale_factor) + current_scale_factor = max_scale_factor; + + return current_scale_factor; +} +} /* namespace cv */ diff --git a/modules/tracking/src/trackerCSRTScaleEstimation.hpp b/modules/tracking/src/trackerCSRTScaleEstimation.hpp new file mode 100644 index 00000000000..e3eca3e6c3f --- /dev/null +++ b/modules/tracking/src/trackerCSRTScaleEstimation.hpp @@ -0,0 +1,45 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_TRACKER_CSRT_SCALE_ESTIMATION +#define OPENCV_TRACKER_CSRT_SCALE_ESTIMATION + +namespace cv +{ + +class DSST { +public: + DSST() {}; + DSST(const Mat &image, Rect2f bounding_box, Size2f template_size, int numberOfScales, + float scaleStep, float maxModelArea, float sigmaFactor, float scaleLearnRate); + ~DSST(); + void update(const Mat &image, const Point2f objectCenter); + float getScale(const Mat &image, const Point2f objecCenter); +private: + Mat get_scale_features(Mat img, Point2f pos, Size2f base_target_sz, float current_scale, + std::vector &scale_factors, Mat scale_window, Size scale_model_sz); + + Size scale_model_sz; + Mat ys; + Mat ysf; + Mat scale_window; + std::vector scale_factors; + Mat sf_num; + Mat sf_den; + float scale_sigma; + float min_scale_factor; + float max_scale_factor; + float current_scale_factor; + int scales_count; + float scale_step; + float max_model_area; + float sigma_factor; + float learn_rate; + + Size original_targ_sz; +}; + +} /* namespace cv */ + +#endif diff --git a/modules/tracking/src/trackerCSRTSegmentation.cpp b/modules/tracking/src/trackerCSRTSegmentation.cpp new file mode 100644 index 00000000000..b3b91aa7edf --- /dev/null +++ b/modules/tracking/src/trackerCSRTSegmentation.cpp @@ -0,0 +1,450 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "trackerCSRTSegmentation.hpp" + +#include +#include +#include +#include + +//-------------------- HISTOGRAM CLASS -------------------- +namespace cv +{ + +Histogram::Histogram(int numDimensions, int numBinsPerDimension) +{ + m_numBinsPerDim = numBinsPerDimension; + m_numDim = numDimensions; + p_size = cvFloor(std::pow(m_numBinsPerDim, m_numDim)); + p_bins.resize(p_size, 0); + p_dimIdCoef.resize(m_numDim, 1); + for (int i = 0; i < m_numDim-1; ++i) + p_dimIdCoef[i] = static_cast(std::pow(numBinsPerDimension, m_numDim - 1 - i)); + +} + +void Histogram::extractForegroundHistogram(std::vector & imgChannels, + cv::Mat weights, bool useMatWeights, int x1, int y1, int x2, int y2) +{ + //just for code clarity + cv::Mat & img = imgChannels[0]; + + if (!useMatWeights){ + //weights are epanechnikov distr. with peek at the center of the image; + double cx = x1 + (x2-x1)/2.; + double cy = y1 + (y2-y1)/2.; + double kernelSize_width = 1.0/(0.5*static_cast(x2-x1)*1.4142+1); //sqrt(2) + double kernelSize_height = 1.0/(0.5*static_cast(y2-y1)*1.4142+1); + + cv::Mat kernelWeight(img.rows, img.cols, CV_64FC1); + for (int y = y1; y < y2+1; ++y){ + double * weightPtr = kernelWeight.ptr(y); + double tmp_y = std::pow((cy-y)*kernelSize_height, 2); + for (int x = x1; x < x2+1; ++x){ + weightPtr[x] = kernelProfile_Epanechnikov(std::pow((cx-x)*kernelSize_width,2) + tmp_y); + } + } + weights = kernelWeight; + } + //extract pixel values and compute histogram + double rangePerBinInverse = static_cast(m_numBinsPerDim)/256.0; // 1 / (imgRange/numBinsPerDim) + double sum = 0; + for (int y = y1; y < y2+1; ++y){ + std::vector dataPtr(m_numDim); + for (int dim = 0; dim < m_numDim; ++dim) + dataPtr[dim] = imgChannels[dim].ptr(y); + const double * weightPtr = weights.ptr(y); + + for (int x = x1; x < x2+1; ++x){ + int id = 0; + for (int dim = 0; dim < m_numDim; ++dim){ + id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]); + } + p_bins[id] += weightPtr[x]; + sum += weightPtr[x]; + } + } + //normalize + sum = 1./sum; + for(int i = 0; i < p_size; ++i) + p_bins[i] *= sum; +} + +void Histogram::extractBackGroundHistogram( + std::vector & imgChannels, + int x1, int y1, int x2, int y2, + int outer_x1, int outer_y1, int outer_x2, int outer_y2) +{ + //extract pixel values and compute histogram + double rangePerBinInverse = static_cast(m_numBinsPerDim)/256.0; // 1 / (imgRange/numBinsPerDim) + double sum = 0; + for (int y = outer_y1; y < outer_y2; ++y){ + + std::vector dataPtr(m_numDim); + for (int dim = 0; dim < m_numDim; ++dim) + dataPtr[dim] = imgChannels[dim].ptr(y); + + for (int x = outer_x1; x < outer_x2; ++x){ + if (x >= x1 && x <= x2 && y >= y1 && y <= y2) + continue; + + int id = 0; + for (int dim = 0; dim < m_numDim; ++dim){ + id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]); + } + p_bins[id] += 1.0; + sum += 1.0; + } + } + //normalize + sum = 1./sum; + for(int i = 0; i < p_size; ++i) + p_bins[i] *= sum; +} + +cv::Mat Histogram::backProject(std::vector & imgChannels) +{ + //just for code clarity + cv::Mat & img = imgChannels[0]; + + cv::Mat backProject(img.rows, img.cols, CV_64FC1); + double rangePerBinInverse = static_cast(m_numBinsPerDim)/256.0; // 1 / (imgRange/numBinsPerDim) + + for (int y = 0; y < img.rows; ++y){ + double * backProjectPtr = backProject.ptr(y); + std::vector dataPtr(m_numDim); + for (int dim = 0; dim < m_numDim; ++dim) + dataPtr[dim] = imgChannels[dim].ptr(y); + + for (int x = 0; x < img.cols; ++x){ + int id = 0; + for (int dim = 0; dim < m_numDim; ++dim){ + id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]); + } + backProjectPtr[x] = p_bins[id]; + } + } + return backProject; +} + +// add new methods +std::vector Histogram::getHistogramVector() { + return p_bins; +} + +void Histogram::setHistogramVector(double *vector) { + for (size_t i=0; i Segment::computePosteriors( + std::vector &imgChannels, + int x1, int y1, int x2, int y2, + cv::Mat weights, cv::Mat fgPrior, cv::Mat bgPrior, + const Histogram &fgHistPrior, int numBinsPerChannel) +{ + //preprocess and normalize all data + CV_Assert(imgChannels.size() > 0); + + //fit target to the image + x1 = std::min(std::max(x1, 0), imgChannels[0].cols-1); + y1 = std::min(std::max(y1, 0), imgChannels[0].rows-1); + x2 = std::max(std::min(x2, imgChannels[0].cols-1), 0); + y2 = std::max(std::min(y2, imgChannels[0].rows-1), 0); + + //enlarge bbox by 1/3 of its size for background area + int offsetX = (x2-x1)/3; + int offsetY = (y2-y1)/3; + int outer_y1 = std::max(0, (int)(y1-offsetY)); + int outer_y2 = std::min(imgChannels[0].rows, (int)(y2+offsetY+1)); + int outer_x1 = std::max(0, (int)(x1-offsetX)); + int outer_x2 = std::min(imgChannels[0].cols, (int)(x2+offsetX+1)); + + //extract histogram from original data -> more pixels better representation of distr. by histograms + Histogram hist_target = + (fgHistPrior.m_numBinsPerDim == numBinsPerChannel && (size_t)fgHistPrior.m_numDim == imgChannels.size()) + ? fgHistPrior : Histogram(static_cast(imgChannels.size()), numBinsPerChannel); + Histogram hist_background(static_cast(imgChannels.size()), numBinsPerChannel); + if (weights.cols == 0) + hist_target.extractForegroundHistogram(imgChannels, cv::Mat(), false, x1, y1, x2, y2); + else + hist_target.extractForegroundHistogram(imgChannels, weights, true, x1, y1, x2, y2); + hist_background.extractBackGroundHistogram(imgChannels, x1, y1, x2, y2, + outer_x1, outer_y1, outer_x2, outer_y2); + + //compute resize factor so that the max area is 1000 (=avg. size ~ 32x32) + double factor = sqrt(1000.0/((x2-x1)*(y2-y1))); + if (factor > 1) + factor = 1.0; + cv::Size newSize(cvFloor((x2-x1)*factor), cvFloor((y2-y1)*factor)); + + //rescale input data + cv::Rect roiRect_inner = cv::Rect(x1, y1, x2-x1, y2-y1); + std::vector imgChannelsROI_inner(imgChannels.size()); + for (size_t i = 0; i < imgChannels.size(); ++i) + cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize); + + //initialize priors if there is no external source and rescale + cv::Mat fgPriorScaled; + if (fgPrior.cols == 0) + fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1); + else + cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize); + cv::Mat bgPriorScaled; + if (bgPrior.cols == 0) + bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1); + else + cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize); + + //backproject pixels likelihood + cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled); + cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled); + + double p_b = std::sqrt((std::pow(outer_x2-outer_x1, 2) + std::pow(outer_y2-outer_y1, 2)) / + (std::pow(x2-x1, 2) + std::pow(y2-y1, 2))) ; + double p_o = 1./(p_b + 1); + + //convert likelihoods to posterior prob. (Bayes rule) + cv::Mat prob_o(newSize, foregroundLikelihood.type()); + prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood); + cv::Mat prob_b = 1.0 - prob_o; + + std::pair sizedProbs = getRegularizedSegmentation(prob_o, prob_b, fgPriorScaled, bgPriorScaled); + + //resize probs to original size + std::pair probs; + cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height)); + cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height)); + + return probs; +} + +std::pair Segment::computePosteriors2( + std::vector &imgChannels, int x1, int y1, int x2, int y2, double p_b, + cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background) +{ + //preprocess and normalize all data + CV_Assert(imgChannels.size() > 0); + + //fit target to the image + x1 = std::min(std::max(x1, 0), imgChannels[0].cols-1); + y1 = std::min(std::max(y1, 0), imgChannels[0].rows-1); + x2 = std::max(std::min(x2, imgChannels[0].cols-1), 0); + y2 = std::max(std::min(y2, imgChannels[0].rows-1), 0); + + // calculate width and height of the region + int w = x2 - x1 + 1; + int h = y2 - y1 + 1; + w = std::min(std::max(w, 1), imgChannels[0].cols); + h = std::min(std::max(h, 1), imgChannels[0].rows); + + //double p_o = 1./(p_b + 1); + double p_o = 1. - p_b; + + //compute resize factor so that the max area is 1000 (=avg. size ~ 32x32) + double factor = sqrt(1000.0/(w*h)); + if (factor > 1) + factor = 1.0; + cv::Size newSize(cvFloor(w*factor), cvFloor(h*factor)); + + //rescale input data + cv::Rect roiRect_inner = cv::Rect(x1, y1, w, h); + std::vector imgChannelsROI_inner(imgChannels.size()); + for (size_t i = 0; i < imgChannels.size(); ++i) + cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize); + + //initialize priors if there is no external source and rescale + cv::Mat fgPriorScaled; + if (fgPrior.cols == 0) + fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1); + else + cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize); + cv::Mat bgPriorScaled; + if (bgPrior.cols == 0) + bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1); + else + cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize); + + //backproject pixels likelihood + cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled); + cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled); + + //convert likelihoods to posterior prob. (Bayes rule) + cv::Mat prob_o(newSize, foregroundLikelihood.type()); + prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood); + cv::Mat prob_b = 1.0 - prob_o; + + std::pair sizedProbs = getRegularizedSegmentation(prob_o, prob_b, + fgPriorScaled, bgPriorScaled); + //std::pair sizedProbs = std::pair(prob_o, prob_b); + + //resize probs to original size + std::pair probs; + cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height)); + cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height)); + + return probs; +} + +std::pair Segment::computePosteriors2(std::vector &imgChannels, + cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background) +{ + //preprocess and normalize all data + CV_Assert(imgChannels.size() > 0); + + //fit target to the image + int x1 = 0; + int y1 = 0; + int x2 = imgChannels[0].cols-1; + int y2 = imgChannels[0].rows-1; + + //compute resize factor so that we control the max area ~32^2 + double factor = sqrt(1000./((x2-x1)*(y2-y1))); + //double factor = 1; + if (factor > 1) + factor = 1.0; + cv::Size newSize(cvFloor((x2-x1)*factor), cvFloor((y2-y1)*factor)); + + //rescale input data + cv::Rect roiRect_inner = cv::Rect(x1, y1, x2-x1+1, y2-y1+1); + std::vector imgChannelsROI_inner(imgChannels.size()); + for (size_t i = 0; i < imgChannels.size(); ++i) + cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize); + + //initialize priors if there is no external source and rescale + cv::Mat fgPriorScaled; + if (fgPrior.cols == 0) + fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1); + else + cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize); + + cv::Mat bgPriorScaled; + if (bgPrior.cols == 0) + bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1); + else + cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize); + + //backproject pixels likelihood + cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled); + cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled); + + //prior for posterior, relative to the number of pixels in bg and fg + double p_b = 5./3.; + double p_o = 1./(p_b + 1); + + //convert likelihoods to posterior prob. (Bayes rule) + cv::Mat prob_o(newSize, foregroundLikelihood.type()); + prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood); + cv::Mat prob_b = 1.0 - prob_o; + + std::pair sizedProbs = getRegularizedSegmentation(prob_o, prob_b, fgPriorScaled, bgPriorScaled); + + //resize probs to original size + std::pair probs; + cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height)); + cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height)); + + return probs; +} + +std::pair Segment::getRegularizedSegmentation( + cv::Mat &prob_o, cv::Mat &prob_b, cv::Mat & prior_o, cv::Mat & prior_b) +{ + int hsize = cvFloor(std::max(1.0, (double)cvFloor(static_cast(prob_b.cols)*3./50. + 0.5))); + int lambdaSize = hsize*2+1; + + //compute gaussian kernel + cv::Mat lambda(lambdaSize, lambdaSize, CV_64FC1); + double std2 = std::pow(hsize/3.0, 2); + double sumLambda = 0.0; + for (int y = -hsize; y < hsize + 1; ++y){ + double * lambdaPtr = lambda.ptr(y+hsize); + double tmp_y = y*y; + for (int x = -hsize; x < hsize +1; ++x){ + double tmp_gauss = gaussian(x*x, tmp_y, std2); + lambdaPtr[x+hsize] = tmp_gauss; + sumLambda += tmp_gauss; + } + } + sumLambda -= lambda.at(hsize, hsize); + //set center of kernel to 0 + lambda.at(hsize, hsize) = 0.0; + sumLambda = 1.0/sumLambda; + //normalize kernel to sum to 1 + lambda = lambda*sumLambda; + + //create lambda2 kernel + cv::Mat lambda2 = lambda.clone(); + lambda2.at(hsize, hsize) = 1.0; + + double terminateThr = 1e-1; + double logLike = std::numeric_limits::max(); + int maxIter = 50; + + //return values + cv::Mat Qsum_o(prior_o.rows, prior_o.cols, prior_o.type()); + cv::Mat Qsum_b(prior_o.rows, prior_o.cols, prior_o.type()); + + //algorithm temporal + cv::Mat Si_o(prior_o.rows, prior_o.cols, prior_o.type()); + cv::Mat Si_b(prior_o.rows, prior_o.cols, prior_o.type()); + cv::Mat Ssum_o(prior_o.rows, prior_o.cols, prior_o.type()); + cv::Mat Ssum_b(prior_o.rows, prior_o.cols, prior_o.type()); + cv::Mat Qi_o(prior_o.rows, prior_o.cols, prior_o.type()); + cv::Mat Qi_b(prior_o.rows, prior_o.cols, prior_o.type()); + cv::Mat logQo(prior_o.rows, prior_o.cols, prior_o.type()); + cv::Mat logQb(prior_o.rows, prior_o.cols, prior_o.type()); + + int i; + for (i = 0; i < maxIter; ++i){ + //follows the equations from Kristan et al. ACCV2014 paper + //"A graphical model for rapid obstacle image-map estimation from unmanned surface vehicles" + cv::Mat P_Io = prior_o.mul(prob_o) + std::numeric_limits::epsilon(); + cv::Mat P_Ib = prior_b.mul(prob_b) + std::numeric_limits::epsilon(); + + cv::filter2D(prior_o, Si_o, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT); + cv::filter2D(prior_b, Si_b, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT); + Si_o = Si_o.mul(prior_o); + Si_b = Si_b.mul(prior_b); + cv::Mat normSi = 1.0/(Si_o + Si_b); + Si_o = Si_o.mul(normSi); + Si_b = Si_b.mul(normSi); + cv::filter2D(Si_o, Ssum_o, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT); + cv::filter2D(Si_b, Ssum_b, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT); + + cv::filter2D(P_Io, Qi_o, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT); + cv::filter2D(P_Ib, Qi_b, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT); + Qi_o = Qi_o.mul(P_Io); + Qi_b = Qi_b.mul(P_Ib); + cv::Mat normQi = 1.0/(Qi_o + Qi_b); + Qi_o = Qi_o.mul(normQi); + Qi_b = Qi_b.mul(normQi); + cv::filter2D(Qi_o, Qsum_o, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT); + cv::filter2D(Qi_b, Qsum_b, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT); + + prior_o = (Qsum_o + Ssum_o)*0.25; + prior_b = (Qsum_b + Ssum_b)*0.25; + cv::Mat normPI = 1.0/(prior_o + prior_b); + prior_o = prior_o.mul(normPI); + prior_b = prior_b.mul(normPI); + + //converge ? + cv::log(Qsum_o, logQo); + cv::log(Qsum_b, logQb); + cv::Scalar mean = cv::sum(logQo+logQb); + double logLikeNew = -mean.val[0]/(2*Qsum_o.rows*Qsum_o.cols); + if (std::abs(logLike - logLikeNew) < terminateThr) + break; + logLike = logLikeNew; + } + return std::pair(Qsum_o, Qsum_b); +} + +} //cv namespace +//--------------------------------------------------------------------------------------------------------------------- diff --git a/modules/tracking/src/trackerCSRTSegmentation.hpp b/modules/tracking/src/trackerCSRTSegmentation.hpp new file mode 100644 index 00000000000..ac2c3dac567 --- /dev/null +++ b/modules/tracking/src/trackerCSRTSegmentation.hpp @@ -0,0 +1,59 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_TRACKER_CSRT_SEGMENTATION +#define OPENCV_TRACKER_CSRT_SEGMENTATION + +namespace cv +{ +class Histogram +{ +public: + int m_numBinsPerDim; + int m_numDim; + + Histogram() : m_numBinsPerDim(0), m_numDim(0) {} + Histogram(int numDimensions, int numBinsPerDimension = 8); + void extractForegroundHistogram(std::vector & imgChannels, + cv::Mat weights, bool useMatWeights, int x1, int y1, int x2, int y2); + void extractBackGroundHistogram(std::vector & imgChannels, + int x1, int y1, int x2, int y2, int outer_x1, int outer_y1, + int outer_x2, int outer_y2); + cv::Mat backProject(std::vector & imgChannels); + std::vector getHistogramVector(); + void setHistogramVector(double *vector); + +private: + int p_size; + std::vector p_bins; + std::vector p_dimIdCoef; + + inline double kernelProfile_Epanechnikov(double x) + { return (x <= 1) ? (2.0/CV_PI)*(1-x) : 0; } +}; + + +class Segment +{ +public: + static std::pair computePosteriors(std::vector & imgChannels, + int x1, int y1, int x2, int y2, cv::Mat weights, cv::Mat fgPrior, + cv::Mat bgPrior, const Histogram &fgHistPrior, int numBinsPerChannel = 16); + static std::pair computePosteriors2(std::vector & imgChannels, + int x1, int y1, int x2, int y2, double p_b, cv::Mat fgPrior, + cv::Mat bgPrior, Histogram hist_target, Histogram hist_background); + static std::pair computePosteriors2(std::vector &imgChannels, + cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background); + +private: + static std::pair getRegularizedSegmentation(cv::Mat & prob_o, + cv::Mat & prob_b, cv::Mat &prior_o, cv::Mat &prior_b); + + inline static double gaussian(double x2, double y2, double std2){ + return exp(-(x2 + y2)/(2*std2))/(2*CV_PI*std2); + } +}; + +}//cv namespace +#endif diff --git a/modules/tracking/src/trackerCSRTUtils.cpp b/modules/tracking/src/trackerCSRTUtils.cpp new file mode 100644 index 00000000000..2f8aba306e3 --- /dev/null +++ b/modules/tracking/src/trackerCSRTUtils.cpp @@ -0,0 +1,563 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "trackerCSRTUtils.hpp" + +namespace cv { + +Mat circshift(Mat matrix, int dx, int dy) +{ + Mat matrix_out = matrix.clone(); + int idx_y = 0; + int idx_x = 0; + for(int i=0; i(idx_y, idx_x) = matrix.at(i,j); + } + } + return matrix_out; +} + +Mat gaussian_shaped_labels(const float sigma, const int w, const int h) +{ + // create 2D Gaussian peak, convert to Fourier space and stores it into the yf + Mat y = Mat::zeros(h, w, CV_32F); + float w2 = static_cast(cvFloor(w / 2)); + float h2 = static_cast(cvFloor(h / 2)); + + // calculate for each pixel separatelly + for(int i=0; i(i,j) = (float)exp((-0.5 / pow(sigma, 2)) * (pow((i+1-h2), 2) + pow((j+1-w2), 2))); + } + } + // wrap-arround with the circulat shifting + y = circshift(y, -cvFloor(y.cols / 2), -cvFloor(y.rows / 2)); + Mat yf; + dft(y, yf, DFT_COMPLEX_OUTPUT); + return yf; +} + +std::vector fourier_transform_features(const std::vector &M) +{ + std::vector out(M.size()); + Mat channel; + // iterate over channels and convert them to Fourier domain + for(size_t k = 0; k < M.size(); k++) { + M[k].convertTo(channel, CV_32F); + dft(channel, channel, DFT_COMPLEX_OUTPUT); + out[k] = (channel); + } + return out; +} + +Mat divide_complex_matrices(const Mat &A, const Mat &B) +{ + std::vector va,vb; + split(A, va); + split(B, vb); + + Mat a = va.at(0); + Mat b = va.at(1); + Mat c = vb.at(0); + Mat d = vb.at(1); + + Mat div = c.mul(c) + d.mul(d); + Mat real_part = (a.mul(c) + b.mul(d)); + Mat im_part = (b.mul(c) - a.mul(d)); + divide(real_part, div, real_part); + divide(im_part, div, im_part); + + std::vector tmp(2); + tmp[0] = real_part; + tmp[1] = im_part; + Mat res; + merge(tmp, res); + return res; +} + +Mat get_subwindow( + const Mat &image, + const Point2f center, + const int w, + const int h, + Rect *valid_pixels) +{ + int startx = cvFloor(center.x) + 1 - (cvFloor(w/2)); + int starty = cvFloor(center.y) + 1 - (cvFloor(h/2)); + Rect roi(startx, starty, w, h); + int padding_left = 0, padding_right = 0, padding_top = 0, padding_bottom = 0; + if(roi.x < 0) { + padding_left = -roi.x; + roi.x = 0; + } + if(roi.y < 0) { + padding_top = -roi.y; + roi.y = 0; + } + roi.width -= padding_left; + roi.height-= padding_top; + if(roi.x + roi.width >= image.cols) { + padding_right = roi.x + roi.width - image.cols; + roi.width = image.cols - roi.x; + } + if(roi.y + roi.height >= image.rows) { + padding_bottom = roi.y + roi.height - image.rows; + roi.height = image.rows - roi.y; + } + Mat subwin = image(roi).clone(); + copyMakeBorder(subwin, subwin, padding_top, padding_bottom, padding_left, padding_right, BORDER_REPLICATE); + + if(valid_pixels != NULL) { + *valid_pixels = Rect(padding_left, padding_top, roi.width, roi.height); + } + return subwin; +} + +float subpixel_peak(const Mat &response, const std::string &s, const Point2f &p) +{ + int i_p0, i_p_l, i_p_r; // indexes in response + float p0, p_l, p_r; // values in response + + if(s.compare("vertical") == 0) { + // neighbouring rows + i_p0 = cvRound(p.y); + i_p_l = modul(cvRound(p.y) - 1, response.rows); + i_p_r = modul(cvRound(p.y) + 1, response.rows); + int px = static_cast(p.x); + p0 = response.at(i_p0, px); + p_l = response.at(i_p_l, px); + p_r = response.at(i_p_r, px); + } else if(s.compare("horizontal") == 0) { + // neighbouring cols + i_p0 = cvRound(p.x); + i_p_l = modul(cvRound(p.x) - 1, response.cols); + i_p_r = modul(cvRound(p.x) + 1, response.cols); + int py = static_cast(p.y); + p0 = response.at(py, i_p0); + p_l = response.at(py, i_p_l); + p_r = response.at(py, i_p_r); + } else { + std::cout << "Warning: unknown subpixel peak direction!" << std::endl; + return 0; + } + float delta = 0.5f * (p_r - p_l) / (2*p0 - p_r - p_l); + if(!std::isfinite(delta)) { + delta = 0; + } + + return delta; +} + +inline float chebpoly(const int n, const float x) +{ + float res; + if (fabs(x) <= 1) + res = cos(n*acos(x)); + else + res = cosh(n*acosh(x)); + return res; +} + +static Mat chebwin(int N, const float atten) +{ + Mat out(N , 1, CV_32FC1); + int nn, i; + float M, n, sum = 0, max=0; + float tg = static_cast(pow(10,atten/20.0f)); /* 1/r term [2], 10^gamma [2] */ + float x0 = cosh((1.0f/(N-1))*acosh(tg)); + M = (N-1)/2.0f; + if(N%2==0) + M = M + 0.5f; /* handle even length windows */ + for(nn=0; nn<(N/2+1); nn++) { + n = nn-M; + sum = 0; + for(i=1; i<=M; i++){ + sum += chebpoly(N-1,x0*static_cast(cos(CV_PI*i/N))) * + static_cast(cos(2.0f*n*CV_PI*i/N)); + } + out.at(nn,0) = tg + 2*sum; + out.at(N-nn-1,0) = out.at(nn,0) ; + if(out.at(nn,0) > max) + max = out.at(nn,0); + } + for(nn=0; nn(nn,0) /= max; /* normalize everything */ + + return out; +} + + +static double modified_bessel(int order, double x) +{ + // sum m=0:inf 1/(m! * Gamma(m + order + 1)) * (x/2)^(2m + order) + const double eps = 1e-13; + double result = 0; + double m = 0; + double gamma = 1.0; + for(int i = 2; i <= order; ++i) + gamma *= i; + double term = pow(x,order) / (pow(2,order) * gamma); + + while(term > eps * result) { + result += term; + //calculate new term in series + ++m; + term *= (x*x) / (4*m*(m+order)); + } + return result; +} + +Mat get_hann_win(Size sz) +{ + Mat hann_rows = Mat::ones(sz.height, 1, CV_32F); + Mat hann_cols = Mat::ones(1, sz.width, CV_32F); + int NN = sz.height - 1; + if(NN != 0) { + for (int i = 0; i < hann_rows.rows; ++i) { + hann_rows.at(i,0) = (float)(1.0/2.0 * (1.0 - cos(2*CV_PI*i/NN))); + } + } + NN = sz.width - 1; + if(NN != 0) { + for (int i = 0; i < hann_cols.cols; ++i) { + hann_cols.at(0,i) = (float)(1.0/2.0 * (1.0 - cos(2*CV_PI*i/NN))); + } + } + return hann_rows * hann_cols; +} + +Mat get_kaiser_win(Size sz, float alpha) +{ + Mat kaiser_rows = Mat::ones(sz.height, 1, CV_32F); + Mat kaiser_cols = Mat::ones(1, sz.width, CV_32F); + + int N = sz.height - 1; + double shape = alpha; + double den = 1.0 / modified_bessel(0, shape); + + for(int n = 0; n <= N; ++n) { + double K = (2.0 * n * 1.0/N) - 1.0; + double x = sqrt(1.0 - (K * K)); + kaiser_rows.at(n,0) = static_cast(modified_bessel(0, shape * x) * den); + } + + N = sz.width - 1; + for(int n = 0; n <= N; ++n) { + double K = (2.0 * n * 1.0/N) - 1.0; + double x = sqrt(1.0 - (K * K)); + kaiser_cols.at(0,n) = static_cast(modified_bessel(0, shape * x) * den); + } + + return kaiser_rows * kaiser_cols; +} + +Mat get_chebyshev_win(Size sz, float attenuation) +{ + Mat cheb_rows = chebwin(sz.height, attenuation); + Mat cheb_cols = chebwin(sz.width, attenuation).t(); + return cheb_rows * cheb_cols; +} + +static void computeHOG32D(const Mat &imageM, Mat &featM, const int sbin, const int pad_x, const int pad_y) +{ + const int dimHOG = 32; + CV_Assert(pad_x >= 0); + CV_Assert(pad_y >= 0); + CV_Assert(imageM.channels() == 3); + CV_Assert(imageM.depth() == CV_64F); + + // epsilon to avoid division by zero + const double eps = 0.0001; + // number of orientations + const int numOrient = 18; + // unit vectors to compute gradient orientation + const double uu[9] = {1.000, 0.9397, 0.7660, 0.5000, 0.1736, -0.1736, -0.5000, -0.7660, -0.9397}; + const double vv[9] = {0.000, 0.3420, 0.6428, 0.8660, 0.9848, 0.9848, 0.8660, 0.6428, 0.3420}; + + // image size + const Size imageSize = imageM.size(); + // block size + // int bW = cvRound((double)imageSize.width/(double)sbin); + // int bH = cvRound((double)imageSize.height/(double)sbin); + int bW = cvFloor((double)imageSize.width/(double)sbin); + int bH = cvFloor((double)imageSize.height/(double)sbin); + const Size blockSize(bW, bH); + // size of HOG features + int oW = max(blockSize.width-2, 0) + 2*pad_x; + int oH = max(blockSize.height-2, 0) + 2*pad_y; + Size outSize = Size(oW, oH); + // size of visible + const Size visible = blockSize*sbin; + + // initialize historgram, norm, output feature matrices + Mat histM = Mat::zeros(Size(blockSize.width*numOrient, blockSize.height), CV_64F); + Mat normM = Mat::zeros(Size(blockSize.width, blockSize.height), CV_64F); + featM = Mat::zeros(Size(outSize.width*dimHOG, outSize.height), CV_64F); + + // get the stride of each matrix + const size_t imStride = imageM.step1(); + const size_t histStride = histM.step1(); + const size_t normStride = normM.step1(); + const size_t featStride = featM.step1(); + + // calculate the zero offset + const double* im = imageM.ptr(0); + double* const hist = histM.ptr(0); + double* const norm = normM.ptr(0); + double* const feat = featM.ptr(0); + + for (int y = 1; y < visible.height - 1; y++) + { + for (int x = 1; x < visible.width - 1; x++) + { + // OpenCV uses an interleaved format: BGR-BGR-BGR + const double* s = im + 3*min(x, imageM.cols-2) + min(y, imageM.rows-2)*imStride; + + // blue image channel + double dyb = *(s+imStride) - *(s-imStride); + double dxb = *(s+3) - *(s-3); + double vb = dxb*dxb + dyb*dyb; + + // green image channel + s += 1; + double dyg = *(s+imStride) - *(s-imStride); + double dxg = *(s+3) - *(s-3); + double vg = dxg*dxg + dyg*dyg; + + // red image channel + s += 1; + double dy = *(s+imStride) - *(s-imStride); + double dx = *(s+3) - *(s-3); + double v = dx*dx + dy*dy; + + // pick the channel with the strongest gradient + if (vg > v) { v = vg; dx = dxg; dy = dyg; } + if (vb > v) { v = vb; dx = dxb; dy = dyb; } + + // snap to one of the 18 orientations + double best_dot = 0; + int best_o = 0; + for (int o = 0; o < (int)numOrient/2; o++) + { + double dot = uu[o]*dx + vv[o]*dy; + if (dot > best_dot) + { + best_dot = dot; + best_o = o; + } + else if (-dot > best_dot) + { + best_dot = -dot; + best_o = o + (int)(numOrient/2); + } + } + + // add to 4 historgrams around pixel using bilinear interpolation + double yp = ((double)y+0.5)/(double)sbin - 0.5; + double xp = ((double)x+0.5)/(double)sbin - 0.5; + int iyp = (int)cvFloor(yp); + int ixp = (int)cvFloor(xp); + double vy0 = yp - iyp; + double vx0 = xp - ixp; + double vy1 = 1.0 - vy0; + double vx1 = 1.0 - vx0; + v = sqrt(v); + + // fill the value into the 4 neighborhood cells + if (iyp >= 0 && ixp >= 0) + *(hist + iyp*histStride + ixp*numOrient + best_o) += vy1*vx1*v; + + if (iyp >= 0 && ixp+1 < blockSize.width) + *(hist + iyp*histStride + (ixp+1)*numOrient + best_o) += vx0*vy1*v; + + if (iyp+1 < blockSize.height && ixp >= 0) + *(hist + (iyp+1)*histStride + ixp*numOrient + best_o) += vy0*vx1*v; + + if (iyp+1 < blockSize.height && ixp+1 < blockSize.width) + *(hist + (iyp+1)*histStride + (ixp+1)*numOrient + best_o) += vy0*vx0*v; + + } // for y + } // for x + + // compute the energy in each block by summing over orientation + for (int y = 0; y < blockSize.height; y++) + { + const double* src = hist + y*histStride; + double* dst = norm + y*normStride; + double const* const dst_end = dst + blockSize.width; + // for each cell + while (dst < dst_end) + { + *dst = 0; + for (int o = 0; o < (int)(numOrient/2); o++) + { + *dst += (*src + *(src + numOrient/2))* + (*src + *(src + numOrient/2)); + src++; + } + dst++; + src += numOrient/2; + } + } + + // compute the features + for (int y = pad_y; y < outSize.height - pad_y; y++) + { + for (int x = pad_x; x < outSize.width - pad_x; x++) + { + double* dst = feat + y*featStride + x*dimHOG; + double* p, n1, n2, n3, n4; + const double* src; + + p = norm + (y - pad_y + 1)*normStride + (x - pad_x + 1); + n1 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps); + p = norm + (y - pad_y)*normStride + (x - pad_x + 1); + n2 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps); + p = norm + (y- pad_y + 1)*normStride + x - pad_x; + n3 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps); + p = norm + (y - pad_y)*normStride + x - pad_x; + n4 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps); + + double t1 = 0.0, t2 = 0.0, t3 = 0.0, t4 = 0.0; + + // contrast-sesitive features + src = hist + (y - pad_y + 1)*histStride + (x - pad_x + 1)*numOrient; + for (int o = 0; o < numOrient; o++) + { + double val = *src; + double h1 = min(val*n1, 0.2); + double h2 = min(val*n2, 0.2); + double h3 = min(val*n3, 0.2); + double h4 = min(val*n4, 0.2); + *(dst++) = 0.5 * (h1 + h2 + h3 + h4); + + src++; + t1 += h1; + t2 += h2; + t3 += h3; + t4 += h4; + } + + // contrast-insensitive features + src = hist + (y - pad_y + 1)*histStride + (x - pad_x + 1)*numOrient; + for (int o = 0; o < numOrient/2; o++) + { + double sum = *src + *(src + numOrient/2); + double h1 = min(sum * n1, 0.2); + double h2 = min(sum * n2, 0.2); + double h3 = min(sum * n3, 0.2); + double h4 = min(sum * n4, 0.2); + *(dst++) = 0.5 * (h1 + h2 + h3 + h4); + src++; + } + + // texture features + *(dst++) = 0.2357 * t1; + *(dst++) = 0.2357 * t2; + *(dst++) = 0.2357 * t3; + *(dst++) = 0.2357 * t4; + // truncation feature + *dst = 0; + }// for x + }// for y + // Truncation features + for (int m = 0; m < featM.rows; m++) + { + for (int n = 0; n < featM.cols; n += dimHOG) + { + if (m > pad_y - 1 && m < featM.rows - pad_y && n > pad_x*dimHOG - 1 && n < featM.cols - pad_x*dimHOG) + continue; + + featM.at(m, n + dimHOG - 1) = 1; + } // for x + }// for y +} + +std::vector get_features_hog(const Mat &im, const int bin_size) +{ + Mat hogmatrix; + Mat im_; + im.convertTo(im_, CV_64FC3, 1.0/255.0); + computeHOG32D(im_,hogmatrix,bin_size,1,1); + hogmatrix.convertTo(hogmatrix, CV_32F); + Size hog_size = im.size(); + hog_size.width /= bin_size; + hog_size.height /= bin_size; + Mat hogc(hog_size, CV_32FC(32), hogmatrix.data); + std::vector features; + split(hogc, features); + return features; +} + +std::vector get_features_cn(const Mat &ppatch_data, const Size &output_size) { + Mat patch_data = ppatch_data.clone(); + Vec3b & pixel = patch_data.at(0,0); + unsigned index; + + Mat cnFeatures = Mat::zeros(patch_data.rows,patch_data.cols,CV_32FC(10)); + + for(int i=0;i(i,j); + index=(unsigned)(cvFloor((float)pixel[2]/8)+32*cvFloor((float)pixel[1]/8)+32*32*cvFloor((float)pixel[0]/8)); + + //copy the values + for(int k=0;k<10;k++){ + cnFeatures.at >(i,j)[k]=(float)ColorNames[index][k]; + } + } + } + std::vector result; + split(cnFeatures, result); + for (size_t i = 0; i < result.size(); i++) { + if (output_size.width > 0 && output_size.height > 0) { + resize(result.at(i), result.at(i), output_size, INTER_CUBIC); + } + } + return result; +} + +std::vector get_features_rgb(const Mat &patch, const Size &output_size) +{ + std::vector channels; + split(patch, channels); + for(size_t k=0; k hsv_img_channels; + split(hsv_img, hsv_img_channels); + hsv_img_channels.at(0).convertTo(hsv_img_channels.at(0), CV_8UC1, 255.0 / 180.0); + merge(hsv_img_channels, hsv_img); + return hsv_img; +} + +} //cv namespace diff --git a/modules/tracking/src/trackerCSRTUtils.hpp b/modules/tracking/src/trackerCSRTUtils.hpp new file mode 100644 index 00000000000..fbf78424ea3 --- /dev/null +++ b/modules/tracking/src/trackerCSRTUtils.hpp @@ -0,0 +1,52 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_TRACKER_CSRT_UTILS +#define OPENCV_TRACKER_CSRT_UTILS + +#include +#include +#include +#include +#include +#include + +namespace cv +{ + +inline int modul(int a, int b) +{ + // function calculates the module of two numbers and it takes into account also negative numbers + return ((a % b) + b) % b; +} + +inline double kernel_epan(double x) +{ + return (x <= 1) ? (2.0/3.14)*(1-x) : 0; +} + +Mat circshift(Mat matrix, int dx, int dy); +Mat gaussian_shaped_labels(const float sigma, const int w, const int h); +std::vector fourier_transform_features(const std::vector &M); +Mat divide_complex_matrices(const Mat &A, const Mat &B); +Mat get_subwindow(const Mat &image, const Point2f center, + const int w, const int h,Rect *valid_pixels = NULL); + +float subpixel_peak(const Mat &response, const std::string &s, const Point2f &p); +double get_max(const Mat &m); +double get_min(const Mat &m); + +Mat get_hann_win(Size sz); +Mat get_kaiser_win(Size sz, float alpha); +Mat get_chebyshev_win(Size sz, float attenuation); + +std::vector get_features_rgb(const Mat &patch, const Size &output_size); +std::vector get_features_hog(const Mat &im, const int bin_size); +std::vector get_features_cn(const Mat &im, const Size &output_size); + +Mat bgr2hsv(const Mat &img); + +} //cv namespace + +#endif diff --git a/modules/tracking/test/test_trackers.cpp b/modules/tracking/test/test_trackers.cpp index 812f68ab194..1a70f30d9cf 100644 --- a/modules/tracking/test/test_trackers.cpp +++ b/modules/tracking/test/test_trackers.cpp @@ -467,6 +467,12 @@ TEST_P(DistanceAndOverlap, MOSSE) test.run(); } +TEST_P(DistanceAndOverlap, CSRT) +{ + TrackerTest test( TrackerCSRT::create(), dataset, 22, .7f, NoTransform); + test.run(); +} + /***************************************************************************************/ //Tests with shifted initial window TEST_P(DistanceAndOverlap, Shifted_Data_MedianFlow) @@ -504,6 +510,12 @@ TEST_P(DistanceAndOverlap, Shifted_Data_MOSSE) TrackerTest test( TrackerMOSSE::create(), dataset, 13, .69f, CenterShiftLeft); test.run(); } + +TEST_P(DistanceAndOverlap, Shifted_Data_CSRT) +{ + TrackerTest test( TrackerCSRT::create(), dataset, 13, .69f, CenterShiftLeft); + test.run(); +} /***************************************************************************************/ //Tests with scaled initial window TEST_P(DistanceAndOverlap, Scaled_Data_MedianFlow) @@ -549,6 +561,12 @@ TEST_P(DistanceAndOverlap, Scaled_Data_MOSSE) test.run(); } +TEST_P(DistanceAndOverlap, Scaled_Data_CSRT) +{ + TrackerTest test( TrackerCSRT::create(), dataset, 22, 0.69f, Scale_1_1, 1); + test.run(); +} + INSTANTIATE_TEST_CASE_P( Tracking, DistanceAndOverlap, TESTSET_NAMES);