Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement CPU layer ops for ROIPoolingLayer and SmoothL1LossLayer #17

Open
wants to merge 1 commit into
base: faster-rcnn
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 95 additions & 3 deletions src/caffe/layers/roi_pooling_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ using std::ceil;

namespace caffe {

/*
* There are two bottom layers, 0 (actual data) and 1 (ROIs).
*
* A ROI is defined as [batch_index x1 y1 x2 y2]
* The ROI layer (Channels x Width x Height) must == 5
*/
template <typename Dtype>
void ROIPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
Expand All @@ -33,6 +39,9 @@ void ROIPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
template <typename Dtype>
void ROIPoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
CHECK_EQ(bottom.size(), 2) << "number of bottom layers must be == 2";
CHECK_EQ(bottom[1]->channels() * bottom[1]->height() * bottom[1]->width(), 5)
<< "ROI layer C x W x H must be == 5";
channels_ = bottom[0]->channels();
height_ = bottom[0]->height();
width_ = bottom[0]->width();
Expand All @@ -52,8 +61,9 @@ void ROIPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
int batch_size = bottom[0]->num();
int top_count = top[0]->count();
Dtype* top_data = top[0]->mutable_cpu_data();
caffe_set(top_count, Dtype(-FLT_MAX), top_data);
int* argmax_data = max_idx_.mutable_cpu_data();

caffe_set(top_count, Dtype(-FLT_MAX), top_data);
caffe_set(top_count, -1, argmax_data);

// For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
Expand Down Expand Up @@ -127,9 +137,91 @@ void ROIPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
template <typename Dtype>
void ROIPoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
NOT_IMPLEMENTED;
}
if (!propagate_down[0])
return;

// Number of ROIs
const int num_rois = bottom[1]->num();
CHECK_EQ(num_rois, top[0]->num());
const int bottom_count = bottom[0]->count();

const Dtype* bottom_rois = bottom[1]->cpu_data();
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
const int* argmax_data = max_idx_.cpu_data();

caffe_set(bottom_count, Dtype(0), bottom_diff);

for (int n = 0; n < num_rois; ++n) {
int roi_batch_ind = bottom_rois[0];
int roi_start_w = round(bottom_rois[1] * spatial_scale_);
int roi_start_h = round(bottom_rois[2] * spatial_scale_);
int roi_end_w = round(bottom_rois[3] * spatial_scale_);
int roi_end_h = round(bottom_rois[4] * spatial_scale_);

// Force malformed ROIs to be 1x1
int roi_width = max(roi_end_w - roi_start_w + 1, 1);
int roi_height = max(roi_end_h - roi_start_h + 1, 1);

Dtype bin_size_h = static_cast<Dtype>(roi_height)
/ static_cast<Dtype>(pooled_height_);
Dtype bin_size_w = static_cast<Dtype>(roi_width)
/ static_cast<Dtype>(pooled_width_);

// Skip if ROI doesn't include (h, w)
int start_h = max(0, roi_start_h);
int end_h = min(height_, roi_end_h + 1);
int start_w = max(0, roi_start_w);
int end_w = min(width_, roi_end_w + 1);

// Reverse engineer indices of elements pooled by this ROI
Dtype* offset_bottom_diff = bottom_diff + bottom[0]->offset(roi_batch_ind);

for (int c = 0; c < channels_; ++c) {
for (int h = start_h; h < end_h; ++h) {
for (int w = start_w; w < end_w; ++w) {
int index = h * width_ + w;

// Compute feasible set of pooled units that could have pooled
// this bottom unit

int phstart = floor(static_cast<Dtype>(h - roi_start_h) / bin_size_h);
int phend = ceil(static_cast<Dtype>(h - roi_start_h + 1) / bin_size_h);
int pwstart = floor(static_cast<Dtype>(w - roi_start_w) / bin_size_w);
int pwend = ceil(static_cast<Dtype>(w - roi_start_w + 1) / bin_size_w);

phstart = min(max(phstart, 0), pooled_height_);
phend = min(max(phend, 0), pooled_height_);
pwstart = min(max(pwstart, 0), pooled_width_);
pwend = min(max(pwend, 0), pooled_width_);

Dtype gradient = Dtype(0);
#ifdef _OPENMP
#pragma omp parallel for collapse(2) reduction(+:gradient)
#endif
for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) {
int pindex = ph * pooled_width_ + pw;
if (argmax_data[pindex] == index) {
gradient += top_diff[pindex];
}
}
}

offset_bottom_diff[index] += gradient;
}
}

// Increment all data pointers by one channel
offset_bottom_diff += bottom[0]->offset(0, 1);
top_diff += top[0]->offset(0, 1);
argmax_data += max_idx_.offset(0, 1);
}

// Increment ROI data pointer
bottom_rois += bottom[1]->offset(1);
}
}

#ifdef CPU_ONLY
STUB_GPU(ROIPoolingLayer);
Expand Down
94 changes: 92 additions & 2 deletions src/caffe/layers/smooth_L1_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// ------------------------------------------------------------------

#include "caffe/fast_rcnn_layers.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

Expand Down Expand Up @@ -51,13 +52,102 @@ void SmoothL1LossLayer<Dtype>::Reshape(
template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
NOT_IMPLEMENTED;
int count = bottom[0]->count();
caffe_sub(
count,
bottom[0]->cpu_data(),
bottom[1]->cpu_data(),
diff_.mutable_cpu_data()); // d := b0 - b1
if (has_weights_) {
// apply "inside" weights
caffe_mul(
count,
bottom[2]->cpu_data(),
diff_.cpu_data(),
diff_.mutable_cpu_data()); // d := w_in * (b0 - b1)
}

// f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma
// |x| - 0.5 / sigma / sigma otherwise
const Dtype* in = diff_.cpu_data();
Dtype* out = errors_.mutable_cpu_data();

#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int index = 0; index < count; ++index) {
Dtype val = in[index];
Dtype abs_val = fabs(val);
if (abs_val < 1.0 / sigma2_) {
out[index] = 0.5 * val * val * sigma2_;
} else {
out[index] = abs_val - 0.5 / sigma2_;
}
}

if (has_weights_) {
// apply "outside" weights
caffe_mul(
count,
bottom[3]->cpu_data(),
errors_.cpu_data(),
errors_.mutable_cpu_data()); // d := w_out * SmoothL1(w_in * (b0 - b1))
}

Dtype loss = caffe_cpu_dot(count, ones_.cpu_data(), errors_.cpu_data());
top[0]->mutable_cpu_data()[0] = loss / bottom[0]->num();
}

template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
NOT_IMPLEMENTED;
// after forwards, diff_ holds w_in * (b0 - b1)
int count = diff_.count();

// f'(x) = sigma * sigma * x if |x| < 1 / sigma / sigma
// = sign(x) otherwise
const Dtype* in = diff_.cpu_data();
Dtype* out = diff_.mutable_cpu_data();

#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int index = 0; index < count; ++index) {
Dtype val = in[index];
Dtype abs_val = fabs(val);
if (abs_val < 1.0 / sigma2_) {
out[index] = sigma2_ * val;
} else {
out[index] = (Dtype(0) < val) - (val < Dtype(0));
}
}

for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
caffe_cpu_axpby(
count, // count
alpha, // alpha
diff_.cpu_data(), // x
Dtype(0), // beta
bottom[i]->mutable_cpu_diff()); // y
if (has_weights_) {
// Scale by "inside" weight
caffe_mul(
count,
bottom[2]->cpu_data(),
bottom[i]->cpu_diff(),
bottom[i]->mutable_cpu_diff());
// Scale by "outside" weight
caffe_mul(
count,
bottom[3]->cpu_data(),
bottom[i]->cpu_diff(),
bottom[i]->mutable_cpu_diff());
}
}
}
}

#ifdef CPU_ONLY
Expand Down
4 changes: 1 addition & 3 deletions src/caffe/test/test_roi_pooling_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ using boost::scoped_ptr;

namespace caffe {

typedef ::testing::Types<GPUDevice<float>, GPUDevice<double> > TestDtypesGPU;

template <typename TypeParam>
class ROIPoolingLayerTest : public MultiDeviceTest<TypeParam> {
typedef typename TypeParam::Dtype Dtype;
Expand Down Expand Up @@ -85,7 +83,7 @@ class ROIPoolingLayerTest : public MultiDeviceTest<TypeParam> {
vector<Blob<Dtype>*> blob_top_vec_;
};

TYPED_TEST_CASE(ROIPoolingLayerTest, TestDtypesGPU);
TYPED_TEST_CASE(ROIPoolingLayerTest, TestDtypesAndDevices);

TYPED_TEST(ROIPoolingLayerTest, TestGradient) {
typedef typename TypeParam::Dtype Dtype;
Expand Down
14 changes: 5 additions & 9 deletions src/caffe/test/test_smooth_L1_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,13 @@
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/filler.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/fast_rcnn_layers.hpp"

#include "caffe/test/test_caffe_main.hpp"
#include "caffe/test/test_gradient_check_util.hpp"

namespace caffe {

typedef ::testing::Types<GPUDevice<float>, GPUDevice<double> > TestDtypesGPU;

template <typename TypeParam>
class SmoothL1LossLayerTest : public MultiDeviceTest<TypeParam> {
typedef typename TypeParam::Dtype Dtype;
Expand All @@ -30,9 +27,9 @@ class SmoothL1LossLayerTest : public MultiDeviceTest<TypeParam> {
blob_bottom_outside_weights_(new Blob<Dtype>(10, 5, 1, 1)),
blob_top_loss_(new Blob<Dtype>()) {
// fill the values
FillerParameter const_filler_param;
const_filler_param.set_value(-1.);
ConstantFiller<Dtype> const_filler(const_filler_param);
//FillerParameter const_filler_param;
//const_filler_param.set_value(-1.);
//ConstantFiller<Dtype> const_filler(const_filler_param);
FillerParameter filler_param;
GaussianFiller<Dtype> filler(filler_param);

Expand Down Expand Up @@ -67,7 +64,7 @@ class SmoothL1LossLayerTest : public MultiDeviceTest<TypeParam> {
vector<Blob<Dtype>*> blob_top_vec_;
};

TYPED_TEST_CASE(SmoothL1LossLayerTest, TestDtypesGPU);
TYPED_TEST_CASE(SmoothL1LossLayerTest, TestDtypesAndDevices);

TYPED_TEST(SmoothL1LossLayerTest, TestGradient) {
typedef typename TypeParam::Dtype Dtype;
Expand All @@ -79,8 +76,7 @@ TYPED_TEST(SmoothL1LossLayerTest, TestGradient) {
const Dtype kLossWeight = 3.7;
layer_param.add_loss_weight(kLossWeight);
SmoothL1LossLayer<Dtype> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
GradientChecker<Dtype> checker(1e-2, 1e-2, 1701);
GradientChecker<Dtype> checker(1e-2, 1e-2);
checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
this->blob_top_vec_, 0);
checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
Expand Down