#include #include "caffe/filler.hpp" #include "caffe/layers/center_loss_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { template void CenterLossLayer::LayerSetUp(const vector& bottom, const vector& top) { const int num_output = this->layer_param_.center_loss_param().num_output(); N_ = num_output; const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.center_loss_param().axis()); // Dimensions starting from "axis" are "flattened" into a single // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W), // and axis == 1, N inner products with dimension CHW are performed. K_ = bottom[0]->count(axis); // Check if we need to set up the weights if (this->blobs_.size() > 0) { LOG(INFO) << "Skipping parameter initialization"; } else { this->blobs_.resize(1); // Intialize the weight vector center_shape(2); center_shape[0] = N_; center_shape[1] = K_; this->blobs_[0] = Blob::create(center_shape); // fill the weights shared_ptr > center_filler(GetFiller( this->layer_param_.center_loss_param().center_filler())); center_filler->Fill(this->blobs_[0].get()); } // parameter initialization this->param_propagate_down_.resize(this->blobs_.size(), true); } template void CenterLossLayer::Reshape(const vector& bottom, const vector& top) { CHECK_EQ(bottom[1]->channels(), 1); CHECK_EQ(bottom[1]->height(), 1); CHECK_EQ(bottom[1]->width(), 1); M_ = bottom[0]->num(); // The top shape will be the bottom shape with the flattened axes dropped, // and replaced by a single axis with dimension num_output (N_). LossLayer::Reshape(bottom, top); distance_.ReshapeLike(*bottom[0]); variation_sum_.ReshapeLike(*this->blobs_[0]); } template void CenterLossLayer::Forward_cpu(const vector& bottom, const vector& top) { const Ftype* bottom_data = bottom[0]->cpu_data(); const Ftype* label = bottom[1]->cpu_data(); const Ftype* center = this->blobs_[0]->template cpu_data(); Ftype* distance_data = distance_.mutable_cpu_data(); // the i-th distance_data for (int i = 0; i < M_; i++) { const int label_value = static_cast(label[i]); // D(i,:) = X(i,:) - C(y(i),:) caffe_sub(K_, bottom_data + i * K_, center + label_value * K_, distance_data + i * K_); } Ftype dot = caffe_cpu_dot(M_ * K_, distance_.cpu_data(), distance_.cpu_data()); float loss = dot / M_ / 2.F; top[0]->mutable_cpu_data()[0] = loss; } template void CenterLossLayer::Backward_cpu(const vector& top, const vector& propagate_down, const vector& bottom) { // Gradient with respect to centers if (this->param_propagate_down_[0]) { const Btype* label = bottom[1]->cpu_data(); Btype* center_diff = this->blobs_[0]->template mutable_cpu_diff(); Btype* variation_sum_data = variation_sum_.mutable_cpu_data(); const Btype* distance_data = distance_.template cpu_data(); // \sum_{y_i==j} caffe_set(N_ * K_, (Btype)0., variation_sum_.mutable_cpu_data()); for (int n = 0; n < N_; n++) { int count = 0; for (int m = 0; m < M_; m++) { const int label_value = static_cast(label[m]); if (label_value == n) { count++; caffe_sub(K_, variation_sum_data + n * K_, distance_data + m * K_, variation_sum_data + n * K_); } } caffe_axpy(K_, (Btype)1./((Btype)count + (Btype)1.), variation_sum_data + n * K_, center_diff + n * K_); } } // Gradient with respect to bottom data if (propagate_down[0]) { caffe_copy(M_ * K_, distance_.template cpu_data(), bottom[0]->mutable_cpu_diff()); caffe_scal(M_ * K_, top[0]->cpu_diff()[0] / M_, bottom[0]->mutable_cpu_diff()); } if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } } #ifdef CPU_ONLY STUB_GPU(CenterLossLayer); #endif INSTANTIATE_CLASS_FB(CenterLossLayer); REGISTER_LAYER_CLASS(CenterLoss); } // namespace caffe