| @@ -0,0 +1,119 @@ | ||
| #include <boost/thread.hpp> | ||
| #include <map> | ||
| #include <string> | ||
| #include <vector> | ||
|
|
||
| #include "caffe/common.hpp" | ||
| #include "caffe/data_reader.hpp" | ||
| #include "caffe/layers/data_layer.hpp" | ||
| #include "caffe/proto/caffe.pb.h" | ||
|
|
||
| namespace caffe { | ||
|
|
||
| using boost::weak_ptr; | ||
|
|
||
| map<const string, weak_ptr<DataReader::Body> > DataReader::bodies_; | ||
| static boost::mutex bodies_mutex_; | ||
|
|
||
| DataReader::DataReader(const LayerParameter& param) | ||
| : queue_pair_(new QueuePair( // | ||
| param.data_param().prefetch() * param.data_param().batch_size())) { | ||
| // Get or create a body | ||
| boost::mutex::scoped_lock lock(bodies_mutex_); | ||
| string key = source_key(param); | ||
| weak_ptr<Body>& weak = bodies_[key]; | ||
| body_ = weak.lock(); | ||
| if (!body_) { | ||
| body_.reset(new Body(param)); | ||
| bodies_[key] = weak_ptr<Body>(body_); | ||
| } | ||
| body_->new_queue_pairs_.push(queue_pair_); | ||
| } | ||
|
|
||
| DataReader::~DataReader() { | ||
| string key = source_key(body_->param_); | ||
| body_.reset(); | ||
| boost::mutex::scoped_lock lock(bodies_mutex_); | ||
| if (bodies_[key].expired()) { | ||
| bodies_.erase(key); | ||
| } | ||
| } | ||
|
|
||
| // | ||
|
|
||
| DataReader::QueuePair::QueuePair(int size) { | ||
| // Initialize the free queue with requested number of datums | ||
| for (int i = 0; i < size; ++i) { | ||
| free_.push(new Datum()); | ||
| } | ||
| } | ||
|
|
||
| DataReader::QueuePair::~QueuePair() { | ||
| Datum* datum; | ||
| while (free_.try_pop(&datum)) { | ||
| delete datum; | ||
| } | ||
| while (full_.try_pop(&datum)) { | ||
| delete datum; | ||
| } | ||
| } | ||
|
|
||
| // | ||
|
|
||
| DataReader::Body::Body(const LayerParameter& param) | ||
| : param_(param), | ||
| new_queue_pairs_() { | ||
| StartInternalThread(); | ||
| } | ||
|
|
||
| DataReader::Body::~Body() { | ||
| StopInternalThread(); | ||
| } | ||
|
|
||
| void DataReader::Body::InternalThreadEntry() { | ||
| shared_ptr<db::DB> db(db::GetDB(param_.data_param().backend())); | ||
| db->Open(param_.data_param().source(), db::READ); | ||
| shared_ptr<db::Cursor> cursor(db->NewCursor()); | ||
| vector<shared_ptr<QueuePair> > qps; | ||
| try { | ||
| int solver_count = param_.phase() == TRAIN ? Caffe::solver_count() : 1; | ||
|
|
||
| // To ensure deterministic runs, only start running once all solvers | ||
| // are ready. But solvers need to peek on one item during initialization, | ||
| // so read one item, then wait for the next solver. | ||
| for (int i = 0; i < solver_count; ++i) { | ||
| shared_ptr<QueuePair> qp(new_queue_pairs_.pop()); | ||
| read_one(cursor.get(), qp.get()); | ||
| qps.push_back(qp); | ||
| } | ||
| // Main loop | ||
| while (!must_stop()) { | ||
| for (int i = 0; i < solver_count; ++i) { | ||
| read_one(cursor.get(), qps[i].get()); | ||
| } | ||
| // Check no additional readers have been created. This can happen if | ||
| // more than one net is trained at a time per process, whether single | ||
| // or multi solver. It might also happen if two data layers have same | ||
| // name and same source. | ||
| CHECK_EQ(new_queue_pairs_.size(), 0); | ||
| } | ||
| } catch (boost::thread_interrupted&) { | ||
| // Interrupted exception is expected on shutdown | ||
| } | ||
| } | ||
|
|
||
| void DataReader::Body::read_one(db::Cursor* cursor, QueuePair* qp) { | ||
| Datum* datum = qp->free_.pop(); | ||
| // TODO deserialize in-place instead of copy? | ||
| datum->ParseFromString(cursor->value()); | ||
| qp->full_.push(datum); | ||
|
|
||
| // go to the next iter | ||
| cursor->Next(); | ||
| if (!cursor->valid()) { | ||
| DLOG(INFO) << "Restarting data prefetching from start."; | ||
| cursor->SeekToFirst(); | ||
| } | ||
| } | ||
|
|
||
| } // namespace caffe |
| @@ -0,0 +1,167 @@ | ||
| #ifdef USE_OPENCV | ||
| #include <opencv2/core/core.hpp> | ||
| #endif // USE_OPENCV | ||
| #include <stdint.h> | ||
|
|
||
| #include <vector> | ||
|
|
||
| #include "caffe/data_transformer.hpp" | ||
| #include "caffe/layers/box_data_layer.hpp" | ||
| #include "caffe/util/benchmark.hpp" | ||
|
|
||
| namespace caffe { | ||
|
|
||
| template <typename Dtype> | ||
| BoxDataLayer<Dtype>::BoxDataLayer(const LayerParameter& param) | ||
| : BasePrefetchingDataLayer<Dtype>(param), | ||
| reader_(param) { | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| BoxDataLayer<Dtype>::~BoxDataLayer() { | ||
| this->StopInternalThread(); | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| void BoxDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
| const vector<Blob<Dtype>*>& top) { | ||
| this->box_label_ = true; | ||
| const DataParameter param = this->layer_param_.data_param(); | ||
| const int batch_size = param.batch_size(); | ||
| // Read a data point, and use it to initialize the top blob. | ||
| Datum& datum = *(reader_.full().peek()); | ||
|
|
||
| // Use data_transformer to infer the expected blob shape from datum. | ||
| vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); | ||
| this->transformed_data_.Reshape(top_shape); | ||
| // Reshape top[0] and prefetch_data according to the batch_size. | ||
| top_shape[0] = batch_size; | ||
| top[0]->Reshape(top_shape); | ||
| for (int i = 0; i < this->prefetch_.size(); ++i) { | ||
| this->prefetch_[i]->data_.Reshape(top_shape); | ||
| } | ||
| LOG(INFO) << "output data size: " << top[0]->num() << "," | ||
| << top[0]->channels() << "," << top[0]->height() << "," | ||
| << top[0]->width(); | ||
| // label | ||
| if (this->output_labels_) { | ||
| side_ = param.side(); | ||
| vector<int> label_shape(1, batch_size); | ||
| int label_size = side_ * side_ * (1 + 1 + 1 + 4); | ||
| label_shape.push_back(label_size); | ||
| top[1]->Reshape(label_shape); | ||
| for (int j = 0; j < this->prefetch_.size(); ++j) { | ||
| this->prefetch_[j]->label_.Reshape(label_shape); | ||
| } | ||
|
|
||
| } | ||
| } | ||
|
|
||
| // This function is called on prefetch thread | ||
| template<typename Dtype> | ||
| void BoxDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) { | ||
| CPUTimer batch_timer; | ||
| batch_timer.Start(); | ||
| double read_time = 0; | ||
| double trans_time = 0; | ||
| CPUTimer timer; | ||
| CHECK(batch->data_.count()); | ||
| CHECK(this->transformed_data_.count()); | ||
|
|
||
| // Reshape according to the first datum of each batch | ||
| // on single input batches allows for inputs of varying dimension. | ||
| const int batch_size = this->layer_param_.data_param().batch_size(); | ||
| Datum& datum = *(reader_.full().peek()); | ||
| // Use data_transformer to infer the expected blob shape from datum. | ||
| vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); | ||
| this->transformed_data_.Reshape(top_shape); | ||
| // Reshape batch according to the batch_size. | ||
| top_shape[0] = batch_size; | ||
| batch->data_.Reshape(top_shape); | ||
|
|
||
| Dtype* top_data = batch->data_.mutable_cpu_data(); | ||
| Dtype* top_label; | ||
|
|
||
| if (this->output_labels_) { | ||
| top_label = batch->label_.mutable_cpu_data(); | ||
|
|
||
| } | ||
| for (int item_id = 0; item_id < batch_size; ++item_id) { | ||
| timer.Start(); | ||
| // get a datum | ||
| Datum& datum = *(reader_.full().pop("Waiting for data")); | ||
| read_time += timer.MicroSeconds(); | ||
| timer.Start(); | ||
| // Apply data transformations (mirror, scale, crop...) | ||
| int offset = batch->data_.offset(item_id); | ||
| vector<BoxLabel> box_labels; | ||
| this->transformed_data_.set_cpu_data(top_data + offset); | ||
| if (this->output_labels_) { | ||
| // rand sample a patch, adjust box labels | ||
|
|
||
| this->data_transformer_->Transform(datum, &(this->transformed_data_), &box_labels); | ||
| // transform label | ||
| int label_offset = batch->label_.offset(item_id); | ||
| int count = batch->label_.count(1); | ||
| transform_label(count, top_label + label_offset, box_labels, side_); | ||
|
|
||
| } else { | ||
| this->data_transformer_->Transform(datum, &(this->transformed_data_)); | ||
| } | ||
| trans_time += timer.MicroSeconds(); | ||
|
|
||
| reader_.free().push(const_cast<Datum*>(&datum)); | ||
| } | ||
| timer.Stop(); | ||
| batch_timer.Stop(); | ||
| DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; | ||
| DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; | ||
| DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; | ||
| } | ||
|
|
||
| template<typename Dtype> | ||
| void BoxDataLayer<Dtype>::transform_label(int count, Dtype* top_label, | ||
| const vector<BoxLabel>& box_labels, int side) { | ||
| int locations = pow(side, 2); | ||
| CHECK_EQ(count, locations * 7) << | ||
| "side and count not match"; | ||
| // difficult | ||
| caffe_set(locations, Dtype(0), top_label); | ||
| // isobj | ||
| caffe_set(locations, Dtype(0), top_label + locations); | ||
| // class label | ||
| caffe_set(locations, Dtype(-1), top_label + locations * 2); | ||
| // box | ||
| caffe_set(locations*4, Dtype(0), top_label + locations * 3); | ||
| for (int i = 0; i < box_labels.size(); ++i) { | ||
| float difficult = box_labels[i].difficult_; | ||
| if (difficult != 0. && difficult != 1.) { | ||
| LOG(WARNING) << "Difficult must be 0 or 1"; | ||
| } | ||
| float class_label = box_labels[i].class_label_; | ||
| CHECK_GE(class_label, 0) << "class_label must >= 0"; | ||
| float x = box_labels[i].box_[0]; | ||
| float y = box_labels[i].box_[1]; | ||
| // LOG(INFO) << "x: " << x << " y: " << y; | ||
| int x_index = floor(x * side); | ||
| int y_index = floor(y * side); | ||
| x_index = std::min(x_index, side - 1); | ||
| y_index = std::min(y_index, side - 1); | ||
| int dif_index = side * y_index + x_index; | ||
| int obj_index = locations + dif_index; | ||
| int class_index = locations * 2 + dif_index; | ||
| int cor_index = locations * 3 + dif_index * 4; | ||
| top_label[dif_index] = difficult; | ||
| top_label[obj_index] = 1; | ||
| // LOG(INFO) << "dif_index: " << dif_index << " class_label: " << class_label; | ||
| top_label[class_index] = class_label; | ||
| for (int j = 0; j < 4; ++j) { | ||
| top_label[cor_index + j] = box_labels[i].box_[j]; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| INSTANTIATE_CLASS(BoxDataLayer); | ||
| REGISTER_LAYER_CLASS(BoxData); | ||
|
|
||
| } // namespace caffe |
| @@ -0,0 +1,228 @@ | ||
| #include <algorithm> | ||
| #include <cfloat> | ||
| #include <vector> | ||
| #include <cmath> | ||
|
|
||
| #include "caffe/layers/detection_loss_layer.hpp" | ||
| #include "caffe/util/math_functions.hpp" | ||
|
|
||
| namespace caffe { | ||
|
|
||
| template <typename Dtype> | ||
| Dtype Overlap(Dtype x1, Dtype w1, Dtype x2, Dtype w2) { | ||
| Dtype left = std::max(x1 - w1 / 2, x2 - w2 / 2); | ||
| Dtype right = std::min(x1 + w1 / 2, x2 + w2 / 2); | ||
| return right - left; | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| Dtype Calc_iou(const vector<Dtype>& box, const vector<Dtype>& truth) { | ||
| Dtype w = Overlap(box[0], box[2], truth[0], truth[2]); | ||
| Dtype h = Overlap(box[1], box[3], truth[1], truth[3]); | ||
| if (w < 0 || h < 0) return 0; | ||
| Dtype inter_area = w * h; | ||
| Dtype union_area = box[2] * box[3] + truth[2] * truth[3] - inter_area; | ||
| return inter_area / union_area; | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| Dtype Calc_rmse(const vector<Dtype>& box, const vector<Dtype>& truth) { | ||
| return sqrt(pow(box[0]-truth[0], 2) + | ||
| pow(box[1]-truth[1], 2) + | ||
| pow(box[2]-truth[2], 2) + | ||
| pow(box[3]-truth[3], 2)); | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| void DetectionLossLayer<Dtype>::LayerSetUp( | ||
| const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { | ||
| LossLayer<Dtype>::LayerSetUp(bottom, top); | ||
| DetectionLossParameter param = this->layer_param_.detection_loss_param(); | ||
| side_ = param.side(); | ||
| num_class_ = param.num_class(); | ||
| num_object_ = param.num_object(); | ||
| sqrt_ = param.sqrt(); | ||
| constriant_ = param.constriant(); | ||
| object_scale_ = param.object_scale(); | ||
| noobject_scale_ = param.noobject_scale(); | ||
| class_scale_ = param.class_scale(); | ||
| coord_scale_ = param.coord_scale(); | ||
|
|
||
| int input_count = bottom[0]->count(1); | ||
| int label_count = bottom[1]->count(1); | ||
| // outputs: classes, iou, coordinates | ||
| int tmp_input_count = side_ * side_ * (num_class_ + (1 + 4) * num_object_); | ||
| // label: isobj, class_label, coordinates | ||
| int tmp_label_count = side_ * side_ * (1 + 1 + 1 + 4); | ||
| CHECK_EQ(input_count, tmp_input_count); | ||
| CHECK_EQ(label_count, tmp_label_count); | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| void DetectionLossLayer<Dtype>::Reshape( | ||
| const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { | ||
| LossLayer<Dtype>::Reshape(bottom, top); | ||
| diff_.ReshapeLike(*bottom[0]); | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| void DetectionLossLayer<Dtype>::Forward_cpu( | ||
| const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { | ||
| const Dtype* input_data = bottom[0]->cpu_data(); | ||
| const Dtype* label_data = bottom[1]->cpu_data(); | ||
| Dtype* diff = diff_.mutable_cpu_data(); | ||
| Dtype loss(0.0), class_loss(0.0), noobj_loss(0.0), obj_loss(0.0), coord_loss(0.0), area_loss(0.0); | ||
| Dtype avg_iou(0.0), avg_obj(0.0), avg_cls(0.0), avg_pos_cls(0.0), avg_no_obj(0.0); | ||
| Dtype obj_count(0); | ||
| int locations = pow(side_, 2); | ||
| caffe_set(diff_.count(), Dtype(0.), diff); | ||
| for (int i = 0; i < bottom[0]->num(); ++i) { | ||
| int index = i * bottom[0]->count(1); | ||
| int true_index = i * bottom[1]->count(1); | ||
| for (int j = 0; j < locations; ++j) { | ||
| for (int k = 0; k < num_object_; ++k) { | ||
| int p_index = index + num_class_ * locations + k * locations + j; | ||
| noobj_loss += noobject_scale_ * pow(input_data[p_index] - 0, 2); | ||
| diff[p_index] = noobject_scale_ * (input_data[p_index] - 0); | ||
| avg_no_obj += input_data[p_index]; | ||
| } | ||
| bool isobj = label_data[true_index + locations + j]; | ||
| if (!isobj) { | ||
| continue; | ||
| } | ||
| obj_count += 1; | ||
| int label = static_cast<int>(label_data[true_index + locations * 2 + j]); | ||
| CHECK_GE(label, 0) << "label start at 0"; | ||
| CHECK_LT(label, num_class_) << "label must below num_class"; | ||
| for (int c = 0; c < num_class_; ++c) { | ||
| int class_index = index + c * locations + j; | ||
| Dtype target = Dtype(c == label); | ||
| avg_cls += input_data[class_index]; | ||
| if (c == label) | ||
| avg_pos_cls += input_data[class_index]; | ||
| class_loss += class_scale_ * pow(input_data[class_index] - target, 2); | ||
| diff[class_index] = class_scale_ * (input_data[class_index] - target); | ||
| } | ||
| const Dtype* true_box_pt = label_data + true_index + locations * 3 + j * 4; | ||
| vector<Dtype> true_box(true_box_pt, true_box_pt + 4); | ||
| const Dtype* box_pt = input_data + index + (num_class_+num_object_)*locations + j; | ||
| Dtype best_iou = 0.; | ||
| Dtype best_rmse = 20.; | ||
| int best_index = 0; | ||
| for (int k = 0; k < num_object_; ++k) { | ||
| vector<Dtype> box; | ||
| box.push_back(*(box_pt + (k * 4 + 0) * locations)); | ||
| box.push_back(*(box_pt + (k * 4 + 1) * locations)); | ||
| box.push_back(*(box_pt + (k * 4 + 2) * locations)); | ||
| box.push_back(*(box_pt + (k * 4 + 3) * locations)); | ||
| if (constriant_) { | ||
| box[0] = (j % side_ + box[0]) / side_; | ||
| box[1] = (j / side_ + box[1]) / side_; | ||
| } | ||
| if (sqrt_) { | ||
| box[2] = pow(box[2], 2); | ||
| box[3] = pow(box[3], 2); | ||
| } | ||
| Dtype iou = Calc_iou(box, true_box); | ||
| Dtype rmse = Calc_rmse(box, true_box); | ||
| if (best_iou > 0 || iou > 0) { | ||
| if (iou > best_iou) { | ||
| best_iou = iou; | ||
| best_index = k; | ||
| } | ||
| } else { | ||
| if (rmse < best_rmse) { | ||
| best_rmse = rmse; | ||
| best_index = k; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| CHECK_GE(best_index, 0) << "best_index must >= 0"; | ||
| avg_iou += best_iou; | ||
| int p_index = index + num_class_ * locations + best_index * locations + j; | ||
| noobj_loss -= noobject_scale_ * pow(input_data[p_index], 2); | ||
| obj_loss += object_scale_ * pow(input_data[p_index] - 1., 2); | ||
| avg_no_obj -= input_data[p_index]; | ||
| avg_obj += input_data[p_index]; | ||
| // rescore | ||
| diff[p_index] = object_scale_ * (input_data[p_index] - best_iou); | ||
| int box_index = index + (num_class_ + num_object_ + best_index * 4) * locations + j; | ||
| vector<Dtype> best_box; | ||
| best_box.push_back(input_data[box_index + 0 * locations]); | ||
| best_box.push_back(input_data[box_index + 1 * locations]); | ||
| best_box.push_back(input_data[box_index + 2 * locations]); | ||
| best_box.push_back(input_data[box_index + 3 * locations]); | ||
|
|
||
|
|
||
| if (constriant_) { | ||
| true_box[0] = true_box[0] * side_ - Dtype(j % side_); | ||
| true_box[1] = true_box[1] * side_ - Dtype(j / side_); | ||
| } | ||
|
|
||
| if (sqrt_) { | ||
| true_box[2] = sqrt(true_box[2]); | ||
| true_box[3] = sqrt(true_box[3]); | ||
| } | ||
|
|
||
| for (int o = 0; o < 4; ++o) { | ||
| diff[box_index + o * locations] = coord_scale_ * (best_box[o] - true_box[o]); | ||
| } | ||
|
|
||
| coord_loss += coord_scale_ * pow(best_box[0] - true_box[0], 2); | ||
| coord_loss += coord_scale_ * pow(best_box[1] - true_box[1], 2); | ||
| area_loss += coord_scale_ * pow(best_box[2] - true_box[2], 2); | ||
| area_loss += coord_scale_ * pow(best_box[3] - true_box[3], 2); | ||
| } | ||
| } | ||
| class_loss /= obj_count; | ||
| coord_loss /= obj_count; | ||
| area_loss /= obj_count; | ||
| obj_loss /= obj_count; | ||
| noobj_loss /= (locations * num_object_ * bottom[0]->num() - obj_count); | ||
|
|
||
| avg_iou /= obj_count; | ||
| avg_obj /= obj_count; | ||
| avg_no_obj /= (locations * num_object_ * bottom[0]->num() - obj_count); | ||
| avg_cls /= obj_count; | ||
| avg_pos_cls /= obj_count; | ||
|
|
||
| loss = class_loss + coord_loss + area_loss + obj_loss + noobj_loss; | ||
| obj_count /= bottom[0]->num(); | ||
| top[0]->mutable_cpu_data()[0] = loss; | ||
|
|
||
| // LOG(INFO) << "average objects: " << obj_count; | ||
| LOG(INFO) << "loss: " << loss << " class_loss: " << class_loss << " obj_loss: " | ||
| << obj_loss << " noobj_loss: " << noobj_loss << " coord_loss: " << coord_loss | ||
| << " area_loss: " << area_loss; | ||
| LOG(INFO) << "avg_iou: " << avg_iou << " avg_obj: " << avg_obj << " avg_no_obj: " | ||
| << avg_no_obj << " avg_cls: " << avg_cls << " avg_pos_cls: " << avg_pos_cls; | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| void DetectionLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
| const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { | ||
| if (propagate_down[1]) { | ||
| LOG(FATAL) << this->type() | ||
| << " Layer cannot backpropagate to label inputs."; | ||
| } | ||
| if (propagate_down[0]) { | ||
| const Dtype sign(1.); | ||
| const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[0]->num(); | ||
| caffe_cpu_axpby( | ||
| bottom[0]->count(), | ||
| alpha, | ||
| diff_.cpu_data(), | ||
| Dtype(0), | ||
| bottom[0]->mutable_cpu_diff()); | ||
| } | ||
| } | ||
|
|
||
| #ifdef CPU_ONLY | ||
| STUB_GPU(DetectionLossLayer); | ||
| #endif | ||
|
|
||
| INSTANTIATE_CLASS(DetectionLossLayer); | ||
| REGISTER_LAYER_CLASS(DetectionLoss); | ||
|
|
||
| } // namespace caffe |
| @@ -0,0 +1,272 @@ | ||
| #include <algorithm> | ||
| #include <cfloat> | ||
| #include <vector> | ||
| #include <cmath> | ||
|
|
||
| #include "caffe/layers/detection_loss_layer.hpp" | ||
| #include "caffe/layers/eval_detection_layer.hpp" | ||
| #include "caffe/util/math_functions.hpp" | ||
|
|
||
| namespace caffe { | ||
|
|
||
| class BoxData { | ||
| public: | ||
| int label_; | ||
| bool difficult_; | ||
| float score_; | ||
| vector<float> box_; | ||
| }; | ||
|
|
||
| bool BoxSortDecendScore(const BoxData& box1, const BoxData& box2) { | ||
| return box1.score_ > box2.score_; | ||
| } | ||
|
|
||
| void ApplyNms(const vector<BoxData>& boxes, vector<int>* idxes, float threshold) { | ||
| map<int, int> idx_map; | ||
| for (int i = 0; i < boxes.size() - 1; ++i) { | ||
| if (idx_map.find(i) != idx_map.end()) { | ||
| continue; | ||
| } | ||
| vector<float> box1 = boxes[i].box_; | ||
| for (int j = i + 1; j < boxes.size(); ++j) { | ||
| if (idx_map.find(j) != idx_map.end()) { | ||
| continue; | ||
| } | ||
| vector<float> box2 = boxes[j].box_; | ||
| float iou = Calc_iou(box1, box2); | ||
| if (iou >= threshold) { | ||
| idx_map[j] = 1; | ||
| } | ||
| } | ||
| } | ||
| for (int i = 0; i < boxes.size(); ++i) { | ||
| if (idx_map.find(i) == idx_map.end()) { | ||
| idxes->push_back(i); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| void GetGTBox(int side, const Dtype* label_data, map<int, vector<BoxData> >* gt_boxes) { | ||
| int locations = pow(side, 2); | ||
| for (int i = 0; i < locations; ++i) { | ||
| if (!label_data[locations + i]) { | ||
| continue; | ||
| } | ||
| BoxData gt_box; | ||
| bool difficult = (label_data[i] == 1); | ||
| int label = static_cast<int>(label_data[locations * 2 + i]); | ||
| gt_box.difficult_ = difficult; | ||
| gt_box.label_ = label; | ||
| gt_box.score_ = i; | ||
| int box_index = locations * 3 + i * 4; | ||
| for (int j = 0; j < 4; ++j) { | ||
| gt_box.box_.push_back(label_data[box_index + j]); | ||
| } | ||
| if (gt_boxes->find(label) == gt_boxes->end()) { | ||
| (*gt_boxes)[label] = vector<BoxData>(1, gt_box); | ||
| } else { | ||
| (*gt_boxes)[label].push_back(gt_box); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| void GetPredBox(int side, int num_object, int num_class, const Dtype* input_data, | ||
| map<int, vector<BoxData> >* pred_boxes, bool use_sqrt, bool constriant, | ||
| int score_type, float nms_threshold) { | ||
| vector<BoxData> tmp_boxes; | ||
| int locations = pow(side, 2); | ||
| for (int i = 0; i < locations; ++i) { | ||
| int pred_label = 0; | ||
| float max_prob = input_data[i]; | ||
| for (int j = 1; j < num_class; ++j) { | ||
| int class_index = j * locations + i; | ||
| if (input_data[class_index] > max_prob) { | ||
| pred_label = j; | ||
| max_prob = input_data[class_index]; | ||
| } | ||
| } | ||
| if (nms_threshold < 0) { | ||
| if (pred_boxes->find(pred_label) == pred_boxes->end()) { | ||
| (*pred_boxes)[pred_label] = vector<BoxData>(); | ||
| } | ||
| } | ||
| // LOG(INFO) << "pred_label: " << pred_label << " max_prob: " << max_prob; | ||
| int obj_index = num_class * locations + i; | ||
| int coord_index = (num_class + num_object) * locations + i; | ||
| for (int k = 0; k < num_object; ++k) { | ||
| BoxData pred_box; | ||
| float scale = input_data[obj_index + k * locations]; | ||
| pred_box.label_ = pred_label; | ||
| if (score_type == 0) { | ||
| pred_box.score_ = scale; | ||
| } else if (score_type == 1) { | ||
| pred_box.score_ = max_prob; | ||
| } else { | ||
| pred_box.score_ = scale * max_prob; | ||
| } | ||
| int box_index = coord_index + k * 4 * locations; | ||
| if (!constriant) { | ||
| pred_box.box_.push_back(input_data[box_index + 0 * locations]); | ||
| pred_box.box_.push_back(input_data[box_index + 1 * locations]); | ||
| } else { | ||
| pred_box.box_.push_back((i % side + input_data[box_index + 0 * locations]) / side); | ||
| pred_box.box_.push_back((i / side + input_data[box_index + 1 * locations]) / side); | ||
| } | ||
| float w = input_data[box_index + 2 * locations]; | ||
| float h = input_data[box_index + 3 * locations]; | ||
| if (use_sqrt) { | ||
| pred_box.box_.push_back(pow(w, 2)); | ||
| pred_box.box_.push_back(pow(h, 2)); | ||
| } else { | ||
| pred_box.box_.push_back(w); | ||
| pred_box.box_.push_back(h); | ||
| } | ||
| if (nms_threshold >= 0) { | ||
| tmp_boxes.push_back(pred_box); | ||
| } else { | ||
| (*pred_boxes)[pred_label].push_back(pred_box); | ||
| } | ||
| } | ||
| } | ||
| if (nms_threshold >= 0) { | ||
| std::sort(tmp_boxes.begin(), tmp_boxes.end(), BoxSortDecendScore); | ||
| vector<int> idxes; | ||
| ApplyNms(tmp_boxes, &idxes, nms_threshold); | ||
| for (int i = 0; i < idxes.size(); ++i) { | ||
| BoxData box_data = tmp_boxes[idxes[i]]; | ||
| if (pred_boxes->find(box_data.label_) == pred_boxes->end()) { | ||
| (*pred_boxes)[box_data.label_] = vector<BoxData>(); | ||
| } | ||
| (*pred_boxes)[box_data.label_].push_back(box_data); | ||
| } | ||
| } else { | ||
| for (std::map<int, vector<BoxData> >::iterator it = pred_boxes->begin(); it != pred_boxes->end(); ++it) { | ||
| std::sort(it->second.begin(), it->second.end(), BoxSortDecendScore); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| void EvalDetectionLayer<Dtype>::LayerSetUp( | ||
| const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { | ||
| EvalDetectionParameter param = this->layer_param_.eval_detection_param(); | ||
| side_ = param.side(); | ||
| num_class_ = param.num_class(); | ||
| num_object_ = param.num_object(); | ||
| threshold_ = param.threshold(); | ||
| sqrt_ = param.sqrt(); | ||
| constriant_ = param.constriant(); | ||
| nms_ = param.nms(); | ||
| switch (param.score_type()) { | ||
| case EvalDetectionParameter_ScoreType_OBJ: | ||
| score_type_ = 0; | ||
| break; | ||
| case EvalDetectionParameter_ScoreType_PROB: | ||
| score_type_ = 1; | ||
| break; | ||
| case EvalDetectionParameter_ScoreType_MULTIPLY: | ||
| score_type_ = 2; | ||
| break; | ||
| default: | ||
| LOG(FATAL) << "Unknow score type."; | ||
| } | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| void EvalDetectionLayer<Dtype>::Reshape( | ||
| const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { | ||
| int input_count = bottom[0]->count(1); | ||
| int label_count = bottom[1]->count(1); | ||
| // outputs: classes, iou, coordinates | ||
| int tmp_input_count = side_ * side_ * (num_class_ + (1 + 4) * num_object_); | ||
| // label: isobj, class_label, coordinates | ||
| int tmp_label_count = side_ * side_ * (1 + 1 + 1 + 4); | ||
| CHECK_EQ(input_count, tmp_input_count); | ||
| CHECK_EQ(label_count, tmp_label_count); | ||
|
|
||
| vector<int> top_shape(2, 1); | ||
| top_shape[0] = bottom[0]->num(); | ||
| top_shape[1] = num_class_ + side_ * side_ * num_object_ * 4; | ||
| top[0]->Reshape(top_shape); | ||
| } | ||
|
|
||
| template <typename Dtype> | ||
| void EvalDetectionLayer<Dtype>::Forward_cpu( | ||
| const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { | ||
| const Dtype* input_data = bottom[0]->cpu_data(); | ||
| const Dtype* label_data = bottom[1]->cpu_data(); | ||
| Dtype* top_data = top[0]->mutable_cpu_data(); | ||
| caffe_set(top[0]->count(), Dtype(0), top_data); | ||
| for (int i = 0; i < bottom[0]->num(); ++i) { | ||
| int input_index = i * bottom[0]->count(1); | ||
| int true_index = i * bottom[1]->count(1); | ||
| int top_index = i * top[0]->count(1); | ||
| map<int, vector<BoxData> > gt_boxes; | ||
| GetGTBox(side_, label_data + true_index, >_boxes); | ||
| for (std::map<int, vector<BoxData > >::iterator it = gt_boxes.begin(); it != gt_boxes.end(); ++it) { | ||
| int label = it->first; | ||
| vector<BoxData>& g_boxes = it->second; | ||
| for (int j = 0; j < g_boxes.size(); ++j) { | ||
| if (!g_boxes[j].difficult_) { | ||
| top_data[top_index + label] += 1; | ||
| } | ||
| } | ||
| } | ||
| map<int, vector<BoxData> > pred_boxes; | ||
| GetPredBox(side_, num_object_, num_class_, input_data + input_index, &pred_boxes, sqrt_, constriant_, score_type_, nms_); | ||
| int index = top_index + num_class_; | ||
| int pred_count(0); | ||
| for (std::map<int, vector<BoxData> >::iterator it = pred_boxes.begin(); it != pred_boxes.end(); ++it) { | ||
| int label = it->first; | ||
| vector<BoxData>& p_boxes = it->second; | ||
| if (gt_boxes.find(label) == gt_boxes.end()) { | ||
| for (int b = 0; b < p_boxes.size(); ++b) { | ||
| top_data[index + pred_count * 4 + 0] = p_boxes[b].label_; | ||
| top_data[index + pred_count * 4 + 1] = p_boxes[b].score_; | ||
| top_data[index + pred_count * 4 + 2] = 0; | ||
| top_data[index + pred_count * 4 + 3] = 1; | ||
| ++pred_count; | ||
| } | ||
| continue; | ||
| } | ||
| vector<BoxData>& g_boxes = gt_boxes[label]; | ||
| vector<bool> records(g_boxes.size(), false); | ||
| for (int k = 0; k < p_boxes.size(); ++k) { | ||
| top_data[index + pred_count * 4 + 0] = p_boxes[k].label_; | ||
| top_data[index + pred_count * 4 + 1] = p_boxes[k].score_; | ||
| float max_iou(-1); | ||
| int idx(-1); | ||
| for (int g = 0; g < g_boxes.size(); ++g) { | ||
| float iou = Calc_iou(p_boxes[k].box_, g_boxes[g].box_); | ||
| if (iou > max_iou) { | ||
| max_iou = iou; | ||
| idx = g; | ||
| } | ||
| } | ||
| if (max_iou >= threshold_) { | ||
| if (!g_boxes[idx].difficult_) { | ||
| if (!records[idx]) { | ||
| records[idx] = true; | ||
| top_data[index + pred_count * 4 + 2] = 1; | ||
| top_data[index + pred_count * 4 + 3] = 0; | ||
| } else { | ||
| top_data[index + pred_count * 4 + 2] = 0; | ||
| top_data[index + pred_count * 4 + 3] = 1; | ||
| } | ||
| } | ||
| } else { | ||
| top_data[index + pred_count * 4 + 2] = 0; | ||
| top_data[index + pred_count * 4 + 3] = 1; | ||
| } | ||
| ++pred_count; | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| INSTANTIATE_CLASS(EvalDetectionLayer); | ||
| REGISTER_LAYER_CLASS(EvalDetection); | ||
|
|
||
| } // namespace caffe |
| @@ -0,0 +1,172 @@ | ||
| // This program converts a set of images to a lmdb/leveldb by storing them | ||
| // as Datum proto buffers. | ||
| // Usage: | ||
| // convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME | ||
| // | ||
| // where ROOTFOLDER is the root folder that holds all the images, and LISTFILE | ||
| // should be a list of files as well as their labels, in the format as | ||
| // subfolder1/file1.JPEG 7 | ||
| // .... | ||
|
|
||
| #include <algorithm> | ||
| #include <fstream> // NOLINT(readability/streams) | ||
| #include <string> | ||
| #include <utility> | ||
| #include <vector> | ||
|
|
||
| #include "boost/scoped_ptr.hpp" | ||
| #include "gflags/gflags.h" | ||
| #include "glog/logging.h" | ||
|
|
||
| #include "caffe/proto/caffe.pb.h" | ||
| #include "caffe/util/db.hpp" | ||
| #include "caffe/util/format.hpp" | ||
| #include "caffe/util/io.hpp" | ||
| #include "caffe/util/rng.hpp" | ||
|
|
||
| using namespace caffe; // NOLINT(build/namespaces) | ||
| using std::pair; | ||
| using boost::scoped_ptr; | ||
|
|
||
| DEFINE_bool(gray, false, | ||
| "When this option is on, treat images as grayscale ones"); | ||
| DEFINE_bool(shuffle, false, | ||
| "Randomly shuffle the order of images and their labels"); | ||
| DEFINE_string(backend, "lmdb", | ||
| "The backend {lmdb, leveldb} for storing the result"); | ||
| DEFINE_int32(resize_width, 0, "Width images are resized to"); | ||
| DEFINE_int32(resize_height, 0, "Height images are resized to"); | ||
| DEFINE_bool(check_size, false, | ||
| "When this option is on, check that all the datum have the same size"); | ||
| DEFINE_bool(encoded, true, | ||
| "When this option is on, the encoded image will be save in datum"); | ||
| DEFINE_string(encode_type, "jpg", | ||
| "Optional: What type should we encode the image as ('png','jpg',...)."); | ||
| DEFINE_string(label_file, "", | ||
| "a map from name to label"); | ||
|
|
||
| int main(int argc, char** argv) { | ||
| #ifdef USE_OPENCV | ||
| ::google::InitGoogleLogging(argv[0]); | ||
| // Print output to stderr (while still logging) | ||
| FLAGS_alsologtostderr = 1; | ||
|
|
||
| #ifndef GFLAGS_GFLAGS_H_ | ||
| namespace gflags = google; | ||
| #endif | ||
|
|
||
| gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n" | ||
| "format used as input for Caffe.\n" | ||
| "Usage:\n" | ||
| " convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n" | ||
| "The ImageNet dataset for the training demo is at\n" | ||
| " http://www.image-net.org/download-images\n"); | ||
| gflags::ParseCommandLineFlags(&argc, &argv, true); | ||
|
|
||
| if (argc < 4) { | ||
| gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset"); | ||
| return 1; | ||
| } | ||
|
|
||
| const bool is_color = !FLAGS_gray; | ||
| const bool check_size = FLAGS_check_size; | ||
| const bool encoded = FLAGS_encoded; | ||
| const string encode_type = FLAGS_encode_type; | ||
| const std::string label_file = FLAGS_label_file; | ||
| if (label_file == "") { | ||
| LOG(ERROR) << "empty label file"; | ||
| return 1; | ||
| } | ||
|
|
||
| std::ifstream labelfile(label_file.c_str()); | ||
| std::map<std::string, int> label_map; | ||
| std::string tmp_line; | ||
| while (std::getline(labelfile, tmp_line)) { | ||
| size_t pos = tmp_line.find_last_of(' '); | ||
| label_map[tmp_line.substr(0, pos)] = atoi(tmp_line.substr(pos+1).c_str()); | ||
| } | ||
|
|
||
| std::ifstream infile(argv[2]); | ||
| std::vector<std::pair<std::string, std::string> > lines; | ||
| std::string line; | ||
| size_t pos; | ||
| while (std::getline(infile, line)) { | ||
| pos = line.find_last_of(' '); | ||
| lines.push_back(std::make_pair(line.substr(0, pos), line.substr(pos+1))); | ||
| } | ||
| if (FLAGS_shuffle) { | ||
| // randomly shuffle data | ||
| LOG(INFO) << "Shuffling data"; | ||
| shuffle(lines.begin(), lines.end()); | ||
| } | ||
| LOG(INFO) << "A total of " << lines.size() << " images."; | ||
|
|
||
| if (encode_type.size() && !encoded) | ||
| LOG(INFO) << "encode_type specified, assuming encoded=true."; | ||
|
|
||
| int resize_height = std::max<int>(0, FLAGS_resize_height); | ||
| int resize_width = std::max<int>(0, FLAGS_resize_width); | ||
|
|
||
| // Create new DB | ||
| scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend)); | ||
| db->Open(argv[3], db::NEW); | ||
| scoped_ptr<db::Transaction> txn(db->NewTransaction()); | ||
|
|
||
| // Storing to db | ||
| std::string root_folder(argv[1]); | ||
| Datum datum; | ||
| int count = 0; | ||
| int data_size = 0; | ||
| bool data_size_initialized = false; | ||
|
|
||
| for (int line_id = 0; line_id < lines.size(); ++line_id) { | ||
| bool status; | ||
| std::string enc = encode_type; | ||
| if (encoded && !enc.size()) { | ||
| // Guess the encoding type from the file name | ||
| string fn = lines[line_id].first; | ||
| size_t p = fn.rfind('.'); | ||
| if ( p == fn.npos ) | ||
| LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'"; | ||
| enc = fn.substr(p); | ||
| std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower); | ||
| } | ||
| status = ReadBoxDataToDatum(root_folder + lines[line_id].first, | ||
| root_folder + lines[line_id].second, label_map, | ||
| resize_height, resize_width, is_color, enc, &datum); | ||
| if (status == false) continue; | ||
| if (check_size) { | ||
| if (!data_size_initialized) { | ||
| data_size = datum.channels() * datum.height() * datum.width(); | ||
| data_size_initialized = true; | ||
| } else { | ||
| const std::string& data = datum.data(); | ||
| CHECK_EQ(data.size(), data_size) << "Incorrect data field size " | ||
| << data.size(); | ||
| } | ||
| } | ||
| // sequential | ||
| string key_str = caffe::format_int(line_id, 8) + "_" + lines[line_id].first; | ||
|
|
||
| // Put in db | ||
| string out; | ||
| CHECK(datum.SerializeToString(&out)); | ||
| txn->Put(key_str, out); | ||
|
|
||
| if (++count % 1000 == 0) { | ||
| // Commit db | ||
| txn->Commit(); | ||
| txn.reset(db->NewTransaction()); | ||
| LOG(INFO) << "Processed " << count << " files."; | ||
| } | ||
| } | ||
| // write the last batch | ||
| if (count % 1000 != 0) { | ||
| txn->Commit(); | ||
| LOG(INFO) << "Processed " << count << " files."; | ||
| } | ||
| #else | ||
| LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV."; | ||
| #endif // USE_OPENCV | ||
| return 0; | ||
| } |