@@ -0,0 +1,119 @@
#include <boost/thread.hpp>
#include <map>
#include <string>
#include <vector>

#include "caffe/common.hpp"
#include "caffe/data_reader.hpp"
#include "caffe/layers/data_layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

using boost::weak_ptr;

map<const string, weak_ptr<DataReader::Body> > DataReader::bodies_;
static boost::mutex bodies_mutex_;

DataReader::DataReader(const LayerParameter& param)
: queue_pair_(new QueuePair( //
param.data_param().prefetch() * param.data_param().batch_size())) {
// Get or create a body
boost::mutex::scoped_lock lock(bodies_mutex_);
string key = source_key(param);
weak_ptr<Body>& weak = bodies_[key];
body_ = weak.lock();
if (!body_) {
body_.reset(new Body(param));
bodies_[key] = weak_ptr<Body>(body_);
}
body_->new_queue_pairs_.push(queue_pair_);
}

DataReader::~DataReader() {
string key = source_key(body_->param_);
body_.reset();
boost::mutex::scoped_lock lock(bodies_mutex_);
if (bodies_[key].expired()) {
bodies_.erase(key);
}
}

//

DataReader::QueuePair::QueuePair(int size) {
// Initialize the free queue with requested number of datums
for (int i = 0; i < size; ++i) {
free_.push(new Datum());
}
}

DataReader::QueuePair::~QueuePair() {
Datum* datum;
while (free_.try_pop(&datum)) {
delete datum;
}
while (full_.try_pop(&datum)) {
delete datum;
}
}

//

DataReader::Body::Body(const LayerParameter& param)
: param_(param),
new_queue_pairs_() {
StartInternalThread();
}

DataReader::Body::~Body() {
StopInternalThread();
}

void DataReader::Body::InternalThreadEntry() {
shared_ptr<db::DB> db(db::GetDB(param_.data_param().backend()));
db->Open(param_.data_param().source(), db::READ);
shared_ptr<db::Cursor> cursor(db->NewCursor());
vector<shared_ptr<QueuePair> > qps;
try {
int solver_count = param_.phase() == TRAIN ? Caffe::solver_count() : 1;

// To ensure deterministic runs, only start running once all solvers
// are ready. But solvers need to peek on one item during initialization,
// so read one item, then wait for the next solver.
for (int i = 0; i < solver_count; ++i) {
shared_ptr<QueuePair> qp(new_queue_pairs_.pop());
read_one(cursor.get(), qp.get());
qps.push_back(qp);
}
// Main loop
while (!must_stop()) {
for (int i = 0; i < solver_count; ++i) {
read_one(cursor.get(), qps[i].get());
}
// Check no additional readers have been created. This can happen if
// more than one net is trained at a time per process, whether single
// or multi solver. It might also happen if two data layers have same
// name and same source.
CHECK_EQ(new_queue_pairs_.size(), 0);
}
} catch (boost::thread_interrupted&) {
// Interrupted exception is expected on shutdown
}
}

void DataReader::Body::read_one(db::Cursor* cursor, QueuePair* qp) {
Datum* datum = qp->free_.pop();
// TODO deserialize in-place instead of copy?
datum->ParseFromString(cursor->value());
qp->full_.push(datum);

// go to the next iter
cursor->Next();
if (!cursor->valid()) {
DLOG(INFO) << "Restarting data prefetching from start.";
cursor->SeekToFirst();
}
}

} // namespace caffe
@@ -1,5 +1,6 @@
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif // USE_OPENCV

#include <string>
@@ -126,6 +127,106 @@ void DataTransformer<Dtype>::Transform(const Datum& datum,
}
}

template<typename Dtype>
void DataTransformer<Dtype>::Transform(const Datum& datum,
Blob<Dtype>* transformed_blob,
vector<BoxLabel>* box_labels) {
int float_size = datum.float_data_size();
CHECK_GT(float_size, 0) <<
"Every sample must have label";
CHECK_EQ(float_size % 6, 0) <<
"Every box label has 6 labels (class, difficult, box)";
vector<BoxLabel> ori_labels;
for (int j = 0; j < float_size; j += 6) {
BoxLabel box_label;
box_label.class_label_ = datum.float_data(j);
box_label.difficult_ = datum.float_data(j + 1);
for (int k = 2; k < 6; ++k) {
box_label.box_[k-2] = datum.float_data(j+k);
}
ori_labels.push_back(box_label);
}

// If datum is encoded, decoded and transform the cv::image.
CHECK(datum.encoded()) << "For box data, datum must be encoded";
CHECK(!(param_.force_color() && param_.force_gray()))
<< "cannot set both force_color and force_gray";
cv::Mat cv_img;
if (param_.force_color() || param_.force_gray()) {
// If force_color then decode in color otherwise decode in gray.
cv_img = DecodeDatumToCVMat(datum, param_.force_color());
} else {
cv_img = DecodeDatumToCVMatNative(datum);
}

if (phase_ == TEST) {
*box_labels = ori_labels;
Transform(cv_img, transformed_blob);
return;
}

int img_width = cv_img.cols;
int img_height = cv_img.rows;

cv::Mat cv_rand_img;
// bool mirror = Rand(2);
bool mirror = 0;
while (box_labels->size() == 0) {
float rand_scale = (1. - Rand(30) / 100.);
int rand_w = static_cast<int>(img_width * rand_scale) - 1;
int rand_h = static_cast<int>(img_height * rand_scale) - 1;
// LOG(INFO) << "rand_w: " << rand_w << " rand_h: " << rand_h;
// LOG(INFO) << "img_width: " << img_width << " img_height: " << img_height;
int rand_x = Rand(img_width - rand_w);
int rand_y = Rand(img_height - rand_h);
for (int i = 0; i < ori_labels.size(); ++i) {
int ori_x = static_cast<int>(ori_labels[i].box_[0] * img_width);
int ori_y = static_cast<int>(ori_labels[i].box_[1] * img_height);
int ori_w = static_cast<int>(ori_labels[i].box_[2] * img_width);
int ori_h = static_cast<int>(ori_labels[i].box_[3] * img_height);
if (!(ori_x >= rand_x && ori_x < rand_x + rand_w)) {
continue;
}
if (!(ori_y >= rand_y && ori_y < rand_y + rand_h)) {
continue;
}
BoxLabel box_label;
box_label.difficult_ = ori_labels[i].difficult_;
box_label.class_label_ = ori_labels[i].class_label_;
box_label.box_[0] = float(ori_x - rand_x) / float(rand_w);
box_label.box_[1] = float(ori_y - rand_y) / float(rand_h);
box_label.box_[2] = float(ori_w) / float(rand_w);
box_label.box_[3] = float(ori_h) / float(rand_h);
// int xmin = std::max(ori_x - ori_w / 2, rand_x);
// int ymin = std::max(ori_y - ori_h / 2, rand_y);
// int xmax = std::min(ori_x + ori_w / 2, rand_x + rand_w);
// int ymax = std::min(ori_y + ori_h / 2, rand_y + rand_h);
// if (xmin > xmax || ymin > ymax) {
// continue;
// }
// box_label.box_[0] = float(xmin + (xmax - xmin) / 2) / float(rand_w);
// box_label.box_[1] = float(ymin + (ymax - ymin) / 2) / float(rand_h);
// box_label.box_[2] = float(xmax - xmin) / float(rand_w);
// box_label.box_[3] = float(ymax - ymin) / float(rand_h);
if (mirror) {
box_label.box_[0] = std::max(0., 1. - box_label.box_[0]);
box_label.box_[1] = std::max(0., 1. - box_label.box_[1]);
}
box_labels->push_back(box_label);
}
if (box_labels->size() > 0) {
cv::Rect roi(rand_x, rand_y, rand_w, rand_h);
cv_rand_img = cv_img(roi);
if (mirror) {
cv::flip(cv_rand_img, cv_rand_img, 1); // horizen flip
}
}
}
cv::resize(cv_rand_img, cv_rand_img, cv::Size(img_width, img_height));
// Transform the cv::image into blob.
Transform(cv_rand_img, transformed_blob);
return;
}

template<typename Dtype>
void DataTransformer<Dtype>::Transform(const Datum& datum,
@@ -623,8 +724,9 @@ vector<int> DataTransformer<Dtype>::InferBlobShape(

template <typename Dtype>
void DataTransformer<Dtype>::InitRand() {
const bool needs_rand = param_.mirror() ||
(phase_ == TRAIN && param_.crop_size());
const bool needs_rand = param_.mirror() || phase_ == TRAIN;
//const bool needs_rand = param_.mirror() ||
// (phase_ == TRAIN && param_.crop_size());
if (needs_rand) {
const unsigned int rng_seed = caffe_rng_rand();
rng_.reset(new Caffe::RNG(rng_seed));
@@ -24,7 +24,8 @@ void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
output_labels_ = false;
} else {
output_labels_ = true;
}
}
box_label_ = false;
data_transformer_.reset(
new DataTransformer<Dtype>(transform_param_, this->phase_));
data_transformer_->InitRand();
@@ -56,15 +57,17 @@ void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
for (int i = 0; i < prefetch_.size(); ++i) {
prefetch_[i]->data_.mutable_cpu_data();
if (this->output_labels_) {
prefetch_[i]->label_.mutable_cpu_data();
prefetch_[i]->label_.mutable_cpu_data();

}

}
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
for (int i = 0; i < prefetch_.size(); ++i) {
prefetch_[i]->data_.mutable_gpu_data();
if (this->output_labels_) {
prefetch_[i]->label_.mutable_gpu_data();
prefetch_[i]->label_.mutable_gpu_data();
}
}
}
@@ -120,9 +123,9 @@ void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
top[0]->ReshapeLike(prefetch_current_->data_);
top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data());
if (this->output_labels_) {
// Reshape to loaded labels.
top[1]->ReshapeLike(prefetch_current_->label_);
top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());
top[1]->ReshapeLike(prefetch_current_->label_);
top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());

}
}

@@ -15,12 +15,12 @@ void BasePrefetchingDataLayer<Dtype>::Forward_gpu(
top[0]->ReshapeLike(prefetch_current_->data_);
top[0]->set_gpu_data(prefetch_current_->data_.mutable_gpu_data());
if (this->output_labels_) {
// Reshape to loaded labels.
top[1]->ReshapeLike(prefetch_current_->label_);
top[1]->set_gpu_data(prefetch_current_->label_.mutable_gpu_data());
// Reshape to loaded labels.
top[1]->ReshapeLike(prefetch_current_->label_);
top[1]->set_gpu_data(prefetch_current_->label_.mutable_gpu_data());

}
}

INSTANTIATE_LAYER_GPU_FORWARD(BasePrefetchingDataLayer);

} // namespace caffe
@@ -0,0 +1,167 @@
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#endif // USE_OPENCV
#include <stdint.h>

#include <vector>

#include "caffe/data_transformer.hpp"
#include "caffe/layers/box_data_layer.hpp"
#include "caffe/util/benchmark.hpp"

namespace caffe {

template <typename Dtype>
BoxDataLayer<Dtype>::BoxDataLayer(const LayerParameter& param)
: BasePrefetchingDataLayer<Dtype>(param),
reader_(param) {
}

template <typename Dtype>
BoxDataLayer<Dtype>::~BoxDataLayer() {
this->StopInternalThread();
}

template <typename Dtype>
void BoxDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
this->box_label_ = true;
const DataParameter param = this->layer_param_.data_param();
const int batch_size = param.batch_size();
// Read a data point, and use it to initialize the top blob.
Datum& datum = *(reader_.full().peek());

// Use data_transformer to infer the expected blob shape from datum.
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);
// Reshape top[0] and prefetch_data according to the batch_size.
top_shape[0] = batch_size;
top[0]->Reshape(top_shape);
for (int i = 0; i < this->prefetch_.size(); ++i) {
this->prefetch_[i]->data_.Reshape(top_shape);
}
LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
// label
if (this->output_labels_) {
side_ = param.side();
vector<int> label_shape(1, batch_size);
int label_size = side_ * side_ * (1 + 1 + 1 + 4);
label_shape.push_back(label_size);
top[1]->Reshape(label_shape);
for (int j = 0; j < this->prefetch_.size(); ++j) {
this->prefetch_[j]->label_.Reshape(label_shape);
}

}
}

// This function is called on prefetch thread
template<typename Dtype>
void BoxDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
CPUTimer batch_timer;
batch_timer.Start();
double read_time = 0;
double trans_time = 0;
CPUTimer timer;
CHECK(batch->data_.count());
CHECK(this->transformed_data_.count());

// Reshape according to the first datum of each batch
// on single input batches allows for inputs of varying dimension.
const int batch_size = this->layer_param_.data_param().batch_size();
Datum& datum = *(reader_.full().peek());
// Use data_transformer to infer the expected blob shape from datum.
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);
// Reshape batch according to the batch_size.
top_shape[0] = batch_size;
batch->data_.Reshape(top_shape);

Dtype* top_data = batch->data_.mutable_cpu_data();
Dtype* top_label;

if (this->output_labels_) {
top_label = batch->label_.mutable_cpu_data();

}
for (int item_id = 0; item_id < batch_size; ++item_id) {
timer.Start();
// get a datum
Datum& datum = *(reader_.full().pop("Waiting for data"));
read_time += timer.MicroSeconds();
timer.Start();
// Apply data transformations (mirror, scale, crop...)
int offset = batch->data_.offset(item_id);
vector<BoxLabel> box_labels;
this->transformed_data_.set_cpu_data(top_data + offset);
if (this->output_labels_) {
// rand sample a patch, adjust box labels

this->data_transformer_->Transform(datum, &(this->transformed_data_), &box_labels);
// transform label
int label_offset = batch->label_.offset(item_id);
int count = batch->label_.count(1);
transform_label(count, top_label + label_offset, box_labels, side_);

} else {
this->data_transformer_->Transform(datum, &(this->transformed_data_));
}
trans_time += timer.MicroSeconds();

reader_.free().push(const_cast<Datum*>(&datum));
}
timer.Stop();
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}

template<typename Dtype>
void BoxDataLayer<Dtype>::transform_label(int count, Dtype* top_label,
const vector<BoxLabel>& box_labels, int side) {
int locations = pow(side, 2);
CHECK_EQ(count, locations * 7) <<
"side and count not match";
// difficult
caffe_set(locations, Dtype(0), top_label);
// isobj
caffe_set(locations, Dtype(0), top_label + locations);
// class label
caffe_set(locations, Dtype(-1), top_label + locations * 2);
// box
caffe_set(locations*4, Dtype(0), top_label + locations * 3);
for (int i = 0; i < box_labels.size(); ++i) {
float difficult = box_labels[i].difficult_;
if (difficult != 0. && difficult != 1.) {
LOG(WARNING) << "Difficult must be 0 or 1";
}
float class_label = box_labels[i].class_label_;
CHECK_GE(class_label, 0) << "class_label must >= 0";
float x = box_labels[i].box_[0];
float y = box_labels[i].box_[1];
// LOG(INFO) << "x: " << x << " y: " << y;
int x_index = floor(x * side);
int y_index = floor(y * side);
x_index = std::min(x_index, side - 1);
y_index = std::min(y_index, side - 1);
int dif_index = side * y_index + x_index;
int obj_index = locations + dif_index;
int class_index = locations * 2 + dif_index;
int cor_index = locations * 3 + dif_index * 4;
top_label[dif_index] = difficult;
top_label[obj_index] = 1;
// LOG(INFO) << "dif_index: " << dif_index << " class_label: " << class_label;
top_label[class_index] = class_label;
for (int j = 0; j < 4; ++j) {
top_label[cor_index + j] = box_labels[i].box_[j];
}
}
}

INSTANTIATE_CLASS(BoxDataLayer);
REGISTER_LAYER_CLASS(BoxData);

} // namespace caffe
@@ -0,0 +1,228 @@
#include <algorithm>
#include <cfloat>
#include <vector>
#include <cmath>

#include "caffe/layers/detection_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
Dtype Overlap(Dtype x1, Dtype w1, Dtype x2, Dtype w2) {
Dtype left = std::max(x1 - w1 / 2, x2 - w2 / 2);
Dtype right = std::min(x1 + w1 / 2, x2 + w2 / 2);
return right - left;
}

template <typename Dtype>
Dtype Calc_iou(const vector<Dtype>& box, const vector<Dtype>& truth) {
Dtype w = Overlap(box[0], box[2], truth[0], truth[2]);
Dtype h = Overlap(box[1], box[3], truth[1], truth[3]);
if (w < 0 || h < 0) return 0;
Dtype inter_area = w * h;
Dtype union_area = box[2] * box[3] + truth[2] * truth[3] - inter_area;
return inter_area / union_area;
}

template <typename Dtype>
Dtype Calc_rmse(const vector<Dtype>& box, const vector<Dtype>& truth) {
return sqrt(pow(box[0]-truth[0], 2) +
pow(box[1]-truth[1], 2) +
pow(box[2]-truth[2], 2) +
pow(box[3]-truth[3], 2));
}

template <typename Dtype>
void DetectionLossLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::LayerSetUp(bottom, top);
DetectionLossParameter param = this->layer_param_.detection_loss_param();
side_ = param.side();
num_class_ = param.num_class();
num_object_ = param.num_object();
sqrt_ = param.sqrt();
constriant_ = param.constriant();
object_scale_ = param.object_scale();
noobject_scale_ = param.noobject_scale();
class_scale_ = param.class_scale();
coord_scale_ = param.coord_scale();

int input_count = bottom[0]->count(1);
int label_count = bottom[1]->count(1);
// outputs: classes, iou, coordinates
int tmp_input_count = side_ * side_ * (num_class_ + (1 + 4) * num_object_);
// label: isobj, class_label, coordinates
int tmp_label_count = side_ * side_ * (1 + 1 + 1 + 4);
CHECK_EQ(input_count, tmp_input_count);
CHECK_EQ(label_count, tmp_label_count);
}

template <typename Dtype>
void DetectionLossLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::Reshape(bottom, top);
diff_.ReshapeLike(*bottom[0]);
}

template <typename Dtype>
void DetectionLossLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
const Dtype* input_data = bottom[0]->cpu_data();
const Dtype* label_data = bottom[1]->cpu_data();
Dtype* diff = diff_.mutable_cpu_data();
Dtype loss(0.0), class_loss(0.0), noobj_loss(0.0), obj_loss(0.0), coord_loss(0.0), area_loss(0.0);
Dtype avg_iou(0.0), avg_obj(0.0), avg_cls(0.0), avg_pos_cls(0.0), avg_no_obj(0.0);
Dtype obj_count(0);
int locations = pow(side_, 2);
caffe_set(diff_.count(), Dtype(0.), diff);
for (int i = 0; i < bottom[0]->num(); ++i) {
int index = i * bottom[0]->count(1);
int true_index = i * bottom[1]->count(1);
for (int j = 0; j < locations; ++j) {
for (int k = 0; k < num_object_; ++k) {
int p_index = index + num_class_ * locations + k * locations + j;
noobj_loss += noobject_scale_ * pow(input_data[p_index] - 0, 2);
diff[p_index] = noobject_scale_ * (input_data[p_index] - 0);
avg_no_obj += input_data[p_index];
}
bool isobj = label_data[true_index + locations + j];
if (!isobj) {
continue;
}
obj_count += 1;
int label = static_cast<int>(label_data[true_index + locations * 2 + j]);
CHECK_GE(label, 0) << "label start at 0";
CHECK_LT(label, num_class_) << "label must below num_class";
for (int c = 0; c < num_class_; ++c) {
int class_index = index + c * locations + j;
Dtype target = Dtype(c == label);
avg_cls += input_data[class_index];
if (c == label)
avg_pos_cls += input_data[class_index];
class_loss += class_scale_ * pow(input_data[class_index] - target, 2);
diff[class_index] = class_scale_ * (input_data[class_index] - target);
}
const Dtype* true_box_pt = label_data + true_index + locations * 3 + j * 4;
vector<Dtype> true_box(true_box_pt, true_box_pt + 4);
const Dtype* box_pt = input_data + index + (num_class_+num_object_)*locations + j;
Dtype best_iou = 0.;
Dtype best_rmse = 20.;
int best_index = 0;
for (int k = 0; k < num_object_; ++k) {
vector<Dtype> box;
box.push_back(*(box_pt + (k * 4 + 0) * locations));
box.push_back(*(box_pt + (k * 4 + 1) * locations));
box.push_back(*(box_pt + (k * 4 + 2) * locations));
box.push_back(*(box_pt + (k * 4 + 3) * locations));
if (constriant_) {
box[0] = (j % side_ + box[0]) / side_;
box[1] = (j / side_ + box[1]) / side_;
}
if (sqrt_) {
box[2] = pow(box[2], 2);
box[3] = pow(box[3], 2);
}
Dtype iou = Calc_iou(box, true_box);
Dtype rmse = Calc_rmse(box, true_box);
if (best_iou > 0 || iou > 0) {
if (iou > best_iou) {
best_iou = iou;
best_index = k;
}
} else {
if (rmse < best_rmse) {
best_rmse = rmse;
best_index = k;
}
}
}

CHECK_GE(best_index, 0) << "best_index must >= 0";
avg_iou += best_iou;
int p_index = index + num_class_ * locations + best_index * locations + j;
noobj_loss -= noobject_scale_ * pow(input_data[p_index], 2);
obj_loss += object_scale_ * pow(input_data[p_index] - 1., 2);
avg_no_obj -= input_data[p_index];
avg_obj += input_data[p_index];
// rescore
diff[p_index] = object_scale_ * (input_data[p_index] - best_iou);
int box_index = index + (num_class_ + num_object_ + best_index * 4) * locations + j;
vector<Dtype> best_box;
best_box.push_back(input_data[box_index + 0 * locations]);
best_box.push_back(input_data[box_index + 1 * locations]);
best_box.push_back(input_data[box_index + 2 * locations]);
best_box.push_back(input_data[box_index + 3 * locations]);


if (constriant_) {
true_box[0] = true_box[0] * side_ - Dtype(j % side_);
true_box[1] = true_box[1] * side_ - Dtype(j / side_);
}

if (sqrt_) {
true_box[2] = sqrt(true_box[2]);
true_box[3] = sqrt(true_box[3]);
}

for (int o = 0; o < 4; ++o) {
diff[box_index + o * locations] = coord_scale_ * (best_box[o] - true_box[o]);
}

coord_loss += coord_scale_ * pow(best_box[0] - true_box[0], 2);
coord_loss += coord_scale_ * pow(best_box[1] - true_box[1], 2);
area_loss += coord_scale_ * pow(best_box[2] - true_box[2], 2);
area_loss += coord_scale_ * pow(best_box[3] - true_box[3], 2);
}
}
class_loss /= obj_count;
coord_loss /= obj_count;
area_loss /= obj_count;
obj_loss /= obj_count;
noobj_loss /= (locations * num_object_ * bottom[0]->num() - obj_count);

avg_iou /= obj_count;
avg_obj /= obj_count;
avg_no_obj /= (locations * num_object_ * bottom[0]->num() - obj_count);
avg_cls /= obj_count;
avg_pos_cls /= obj_count;

loss = class_loss + coord_loss + area_loss + obj_loss + noobj_loss;
obj_count /= bottom[0]->num();
top[0]->mutable_cpu_data()[0] = loss;

// LOG(INFO) << "average objects: " << obj_count;
LOG(INFO) << "loss: " << loss << " class_loss: " << class_loss << " obj_loss: "
<< obj_loss << " noobj_loss: " << noobj_loss << " coord_loss: " << coord_loss
<< " area_loss: " << area_loss;
LOG(INFO) << "avg_iou: " << avg_iou << " avg_obj: " << avg_obj << " avg_no_obj: "
<< avg_no_obj << " avg_cls: " << avg_cls << " avg_pos_cls: " << avg_pos_cls;
}

template <typename Dtype>
void DetectionLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[1]) {
LOG(FATAL) << this->type()
<< " Layer cannot backpropagate to label inputs.";
}
if (propagate_down[0]) {
const Dtype sign(1.);
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[0]->num();
caffe_cpu_axpby(
bottom[0]->count(),
alpha,
diff_.cpu_data(),
Dtype(0),
bottom[0]->mutable_cpu_diff());
}
}

#ifdef CPU_ONLY
STUB_GPU(DetectionLossLayer);
#endif

INSTANTIATE_CLASS(DetectionLossLayer);
REGISTER_LAYER_CLASS(DetectionLoss);

} // namespace caffe
@@ -0,0 +1,272 @@
#include <algorithm>
#include <cfloat>
#include <vector>
#include <cmath>

#include "caffe/layers/detection_loss_layer.hpp"
#include "caffe/layers/eval_detection_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

class BoxData {
public:
int label_;
bool difficult_;
float score_;
vector<float> box_;
};

bool BoxSortDecendScore(const BoxData& box1, const BoxData& box2) {
return box1.score_ > box2.score_;
}

void ApplyNms(const vector<BoxData>& boxes, vector<int>* idxes, float threshold) {
map<int, int> idx_map;
for (int i = 0; i < boxes.size() - 1; ++i) {
if (idx_map.find(i) != idx_map.end()) {
continue;
}
vector<float> box1 = boxes[i].box_;
for (int j = i + 1; j < boxes.size(); ++j) {
if (idx_map.find(j) != idx_map.end()) {
continue;
}
vector<float> box2 = boxes[j].box_;
float iou = Calc_iou(box1, box2);
if (iou >= threshold) {
idx_map[j] = 1;
}
}
}
for (int i = 0; i < boxes.size(); ++i) {
if (idx_map.find(i) == idx_map.end()) {
idxes->push_back(i);
}
}
}

template <typename Dtype>
void GetGTBox(int side, const Dtype* label_data, map<int, vector<BoxData> >* gt_boxes) {
int locations = pow(side, 2);
for (int i = 0; i < locations; ++i) {
if (!label_data[locations + i]) {
continue;
}
BoxData gt_box;
bool difficult = (label_data[i] == 1);
int label = static_cast<int>(label_data[locations * 2 + i]);
gt_box.difficult_ = difficult;
gt_box.label_ = label;
gt_box.score_ = i;
int box_index = locations * 3 + i * 4;
for (int j = 0; j < 4; ++j) {
gt_box.box_.push_back(label_data[box_index + j]);
}
if (gt_boxes->find(label) == gt_boxes->end()) {
(*gt_boxes)[label] = vector<BoxData>(1, gt_box);
} else {
(*gt_boxes)[label].push_back(gt_box);
}
}
}

template <typename Dtype>
void GetPredBox(int side, int num_object, int num_class, const Dtype* input_data,
map<int, vector<BoxData> >* pred_boxes, bool use_sqrt, bool constriant,
int score_type, float nms_threshold) {
vector<BoxData> tmp_boxes;
int locations = pow(side, 2);
for (int i = 0; i < locations; ++i) {
int pred_label = 0;
float max_prob = input_data[i];
for (int j = 1; j < num_class; ++j) {
int class_index = j * locations + i;
if (input_data[class_index] > max_prob) {
pred_label = j;
max_prob = input_data[class_index];
}
}
if (nms_threshold < 0) {
if (pred_boxes->find(pred_label) == pred_boxes->end()) {
(*pred_boxes)[pred_label] = vector<BoxData>();
}
}
// LOG(INFO) << "pred_label: " << pred_label << " max_prob: " << max_prob;
int obj_index = num_class * locations + i;
int coord_index = (num_class + num_object) * locations + i;
for (int k = 0; k < num_object; ++k) {
BoxData pred_box;
float scale = input_data[obj_index + k * locations];
pred_box.label_ = pred_label;
if (score_type == 0) {
pred_box.score_ = scale;
} else if (score_type == 1) {
pred_box.score_ = max_prob;
} else {
pred_box.score_ = scale * max_prob;
}
int box_index = coord_index + k * 4 * locations;
if (!constriant) {
pred_box.box_.push_back(input_data[box_index + 0 * locations]);
pred_box.box_.push_back(input_data[box_index + 1 * locations]);
} else {
pred_box.box_.push_back((i % side + input_data[box_index + 0 * locations]) / side);
pred_box.box_.push_back((i / side + input_data[box_index + 1 * locations]) / side);
}
float w = input_data[box_index + 2 * locations];
float h = input_data[box_index + 3 * locations];
if (use_sqrt) {
pred_box.box_.push_back(pow(w, 2));
pred_box.box_.push_back(pow(h, 2));
} else {
pred_box.box_.push_back(w);
pred_box.box_.push_back(h);
}
if (nms_threshold >= 0) {
tmp_boxes.push_back(pred_box);
} else {
(*pred_boxes)[pred_label].push_back(pred_box);
}
}
}
if (nms_threshold >= 0) {
std::sort(tmp_boxes.begin(), tmp_boxes.end(), BoxSortDecendScore);
vector<int> idxes;
ApplyNms(tmp_boxes, &idxes, nms_threshold);
for (int i = 0; i < idxes.size(); ++i) {
BoxData box_data = tmp_boxes[idxes[i]];
if (pred_boxes->find(box_data.label_) == pred_boxes->end()) {
(*pred_boxes)[box_data.label_] = vector<BoxData>();
}
(*pred_boxes)[box_data.label_].push_back(box_data);
}
} else {
for (std::map<int, vector<BoxData> >::iterator it = pred_boxes->begin(); it != pred_boxes->end(); ++it) {
std::sort(it->second.begin(), it->second.end(), BoxSortDecendScore);
}
}
}

template <typename Dtype>
void EvalDetectionLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
EvalDetectionParameter param = this->layer_param_.eval_detection_param();
side_ = param.side();
num_class_ = param.num_class();
num_object_ = param.num_object();
threshold_ = param.threshold();
sqrt_ = param.sqrt();
constriant_ = param.constriant();
nms_ = param.nms();
switch (param.score_type()) {
case EvalDetectionParameter_ScoreType_OBJ:
score_type_ = 0;
break;
case EvalDetectionParameter_ScoreType_PROB:
score_type_ = 1;
break;
case EvalDetectionParameter_ScoreType_MULTIPLY:
score_type_ = 2;
break;
default:
LOG(FATAL) << "Unknow score type.";
}
}

template <typename Dtype>
void EvalDetectionLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
int input_count = bottom[0]->count(1);
int label_count = bottom[1]->count(1);
// outputs: classes, iou, coordinates
int tmp_input_count = side_ * side_ * (num_class_ + (1 + 4) * num_object_);
// label: isobj, class_label, coordinates
int tmp_label_count = side_ * side_ * (1 + 1 + 1 + 4);
CHECK_EQ(input_count, tmp_input_count);
CHECK_EQ(label_count, tmp_label_count);

vector<int> top_shape(2, 1);
top_shape[0] = bottom[0]->num();
top_shape[1] = num_class_ + side_ * side_ * num_object_ * 4;
top[0]->Reshape(top_shape);
}

template <typename Dtype>
void EvalDetectionLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
const Dtype* input_data = bottom[0]->cpu_data();
const Dtype* label_data = bottom[1]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
caffe_set(top[0]->count(), Dtype(0), top_data);
for (int i = 0; i < bottom[0]->num(); ++i) {
int input_index = i * bottom[0]->count(1);
int true_index = i * bottom[1]->count(1);
int top_index = i * top[0]->count(1);
map<int, vector<BoxData> > gt_boxes;
GetGTBox(side_, label_data + true_index, &gt_boxes);
for (std::map<int, vector<BoxData > >::iterator it = gt_boxes.begin(); it != gt_boxes.end(); ++it) {
int label = it->first;
vector<BoxData>& g_boxes = it->second;
for (int j = 0; j < g_boxes.size(); ++j) {
if (!g_boxes[j].difficult_) {
top_data[top_index + label] += 1;
}
}
}
map<int, vector<BoxData> > pred_boxes;
GetPredBox(side_, num_object_, num_class_, input_data + input_index, &pred_boxes, sqrt_, constriant_, score_type_, nms_);
int index = top_index + num_class_;
int pred_count(0);
for (std::map<int, vector<BoxData> >::iterator it = pred_boxes.begin(); it != pred_boxes.end(); ++it) {
int label = it->first;
vector<BoxData>& p_boxes = it->second;
if (gt_boxes.find(label) == gt_boxes.end()) {
for (int b = 0; b < p_boxes.size(); ++b) {
top_data[index + pred_count * 4 + 0] = p_boxes[b].label_;
top_data[index + pred_count * 4 + 1] = p_boxes[b].score_;
top_data[index + pred_count * 4 + 2] = 0;
top_data[index + pred_count * 4 + 3] = 1;
++pred_count;
}
continue;
}
vector<BoxData>& g_boxes = gt_boxes[label];
vector<bool> records(g_boxes.size(), false);
for (int k = 0; k < p_boxes.size(); ++k) {
top_data[index + pred_count * 4 + 0] = p_boxes[k].label_;
top_data[index + pred_count * 4 + 1] = p_boxes[k].score_;
float max_iou(-1);
int idx(-1);
for (int g = 0; g < g_boxes.size(); ++g) {
float iou = Calc_iou(p_boxes[k].box_, g_boxes[g].box_);
if (iou > max_iou) {
max_iou = iou;
idx = g;
}
}
if (max_iou >= threshold_) {
if (!g_boxes[idx].difficult_) {
if (!records[idx]) {
records[idx] = true;
top_data[index + pred_count * 4 + 2] = 1;
top_data[index + pred_count * 4 + 3] = 0;
} else {
top_data[index + pred_count * 4 + 2] = 0;
top_data[index + pred_count * 4 + 3] = 1;
}
}
} else {
top_data[index + pred_count * 4 + 2] = 0;
top_data[index + pred_count * 4 + 3] = 1;
}
++pred_count;
}
}
}
}

INSTANTIATE_CLASS(EvalDetectionLayer);
REGISTER_LAYER_CLASS(EvalDetection);

} // namespace caffe
@@ -180,6 +180,9 @@ message SolverParameter {
optional int32 stepsize = 13;
// the stepsize for learning rate policy "multistep"
repeated int32 stepvalue = 34;
// for rate policy "multifixed"
repeated float stagelr = 50;
repeated int32 stageiter = 51;

// Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
// whenever their actual L2 norm is larger.
@@ -409,6 +412,10 @@ message LayerParameter {
optional DenseImageDataParameter dense_image_data_param = 208;
optional UpsampleParameter upsample_param = 209;
optional InterpParameter interp_param = 148;

// Yolo detection loss layer
optional DetectionLossParameter detection_loss_param = 200;
optional EvalDetectionParameter eval_detection_param = 201;
}

// Message that stores parameters used to apply transformation
@@ -583,6 +590,23 @@ message ContrastiveLossParameter {
optional bool legacy_version = 2 [default = false];
}

message EvalDetectionParameter {
enum ScoreType {
OBJ = 0;
PROB = 1;
MULTIPLY = 2;
}
// Yolo detection evaluation layer
optional uint32 side = 1 [default = 7];
optional uint32 num_class = 2 [default = 20];
optional uint32 num_object = 3 [default = 2];
optional float threshold = 4 [default = 0.5];
optional bool sqrt = 5 [default = true];
optional bool constriant = 6 [default = true];
optional ScoreType score_type = 7 [default = MULTIPLY];
optional float nms = 8 [default = -1];
}

message ConvolutionParameter {
optional uint32 num_output = 1; // The number of outputs for the layer
optional bool bias_term = 2 [default = true]; // whether to have bias terms
@@ -686,6 +710,7 @@ message DataParameter {
// Prefetch queue (Increase if data feeding bandwidth varies, within the
// limit of device memory for GPU training)
optional uint32 prefetch = 10 [default = 4];
optional uint32 side = 11;
}

message DenseImageDataParameter {
@@ -733,6 +758,19 @@ message UpsampleParameter {
optional uint32 upsample_w = 7;
}

message DetectionLossParameter {
// Yolo detection loss layer
optional uint32 side = 1 [default = 7];
optional uint32 num_class = 2 [default = 20];
optional uint32 num_object = 3 [default = 2];
optional float object_scale = 4 [default = 1.0];
optional float noobject_scale = 5 [default = 0.5];
optional float class_scale = 6 [default = 1.0];
optional float coord_scale = 7 [default = 5.0];
optional bool sqrt = 8 [default = true];
optional bool constriant = 9 [default = false];
}

message DropoutParameter {
optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
}
@@ -29,6 +29,15 @@ Dtype SGDSolver<Dtype>::GetLearningRate() {
const string& lr_policy = this->param_.lr_policy();
if (lr_policy == "fixed") {
rate = this->param_.base_lr();
} else if (lr_policy == "multifixed") {
CHECK_EQ(this->param_.stageiter_size(), this->param_.stagelr_size());
int num_stages = this->param_.stagelr_size();
int stage = 0;
for (; stage < num_stages; ++stage) {
if (this->iter_ <= this->param_.stageiter(stage)) break;
}
stage = (stage == num_stages) ? stage - 1 : stage;
rate = this->param_.stagelr(stage);
} else if (lr_policy == "step") {
this->current_step_ = this->iter_ / this->param_.stepsize();
rate = this->param_.base_lr() *
@@ -1,6 +1,8 @@
#include <boost/thread.hpp>
#include <string>


#include "caffe/data_reader.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/parallel.hpp"
#include "caffe/util/blocking_queue.hpp"
@@ -87,5 +89,6 @@ size_t BlockingQueue<T>::size() const {

template class BlockingQueue<Batch<float>*>;
template class BlockingQueue<Batch<double>*>;

template class BlockingQueue<Datum*>;
template class BlockingQueue<shared_ptr<DataReader::QueuePair> >;
} // namespace caffe
@@ -1,4 +1,9 @@
#include <fcntl.h>
#include <boost/filesystem.hpp>
#include <boost/foreach.hpp>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/xml_parser.hpp>

#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/text_format.h>
@@ -69,7 +74,51 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
CHECK(proto.SerializeToOstream(&output));
}

// Do the file extension and encoding match?
static bool matchExt(const std::string & fn,
std::string en) {
size_t p = fn.rfind('.');
std::string ext = p != fn.npos ? fn.substr(p) : fn;
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
std::transform(en.begin(), en.end(), en.begin(), ::tolower);
if ( ext == en )
return true;
if ( en == "jpg" && ext == "jpeg" )
return true;
return false;
}

#ifdef USE_OPENCV
bool ReadBoxDataToDatum(const string& filename, const string& annoname,
const map<string, int>& label_map, const int height, const int width,
const bool is_color, const std::string & encoding, Datum* datum) {
int ori_w, ori_h;
cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color);
ori_w = cv_img.cols;
ori_h = cv_img.rows;
if (cv_img.data) {
if (encoding.size()) {
if ( (cv_img.channels() == 3) == is_color && !height && !width &&
matchExt(filename, encoding) )
return ReadFileToDatum(filename, annoname, label_map, ori_w, ori_h, datum);
std::vector<uchar> buf;
cv::imencode("."+encoding, cv_img, buf);
datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]),
buf.size()));
datum->set_encoded(true);
// read xml anno data
ParseXmlToDatum(annoname, label_map, ori_w, ori_h, datum);
return true;
}
CVMatToDatum(cv_img, datum);
// read xml anno data
ParseXmlToDatum(annoname, label_map, ori_w, ori_h, datum);
return true;
} else {
return false;
}
}

cv::Mat ReadImageToCVMat(const string& filename, const int height,
const int width, const int min_dim, const int max_dim,
const bool is_color) {
@@ -166,19 +215,7 @@ cv::Mat ReadImageToCVMat(const string& filename) {
return ReadImageToCVMat(filename, 0, 0, true);
}

// Do the file extension and encoding match?
static bool matchExt(const std::string & fn,
std::string en) {
size_t p = fn.rfind('.');
std::string ext = p != fn.npos ? fn.substr(p) : fn;
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
std::transform(en.begin(), en.end(), en.begin(), ::tolower);
if ( ext == en )
return true;
if ( en == "jpg" && ext == "jpeg" )
return true;
return false;
}


bool ReadImageToDatum(const string& filename, const int label,
const int height, const int width, const bool is_color,
@@ -299,4 +336,97 @@ void CVMatToDatum(const cv::Mat& cv_img, Datum* datum) {
datum->set_data(buffer);
}
#endif // USE_OPENCV

int name_to_label(const string& name, const map<string, int>& label_map) {
map<string, int>::const_iterator it = label_map.find(name);
if (it == label_map.end())
return -1;
else
return it->second;
}

void ParseXmlToDatum(const string& annoname, const map<string, int>& label_map,
int ori_w, int ori_h, Datum* datum) {
boost::property_tree::ptree pt;
read_xml(annoname, pt);
int width(0), height(0);
try {
height = pt.get<int>("annotation.size.height");
width = pt.get<int>("annotation.size.width");
CHECK_EQ(ori_w, width);
CHECK_EQ(ori_h, height);
} catch (const boost::property_tree::ptree_error &e) {
LOG(WARNING) << "When paring " << annoname << ": " << e.what();
}
datum->clear_float_data();
BOOST_FOREACH(boost::property_tree::ptree::value_type &v1, pt.get_child("annotation")) {
if (v1.first == "object") {
boost::property_tree::ptree object = v1.second;
int label(-1);
vector<float> box(4, 0);
int difficult(0);
BOOST_FOREACH(boost::property_tree::ptree::value_type &v2, object.get_child("")) {
boost::property_tree::ptree pt2 = v2.second;
if (v2.first == "name") {
string name = pt2.data();
// map name to label
label = name_to_label(name, label_map);
if (label < 0) {
LOG(FATAL) << "Anno file " << annoname << " -> unknown name: " << name;
}
} else if (v2.first == "bndbox") {
int xmin = pt2.get("xmin", 0);
int ymin = pt2.get("ymin", 0);
int xmax = pt2.get("xmax", 0);
int ymax = pt2.get("ymax", 0);
LOG_IF(WARNING, xmin < 0 || xmin > ori_w) << annoname <<
" bounding box exceeds image boundary";
LOG_IF(WARNING, xmax < 0 || xmax > ori_w) << annoname <<
" bounding box exceeds image boundary";
LOG_IF(WARNING, ymin < 0 || ymin > ori_h) << annoname <<
" bounding box exceeds image boundary";
LOG_IF(WARNING, ymax < 0 || ymax > ori_h) << annoname <<
" bounding box exceeds image boundary";
LOG_IF(WARNING, xmin > xmax) << annoname <<
" bounding box exceeds image boundary";
LOG_IF(WARNING, ymin > ymax) << annoname <<
" bounding box exceeds image boundary";
box[0] = float(xmin + (xmax - xmin) / 2.) / ori_w;
box[1] = float(ymin + (ymax - ymin) / 2.) / ori_h;
box[2] = float(xmax - xmin) / ori_w;
box[3] = float(ymax - ymin) / ori_h;
} else if (v2.first == "difficult") {
difficult = atoi(pt2.data().c_str());
}
}
CHECK_GE(label, 0) << "label must start at 0";
datum->add_float_data(float(label));
datum->add_float_data(float(difficult));
for (int i = 0; i < 4; ++i) {
datum->add_float_data(box[i]);
}
}
}
}

bool ReadFileToDatum(const string& filename, const string& annoname,
const map<string, int>& label_map, int ori_w, int ori_h, Datum* datum) {
std::streampos size;

fstream file(filename.c_str(), ios::in|ios::binary|ios::ate);
if (file.is_open()) {
size = file.tellg();
std::string buffer(size, ' ');
file.seekg(0, ios::beg);
file.read(&buffer[0], size);
file.close();
datum->set_data(buffer);
datum->set_encoded(true);
ParseXmlToDatum(annoname, label_map, ori_w, ori_h, datum);
return true;
} else {
return false;
}
}

} // namespace caffe
@@ -0,0 +1,172 @@
// This program converts a set of images to a lmdb/leveldb by storing them
// as Datum proto buffers.
// Usage:
// convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME
//
// where ROOTFOLDER is the root folder that holds all the images, and LISTFILE
// should be a list of files as well as their labels, in the format as
// subfolder1/file1.JPEG 7
// ....

#include <algorithm>
#include <fstream> // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>

#include "boost/scoped_ptr.hpp"
#include "gflags/gflags.h"
#include "glog/logging.h"

#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/format.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/rng.hpp"

using namespace caffe; // NOLINT(build/namespaces)
using std::pair;
using boost::scoped_ptr;

DEFINE_bool(gray, false,
"When this option is on, treat images as grayscale ones");
DEFINE_bool(shuffle, false,
"Randomly shuffle the order of images and their labels");
DEFINE_string(backend, "lmdb",
"The backend {lmdb, leveldb} for storing the result");
DEFINE_int32(resize_width, 0, "Width images are resized to");
DEFINE_int32(resize_height, 0, "Height images are resized to");
DEFINE_bool(check_size, false,
"When this option is on, check that all the datum have the same size");
DEFINE_bool(encoded, true,
"When this option is on, the encoded image will be save in datum");
DEFINE_string(encode_type, "jpg",
"Optional: What type should we encode the image as ('png','jpg',...).");
DEFINE_string(label_file, "",
"a map from name to label");

int main(int argc, char** argv) {
#ifdef USE_OPENCV
::google::InitGoogleLogging(argv[0]);
// Print output to stderr (while still logging)
FLAGS_alsologtostderr = 1;

#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
#endif

gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n"
"format used as input for Caffe.\n"
"Usage:\n"
" convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n"
"The ImageNet dataset for the training demo is at\n"
" http://www.image-net.org/download-images\n");
gflags::ParseCommandLineFlags(&argc, &argv, true);

if (argc < 4) {
gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset");
return 1;
}

const bool is_color = !FLAGS_gray;
const bool check_size = FLAGS_check_size;
const bool encoded = FLAGS_encoded;
const string encode_type = FLAGS_encode_type;
const std::string label_file = FLAGS_label_file;
if (label_file == "") {
LOG(ERROR) << "empty label file";
return 1;
}

std::ifstream labelfile(label_file.c_str());
std::map<std::string, int> label_map;
std::string tmp_line;
while (std::getline(labelfile, tmp_line)) {
size_t pos = tmp_line.find_last_of(' ');
label_map[tmp_line.substr(0, pos)] = atoi(tmp_line.substr(pos+1).c_str());
}

std::ifstream infile(argv[2]);
std::vector<std::pair<std::string, std::string> > lines;
std::string line;
size_t pos;
while (std::getline(infile, line)) {
pos = line.find_last_of(' ');
lines.push_back(std::make_pair(line.substr(0, pos), line.substr(pos+1)));
}
if (FLAGS_shuffle) {
// randomly shuffle data
LOG(INFO) << "Shuffling data";
shuffle(lines.begin(), lines.end());
}
LOG(INFO) << "A total of " << lines.size() << " images.";

if (encode_type.size() && !encoded)
LOG(INFO) << "encode_type specified, assuming encoded=true.";

int resize_height = std::max<int>(0, FLAGS_resize_height);
int resize_width = std::max<int>(0, FLAGS_resize_width);

// Create new DB
scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));
db->Open(argv[3], db::NEW);
scoped_ptr<db::Transaction> txn(db->NewTransaction());

// Storing to db
std::string root_folder(argv[1]);
Datum datum;
int count = 0;
int data_size = 0;
bool data_size_initialized = false;

for (int line_id = 0; line_id < lines.size(); ++line_id) {
bool status;
std::string enc = encode_type;
if (encoded && !enc.size()) {
// Guess the encoding type from the file name
string fn = lines[line_id].first;
size_t p = fn.rfind('.');
if ( p == fn.npos )
LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'";
enc = fn.substr(p);
std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower);
}
status = ReadBoxDataToDatum(root_folder + lines[line_id].first,
root_folder + lines[line_id].second, label_map,
resize_height, resize_width, is_color, enc, &datum);
if (status == false) continue;
if (check_size) {
if (!data_size_initialized) {
data_size = datum.channels() * datum.height() * datum.width();
data_size_initialized = true;
} else {
const std::string& data = datum.data();
CHECK_EQ(data.size(), data_size) << "Incorrect data field size "
<< data.size();
}
}
// sequential
string key_str = caffe::format_int(line_id, 8) + "_" + lines[line_id].first;

// Put in db
string out;
CHECK(datum.SerializeToString(&out));
txn->Put(key_str, out);

if (++count % 1000 == 0) {
// Commit db
txn->Commit();
txn.reset(db->NewTransaction());
LOG(INFO) << "Processed " << count << " files.";
}
}
// write the last batch
if (count % 1000 != 0) {
txn->Commit();
LOG(INFO) << "Processed " << count << " files.";
}
#else
LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV.";
#endif // USE_OPENCV
return 0;
}