Skip to content

Commit

Permalink
added a section about training
Browse files Browse the repository at this point in the history
  • Loading branch information
liuliu committed Dec 14, 2014
1 parent 77f72f5 commit 8f4daad
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 80 deletions.
81 changes: 3 additions & 78 deletions bin/aflw.c
Expand Up @@ -7,44 +7,6 @@
#endif

#ifdef HAVE_GSL
/*
static ccv_dense_matrix_t* _ccv_aflw_slice_with_pose(gsl_rng* rng, ccv_dense_matrix_t* image, ccv_decimal_pose_t pose, ccv_size_t size, ccv_margin_t margin, float deform_angle, float deform_scale, float deform_shift)
{
float rotate_x = 0; // (deform_angle * 2 * gsl_rng_uniform(rng) - deform_angle) * CCV_PI / 180 + pose.pitch;
float rotate_y = 0; // (deform_angle * 2 * gsl_rng_uniform(rng) - deform_angle) * CCV_PI / 180 + pose.yaw;
float rotate_z = (deform_angle * 2 * gsl_rng_uniform(rng) - deform_angle) * CCV_PI / 180 + pose.roll;
float scale = gsl_rng_uniform(rng);
// to make the scale evenly distributed, for example, when deforming of 1/2 ~ 2, we want it to distribute around 1, rather than any average of 1/2 ~ 2
scale = (1 + deform_scale * scale) / (1 + deform_scale * (1 - scale));
float scale_ratio = sqrtf((float)(size.width * size.height) / (pose.a * pose.b * 4));
float m00 = cosf(rotate_z) * scale;
float m01 = cosf(rotate_y) * sinf(rotate_z) * scale;
float m02 = (deform_shift * 2 * gsl_rng_uniform(rng) - deform_shift) / scale_ratio + pose.x + (margin.right - margin.left) / scale_ratio - image->cols * 0.5;
float m10 = (sinf(rotate_y) * cosf(rotate_z) - cosf(rotate_x) * sinf(rotate_z)) * scale;
float m11 = (sinf(rotate_y) * sinf(rotate_z) + cosf(rotate_x) * cosf(rotate_z)) * scale;
float m12 = (deform_shift * 2 * gsl_rng_uniform(rng) - deform_shift) / scale_ratio + pose.y + (margin.bottom - margin.top) / scale_ratio - image->rows * 0.5;
float m20 = (sinf(rotate_y) * cosf(rotate_z) + sinf(rotate_x) * sinf(rotate_z)) * scale;
float m21 = (sinf(rotate_y) * sinf(rotate_z) - sinf(rotate_x) * cosf(rotate_z)) * scale;
float m22 = cosf(rotate_x) * cosf(rotate_y);
ccv_dense_matrix_t* b = 0;
ccv_perspective_transform(image, &b, 0, m00, m01, m02, m10, m11, m12, m20, m21, m22);
ccv_dense_matrix_t* resize = 0;
ccv_size_t scale_size = {
.width = (int)((size.width + margin.left + margin.right) / scale_ratio + 0.5),
.height = (int)((size.height + margin.top + margin.bottom) / scale_ratio + 0.5),
};
assert(scale_size.width > 0 && scale_size.height > 0);
ccv_slice(b, (ccv_matrix_t**)&resize, 0, (int)(b->rows * 0.5 - (size.height + margin.top + margin.bottom - 16) / scale_ratio * 0.5 + 0.5), (int)(b->cols * 0.5 - (size.width + margin.left + margin.right) / scale_ratio * 0.5 + 0.5), scale_size.height, scale_size.width);
ccv_matrix_free(b);
b = 0;
if (scale_ratio > 1)
ccv_resample(resize, &b, 0, size.height + margin.top + margin.bottom, size.width + margin.left + margin.right, CCV_INTER_CUBIC);
else
ccv_resample(resize, &b, 0, size.height + margin.top + margin.bottom, size.width + margin.left + margin.right, CCV_INTER_AREA);
ccv_matrix_free(resize);
return b;
}
*/
static ccv_dense_matrix_t* _ccv_aflw_slice_with_rect(gsl_rng* rng, ccv_dense_matrix_t* image, ccv_rect_t rect, ccv_size_t size, ccv_margin_t margin, float deform_angle, float deform_scale, float deform_shift)
{
ccv_dense_matrix_t* resize = 0;
Expand All @@ -71,6 +33,7 @@ static ccv_dense_matrix_t* _ccv_aflw_slice_with_rect(gsl_rng* rng, ccv_dense_mat
int main(int argc, char** argv)
{
#ifdef HAVE_GSL
assert(argc == 4);
gsl_rng* rng = gsl_rng_alloc(gsl_rng_default);
FILE* r = fopen(argv[1], "r");
char* base_dir = argv[2];
Expand All @@ -96,50 +59,14 @@ int main(int argc, char** argv)
ccv_read(filename, &image, CCV_IO_ANY_FILE | CCV_IO_GRAY);
char* savefile = (char*)malloc(1024);
ccv_dense_matrix_t* b = _ccv_aflw_slice_with_rect(rng, image, rect, ccv_size(48, 48), ccv_margin(0, 0, 0, 0), 10, 0.1, 0.05);
snprintf(savefile, 1024, "/home/liu/Data/facepos/aflw-%07d-bw.png", i);
snprintf(savefile, 1024, "%s/aflw-%07d-bw.png", argv[3], i);
ccv_write(b, savefile, 0, CCV_IO_PNG_FILE, 0);
ccv_matrix_free(b);
ccv_matrix_free(image);
image = 0;
ccv_read(filename, &image, CCV_IO_ANY_FILE | CCV_IO_RGB_COLOR);
b = _ccv_aflw_slice_with_rect(rng, image, rect, ccv_size(48, 48), ccv_margin(0, 0, 0, 0), 10, 0.1, 0.05);
snprintf(savefile, 1024, "/home/liu/Data/facepos/aflw-%07d-rgb.png", i);
ccv_write(b, savefile, 0, CCV_IO_PNG_FILE, 0);
ccv_matrix_free(b);
ccv_matrix_free(image);
i++;
free(savefile);
free(filename);
}
}
/*
char* file_id = (char*)malloc(1024);
int face_id;
// roll pitch yaw
while (fscanf(r, "%s %s %d %f %f %f %f %f %f %f", file_id, file, &face_id, &pose.x, &pose.y, &pose.a, &pose.b, &pose.roll, &pose.pitch, &pose.yaw) != EOF)
{
if (pose.pitch < CCV_PI * 22.5 / 180 && pose.pitch > -CCV_PI * 22.5 / 180 &&
pose.roll < CCV_PI * 22.5 / 180 && pose.roll > -CCV_PI * 22.5 / 180 &&
pose.yaw < CCV_PI * 20 / 180 && pose.yaw > -CCV_PI * 20 / 180 &&
pose.a >= 10 && pose.b >= 10)
{
// resize to a more proper sizes
char* filename = (char*)malloc(1024);
strncpy(filename, base_dir, 1024);
filename[dirlen - 1] = '/';
strncpy(filename + dirlen, file, 1024 - dirlen);
ccv_dense_matrix_t* image = 0;
ccv_read(filename, &image, CCV_IO_ANY_FILE | CCV_IO_GRAY);
char* savefile = (char*)malloc(1024);
ccv_dense_matrix_t* b = _ccv_aflw_slice_with_pose(rng, image, pose, ccv_size(48, 48), ccv_margin(0, 0, 0, 0), 10, 0.05, 0.05);
snprintf(savefile, 1024, "/home/liu/Data/facepos/aflw-%07d-bw.png", i);
ccv_write(b, savefile, 0, CCV_IO_PNG_FILE, 0);
ccv_matrix_free(b);
ccv_matrix_free(image);
image = 0;
ccv_read(filename, &image, CCV_IO_ANY_FILE | CCV_IO_RGB_COLOR);
b = _ccv_aflw_slice_with_pose(rng, image, pose, ccv_size(48, 48), ccv_margin(0, 0, 0, 0), 10, 0.05, 0.05);
snprintf(savefile, 1024, "/home/liu/Data/facepos/aflw-%07d-rgb.png", i);
snprintf(savefile, 1024, "%s/aflw-%07d-rgb.png", argv[3], i);
ccv_write(b, savefile, 0, CCV_IO_PNG_FILE, 0);
ccv_matrix_free(b);
ccv_matrix_free(image);
Expand All @@ -148,8 +75,6 @@ int main(int argc, char** argv)
free(filename);
}
}
free(file_id);
*/
fclose(r);
free(file);
gsl_rng_free(rng);
Expand Down
43 changes: 41 additions & 2 deletions doc/scd.md
Expand Up @@ -27,7 +27,7 @@ After the dataset is downloaded and unzipped (http://vis-www.cs.umass.edu/fddb/i
The evaluation tools are downloaded, unzipped, and compiled (http://vis-www.cs.umass.edu/fddb/results.html).

I mainly compared this implementation with the state of the art frontal face
detector implementations and the BBF implementation as is. scdfmt.rb script
detector implementations and the BBF implementation as is. `scdfmt.rb` script
is provided to convert rectangle format from BBF or SCD to ellipse format which
seems provide a better result on FDDB dataset.

Expand Down Expand Up @@ -69,7 +69,7 @@ the same false positives.
What about the speed?
---------------------

One reason why BBF implementation, despite its rather unimpressive performance,
One reason why BBF implementation, despite its rather unimpressive accuracy,
still provided in ccv is its speed. At the longest time, BBF implementation or
more accurately, its derivative is the only one that can run semi-real time on
modern JavaScript engines. Although there is no plan to port SCD implemenation
Expand All @@ -96,3 +96,42 @@ false positives.

How to train my own detector?
-----------------------------

The vision community in the past a few years has generated plenty of open
dataset, therefore, it is now possible to train a reasonable face detector
(as shown by [HeadHunter](https://bitbucket.org/rodrigob/doppia)) with open
dataset.

This face detector is trained on AFLW dataset: http://lrs.icg.tugraz.at/research/aflw/

A script `./aflw` is provided to generate positive images from AFLW annotated
original photos. But first, we need to generate a list of photos => faces rectangles.

AFLW dataset provided a SQLite file, therefore:

> sqlite3 aflw.sqlite
sqlite> .output outrect.txt
sqlite> .mode tabs
sqlite> SELECT filepath, x, y, w, h, roll, pitch, yaw FROM FaceImages, Faces, FacePose, FaceRect WHERE FaceImages.file_id = Faces.file_id AND Faces.face_id = FacePose.face_id AND Faces.face_id = FaceRect.face_id;
sqlite> .exit

Now, run `./aflw` to generate the positive examples:

> ./aflw outrect.txt <directory to AFLW dataset>/data/flickr/ <output directory>

It will take a while to complete, but once it is done, you will see 16444 images in the
output directory, both in grayscale and color.

Using `find` command to create both list of negative images for hard mining, and the list
of positives:

> find <output directory for positive images> -name '*.png' > facepos.txt
> find <negative images collected from web> -name '*.jpg' > faceneg.txt

Training with `scdcreate` is full automatic, run:

> ./scdcreate --positive-list facepos.txt --background-list faceneg.txt --working-dir face.sqlite3 --negative-count 16444

It takes me about half day to finish training to 6-th classifier, and this is the depth
used in ./samples/face.sqlite3.

0 comments on commit 8f4daad

Please sign in to comment.