added a section about training

liuliu · Dec 14, 2014 · 8f4daad · 8f4daad
1 parent 77f72f5
commit 8f4daad
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 80 deletions.
diff --git a/bin/aflw.c b/bin/aflw.c
@@ -7,44 +7,6 @@
 #endif
 
 #ifdef HAVE_GSL
-/*
-static ccv_dense_matrix_t* _ccv_aflw_slice_with_pose(gsl_rng* rng, ccv_dense_matrix_t* image, ccv_decimal_pose_t pose, ccv_size_t size, ccv_margin_t margin, float deform_angle, float deform_scale, float deform_shift)
-{
-	float rotate_x = 0; // (deform_angle * 2 * gsl_rng_uniform(rng) - deform_angle) * CCV_PI / 180 + pose.pitch;
-	float rotate_y = 0; // (deform_angle * 2 * gsl_rng_uniform(rng) - deform_angle) * CCV_PI / 180 + pose.yaw;
-	float rotate_z = (deform_angle * 2 * gsl_rng_uniform(rng) - deform_angle) * CCV_PI / 180 + pose.roll;
-	float scale = gsl_rng_uniform(rng);
-	// to make the scale evenly distributed, for example, when deforming of 1/2 ~ 2, we want it to distribute around 1, rather than any average of 1/2 ~ 2
-	scale = (1 + deform_scale * scale) / (1 + deform_scale * (1 - scale));
-	float scale_ratio = sqrtf((float)(size.width * size.height) / (pose.a * pose.b * 4));
-	float m00 = cosf(rotate_z) * scale;
-	float m01 = cosf(rotate_y) * sinf(rotate_z) * scale;
-	float m02 = (deform_shift * 2 * gsl_rng_uniform(rng) - deform_shift) / scale_ratio + pose.x + (margin.right - margin.left) / scale_ratio - image->cols * 0.5;
-	float m10 = (sinf(rotate_y) * cosf(rotate_z) - cosf(rotate_x) * sinf(rotate_z)) * scale;
-	float m11 = (sinf(rotate_y) * sinf(rotate_z) + cosf(rotate_x) * cosf(rotate_z)) * scale;
-	float m12 = (deform_shift * 2 * gsl_rng_uniform(rng) - deform_shift) / scale_ratio + pose.y + (margin.bottom - margin.top) / scale_ratio - image->rows * 0.5;
-	float m20 = (sinf(rotate_y) * cosf(rotate_z) + sinf(rotate_x) * sinf(rotate_z)) * scale;
-	float m21 = (sinf(rotate_y) * sinf(rotate_z) - sinf(rotate_x) * cosf(rotate_z)) * scale;
-	float m22 = cosf(rotate_x) * cosf(rotate_y);
-	ccv_dense_matrix_t* b = 0;
-	ccv_perspective_transform(image, &b, 0, m00, m01, m02, m10, m11, m12, m20, m21, m22);
-	ccv_dense_matrix_t* resize = 0;
-	ccv_size_t scale_size = {
-		.width = (int)((size.width + margin.left + margin.right) / scale_ratio + 0.5),
-		.height = (int)((size.height + margin.top + margin.bottom) / scale_ratio + 0.5),
-	};
-	assert(scale_size.width > 0 && scale_size.height > 0);
-	ccv_slice(b, (ccv_matrix_t**)&resize, 0, (int)(b->rows * 0.5 - (size.height + margin.top + margin.bottom - 16) / scale_ratio * 0.5 + 0.5), (int)(b->cols * 0.5 - (size.width + margin.left + margin.right) / scale_ratio * 0.5 + 0.5), scale_size.height, scale_size.width);
-	ccv_matrix_free(b);
-	b = 0;
-	if (scale_ratio > 1)
-		ccv_resample(resize, &b, 0, size.height + margin.top + margin.bottom, size.width + margin.left + margin.right, CCV_INTER_CUBIC);
-	else
-		ccv_resample(resize, &b, 0, size.height + margin.top + margin.bottom, size.width + margin.left + margin.right, CCV_INTER_AREA);
-	ccv_matrix_free(resize);
-	return b;
-}
-*/
 static ccv_dense_matrix_t* _ccv_aflw_slice_with_rect(gsl_rng* rng, ccv_dense_matrix_t* image, ccv_rect_t rect, ccv_size_t size, ccv_margin_t margin, float deform_angle, float deform_scale, float deform_shift)
 {
 	ccv_dense_matrix_t* resize = 0;
@@ -71,6 +33,7 @@ static ccv_dense_matrix_t* _ccv_aflw_slice_with_rect(gsl_rng* rng, ccv_dense_mat
 int main(int argc, char** argv)
 {
 #ifdef HAVE_GSL
+	assert(argc == 4);
 	gsl_rng* rng = gsl_rng_alloc(gsl_rng_default);
 	FILE* r = fopen(argv[1], "r");
 	char* base_dir = argv[2];
@@ -96,50 +59,14 @@ int main(int argc, char** argv)
 			ccv_read(filename, &image, CCV_IO_ANY_FILE | CCV_IO_GRAY);
 			char* savefile = (char*)malloc(1024);
 			ccv_dense_matrix_t* b = _ccv_aflw_slice_with_rect(rng, image, rect, ccv_size(48, 48), ccv_margin(0, 0, 0, 0), 10, 0.1, 0.05);
-			snprintf(savefile, 1024, "/home/liu/Data/facepos/aflw-%07d-bw.png", i);
+			snprintf(savefile, 1024, "%s/aflw-%07d-bw.png", argv[3], i);
 			ccv_write(b, savefile, 0, CCV_IO_PNG_FILE, 0);
 			ccv_matrix_free(b);
 			ccv_matrix_free(image);
 			image = 0;
 			ccv_read(filename, &image, CCV_IO_ANY_FILE | CCV_IO_RGB_COLOR);
 			b = _ccv_aflw_slice_with_rect(rng, image, rect, ccv_size(48, 48), ccv_margin(0, 0, 0, 0), 10, 0.1, 0.05);
-			snprintf(savefile, 1024, "/home/liu/Data/facepos/aflw-%07d-rgb.png", i);
-			ccv_write(b, savefile, 0, CCV_IO_PNG_FILE, 0);
-			ccv_matrix_free(b);
-			ccv_matrix_free(image);
-			i++;
-			free(savefile);
-			free(filename);
-		}
-	}
-	/*
-	char* file_id = (char*)malloc(1024);
-	int face_id;
-	// roll pitch yaw
-	while (fscanf(r, "%s %s %d %f %f %f %f %f %f %f", file_id, file, &face_id, &pose.x, &pose.y, &pose.a, &pose.b, &pose.roll, &pose.pitch, &pose.yaw) != EOF)
-	{
-		if (pose.pitch < CCV_PI * 22.5 / 180 && pose.pitch > -CCV_PI * 22.5 / 180 &&
-			pose.roll < CCV_PI * 22.5 / 180 && pose.roll > -CCV_PI * 22.5 / 180 &&
-			pose.yaw < CCV_PI * 20 / 180 && pose.yaw > -CCV_PI * 20 / 180 &&
-			pose.a >= 10 && pose.b >= 10)
-		{
-			// resize to a more proper sizes
-			char* filename = (char*)malloc(1024);
-			strncpy(filename, base_dir, 1024);
-			filename[dirlen - 1] = '/';
-			strncpy(filename + dirlen, file, 1024 - dirlen);
-			ccv_dense_matrix_t* image = 0;
-			ccv_read(filename, &image, CCV_IO_ANY_FILE | CCV_IO_GRAY);
-			char* savefile = (char*)malloc(1024);
-			ccv_dense_matrix_t* b = _ccv_aflw_slice_with_pose(rng, image, pose, ccv_size(48, 48), ccv_margin(0, 0, 0, 0), 10, 0.05, 0.05);
-			snprintf(savefile, 1024, "/home/liu/Data/facepos/aflw-%07d-bw.png", i);
-			ccv_write(b, savefile, 0, CCV_IO_PNG_FILE, 0);
-			ccv_matrix_free(b);
-			ccv_matrix_free(image);
-			image = 0;
-			ccv_read(filename, &image, CCV_IO_ANY_FILE | CCV_IO_RGB_COLOR);
-			b = _ccv_aflw_slice_with_pose(rng, image, pose, ccv_size(48, 48), ccv_margin(0, 0, 0, 0), 10, 0.05, 0.05);
-			snprintf(savefile, 1024, "/home/liu/Data/facepos/aflw-%07d-rgb.png", i);
+			snprintf(savefile, 1024, "%s/aflw-%07d-rgb.png", argv[3], i);
 			ccv_write(b, savefile, 0, CCV_IO_PNG_FILE, 0);
 			ccv_matrix_free(b);
 			ccv_matrix_free(image);
@@ -148,8 +75,6 @@ int main(int argc, char** argv)
 			free(filename);
 		}
 	}
-	free(file_id);
-	*/
 	fclose(r);
 	free(file);
 	gsl_rng_free(rng);

diff --git a/doc/scd.md b/doc/scd.md
@@ -27,7 +27,7 @@ After the dataset is downloaded and unzipped (http://vis-www.cs.umass.edu/fddb/i
 The evaluation tools are downloaded, unzipped, and compiled (http://vis-www.cs.umass.edu/fddb/results.html).
 
 I mainly compared this implementation with the state of the art frontal face
-detector implementations and the BBF implementation as is. scdfmt.rb script
+detector implementations and the BBF implementation as is. `scdfmt.rb` script
 is provided to convert rectangle format from BBF or SCD to ellipse format which
 seems provide a better result on FDDB dataset.
 
@@ -69,7 +69,7 @@ the same false positives.
 What about the speed?
 ---------------------
 
-One reason why BBF implementation, despite its rather unimpressive performance,
+One reason why BBF implementation, despite its rather unimpressive accuracy,
 still provided in ccv is its speed. At the longest time, BBF implementation or
 more accurately, its derivative is the only one that can run semi-real time on
 modern JavaScript engines. Although there is no plan to port SCD implemenation
@@ -96,3 +96,42 @@ false positives.
 
 How to train my own detector?
 -----------------------------
+
+The vision community in the past a few years has generated plenty of open
+dataset, therefore, it is now possible to train a reasonable face detector
+(as shown by [HeadHunter](https://bitbucket.org/rodrigob/doppia)) with open
+dataset.
+
+This face detector is trained on AFLW dataset: http://lrs.icg.tugraz.at/research/aflw/
+
+A script `./aflw` is provided to generate positive images from AFLW annotated
+original photos. But first, we need to generate a list of photos => faces rectangles.
+
+AFLW dataset provided a SQLite file, therefore:
+
+	> sqlite3 aflw.sqlite
+	sqlite> .output outrect.txt
+	sqlite> .mode tabs
+	sqlite> SELECT filepath, x, y, w, h, roll, pitch, yaw FROM FaceImages, Faces, FacePose, FaceRect WHERE FaceImages.file_id = Faces.file_id AND Faces.face_id = FacePose.face_id AND Faces.face_id = FaceRect.face_id;
+	sqlite> .exit
+
+Now, run `./aflw` to generate the positive examples:
+
+	> ./aflw outrect.txt <directory to AFLW dataset>/data/flickr/ <output directory>
+
+It will take a while to complete, but once it is done, you will see 16444 images in the
+output directory, both in grayscale and color.
+
+Using `find` command to create both list of negative images for hard mining, and the list
+of positives:
+
+	> find <output directory for positive images> -name '*.png' > facepos.txt
+	> find <negative images collected from web> -name '*.jpg' > faceneg.txt
+
+Training with `scdcreate` is full automatic, run:
+
+	> ./scdcreate --positive-list facepos.txt --background-list faceneg.txt --working-dir face.sqlite3 --negative-count 16444
+
+It takes me about half day to finish training to 6-th classifier, and this is the depth
+used in ./samples/face.sqlite3.
+