Skip to content

Commit

Permalink
I forget to implement relu in full connect layer
Browse files Browse the repository at this point in the history
Another 10 days delay for the 0.6 release :(
  • Loading branch information
liuliu committed Mar 17, 2014
1 parent cb10a22 commit c985ff1
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 48 deletions.
59 changes: 31 additions & 28 deletions bin/image-net.c
Expand Up @@ -144,7 +144,7 @@ int main(int argc, char** argv)
},
},
{
.type = CCV_CONVNET_MAX_POOL,
.type = CCV_CONVNET_LOCAL_RESPONSE_NORM,
.input = {
.matrix = {
.rows = 55,
Expand All @@ -154,29 +154,29 @@ int main(int argc, char** argv)
},
},
.output = {
.pool = {
.strides = 2,
.size = 3,
.border = 0,
.rnorm = {
.size = 5,
.kappa = 2,
.alpha = 1e-4,
.beta = 0.75,
},
},
},
{
.type = CCV_CONVNET_LOCAL_RESPONSE_NORM,
.type = CCV_CONVNET_MAX_POOL,
.input = {
.matrix = {
.rows = 27,
.cols = 27,
.rows = 55,
.cols = 55,
.channels = 96,
.partition = 2,
},
},
.output = {
.rnorm = {
.size = 5,
.kappa = 2,
.alpha = 1e-4,
.beta = 0.75,
.pool = {
.strides = 2,
.size = 3,
.border = 0,
},
},
},
Expand Down Expand Up @@ -206,7 +206,7 @@ int main(int argc, char** argv)
},
},
{
.type = CCV_CONVNET_MAX_POOL,
.type = CCV_CONVNET_LOCAL_RESPONSE_NORM,
.input = {
.matrix = {
.rows = 27,
Expand All @@ -216,29 +216,29 @@ int main(int argc, char** argv)
},
},
.output = {
.pool = {
.strides = 2,
.size = 3,
.border = 0,
.rnorm = {
.size = 5,
.kappa = 2,
.alpha = 1e-4,
.beta = 0.75,
},
},
},
{
.type = CCV_CONVNET_LOCAL_RESPONSE_NORM,
.type = CCV_CONVNET_MAX_POOL,
.input = {
.matrix = {
.rows = 13,
.cols = 13,
.rows = 27,
.cols = 27,
.channels = 256,
.partition = 2,
},
},
.output = {
.rnorm = {
.size = 5,
.kappa = 2,
.alpha = 1e-4,
.beta = 0.75,
.pool = {
.strides = 2,
.size = 3,
.border = 0,
},
},
},
Expand Down Expand Up @@ -353,6 +353,7 @@ int main(int argc, char** argv)
},
.output = {
.full_connect = {
.relu = 1,
.count = 4096,
},
},
Expand All @@ -375,6 +376,7 @@ int main(int argc, char** argv)
},
.output = {
.full_connect = {
.relu = 1,
.count = 4096,
},
},
Expand All @@ -397,6 +399,7 @@ int main(int argc, char** argv)
},
.output = {
.full_connect = {
.relu = 0,
.count = 1000,
},
},
Expand All @@ -409,10 +412,10 @@ int main(int argc, char** argv)
for (i = 0; i < 13; i++)
{
layer_params[i].w.decay = 0.0005;
layer_params[i].w.learn_rate = 0.0001;
layer_params[i].w.learn_rate = 0.01;
layer_params[i].w.momentum = 0.9;
layer_params[i].bias.decay = 0;
layer_params[i].bias.learn_rate = 0.0001;
layer_params[i].bias.learn_rate = 0.01;
layer_params[i].bias.momentum = 0.9;
}
layer_params[10].dor = 0.5;
Expand Down
1 change: 1 addition & 0 deletions lib/ccv.h
Expand Up @@ -1119,6 +1119,7 @@ typedef union {
float beta;
} rnorm;
struct {
int relu; // apply relu or not
int count;
} full_connect;
} ccv_convnet_type_t;
Expand Down
35 changes: 26 additions & 9 deletions lib/ccv_convnet.c
Expand Up @@ -226,6 +226,9 @@ static void _ccv_convnet_full_connect_forward_propagate(ccv_convnet_layer_t* lay
bptr[i] = layer->bias[i];
ccv_dense_matrix_t dw = ccv_dense_matrix(db->rows, a->rows, CCV_32F | CCV_C1, layer->w, 0);
ccv_gemm(&dw, a, 1, db, 1, 0, (ccv_matrix_t**)&db, 0); // supply db as matrix C is allowed
if (layer->net.full_connect.relu)
for (i = 0; i < db->rows; i++)
bptr[i] = ccv_max(0, bptr[i]); // relu
a->rows = rows, a->cols = cols, a->type = (a->type - CCV_GET_CHANNEL(a->type)) | ch;
a->step = a->cols * CCV_GET_DATA_TYPE_SIZE(a->type) * CCV_GET_CHANNEL(a->type);
}
Expand Down Expand Up @@ -421,6 +424,14 @@ static void _ccv_convnet_full_connect_forward_propagate_parallel(ccv_convnet_lay
}
ccv_dense_matrix_t dw = ccv_dense_matrix(db->cols, a->cols, CCV_32F | CCV_C1, layer->w, 0);
ccv_gemm(a, &dw, 1, db, 1, CCV_B_TRANSPOSE, (ccv_matrix_t**)&db, 0); // supply db as matrix C is allowed
bptr = db->data.f32;
if (layer->net.full_connect.relu)
for (i = 0; i < db->rows; i++)
{
for (j = 0; j < db->cols; j++)
bptr[j] = ccv_max(0, bptr[j]); // relu
bptr += db->cols;
}
}

static void _ccv_convnet_compute_softmax_parallel(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int type)
Expand Down Expand Up @@ -752,17 +763,21 @@ static void _ccv_convnet_convolutional_backward_propagate(ccv_convnet_layer_t* l
a->rows = a_rows, a->cols = a_cols, a->type = (a->type - CCV_GET_CHANNEL(a->type)) | a_ch;
}

static void _ccv_convnet_full_connect_backward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t* x, ccv_dense_matrix_t** b, ccv_convnet_layer_t* update_params)
static void _ccv_convnet_full_connect_backward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t* y, ccv_dense_matrix_t* x, ccv_dense_matrix_t** b, ccv_convnet_layer_t* update_params)
{
// a is the input gradient (for back prop), d is the dropout,
// a is the input gradient (for back prop), y is the output (for forward prop)
// x is the input (for forward prop), b is the output gradient (gradient, or known as propagated error)
// note that y (the output from forward prop) is not included because the full connect net is simple enough that we don't need it
ccv_dense_matrix_t* db = 0;
if (b)
db = *b = ccv_dense_matrix_renew(*b, x->rows, x->cols, CCV_32F | CCV_GET_CHANNEL(x->type), CCV_32F | CCV_GET_CHANNEL(x->type), 0);
int x_rows = x->rows, x_cols = x->cols, x_ch = CCV_GET_CHANNEL(x->type);
x->rows = x_rows * x_cols * x_ch, x->cols = 1, x->type = (x->type - x_ch) | CCV_C1;
x->step = x->cols * CCV_GET_DATA_TYPE_SIZE(x->type);
int i;
if (layer->net.full_connect.relu)
for (i = 0; i < y->rows; i++)
if (y->data.f32[i] <= 0)
a->data.f32[i] = 0;
ccv_dense_matrix_t w = ccv_dense_matrix(a->rows, x->rows, CCV_32F | CCV_C1, update_params->w, 0);
ccv_dense_matrix_t* dw = &w;
// compute bias gradient
Expand Down Expand Up @@ -915,7 +930,7 @@ static void _ccv_convnet_propagate_loss(ccv_convnet_t* convnet, ccv_dense_matrix
int i;
ccv_convnet_layer_t* layer = convnet->layers + convnet->count - 1;
assert(layer->type == CCV_CONVNET_FULL_CONNECT); // the last layer has too be a full connect one to generate softmax result
_ccv_convnet_full_connect_backward_propagate(layer, dloss, convnet->acts[convnet->count - 2], convnet->count - 1 > 0 ? update_params->acts + convnet->count - 2 : 0, update_params->layers + convnet->count - 1);
_ccv_convnet_full_connect_backward_propagate(layer, dloss, convnet->acts[convnet->count - 1], convnet->acts[convnet->count - 2], convnet->count - 1 > 0 ? update_params->acts + convnet->count - 2 : 0, update_params->layers + convnet->count - 1);
for (i = convnet->count - 2; i >= 0; i--)
{
layer = convnet->layers + i;
Expand All @@ -925,7 +940,7 @@ static void _ccv_convnet_propagate_loss(ccv_convnet_t* convnet, ccv_dense_matrix
_ccv_convnet_convolutional_backward_propagate(layer, update_params->acts[i], convnet->acts[i], i > 0 ? convnet->acts[i - 1] : a, i > 0 ? update_params->acts + i - 1 : 0, update_params->layers + i);
break;
case CCV_CONVNET_FULL_CONNECT:
_ccv_convnet_full_connect_backward_propagate(layer, update_params->acts[i], i > 0 ? convnet->acts[i - 1] : a, i > 0 ? update_params->acts + i - 1 : 0, update_params->layers + i);
_ccv_convnet_full_connect_backward_propagate(layer, update_params->acts[i], convnet->acts[i], i > 0 ? convnet->acts[i - 1] : a, i > 0 ? update_params->acts + i - 1 : 0, update_params->layers + i);
break;
case CCV_CONVNET_LOCAL_RESPONSE_NORM:
_ccv_convnet_rnorm_backward_propagate(layer, update_params->acts[i], convnet->acts[i], i > 0 ? convnet->acts[i - 1] : a, convnet->denoms[i], i > 0 ? update_params->acts + i - 1 : 0);
Expand Down Expand Up @@ -1174,7 +1189,7 @@ void ccv_convnet_write(ccv_convnet_t* convnet, const char* filename, ccv_convnet
"(layer INTEGER PRIMARY KEY ASC, type INTEGER, "
"input_matrix_rows INTEGER, input_matrix_cols INTEGER, input_matrix_channels INTEGER, input_matrix_partition INTEGER, input_node_count INTEGER, "
"output_rows INTEGER, output_cols INTEGER, output_channels INTEGER, output_partition INTEGER, output_count INTEGER, output_strides INTEGER, output_border INTEGER, "
"output_size INTEGER, output_kappa REAL, output_alpha REAL, output_beta REAL);"
"output_size INTEGER, output_kappa REAL, output_alpha REAL, output_beta REAL, output_relu INTEGER);"
"CREATE TABLE IF NOT EXISTS convnet_params "
"(convnet INTEGER PRIMARY KEY ASC, input_height INTEGER, input_width INTEGER, mean_activity BLOB);"
"CREATE TABLE IF NOT EXISTS layer_data "
Expand All @@ -1185,11 +1200,11 @@ void ccv_convnet_write(ccv_convnet_t* convnet, const char* filename, ccv_convnet
"(layer, type, "
"input_matrix_rows, input_matrix_cols, input_matrix_channels, input_matrix_partition, input_node_count, "
"output_rows, output_cols, output_channels, output_partition, output_count, output_strides, output_border, "
"output_size, output_kappa, output_alpha, output_beta) VALUES "
"output_size, output_kappa, output_alpha, output_beta, output_relu) VALUES "
"($layer, $type, " // 1
"$input_matrix_rows, $input_matrix_cols, $input_matrix_channels, $input_matrix_partition, $input_node_count, " // 6
"$output_rows, $output_cols, $output_channels, $output_partition, $output_count, $output_strides, $output_border, " // 13
"$output_size, $output_kappa, $output_alpha, $output_beta);"; // 17
"$output_size, $output_kappa, $output_alpha, $output_beta, $output_relu);"; // 18
sqlite3_stmt* layer_params_insert_stmt = 0;
assert(SQLITE_OK == sqlite3_prepare_v2(db, layer_params_insert_qs, sizeof(layer_params_insert_qs), &layer_params_insert_stmt, 0));
const char layer_data_insert_qs[] =
Expand Down Expand Up @@ -1222,6 +1237,7 @@ void ccv_convnet_write(ccv_convnet_t* convnet, const char* filename, ccv_convnet
break;
case CCV_CONVNET_FULL_CONNECT:
sqlite3_bind_int(layer_params_insert_stmt, 12, layer->net.full_connect.count);
sqlite3_bind_int(layer_params_insert_stmt, 19, layer->net.full_connect.relu);
break;
case CCV_CONVNET_MAX_POOL:
case CCV_CONVNET_AVERAGE_POOL:
Expand Down Expand Up @@ -1297,7 +1313,7 @@ ccv_convnet_t* ccv_convnet_read(int use_cwc_accel, const char* filename)
"SELECT type, " // 1
"input_matrix_rows, input_matrix_cols, input_matrix_channels, input_matrix_partition, input_node_count, " // 6
"output_rows, output_cols, output_channels, output_partition, output_count, output_strides, output_border, " // 13
"output_size, output_kappa, output_alpha, output_beta FROM layer_params ORDER BY layer ASC;"; // 17
"output_size, output_kappa, output_alpha, output_beta, output_relu FROM layer_params ORDER BY layer ASC;"; // 18
if (SQLITE_OK == sqlite3_prepare_v2(db, layer_params_qs, sizeof(layer_params_qs), &layer_params_stmt, 0))
{
ccv_array_t* layer_params = ccv_array_new(sizeof(ccv_convnet_layer_param_t), 3, 0);
Expand All @@ -1324,6 +1340,7 @@ ccv_convnet_t* ccv_convnet_read(int use_cwc_accel, const char* filename)
break;
case CCV_CONVNET_FULL_CONNECT:
layer_param.output.full_connect.count = sqlite3_column_int(layer_params_stmt, 10);
layer_param.output.full_connect.relu = sqlite3_column_int(layer_params_stmt, 17);
break;
case CCV_CONVNET_MAX_POOL:
case CCV_CONVNET_AVERAGE_POOL:
Expand Down
33 changes: 25 additions & 8 deletions lib/cuda/cwc_convnet.cu
Expand Up @@ -705,7 +705,14 @@ static void _cwc_convnet_average_pool_forward_propagate(ccv_convnet_layer_t* lay
b, out_rows, out_cols);
}

static void _cwc_convnet_full_connect_forward_propagate(ccv_convnet_layer_t* layer, int batch, float* a, float* b, float* batch_unit /* this is just 1's in device */, const cublasHandle_t& handle)
__global__ static void _cwc_kern_relu_forward_propagate(float* a)
{
a += blockIdx.x * blockDim.x;
const int thidx = threadIdx.x;
a[thidx] = max(0.0, a[thidx]);
}

static void _cwc_convnet_full_connect_forward_propagate(ccv_convnet_layer_t* layer, int batch, float* a, float* b, float* batch_unit /* this is just 1's in device */, const cudaStream_t& stream, const cublasHandle_t& handle)
{
int rows, out_rows, out_cols, out_partition;
_cwc_convnet_layer_deduce_output_format(layer, &out_rows, &out_cols, &out_partition);
Expand All @@ -718,6 +725,11 @@ static void _cwc_convnet_full_connect_forward_propagate(ccv_convnet_layer_t* lay
beta = 1;
// and then do the GEMM by adding bias
cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, batch, out_rows, rows, &alpha, a, batch, layer->w, rows, &beta, b, batch);
if (layer->net.full_connect.relu)
_cwc_kern_relu_forward_propagate
<<<layer->net.full_connect.count, batch, 0, stream>>>
(b);

}

__global__ static void _cwc_kern_mute_neuron(float* a, float* d)
Expand Down Expand Up @@ -751,7 +763,7 @@ static void _cwc_convnet_encode_impl(ccv_convnet_t* convnet, float* a, int batch
break;
case CCV_CONVNET_FULL_CONNECT:
assert(i > 0);
_cwc_convnet_full_connect_forward_propagate(layer, batch, GPU(convnet)->forwards[i - 1], GPU(convnet)->forwards[i], GPU(convnet)->unit, context->device.cublas);
_cwc_convnet_full_connect_forward_propagate(layer, batch, GPU(convnet)->forwards[i - 1], GPU(convnet)->forwards[i], GPU(convnet)->unit, context->device.stream, context->device.cublas);
if (dor && context->device.dor[i])
_cwc_kern_mute_neuron
<<<layer->net.full_connect.count, batch, 0, context->device.stream>>>
Expand All @@ -775,7 +787,7 @@ static void _cwc_convnet_encode_impl(ccv_convnet_t* convnet, float* a, int batch

#ifdef HAVE_GSL

__global__ static void _cwc_kern_convolutional_relu_backward_propagate(const int batch,
__global__ static void _cwc_kern_relu_backward_propagate(const int batch,
float* out, float* out_grad, const int out_rows, const int out_cols,
const int count)
{
Expand Down Expand Up @@ -1231,7 +1243,7 @@ static void _cwc_convnet_convolutional_backward_propagate(ccv_convnet_layer_t* l
int out_rows, out_cols, out_partition, shared_memory_size;
_cwc_convnet_layer_deduce_output_format(layer, &out_rows, &out_cols, &out_partition);
// it turns out that first apply relu would save us a lot of computation because no need to low both out and out_grad any more
_cwc_kern_convolutional_relu_backward_propagate
_cwc_kern_relu_backward_propagate
<<<dim3(out_cols, out_rows, layer->net.convolutional.count), batch, 0, stream>>>
(batch, n, a, out_rows, out_cols, layer->net.convolutional.count);
assert(cudaGetLastError() == cudaSuccess);
Expand Down Expand Up @@ -1492,12 +1504,17 @@ static void _cwc_convnet_average_pool_backward_propagate(ccv_convnet_layer_t* la
a, out_rows, out_cols);
}

static void _cwc_convnet_full_connect_backward_propagate(ccv_convnet_layer_t* layer, int batch, float* a, float* m, float* b, float* batch_unit, ccv_convnet_layer_t* configuration, const cublasHandle_t& handle)
static void _cwc_convnet_full_connect_backward_propagate(ccv_convnet_layer_t* layer, int batch, float* a, float* n, float* m, float* b, float* batch_unit, ccv_convnet_layer_t* configuration, const cudaStream_t& stream, const cublasHandle_t& handle)
{
int rows, out_rows, out_cols, out_partition;
_cwc_convnet_layer_deduce_output_format(layer, &out_rows, &out_cols, &out_partition);
out_cols = batch;
rows = layer->input.matrix.rows * layer->input.matrix.cols * layer->input.matrix.channels;
// apply relu for full connect layer
if (layer->net.full_connect.relu)
_cwc_kern_relu_backward_propagate
<<<dim3(1, out_rows, 1), batch, 0, stream>>>
(batch, n, a, out_rows, 1, 1);
float alpha = 1;
float beta = 0;
// propagate bias
Expand Down Expand Up @@ -1701,8 +1718,8 @@ static void _cwc_convnet_batch_formation(gsl_rng* rng, ccv_array_t* categorizeds
ccv_dense_matrix_t* input = 0;
if (image->cols != dim.width || image->rows != dim.height)
{
int x = (image->cols - dim.width + 1) / 2;
int y = (image->rows - dim.height + 1) / 2;
int x = rng ? gsl_rng_uniform_int(rng, image->cols - dim.width + 1) : (image->cols - dim.width + 1) / 2;
int y = rng ? gsl_rng_uniform_int(rng, image->rows - dim.height + 1) : (image->rows - dim.height + 1) / 2;
assert(x == 0 || y == 0);
ccv_slice(image, (ccv_matrix_t**)&input, CCV_32F, y, x, dim.height, dim.width);
} else
Expand Down Expand Up @@ -1964,7 +1981,7 @@ static void _cwc_convnet_backwards_propagate_error(ccv_convnet_t* convnet, float
_cwc_kern_mute_neuron
<<<layer->net.full_connect.count, batch, 0, context->device.stream>>>
(i == convnet->count - 1 ? a : GPU(convnet)->backwards[i + 1], context->device.dor[i]);
_cwc_convnet_full_connect_backward_propagate(layer, batch, i == convnet->count - 1 ? a : GPU(convnet)->backwards[i + 1], i > 0 ? GPU(convnet)->forwards[i - 1] : m, GPU(convnet)->backwards[i], GPU(convnet)->unit, configuration, context->device.cublas);
_cwc_convnet_full_connect_backward_propagate(layer, batch, i == convnet->count - 1 ? a : GPU(convnet)->backwards[i + 1], GPU(convnet)->forwards[i], i > 0 ? GPU(convnet)->forwards[i - 1] : m, GPU(convnet)->backwards[i], GPU(convnet)->unit, configuration, context->device.stream, context->device.cublas);
assert(cudaGetLastError() == cudaSuccess);
break;
case CCV_CONVNET_LOCAL_RESPONSE_NORM:
Expand Down
Binary file modified samples/image-net.sqlite3
Binary file not shown.

0 comments on commit c985ff1

Please sign in to comment.