Skip to content

Commit

Permalink
BatchNormalization: add evaluation mode, add doc for nn.Jacobian
Browse files Browse the repository at this point in the history
  • Loading branch information
Kaiyu Yang authored and soumith committed Apr 16, 2016
1 parent e07d84d commit 26a5a7e
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 51 deletions.
7 changes: 5 additions & 2 deletions BatchNormalization.lua
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ end
local function backward(self, input, gradOutput, scale, gradInput, gradWeight, gradBias)
self:checkInputDim(input)
self:checkInputDim(gradOutput)
assert(self.train == true, 'should be in training mode when self.train is true')
assert(self.save_mean and self.save_std, 'must call :updateOutput() first')

input, gradOutput = makeContiguous(self, input, gradOutput)
Expand All @@ -142,9 +141,13 @@ local function backward(self, input, gradOutput, scale, gradInput, gradWeight, g
THNN.optionalTensor(gradWeight),
THNN.optionalTensor(gradBias),
THNN.optionalTensor(self.weight),
self.running_mean:cdata(),
self.running_var:cdata(),
self.save_mean:cdata(),
self.save_std:cdata(),
scale)
self.train,
scale,
self.eps)

return self.gradInput
end
Expand Down
66 changes: 65 additions & 1 deletion doc/testing.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,69 @@
## Testing ##
# Testing #
For those who want to implement their own modules, we suggest using
the `nn.Jacobian` class for testing the derivatives of their class,
together with the [torch.Tester](https://github.com/torch/torch7/blob/master/doc/tester.md) class. The sources
of `nn` package contains sufficiently many examples of such tests.


## nn.Jacobian ##


<a name="nn.Jacobian.testJacobian"></a>
### testJacobian(module, input, minval, maxval, perturbation) ###

Test the jacobian of a module w.r.t. to its input.

`module` takes as its input a random tensor shaped the same as `input`.
`minval` and `maxval` specify the range of the random tensor ([-2, 2] by default).
`perturbation` is used as finite difference (1e-6 by default).

Returns the L-inf distance between the jacobian computed by backpropagation and by finite difference.


<a name="nn.Jacobian.testJacobianParameters"></a>
### testJacobianParameters(module, input, param, dparam, minval, maxval, perturbation) ###

Test the jacobian of a module w.r.t. its parameters (instead of its input).

The input and parameters of `module` are random tensors shaped the same as `input` and `param`.
`minval` and `maxval` specify the range of the random tensors ([-2, 2] by default).
`dparam` points to the gradient w.r.t. parameters.
`perturbation` is used as finite difference (1e-6 by default).

Returns the L-inf distance between the jacobian computed by backpropagation and by finite difference.


<a name="nn.Jacobian.testJacobianUpdateParameters"></a>
### testJacobianUpdateParameters(module, input, param, minval, maxval, perturbation) ###

Test the amount of update of a module to its parameters.

The input and parameters of `module` are random tensors shaped the same as `input` and `param`.
`minval` and `maxval` specify the range of the random tensors ([-2, 2] by default).
`perturbation` is used as finite difference (1e-6 by default).

Returns the L-inf distance between the update computed by backpropagation and by finite difference.


<a name="nn.Jacobian.forward"></a>
### forward(module, input, param, perturbation) ###

Compute the jacobian by finite difference.

`module` has parameters `param` and input `input`.
If provided, `param` is regarded as independent variables, otherwise `input` is the independent variables.
`perturbation` is used as finite difference (1e-6 by default).

Returns the jacobian computed by finite difference.


<a name="nn.Jacobian.backward"></a>
### backward(module, input, param, dparam) ###

Compute the jacobian by backpropagation.

`module` has parameters `param` and input `input`.
If provided, `param` is regarded as independent variables, otherwise `input` is the independent variables.
`dparam` is the gradient w.r.t. parameters, it must present as long as `param` is present.

Returns the jacobian computed by backpropagation.
47 changes: 33 additions & 14 deletions lib/THNN/generic/BatchNormalization.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,22 +66,26 @@ void THNN_(BatchNormalization_updateOutput)(
void THNN_(BatchNormalization_backward)(
THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput,
THTensor *gradWeight, THTensor *gradBias, THTensor *weight,
THTensor *save_mean, THTensor *save_std, double scale)
THTensor *running_mean, THTensor *running_var,
THTensor *save_mean, THTensor *save_std,
bool train, double scale, double eps)
{
long nInput = THTensor_(size)(input, 1);
long n = THTensor_(nElement)(input) / nInput;

// Q(X) = X - E[x] ; i.e. input centered to zero mean
// Y = Q(X) / σ ; i.e. BN output before weight and bias
// dL/dX = (Q(dL/dY) - dot(Y, dL/dY) * Y / n) / σ * w

#pragma omp parallel for
for (long f = 0; f < nInput; ++f) {
THTensor *in = THTensor_(newSelect)(input, 1, f);
THTensor *gradOut = THTensor_(newSelect)(gradOutput, 1, f);
real mean = THTensor_(get1d)(save_mean, f);
real invstd = THTensor_(get1d)(save_std, f);
real w = weight ? THTensor_(get1d)(weight, f) : 1;
real mean, invstd;
if (train) {
mean = THTensor_(get1d)(save_mean, f);
invstd = THTensor_(get1d)(save_std, f);
} else {
mean = THTensor_(get1d)(running_mean, f);
invstd = 1 / sqrt(THTensor_(get1d)(running_var, f) + eps);
}

// sum over all gradOutput in feature plane
accreal sum = 0;
Expand All @@ -95,14 +99,29 @@ void THNN_(BatchNormalization_backward)(
if (gradInput) {
THTensor *gradIn = THTensor_(newSelect)(gradInput, 1, f);

// projection of gradOutput on to output scaled by std
real k = (real) dotp * invstd * invstd / n;
TH_TENSOR_APPLY2(real, gradIn, real, in,
*gradIn_data = (*in_data - mean) * k;);
if (train) {
// when in training mode
// Q(X) = X - E[x] ; i.e. input centered to zero mean
// Y = Q(X) / σ ; i.e. BN output before weight and bias
// dL/dX = (Q(dL/dY) - dot(Y, dL/dY) * Y) / σ * w

// projection of gradOutput on to output scaled by std
real k = (real) dotp * invstd * invstd / n;
TH_TENSOR_APPLY2(real, gradIn, real, in,
*gradIn_data = (*in_data - mean) * k;);

accreal gradMean = sum / n;
TH_TENSOR_APPLY2(real, gradIn, real, gradOut,
*gradIn_data = (*gradOut_data - gradMean - *gradIn_data) * invstd * w;);
accreal gradMean = sum / n;
TH_TENSOR_APPLY2(real, gradIn, real, gradOut,
*gradIn_data = (*gradOut_data - gradMean - *gradIn_data) * invstd * w;);

} else {
// when in evaluation mode
// Q(X) = X - running_mean ; i.e. input centered to zero mean
// Y = Q(X) / running_std ; i.e. BN output before weight and bias
// dL/dX = w / running_std
TH_TENSOR_APPLY2(real, gradIn, real, gradOut,
*gradIn_data = *gradOut_data * invstd * w;);
}

THTensor_(free)(gradIn);
}
Expand Down
6 changes: 5 additions & 1 deletion lib/THNN/generic/THNN.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,9 +535,13 @@ TH_API void THNN_(BatchNormalization_backward)(
THTensor *gradWeight,
THTensor *gradBias,
THTensor *weight,
THTensor *running_mean,
THTensor *running_var,
THTensor *save_mean,
THTensor *save_std,
double scale);
bool train,
double scale,
double eps);

TH_API void THNN_(SpatialConvolutionMap_updateOutput)(
THNNState *state, // library state
Expand Down
71 changes: 38 additions & 33 deletions test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5541,49 +5541,54 @@ local function testBatchNormalization(moduleName, dim, k)
table.insert(size, torch.random(1,k))
end
local input = torch.zeros(table.unpack(size)):uniform()
local module = nn[moduleName](planes)

local err = jac.testJacobian(module,input)
mytester:assertlt(err,precision, 'error on state ')
local function jacTests(module, input, affine)
local err = jac.testJacobian(module,input)
mytester:assertlt(err,precision, 'error on state ')

local err = jac.testJacobianParameters(module, input,
module.weight, module.gradWeight)
mytester:assertlt(err,precision, 'error on weight ')
if affine then
local err = jac.testJacobianParameters(module, input,
module.weight, module.gradWeight)
mytester:assertlt(err,precision, 'error on weight ')

local err = jac.testJacobianParameters(module, input,
module.bias, module.gradBias)
mytester:assertlt(err,precision, 'error on weight ')
local err = jac.testJacobianParameters(module, input,
module.bias, module.gradBias)
mytester:assertlt(err,precision, 'error on weight ')

local err = jac.testJacobianUpdateParameters(module, input, module.weight)
mytester:assertlt(err,precision, 'error on weight [direct update] ')
local err = jac.testJacobianUpdateParameters(module, input, module.weight)
mytester:assertlt(err,precision, 'error on weight [direct update] ')

local err = jac.testJacobianUpdateParameters(module, input, module.bias)
mytester:assertlt(err,precision, 'error on bias [direct update] ')
local err = jac.testJacobianUpdateParameters(module, input, module.bias)
mytester:assertlt(err,precision, 'error on bias [direct update] ')

for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
mytester:assertlt(err, precision, string.format(
'error on weight [%s]', t))
end
for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
mytester:assertlt(err, precision, string.format(
'error on weight [%s]', t))
end

for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
mytester:assertlt(err, precision, string.format('error on bias [%s]', t))
for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
mytester:assertlt(err, precision, string.format('error on bias [%s]', t))
end
end

-- IO
local ferr,berr = jac.testIO(module,input)
mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
end

-- IO
local ferr,berr = jac.testIO(module,input)
mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')


local module = nn[moduleName](planes)
module:training()
jacTests(module, input, true)
module:evaluate()
jacTests(module, input, true)

-- batch norm without affine transform
module = nn[moduleName](planes, 1e-5, 0.1, false)

local err = jac.testJacobian(module, input)
mytester:assertlt(err,precision, 'error on state ')

-- IO
local ferr,berr = jac.testIO(module,input)
mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
module:training()
jacTests(module, input, false)
module:evaluate()
jacTests(module, input, false)
end

function nntest.BatchNormalization()
Expand Down

0 comments on commit 26a5a7e

Please sign in to comment.