diff --git a/tensorlayer/layers/convolution.py b/tensorlayer/layers/convolution.py index f19cd62e6..1cc92d048 100644 --- a/tensorlayer/layers/convolution.py +++ b/tensorlayer/layers/convolution.py @@ -123,9 +123,9 @@ class Conv2dLayer(Layer): padding : str The padding algorithm type: "SAME" or "VALID". W_init : initializer - The initializer for the the weight matrix. + The initializer for the weight matrix. b_init : initializer or None - The initializer for the the bias vector. If None, skip biases. + The initializer for the bias vector. If None, skip biases. W_init_args : dictionary The arguments for the weight matrix initializer. b_init_args : dictionary @@ -358,8 +358,8 @@ class Conv3dLayer(Layer): The padding algorithm type: "SAME" or "VALID". W_init : initializer The initializer for the weight matrix. - b_init : initializer - The initializer for the bias vector. + b_init : initializer or None + The initializer for the bias vector. If None, skip biases. W_init_args : dictionary The arguments for the weight matrix initializer. b_init_args : dictionary @@ -403,8 +403,11 @@ def __init__( # W = tf.Variable(W_init(shape=shape, **W_init_args), name='W_conv') # b = tf.Variable(b_init(shape=[shape[-1]], **b_init_args), name='b_conv') W = tf.get_variable(name='W_conv3d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args) - b = tf.get_variable(name='b_conv3d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - self.outputs = act(tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b) + if b_init: + b = tf.get_variable(name='b_conv3d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) + self.outputs = act(tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b) + else: + self.outputs = act(tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None)) # self.outputs = act( tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b ) @@ -412,7 +415,10 @@ def __init__( # self.all_params = list(layer.all_params) # self.all_drop = dict(layer.all_drop) self.all_layers.append(self.outputs) - self.all_params.extend([W, b]) + if b_init: + self.all_params.extend([W, b]) + else: + self.all_params.extend([W]) class DeConv3dLayer(Layer): @@ -435,8 +441,8 @@ class DeConv3dLayer(Layer): The padding algorithm type: "SAME" or "VALID". W_init : initializer The initializer for the weight matrix. - b_init : initializer - The initializer for the bias vector. + b_init : initializer or None + The initializer for the bias vector. If None, skip biases. W_init_args : dictionary The arguments for the weight matrix initializer. b_init_args : dictionary @@ -474,15 +480,20 @@ def __init__( with tf.variable_scope(name): W = tf.get_variable(name='W_deconv3d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args) - b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - - self.outputs = act(tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b) + if b_init: + b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) + self.outputs = act(tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b) + else: + self.outputs = act(tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding)) # self.all_layers = list(layer.all_layers) # self.all_params = list(layer.all_params) # self.all_drop = dict(layer.all_drop) self.all_layers.append(self.outputs) - self.all_params.extend([W, b]) + if b_init: + self.all_params.extend([W, b]) + else: + self.all_params.extend([W]) class UpSampling2dLayer(Layer): diff --git a/tensorlayer/layers/normalization.py b/tensorlayer/layers/normalization.py index 6d5f02814..42b1dcf86 100644 --- a/tensorlayer/layers/normalization.py +++ b/tensorlayer/layers/normalization.py @@ -75,10 +75,13 @@ class BatchNormLayer(Layer): The activation function of this layer. is_train : boolean Is being used for training or inference. - beta_init : initializer - The initializer for initializing beta. - gamma_init : initializer - The initializer for initializing gamma. + beta_init : initializer or None + The initializer for initializing beta, if None, skip beta. + Usually you should not skip beta unless you know what happened. + gamma_init : initializer or None + The initializer for initializing gamma, if None, skip gamma. + When the batch normalization layer is use instead of 'biases', or the next layer is linear, this can be + disabled since the scaling can be done by the next layer. see `Inception-ResNet-v2 `__ dtype : TensorFlow dtype tf.float32 (default) or tf.float16. name : str @@ -112,19 +115,27 @@ def __init__( with tf.variable_scope(name): axis = list(range(len(x_shape) - 1)) - # 1. beta, gamma - if tf.__version__ > '0.12.1' and beta_init == tf.zeros_initializer: - beta_init = beta_init() - beta = tf.get_variable('beta', shape=params_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, trainable=is_train) - - gamma = tf.get_variable( - 'gamma', - shape=params_shape, - initializer=gamma_init, - dtype=LayersConfig.tf_dtype, - trainable=is_train, - ) + variables = [] + if beta_init: + if tf.__version__ > '0.12.1' and beta_init == tf.zeros_initializer: + beta_init = beta_init() + beta = tf.get_variable('beta', shape=params_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, trainable=is_train) + variables.append(beta) + else: + beta = None + + if gamma_init: + gamma = tf.get_variable( + 'gamma', + shape=params_shape, + initializer=gamma_init, + dtype=LayersConfig.tf_dtype, + trainable=is_train, + ) + variables.append(gamma) + else: + gamma = None # 2. if tf.__version__ > '0.12.1': @@ -163,7 +174,7 @@ def mean_var_with_update(): else: self.outputs = act(tf.nn.batch_normalization(self.inputs, moving_mean, moving_variance, beta, gamma, epsilon)) - variables = [beta, gamma, moving_mean, moving_variance] + variables.extend([moving_mean, moving_variance]) # logging.info(len(variables)) # for idx, v in enumerate(variables):