diff --git a/tensorlayer/layers/convolution.py b/tensorlayer/layers/convolution.py
index f19cd62e6..1cc92d048 100644
--- a/tensorlayer/layers/convolution.py
+++ b/tensorlayer/layers/convolution.py
@@ -123,9 +123,9 @@ class Conv2dLayer(Layer):
     padding : str
         The padding algorithm type: "SAME" or "VALID".
     W_init : initializer
-        The initializer for the the weight matrix.
+        The initializer for the weight matrix.
     b_init : initializer or None
-        The initializer for the the bias vector. If None, skip biases.
+        The initializer for the bias vector. If None, skip biases.
     W_init_args : dictionary
         The arguments for the weight matrix initializer.
     b_init_args : dictionary
@@ -358,8 +358,8 @@ class Conv3dLayer(Layer):
         The padding algorithm type: "SAME" or "VALID".
     W_init : initializer
         The initializer for the weight matrix.
-    b_init : initializer
-        The initializer for the bias vector.
+    b_init : initializer or None
+        The initializer for the bias vector. If None, skip biases.
     W_init_args : dictionary
         The arguments for the weight matrix initializer.
     b_init_args : dictionary
@@ -403,8 +403,11 @@ def __init__(
             # W = tf.Variable(W_init(shape=shape, **W_init_args), name='W_conv')
             # b = tf.Variable(b_init(shape=[shape[-1]], **b_init_args), name='b_conv')
             W = tf.get_variable(name='W_conv3d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
-            b = tf.get_variable(name='b_conv3d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
-            self.outputs = act(tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b)
+            if b_init:
+                b = tf.get_variable(name='b_conv3d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                self.outputs = act(tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b)
+            else:
+                self.outputs = act(tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None))
 
         # self.outputs = act( tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b )
 
@@ -412,7 +415,10 @@ def __init__(
         # self.all_params = list(layer.all_params)
         # self.all_drop = dict(layer.all_drop)
         self.all_layers.append(self.outputs)
-        self.all_params.extend([W, b])
+        if b_init:
+            self.all_params.extend([W, b])
+        else:
+            self.all_params.extend([W])
 
 
 class DeConv3dLayer(Layer):
@@ -435,8 +441,8 @@ class DeConv3dLayer(Layer):
         The padding algorithm type: "SAME" or "VALID".
     W_init : initializer
         The initializer for the weight matrix.
-    b_init : initializer
-        The initializer for the bias vector.
+    b_init : initializer or None
+        The initializer for the bias vector. If None, skip biases.
     W_init_args : dictionary
         The arguments for the weight matrix initializer.
     b_init_args : dictionary
@@ -474,15 +480,20 @@ def __init__(
 
         with tf.variable_scope(name):
             W = tf.get_variable(name='W_deconv3d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
-            b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
-
-            self.outputs = act(tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b)
+            if b_init:
+                b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                self.outputs = act(tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b)
+            else:
+                self.outputs = act(tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding))
 
         # self.all_layers = list(layer.all_layers)
         # self.all_params = list(layer.all_params)
         # self.all_drop = dict(layer.all_drop)
         self.all_layers.append(self.outputs)
-        self.all_params.extend([W, b])
+        if b_init:
+            self.all_params.extend([W, b])
+        else:
+            self.all_params.extend([W])
 
 
 class UpSampling2dLayer(Layer):
diff --git a/tensorlayer/layers/normalization.py b/tensorlayer/layers/normalization.py
index 6d5f02814..42b1dcf86 100644
--- a/tensorlayer/layers/normalization.py
+++ b/tensorlayer/layers/normalization.py
@@ -75,10 +75,13 @@ class BatchNormLayer(Layer):
         The activation function of this layer.
     is_train : boolean
         Is being used for training or inference.
-    beta_init : initializer
-        The initializer for initializing beta.
-    gamma_init : initializer
-        The initializer for initializing gamma.
+    beta_init : initializer or None
+        The initializer for initializing beta, if None, skip beta.
+        Usually you should not skip beta unless you know what happened.
+    gamma_init : initializer or None
+        The initializer for initializing gamma, if None, skip gamma.
+        When the batch normalization layer is use instead of 'biases', or the next layer is linear, this can be
+        disabled since the scaling can be done by the next layer. see `Inception-ResNet-v2 <https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_resnet_v2.py>`__
     dtype : TensorFlow dtype
         tf.float32 (default) or tf.float16.
     name : str
@@ -112,19 +115,27 @@ def __init__(
 
         with tf.variable_scope(name):
             axis = list(range(len(x_shape) - 1))
-
             # 1. beta, gamma
-            if tf.__version__ > '0.12.1' and beta_init == tf.zeros_initializer:
-                beta_init = beta_init()
-            beta = tf.get_variable('beta', shape=params_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, trainable=is_train)
-
-            gamma = tf.get_variable(
-                'gamma',
-                shape=params_shape,
-                initializer=gamma_init,
-                dtype=LayersConfig.tf_dtype,
-                trainable=is_train,
-            )
+            variables = []
+            if beta_init:
+                if tf.__version__ > '0.12.1' and beta_init == tf.zeros_initializer:
+                    beta_init = beta_init()
+                beta = tf.get_variable('beta', shape=params_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, trainable=is_train)
+                variables.append(beta)
+            else:
+                beta = None
+
+            if gamma_init:
+                gamma = tf.get_variable(
+                    'gamma',
+                    shape=params_shape,
+                    initializer=gamma_init,
+                    dtype=LayersConfig.tf_dtype,
+                    trainable=is_train,
+                )
+                variables.append(gamma)
+            else:
+                gamma = None
 
             # 2.
             if tf.__version__ > '0.12.1':
@@ -163,7 +174,7 @@ def mean_var_with_update():
             else:
                 self.outputs = act(tf.nn.batch_normalization(self.inputs, moving_mean, moving_variance, beta, gamma, epsilon))
 
-            variables = [beta, gamma, moving_mean, moving_variance]
+            variables.extend([moving_mean, moving_variance])
 
             # logging.info(len(variables))
             # for idx, v in enumerate(variables):