Support in GroupNormLayer

2wins · 2wins · commit 27224ce227b6 · 2018-09-26T11:58:10.000+09:00
diff --git a/tensorlayer/layers/normalization.py b/tensorlayer/layers/normalization.py
@@ -7,6 +7,7 @@
 from tensorlayer.layers.core import Layer
 from tensorlayer.layers.core import LayersConfig
 from tensorlayer.layers.core import TF_GRAPHKEYS_VARIABLES
+from tensorlayer.layers.utils import get_collection_trainable
 
 from tensorlayer import logging
 
@@ -323,36 +324,57 @@ class GroupNormLayer(Layer):
     """
 
     @deprecated_alias(layer='prev_layer', end_support_version=1.9)  # TODO remove this line for the 1.9 release
-    def __init__(self, prev_layer, groups=32, epsilon=1e-06, act=None, name='groupnorm'):
+    def __init__(self, prev_layer, groups=32, epsilon=1e-06, act=None, data_format='channels_last', name='groupnorm'):
         super(GroupNormLayer, self).__init__(prev_layer=prev_layer, act=act, name=name)
 
         logging.info(
             "GroupNormLayer %s: act: %s" % (self.name, self.act.__name__ if self.act is not None else 'No Activation')
         )
 
-        channels = self.inputs.get_shape().as_list()[-1]
-        if groups > channels:
-            raise ValueError('Invalid groups %d for %d channels.' % (groups, channels))
-        if channels % groups != 0:
-            raise ValueError('%d channels is not commensurate with %d groups.' % (channels, groups))
+        shape = self.inputs.get_shape().as_list()
+        if len(shape) != 4:
+            raise Exception("GroupNormLayer only supports 2D images.")
 
-        with tf.variable_scope(name) as vs:
+        if data_format == 'channels_last':
+            channels = shape[-1]
             int_shape = tf.concat(
                 [tf.shape(self.inputs)[0:3],
                  tf.convert_to_tensor([groups, channels // groups])], axis=0
             )
+        elif data_format == 'channels_first':
+            channels = shape[1]
+            int_shape = tf.concat(
+                [
+                    tf.shape(self.inputs)[0:1],
+                    tf.convert_to_tensor([groups, channels // groups]),
+                    tf.shape(self.inputs)[2:4]
+                ], axis=0
+            )
+        else:
+            raise ValueError("data_format must be 'channels_last' or 'channels_first'.")
 
+        if groups > channels:
+            raise ValueError('Invalid groups %d for %d channels.' % (groups, channels))
+        if channels % groups != 0:
+            raise ValueError('%d channels is not commensurate with %d groups.' % (channels, groups))
+
+        with tf.variable_scope(name):
             x = tf.reshape(self.inputs, int_shape)
-            mean, var = tf.nn.moments(x, [1, 2, 4], keep_dims=True)
-            x = (x - mean) / tf.sqrt(var + epsilon)
+            if data_format == 'channels_last':
+                mean, var = tf.nn.moments(x, [1, 2, 4], keep_dims=True)
+                gamma = tf.get_variable('gamma', channels, initializer=tf.ones_initializer())
+                beta = tf.get_variable('beta', channels, initializer=tf.zeros_initializer())
+            else:
+                mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True)
+                gamma = tf.get_variable('gamma', [1, channels, 1, 1], initializer=tf.ones_initializer())
+                beta = tf.get_variable('beta', [1, channels, 1, 1], initializer=tf.zeros_initializer())
 
-            gamma = tf.get_variable('gamma', channels, initializer=tf.ones_initializer())
-            beta = tf.get_variable('beta', channels, initializer=tf.zeros_initializer())
+            x = (x - mean) / tf.sqrt(var + epsilon)
 
             self.outputs = tf.reshape(x, tf.shape(self.inputs)) * gamma + beta
             self.outputs = self._apply_activation(self.outputs)
 
-            variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
+        variables = get_collection_trainable(self.name)
 
         self._add_layers(self.outputs)
         self._add_params(variables)
diff --git a/tests/test_layers_normalization.py b/tests/test_layers_normalization.py
@@ -21,7 +21,10 @@ def model(x, is_train=True, reuse=False):
         n = tl.layers.LocalResponseNormLayer(n, name='norm_local')
         n = tl.layers.LayerNormLayer(n, reuse=reuse, name='norm_layer')
         n = tl.layers.InstanceNormLayer(n, name='norm_instance')
-        n = tl.layers.GroupNormLayer(n, groups=40, name='groupnorm')
+        # n = tl.layers.GroupNormLayer(n, groups=40, name='groupnorm')
+        n.outputs = tf.reshape(n.outputs, [-1, 80, 100, 100])
+        n = tl.layers.GroupNormLayer(n, groups=40, data_format='channels_first', name='groupnorm')
+        n.outputs = tf.reshape(n.outputs, [-1, 100, 100, 80])
         n = tl.layers.SwitchNormLayer(n, name='switchnorm')
         n = tl.layers.QuanConv2dWithBN(n, n_filter=3, is_train=is_train, name='quan_cnn_with_bn')
         n = tl.layers.FlattenLayer(n, name='flatten')