Lots of fixes

Joooohan · Joooohan · commit a83c151532bf · 2020-06-30T15:01:20.000Z
diff --git a/gated_pixelcnn/model.py b/gated_pixelcnn/model.py
@@ -202,7 +202,7 @@ def build(self, input_shape):
             stack='H',
             type='A',
             n_colors=self.n_colors,
-            kernel_size=(1, 3),
+            kernel_size=3,
             padding='SAME',
             filters=self.hidden_dim * self.n_colors
         )
@@ -212,6 +212,18 @@ def build(self, input_shape):
             for i in range(self.n_res)
         ]
 
+        self.final_conv_h = tfkl.Conv2D(
+            filters = self.n_output * self.n_colors,
+            kernel_size = 1,
+            name='final_conv_h'
+        )
+
+        self.final_conv_v = tfkl.Conv2D(
+            filters = self.n_output * self.n_colors,
+            kernel_size = 1,
+            name='final_conv_v'
+        )
+
         self.final_conv = tfkl.Conv2D(
             filters = self.n_output * self.n_colors,
             kernel_size = 1,
@@ -225,7 +237,10 @@ def call(self, x):
         for res_block in self.res_blocks:
             v_stack, h_stack = res_block(v_stack, h_stack)
 
-        h = self.final_conv(tf.nn.relu(v_stack + h_stack))
+        h = self.final_conv_h(tf.nn.relu(h_stack)) + \
+            self.final_conv_v(tf.nn.relu(v_stack))
+
+        h = self.final_conv(tf.nn.relu(h))
 
         # Format output
         h = tf.split(h, num_or_size_splits=self.n_colors, axis=-1)
@@ -254,3 +269,13 @@ def sample(self, n):
             samples = tf.tensor_scatter_nd_update(samples, indices, updates)
 
         return samples
+
+def bits_per_dim_loss(y_true, y_pred):
+    """Return the bits per dim value of the predicted distribution."""
+    B, H, W, C = y_true.shape
+    num_pixels = float(H * W * C)
+    log_probs = tf.math.log_softmax(y_pred, axis=-1)
+    log_probs = tf.gather(log_probs, tf.cast(y_true, tf.int32), axis=-1, batch_dims=4)
+    nll = - tf.reduce_sum(log_probs, axis=[1, 2, 3])
+    bits_per_dim = nll / num_pixels / tf.math.log(2.)
+    return bits_per_dim
diff --git a/gated_pixelcnn/train_mnist.py b/gated_pixelcnn/train_mnist.py
@@ -3,15 +3,15 @@
 import tensorflow as tf
 import tensorflow_datasets as tfds
 
-from model import GatedPixelCNN
+from model import GatedPixelCNN, bits_per_dim_loss
 from utils import PlotSamplesCallback
 
 tfk = tf.keras
 tfkl = tf.keras.layers
 AUTOTUNE = tf.data.experimental.AUTOTUNE
 
 # Training parameters
-EPOCHS = 10
+EPOCHS = 75
 BATCH_SIZE = 64
 BUFFER_SIZE = 1024  # for shuffling
 
@@ -43,9 +43,8 @@ def duplicate(element):
 # Define model
 strategy = tf.distribute.MirroredStrategy()
 with strategy.scope():
-    model = GatedPixelCNN(hidden_dim=64, n_res=5)
-    loss = tfk.losses.SparseCategoricalCrossentropy(from_logits=True)
-    model.compile(optimizer='adam', loss=loss)
+    model = GatedPixelCNN(hidden_dim=64, n_res=6)
+    model.compile(optimizer='adam', loss=bits_per_dim_loss)
 
 # Callbacks
 time = datetime.now().strftime('%Y%m%d-%H%M%S')
diff --git a/pixelcnn/model.py b/pixelcnn/model.py
@@ -219,3 +219,13 @@ def sample(self, n):
             samples = tf.tensor_scatter_nd_update(samples, indices, updates)
 
         return samples
+
+def bits_per_dim_loss(y_true, y_pred):
+    """Return the bits per dim value of the predicted distribution."""
+    B, H, W, C = y_true.shape
+    num_pixels = float(H * W * C)
+    log_probs = tf.math.log_softmax(y_pred, axis=-1)
+    log_probs = tf.gather(log_probs, tf.cast(y_true, tf.int32), axis=-1, batch_dims=4)
+    nll = - tf.reduce_sum(log_probs, axis=[1, 2, 3])
+    bits_per_dim = nll / num_pixels / tf.math.log(2.)
+    return bits_per_dim
diff --git a/pixelcnn/train_mnist.py b/pixelcnn/train_mnist.py
@@ -3,15 +3,15 @@
 import tensorflow as tf
 import tensorflow_datasets as tfds
 
-from model import PixelCNN
+from model import PixelCNN, bits_per_dim_loss
 from utils import PlotSamplesCallback
 
 tfk = tf.keras
 tfkl = tf.keras.layers
 AUTOTUNE = tf.data.experimental.AUTOTUNE
 
 # Training parameters
-EPOCHS = 10
+EPOCHS = 50
 BATCH_SIZE = 64
 BUFFER_SIZE = 1024  # for shuffling
 
@@ -43,9 +43,8 @@ def duplicate(element):
 # Define model
 strategy = tf.distribute.MirroredStrategy()
 with strategy.scope():
-    model = PixelCNN(hidden_dim=32, n_res=3)
-    loss = tfk.losses.SparseCategoricalCrossentropy(from_logits=True)
-    model.compile(optimizer='adam', loss=loss)
+    model = PixelCNN(hidden_dim=64, n_res=6)
+    model.compile(optimizer='adam', loss=bits_per_dim_loss)
 
 # Callbacks
 time = datetime.now().strftime('%Y%m%d-%H%M%S')
diff --git a/pixelcnn_plus/model.py b/pixelcnn_plus/model.py
@@ -309,6 +309,12 @@ def build(self, input_shape):
             name='final_conv_h'
         )
 
+        self.final_conv = tfkl.Conv2D(
+            filters = self.n_mix * self.n_component_per_mix,
+            kernel_size = 1,
+            name='final_conv'
+        )
+
     def call(self, x, training=False):
         # First convs
         v_stack = self.down_shift(self.first_conv_v(x))
@@ -323,8 +329,8 @@ def call(self, x, training=False):
                 residuals_h.append(h_stack)
                 residuals_v.append(v_stack)
             if ds < self.n_downsampling:
-                v_stack = self.downsampling_convs_v[ds](v_stack)
-                h_stack = self.downsampling_convs_h[ds](h_stack)
+                v_stack = self.downsampling_convs_v[ds](tf.nn.relu(v_stack))
+                h_stack = self.downsampling_convs_h[ds](tf.nn.relu(h_stack))
                 residuals_h.append(h_stack)
                 residuals_v.append(v_stack)
 
@@ -348,13 +354,15 @@ def call(self, x, training=False):
                 v_stack += residuals_v.pop()
                 h_stack += residuals_h.pop()
             if us < self.n_downsampling:
-                v_stack = self.upsampling_convs_v[us](v_stack)
-                h_stack = self.upsampling_convs_h[us](h_stack)
+                v_stack = self.upsampling_convs_v[us](tf.nn.relu(v_stack))
+                h_stack = self.upsampling_convs_h[us](tf.nn.relu(h_stack))
                 v_stack += residuals_v.pop()
                 h_stack += residuals_h.pop()
 
         # Final conv
-        outputs = self.final_conv_h(h_stack) + self.final_conv_v(v_stack)
+        outputs = self.final_conv_h(tf.nn.relu(h_stack)) + \
+                  self.final_conv_v(tf.nn.relu(v_stack))
+        outputs = self.final_conv(tf.nn.relu(outputs))
 
         return outputs
 
@@ -382,8 +390,8 @@ def sample(self, n):
                 beta = tf.math.tanh(beta)
                 gamma = tf.math.tanh(gamma)
 
-                mu_g = mu_g + alpha * mu_r
-                mu_b = mu_b + beta * mu_r + gamma * mu_g
+                # mu_g = mu_g + alpha * mu_r
+                # mu_b = mu_b + beta * mu_r + gamma * mu_g
                 mu = tf.stack([mu_r, mu_g, mu_b], axis=2)
                 logvar = tf.stack([logvar_r, logvar_g, logvar_b], axis=2)
 
@@ -397,32 +405,45 @@ def sample(self, n):
             # Sample colors
             u = tf.random.uniform(tf.shape(mu), minval=1e-5, maxval=1. - 1e-5)
             x = mu + tf.exp(logvar) * (tf.math.log(u) - tf.math.log(1. - u))
-            updates = tf.clip_by_value(x, -1., 1.)
+
+            # Readjust means
             if channels == 3:
-                updates = updates[:, 0, :]
+                alpha = tf.gather(alpha, components, axis=1, batch_dims=1)
+                beta = tf.gather(beta, components, axis=1, batch_dims=1)
+                gamma = tf.gather(gamma, components, axis=1, batch_dims=1)
+                x_r = x[:, 0, 0]
+                x_g = x[:, 0, 1] + alpha[:, 0] * x_r
+                x_b = x[:, 0, 2] + beta[:, 0] * x_r + gamma[:, 0] * x_g
+                x = tf.stack([x_r, x_g, x_b], axis=-1)
+
+            updates = tf.clip_by_value(x, -1., 1.)
             indices = tf.constant([[i, h, w] for i in range(n)])
             samples = tf.tensor_scatter_nd_update(samples, indices, updates)
 
         return samples
 
 def discretized_logistic_mix_loss(y_true, y_pred):
-    # y_true shape (batch_size, H, W, channels)
-    n_channels = y_true.shape[-1]
+    # y_true shape (batch_size, H, W, C)
+    _, H, W, C = y_true.shape
+    num_pixels = float(H * W * C)
 
-    if n_channels == 1:
+    if C == 1:
         pi, mu, logvar = tf.split(y_pred, num_or_size_splits=3, axis=-1)
         mu = tf.expand_dims(mu, axis=3)
         logvar = tf.expand_dims(logvar, axis=3)
-    else:  # n_channels == 3
+    else:  # C == 3
         (pi, mu_r, mu_g, mu_b, logvar_r, logvar_g, logvar_b, alpha,
          beta, gamma) = tf.split(y_pred, num_or_size_splits=10, axis=-1)
 
         alpha = tf.math.tanh(alpha)
         beta = tf.math.tanh(beta)
         gamma = tf.math.tanh(gamma)
 
-        mu_g = mu_g + alpha * mu_r
-        mu_b = mu_b + beta * mu_r + gamma * mu_g
+        red = y_true[:,:,:,0:1]
+        green = y_true[:,:,:,1:2]
+
+        mu_g = mu_g + alpha * red
+        mu_b = mu_b + beta * red + gamma * green
         mu = tf.stack([mu_r, mu_g, mu_b], axis=3)
         logvar = tf.stack([logvar_r, logvar_g, logvar_b], axis=3)
 
@@ -462,11 +483,14 @@ def log_pdf(x):  # log logistic pdf
 
     # Deal with edge cases
     log_probs = tf.where(y_true > 0.999, log_one_minus_cdf_min, log_probs)
-    log_probs = tf.where(y_true < 0.999, log_cdf_plus, log_probs)
+    log_probs = tf.where(y_true < -0.999, log_cdf_plus, log_probs)
 
     log_probs = tf.reduce_sum(log_probs, axis=3)  # whole pixel prob per component
     log_probs += tf.nn.log_softmax(pi)  #  multiply by mixture components
     log_probs = tf.math.reduce_logsumexp(log_probs, axis=-1)  # add components probs
     log_probs = tf.reduce_sum(log_probs, axis=[1, 2])
 
-    return -log_probs
+    # Convert to bits per dim
+    bits_per_dim = -log_probs / num_pixels / tf.math.log(2.)
+
+    return bits_per_dim
diff --git a/pixelcnn_plus/train_mnist.py b/pixelcnn_plus/train_mnist.py
@@ -4,7 +4,7 @@
 import tensorflow_datasets as tfds
 
 from model import PixelCNNplus, discretized_logistic_mix_loss
-from utils import PlotSamplesCallback, PlotReconstructionCallback
+from utils import PlotSamplesCallback
 
 tfk = tf.keras
 tfkl = tf.keras.layers
@@ -50,8 +50,7 @@ def duplicate(element):
 time = datetime.now().strftime('%Y%m%d-%H%M%S')
 log_dir = os.path.join('.', 'logs', 'pixelcnn++', time)
 tensorboard_clbk = tfk.callbacks.TensorBoard(log_dir=log_dir)
-sample_clbk = PlotSamplesCallback(logdir=log_dir, period=5)
-reconstruction_clbk = PlotReconstructionCallback(logdir=log_dir, test_ds=test_ds)
+sample_clbk = PlotSamplesCallback(logdir=log_dir, period=1)
 callbacks = [tensorboard_clbk, sample_clbk, reconstruction_clbk]
 
 # Fit
diff --git a/pixelcnn_plus/utils.py b/pixelcnn_plus/utils.py
@@ -23,7 +23,7 @@ def plot_to_image(figure):
 
 class PlotSamplesCallback(tfk.callbacks.Callback):
     """Plot `nex` reconstructed image to tensorboard."""
-    def __init__(self, logdir: str, nex: int=4, period: int=5):
+    def __init__(self, logdir: str, nex: int=4, period: int=1):
         super(PlotSamplesCallback, self).__init__()
         logdir = os.path.join(logdir, 'samples')
         self.file_writer = tf.summary.create_file_writer(logdir=logdir)
@@ -58,74 +58,3 @@ def on_epoch_end(self, epoch, logs=None):
                     step=epoch,
                     max_outputs=self.nex
                 )
-
-
-class PlotReconstructionCallback(tfk.callbacks.Callback):
-    """Plot `nex` reconstructed image to tensorboard."""
-    def __init__(self, logdir: str, test_ds: tf.data.Dataset, nex: int=4):
-        super(PlotReconstructionCallback, self).__init__()
-        logdir = os.path.join(logdir, 'reconstructions')
-        self.file_writer = tf.summary.create_file_writer(logdir=logdir)
-        self.nex = nex
-        self.test_ds = test_ds.map(lambda x, y: x).unbatch().batch(nex)
-        self.test_it = iter(self.test_ds)
-
-    def get_next_images(self):
-        try:
-            next_images = next(self.test_it)
-        except StopIteration:
-            self.test_it = iter(self.test_ds)
-            next_images = next(self.test_it)
-        return next_images
-
-    def plot_img_reconstruction(self, image, reconstruction):
-        fig, ax = plt.subplots(nrows=1, ncols=2)
-
-        if image.shape[-1] == 1:
-            image = tf.squeeze(image, axis=-1)
-            reconstruction = tf.squeeze(reconstruction, axis=-1)
-
-        ax[0].imshow(image, vmin=-1., vmax=1., cmap=plt.cm.Greys)
-        ax[0].set_title('Image')
-        ax[0].axis('off')
-
-        ax[1].imshow(reconstruction, vmin=-1., vmax=1., cmap=plt.cm.Greys)
-        ax[1].set_title('Reconstruction')
-        ax[1].axis('off')
-
-        return fig
-
-    def get_means(self, logits):
-        pi, mu, _ = tf.split(logits, num_or_size_splits=3, axis=-1)
-        nex, height, width, n_mix = pi.shape
-
-        pi = tf.reshape(pi, shape=(-1, n_mix))
-        # components = tf.random.categorical(logits=pi, num_samples=1)
-        components = tf.argmax(pi, axis=-1)[:, None]
-
-        mu = tf.reshape(pi, shape=(-1, n_mix))
-        mu = tf.gather(mu, components, axis=1, batch_dims=1)
-        mu = tf.reshape(mu, (nex, height, width, 1))
-        mu = tf.clip_by_value(mu, -1., 1.)
-
-        return mu
-
-
-    def on_epoch_end(self, epoch, logs=None):
-        images = self.get_next_images()
-        logits = self.model(images)
-        reconstructions = self.get_means(logits)
-
-        imgs = []
-        for i in range(self.nex):
-            fig = self.plot_img_reconstruction(images[i], reconstructions[i])
-            imgs.append(plot_to_image(fig))
-
-        imgs = tf.concat(imgs, axis=0)
-        with self.file_writer.as_default():
-            tf.summary.image(
-                name='Reconstructions',
-                data=imgs,
-                step=epoch,
-                max_outputs=self.nex
-            )