In [1]:
# https://gist.github.com/kylemcdonald/a10c826e0bec1503de6a6b6f2f042fa4

In [2]:
import time
from collections import defaultdict

import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Conv2D, LeakyReLU, AvgPool2D, UpSampling2D
from tensorflow.keras.losses import MSE
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
import math

In [3]:
args = {
    'epochs': 10,
    'width': 32,
    'latent_width': 2,
    'depth': 16,
    'latent': 2,
    'colors': 1,
    'batch_size': 64
}

In [4]:
from sklearn.datasets import fetch_openml

In [5]:
def build_batches(x, n):
    m = (x.shape[0] // n) * n
    return x[:m].reshape(-1, n, *x.shape[1:])

def get_mnist32_batches(batch_size, data_format='channels_last'):
    channel_index = 1 if data_format == 'channels_first' else 3
    mnist = fetch_openml('mnist_784')
    data_x = mnist['data'].reshape(-1,28,28).astype(np.float32) / 255.
    data_x = np.pad(data_x, ((0,0), (2,2), (2,2)), mode='constant')
    data_x = np.expand_dims(data_x, channel_index)
    data_y = mnist['target']
    indices = np.arange(len(data_x))
    np.random.shuffle(indices)
    y_batches = build_batches(data_y[indices], batch_size)
    x_batches = build_batches(data_x[indices], batch_size)
    return x_batches, y_batches

x_batches, y_batches = get_mnist32_batches(args['batch_size'])

In [6]:
x_batches.shape

(1093, 64, 32, 32, 1)

In [7]:
def Encoder(input_shape, scales, depth, latent):
    activation = LeakyReLU()
    f = Sequential()
    f.add(Conv2D(depth, 1, padding='same', input_shape=input_shape))
    for scale in range(scales):
        k = depth << scale
        f.add(Conv2D(k, 3, padding='same', activation=activation))
        f.add(Conv2D(k, 3, padding='same', activation=activation))
        f.add(AvgPool2D(2))
    k = depth << scales
    f.add(Conv2D(k, 3, padding='same', activation=activation))
    f.add(Conv2D(latent, 3, padding='same'))
    return f

def Decoder(input_shape, scales, depth, colors):
    activation = LeakyReLU()
    f = Sequential()
    for scale in range(scales - 1, -1, -1):
        k = depth << scale
        f.add(Conv2D(k, 3, padding='same', activation=activation))
        f.add(Conv2D(k, 3, padding='same', activation=activation))
        f.add(UpSampling2D(2))
    f.add(Conv2D(depth, 3, padding='same', activation=activation))
    f.add(Conv2D(colors, 3, padding='same'))
    return f

scales = int(round(math.log(args['width'] // args['latent_width'], 2)))

input_shape = x_batches.shape[2:]
encoder = Encoder(input_shape, scales, args['depth'], args['latent'])

input_shape = encoder.output_shape[1:]
decoder = Decoder(input_shape, scales, args['depth'], args['colors'])

In [8]:
input_shape = x_batches.shape[2:]
inputs = Input(input_shape)
encoded = encoder(inputs)
decoded = decoder(encoded)
model = tf.keras.Model(inputs=inputs, outputs=decoded)

model.compile('adam', loss=lambda yt,yp: MSE(inputs, decoded))

losses = []

In [9]:
encoder.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 16)        32        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 16)        2320      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 16)        2320      
_________________________________________________________________
average_pooling2d (AveragePo (None, 16, 16, 16)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 16, 16, 32)        4640      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 16, 16, 32)        9248      
_________________________________________________________________
average_pooling2d_1 (Average (None, 8, 8, 32)          0

In [10]:
decoder.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_11 (Conv2D)           (None, 2, 2, 128)         2432      
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 2, 2, 128)         147584    
_________________________________________________________________
up_sampling2d (UpSampling2D) (None, 4, 4, 128)         0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 4, 4, 64)          73792     
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 4, 4, 64)          36928     
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 8, 8, 64)          0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 8, 8, 32)         

In [19]:
batch.shape

(64, 32, 32, 1)

In [17]:
for epoch in range(args['epochs']):
    for batch in x_batches:
        loss = model.train_on_batch(batch)
        losses.append(loss)

ValueError: in user code:

    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:806 train_function  *
        return step_function(self, iterator)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:789 run_step  **
        outputs = model.train_step(data)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:757 train_step
        self.trainable_variables)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:2737 _minimize
        trainable_variables))
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:562 _aggregate_gradients
        filtered_grads_and_vars = _filter_grads(grads_and_vars)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:1271 _filter_grads
        ([v.name for _, v in grads_and_vars],))

    ValueError: No gradients provided for any variable: ['conv2d/kernel:0', 'conv2d/bias:0', 'conv2d_1/kernel:0', 'conv2d_1/bias:0', 'conv2d_2/kernel:0', 'conv2d_2/bias:0', 'conv2d_3/kernel:0', 'conv2d_3/bias:0', 'conv2d_4/kernel:0', 'conv2d_4/bias:0', 'conv2d_5/kernel:0', 'conv2d_5/bias:0', 'conv2d_6/kernel:0', 'conv2d_6/bias:0', 'conv2d_7/kernel:0', 'conv2d_7/bias:0', 'conv2d_8/kernel:0', 'conv2d_8/bias:0', 'conv2d_9/kernel:0', 'conv2d_9/bias:0', 'conv2d_10/kernel:0', 'conv2d_10/bias:0', 'sequential_1/conv2d_11/kernel:0', 'sequential_1/conv2d_11/bias:0', 'sequential_1/conv2d_12/kernel:0', 'sequential_1/conv2d_12/bias:0', 'sequential_1/conv2d_13/kernel:0', 'sequential_1/conv2d_13/bias:0', 'sequential_1/conv2d_14/kernel:0', 'sequential_1/conv2d_14/bias:0', 'sequential_1/conv2d_15/kernel:0', 'sequential_1/conv2d_15/bias:0', 'sequential_1/conv2d_16/kernel:0', 'sequential_1/conv2d_16/bias:0', 'sequential_1/conv2d_17/kernel:0', 'sequential_1/conv2d_17/bias:0', 'sequential_1/conv2d_18/kernel:0', 'sequential_1/conv2d_18/bias:0', 'sequential_1/conv2d_19/kernel:0', 'sequential_1/conv2d_19/bias:0', 'sequential_1/conv2d_20/kernel:0', 'sequential_1/conv2d_20/bias:0'].


In [16]:
it = 0
start_time = time.time()
try:
    for epoch in range(args['epochs']):
        for batch in x_batches:
            loss = model.train_on_batch(batch)
            losses.append(loss)
            
            if it % 100 == 0:
                x_hat = model.predict(x_batches[0])
                mosaic = make_mosaic(x_hat.squeeze())
                
                plt.figure(facecolor='w')
                plt.plot(losses)
                plt.yscale('log')
                
                clear_output(wait=True)
                plt.show()
                show_array(mosaic * 255)
                
                speed = args['batch_size'] * it / (time.time() - start_time)
                print(f'{epoch+1}/{args["epochs"]}; {speed:.2f} samples/sec')

            it += 1
except KeyboardInterrupt:
    pass

ValueError: in user code:

    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:806 train_function  *
        return step_function(self, iterator)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:789 run_step  **
        outputs = model.train_step(data)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:757 train_step
        self.trainable_variables)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:2737 _minimize
        trainable_variables))
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:562 _aggregate_gradients
        filtered_grads_and_vars = _filter_grads(grads_and_vars)
    /mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:1271 _filter_grads
        ([v.name for _, v in grads_and_vars],))

    ValueError: No gradients provided for any variable: ['conv2d/kernel:0', 'conv2d/bias:0', 'conv2d_1/kernel:0', 'conv2d_1/bias:0', 'conv2d_2/kernel:0', 'conv2d_2/bias:0', 'conv2d_3/kernel:0', 'conv2d_3/bias:0', 'conv2d_4/kernel:0', 'conv2d_4/bias:0', 'conv2d_5/kernel:0', 'conv2d_5/bias:0', 'conv2d_6/kernel:0', 'conv2d_6/bias:0', 'conv2d_7/kernel:0', 'conv2d_7/bias:0', 'conv2d_8/kernel:0', 'conv2d_8/bias:0', 'conv2d_9/kernel:0', 'conv2d_9/bias:0', 'conv2d_10/kernel:0', 'conv2d_10/bias:0', 'sequential_1/conv2d_11/kernel:0', 'sequential_1/conv2d_11/bias:0', 'sequential_1/conv2d_12/kernel:0', 'sequential_1/conv2d_12/bias:0', 'sequential_1/conv2d_13/kernel:0', 'sequential_1/conv2d_13/bias:0', 'sequential_1/conv2d_14/kernel:0', 'sequential_1/conv2d_14/bias:0', 'sequential_1/conv2d_15/kernel:0', 'sequential_1/conv2d_15/bias:0', 'sequential_1/conv2d_16/kernel:0', 'sequential_1/conv2d_16/bias:0', 'sequential_1/conv2d_17/kernel:0', 'sequential_1/conv2d_17/bias:0', 'sequential_1/conv2d_18/kernel:0', 'sequential_1/conv2d_18/bias:0', 'sequential_1/conv2d_19/kernel:0', 'sequential_1/conv2d_19/bias:0', 'sequential_1/conv2d_20/kernel:0', 'sequential_1/conv2d_20/bias:0'].


In [7]:
import math

In [8]:
width = 32
latent_width = 2
depth = 16
latent = 2

In [9]:
activation = LeakyReLU()

In [10]:
scales = int(round(math.log(width // latent_width, 2)))
scales

4

In [11]:
input_shape = (28,28,1)

In [12]:
f = Sequential()
f.add(Conv2D(depth, 1, padding='same', input_shape=input_shape))
for scale in range(scales):
    k = depth << scale
    f.add(Conv2D(k, 3, padding='same', activation=activation))
    f.add(Conv2D(k, 3, padding='same', activation=activation))
    f.add(AvgPool2D(2))
k = depth << scales
f.add(Conv2D(k, 3, padding='same', activation=activation))
f.add(Conv2D(latent, 3, padding='same'))

In [29]:
f.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_49 (Conv2D)           (None, 28, 28, 16)        32        
_________________________________________________________________
conv2d_50 (Conv2D)           (None, 28, 28, 16)        2320      
_________________________________________________________________
conv2d_51 (Conv2D)           (None, 28, 28, 16)        2320      
_________________________________________________________________
average_pooling2d_10 (Averag (None, 14, 14, 16)        0         
_________________________________________________________________
conv2d_52 (Conv2D)           (None, 14, 14, 32)        4640      
_________________________________________________________________
conv2d_53 (Conv2D)           (None, 14, 14, 32)        9248      
_________________________________________________________________
average_pooling2d_11 (Averag (None, 7, 7, 32)         

In [32]:
colors = 1

In [48]:
activation = LeakyReLU()
f = Sequential()
for scale in range(scales - 1, -1, -1):
    k = depth << scale
    f.add(Conv2D(k, 3, padding='same', activation=activation))
    f.add(Conv2D(k, 3, padding='same', activation=activation))
    f.add(UpSampling2D(2))
f.add(Conv2D(depth, 3, padding='same', activation=activation))
f.add(Conv2D(colors, 3, padding='same'))

In [49]:
f.build(input_shape=(None,1,1,2))

In [50]:
f.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_89 (Conv2D)           (None, 1, 1, 128)         2432      
_________________________________________________________________
conv2d_90 (Conv2D)           (None, 1, 1, 128)         147584    
_________________________________________________________________
up_sampling2d_20 (UpSampling (None, 2, 2, 128)         0         
_________________________________________________________________
conv2d_91 (Conv2D)           (None, 2, 2, 64)          73792     
_________________________________________________________________
conv2d_92 (Conv2D)           (None, 2, 2, 64)          36928     
_________________________________________________________________
up_sampling2d_21 (UpSampling (None, 4, 4, 64)          0         
_________________________________________________________________
conv2d_93 (Conv2D)           (None, 4, 4, 32)        

In [13]:
def Encoder(input_shape, scales, depth, latent):
    activation = LeakyReLU()
    f = Sequential()
    f.add(Conv2D(depth, 1, padding='same', input_shape=input_shape))
    for scale in range(scales):
        k = depth << scale
        f.add(Conv2D(k, 3, padding='same', activation=activation))
        f.add(Conv2D(k, 3, padding='same', activation=activation))
        f.add(AvgPool2D(2))
    k = depth << scales
    f.add(Conv2D(k, 3, padding='same', activation=activation))
    f.add(Conv2D(latent, 3, padding='same'))
    return f

def Decoder(input_shape, scales, depth, colors):
    activation = LeakyReLU()
    f = Sequential()
    for scale in range(scales - 1, -1, -1):
        k = depth << scale
        f.add(Conv2D(k, 3, padding='same', activation=activation))
        f.add(Conv2D(k, 3, padding='same', activation=activation))
        f.add(UpSampling2D(2))
    f.add(Conv2D(depth, 3, padding='same', activation=activation))
    f.add(Conv2D(colors, 3, padding='same'))
    return f


input_shape = (28,28,1)
encoder = Encoder(input_shape, scales, args['depth'], args['latent'])

input_shape = encoder.output_shape[1:]
decoder = Decoder(input_shape, scales, args['depth'], args['colors'])

In [14]:
decoder

<tensorflow.python.keras.engine.sequential.Sequential at 0x7f65b42d4080>

In [16]:
decoder.build(input_shape = (1, 28,28,1))

In [17]:
decoder.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_33 (Conv2D)           multiple                  1280      
_________________________________________________________________
conv2d_34 (Conv2D)           multiple                  147584    
_________________________________________________________________
up_sampling2d_4 (UpSampling2 multiple                  0         
_________________________________________________________________
conv2d_35 (Conv2D)           multiple                  73792     
_________________________________________________________________
conv2d_36 (Conv2D)           multiple                  36928     
_________________________________________________________________
up_sampling2d_5 (UpSampling2 multiple                  0         
_________________________________________________________________
conv2d_37 (Conv2D)           multiple                 