In [1]:
from time import time
from keras.layers import Input, Dense, Lambda, Conv2D, Conv2DTranspose, Activation, Flatten, Reshape
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Model
from keras import backend as K
from keras import metrics

Using TensorFlow backend.


In [2]:
class ConvVAE():
    stdEps = 1.0
    
    def __init__(self, 
                 dimInput, 
                 layerConv=[8, 32], sizeKernel=3, strides=2, actConv='relu', padding='same',
                 layerDense=[64, 2], actDense='relu',
                 ratRecon=0.998):
        
        # Initialize some setting 
        self.dimInput = dimInput # dimInput is (width, height, channels)
        self.inputs = Input(shape=(dimInput)) 
        self.dimEncode = layerDense[-1]
        self.ratRecon = ratRecon
        
        self.encoding(layerConv, sizeKernel, strides, actConv, padding,
                 layerDense, actDense,
                )
        
        self.decoding(layerConv, sizeKernel, strides, actConv, padding,
                 layerDense, actDense,
                )
        
        self.autoencoder = Model(self.inputs, self.decoder(self.encoder(self.inputs)), name='autoencoder')
        

        
    def encoding(self, 
                 layerConv, sizeKernel, strides, actConv, padding,
                 layerDense, actDense,
                ):
        dimEncode = self.dimEncode
        x = self.inputs
        # Stack of Conv2D layers
        for filters in layerConv:
            x = Conv2D(filters=filters,
                       kernel_size=sizeKernel,
                       strides=strides,
                       activation=actConv,
                       padding=padding)(x)

        # Shape info needed to build Decoder Model
        self.shape = K.int_shape(x)

        # Stack of Dense layers
        x = Flatten()(x)
        for numFilt in layerDense[:-1]:
            x = Dense(numFilt, activation=actDense)(x)
        self.zMean = Dense(dimEncode)(x)
        self.zSigmaLog = Dense(dimEncode)(x) # log for linear dense

        # Construct the latent as the output and build the encorder pipeline
        z = Lambda(self.sampling)([self.zMean, self.zSigmaLog])
        self.encoder = Model(self.inputs, z, name='encoder')

        
    def decoding(self,
                 layerConv, sizeKernel, strides, actConv, padding,
                 layerDense, actDense,
                ):
        
        shape = self.shape
         # Build the Decoder Model
        inputLatent = Input(shape=(self.dimEncode,), name='decoder_input')
        x = inputLatent
        for numFilt in layerDense[-2::-1]:
            x = Dense(numFilt, activation=actDense)(x)
            
        x = Dense(shape[1] * shape[2] * shape[3])(x)
        x = Reshape((shape[1], shape[2], shape[3]))(x)

        # Stack of Transposed Conv2D layers
        for numFilt in layerConv[-2::-1]:
            x = Conv2DTranspose(filters=numFilt,
                                kernel_size=sizeKernel,
                                strides=strides,
                                activation=actConv,
                                padding=padding)(x)

        # Build the Conv2DTranspose layer for the pixel dimension
        x = Conv2DTranspose(filters=self.dimInput[-1],
                            kernel_size=sizeKernel,
                            strides=strides,
                            activation=actConv,
                            padding=padding)(x)

        # Reconstruct the pixels as the output and build the decorder pipeline
        outputs = Activation('sigmoid', name='decoder_output')(x)
        self.decoder = Model(inputLatent, outputs, name='decoder')
        
        
    def sampling(self, args):
        zMean, zSigmaLog = args
        epsilon = K.random_normal(shape=(K.shape(zMean)[0], self.dimEncode),
                                  mean=0., stddev=stdEps)
        return zMean + K.exp(zSigmaLog) * epsilon  
        
        
    def lossVAE(self, args):
        zMean, zSigmaLog = args
        def loss(tensorInput, tensorDecode):
            lossRecon =  metrics.binary_crossentropy(K.flatten(tensorInput), K.flatten(tensorDecode))
            lossKL = - 0.5 * K.sum(1 + 2 * zSigmaLog - K.square(zMean) - K.square(K.exp(zSigmaLog)), axis=-1)
    #         lossKL = - 0.5 * K.mean(1 + zSigmaLog - K.square(zMean) - K.exp(zSigmaLog), axis=-1)
            return ratRecon * lossRecon + (1 - ratRecon) * lossKL
        return loss
    
    
    def train(self,
              xTrain, xValid,
              numEpochs=50, sizeBatch=32, nameOptim='adam',
              tempPathBest=None, patience=3,
              layerConv=[8, 32], sizeKernel=3, strides=2, actConv='relu', padding='same',
              layerDense=[64, 2], actDense='relu',
             ):
        
        self.autoencoder.compile(optimizer=nameOptim, loss=self.lossVAE([self.zMean, self.zSigmaLog]))

        # Train the autoencoder
        tic = time()
        history = self.autoencoder.fit(xTrain, xTrain,
                        epochs=numEpochs,
                        batch_size=sizeBatch,
                        shuffle=True,
                        validation_data=(xValid, xValid))
        timeTrain = time() - tic
        
        return history, timeTrain

In [3]:
numEpochs = 50
sizeBatch = 128
sizeKernel = 3
layerDense = [128, 32, 2]
layerConv = [8, 32]
stdEps = 1.0 
ratRecon = 0.999999
factNoise = 0
nameOptim = 'adam'
modelPath = '../model/temp/'
patience = 5


In [4]:
import numpy as np
from keras.datasets import mnist
(xTrain, yTrain), (xTest, yTest) = mnist.load_data()
xTrain = xTrain.astype('float32') / 255.
xTest = xTest.astype('float32') / 255.
numTrain = len(xTrain)
numTest = len(xTest)
numClass = 10
dimInput = [*xTrain.shape[1:], 1]
sizeDigit = xTrain.shape[1]

xTrain = xTrain.reshape((numTrain, *dimInput))
xTest = xTest.reshape((numTest, *dimInput))
print(xTrain.shape)
print(xTest.shape)

(60000, 28, 28, 1)
(10000, 28, 28, 1)


In [5]:
dimInput

[28, 28, 1]

In [6]:
convVAE = ConvVAE(dimInput, layerDense=layerDense, layerConv=layerConv)

In [7]:
convVAE.encoder.summary()
convVAE.decoder.summary()
convVAE.autoencoder.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 14, 14, 8)    80          input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 7, 7, 32)     2336        conv2d_1[0][0]                   
__________________________________________________________________________________________________
flatten_1 (Flatten)             (None, 1568)         0           conv2d_2[0][0]                   
__________________________________________________________________________________________________
dense_1 (D

In [8]:
convVAE.train(xTrain, xTest, sizeBatch=32)

Train on 60000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50

KeyboardInterrupt: 