<a href="https://colab.research.google.com/github/niyanchun/machine-vision-in-action/blob/master/autoencoder/autoencoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
%tensorflow_version 2.x

import tensorflow
import cv2

print(tensorflow.__version__)

# set the matplotlib backend so figures can be saved in the background
import matplotlib

# from pyimagesearch.conv_autoencoder import ConvAutoencoder
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
import numpy as np
import argparse
import cv2

matplotlib.use("Agg")

from tensorflow.keras.layers import (BatchNormalization, Conv2D, Conv2DTranspose,
                                     LeakyReLU, Activation, Flatten,
                                     Dense, Reshape, Input)
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K

class ConvAutoencoder:

    @staticmethod
    def build(width, height, depth, filters=(32, 64), latentDim=16):

        inputShape = (height, width, depth)
        chanDim = -1

        # define the input to the encoder
        inputs = Input(shape=inputShape)
        x = inputs

        for f in filters:
            x = Conv2D(f, (3, 3), strides=2, padding="same")(x)
            x = LeakyReLU(alpha=0.2)(x)
            x = BatchNormalization(axis=chanDim)(x)

        # flatten the network and then construct our latent vector
        volumeSize = K.int_shape(x)
        x = Flatten()(x)
        latent = Dense(latentDim)(x)

        encoder = Model(inputs, latent, name="encoder")
        print(encoder.summary())

        latentInputs = Input(shape=(latentDim,))
        x = Dense(np.prod(volumeSize[1:]))(latentInputs)
        x = Reshape((volumeSize[1], volumeSize[2], volumeSize[3]))(x)

        for f in filters[::-1]:
            x = Conv2DTranspose(f, (3, 3), strides=2, padding="same")(x)
            x = LeakyReLU(alpha=0.2)(x)
            x = BatchNormalization(axis=chanDim)(x)

        x = Conv2DTranspose(depth, (3, 3), padding="same")(x)
        outputs = Activation("sigmoid")(x)

        decoder = Model(latentInputs, outputs, name="decoder")

        autoencoder = Model(inputs, decoder(encoder(inputs)), name="autoencoder")

        return encoder, decoder, autoencoder


# ap = argparse.ArgumentParser()
# ap.add_argument("-s", "--samples", type=int, default=8,
#                 help="# number of samples to visualize when decoding")
# ap.add_argument("-o", "--output", type=str, default="output.png",
#                 help="path to output visualization file")
# ap.add_argument("-p", "--plot", type=str, default="plot.png",
#                 help="path to output plot file")
# args = vars(ap.parse_args())

args = {}
args["samples"] = 8
args["output"] = "output.png"
args["plot"] = "plot.png"

Epoch = 25
BatchSize = 32

print("loading MNIST dataset...")
((trainX, _), (testX, _)) = mnist.load_data()

trainX = np.expand_dims(trainX, axis=-1)
testX = np.expand_dims(testX, axis=-1)
trainX = trainX.astype("float32") / 255.0
testX = testX.astype("float32") / 255.0

print("building autoencoder...")
(encoder, decoder, autoencoder) = ConvAutoencoder.build(28, 28, 1)
opt = Adam(lr=1e-3)
autoencoder.compile(loss="mse", optimizer=opt)

H = autoencoder.fit(trainX, trainX, validation_data=(testX, testX), epochs=Epoch, batch_size=BatchSize)

N = np.arange(0, Epoch)
plt.style.use("ggplot")
plt.figure()
# loss: 训练集上的loss， val_loss: 测试集上的loss
# loss一直下降、收敛，val_loss却上升、不收敛，说明过拟合了
#
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
# plt.show()
plt.savefig(args["plot"])

print("making predictions...")
decoded = autoencoder.predict(testX)
outputs = None

for i in range(0, args["samples"]):
    original = (testX[i] * 255).astype("uint8")
    recon = (decoded[i] * 255).astype("uint8")

    output = np.hstack([original, recon])
    if outputs is None:
        outputs = output

    else:
        outputs = np.vstack([outputs, output])

cv2.imwrite(args["output"], outputs)

2.1.0
loading MNIST dataset...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
building autoencoder...
Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 14, 14, 32)        320       
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 14, 14, 32)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 14, 14, 32)        128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 7, 7, 64)          18496     
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 7, 7, 64