In [1]:
%matplotlib inline

In [2]:
import matplotlib.pyplot as plt
import numpy as np

# Dataset

## Reference

[python读取MNIST image数据](http://blog.csdn.net/u010165147/article/details/50599490)

In [3]:
import gzip, struct, os

def _read(image, label):
    minist_dir = './data/mnist'
    with gzip.open(os.path.join(minist_dir, label)) as flbl:
        magic, num = struct.unpack(">II", flbl.read(8))
        label = np.fromstring(flbl.read(), dtype=np.int8)
    with gzip.open(os.path.join(minist_dir, image), 'rb') as fimg:
        magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
        image = np.fromstring(fimg.read(), dtype=np.uint8).reshape(len(label), rows, cols)
    return image,label

def read_data():
    train_img, train_label = _read(
            'train-images-idx3-ubyte.gz', 
            'train-labels-idx1-ubyte.gz')
    test_img, test_label = _read(
            't10k-images-idx3-ubyte.gz', 
            't10k-labels-idx1-ubyte.gz')
    return train_img, train_label, test_img, test_label

In [4]:
def read_data_2():
    trX,trY,teX,teY = read_data()
    
    # 由于生成网络是无监督任务，不需要测试集，
    # 所以把训练和测试两部分数据合并
    X = np.concatenate((trX, teX), axis=0)
    y = np.concatenate((trY, teY), axis=0)
    
    # 打乱排序
    seed = 547
    np.random.seed(seed)
    np.random.shuffle(X)
    np.random.seed(seed)
    np.random.shuffle(y)
    # 把标签格式变成one-hot
    y_vec = np.zeros((len(y), 10), dtype=np.float)
    for i, label in enumerate(y):
        y_vec[i, y[i]] = 1.0
        
    return X.astype(np.float)/255, y_vec

# Operations

In [5]:
import tensorflow as tf
from tensorflow.contrib.layers.python.layers import batch_norm as batch_norm

In [6]:
# 常数偏置
def bias(name, shape, bias_start = 0.0, trainable = True):
    dtype = tf.float32
    var = tf.get_variable(name, shape, tf.float32, trainable = trainable, 
                          initializer = tf.constant_initializer(
                              bias_start, dtype = dtype))
    return var

In [7]:
# 随机权重
def weight(name, shape, stddev = 0.02, trainable = True):
    dtype = tf.float32
    var = tf.get_variable(name, shape, tf.float32, trainable = trainable, 
                          initializer = tf.random_normal_initializer(
                              stddev = stddev, dtype = dtype))
    return var

In [8]:
# 全连接层
def fully_connected(value, output_shape, name = 'fully_connected', with_w = False):
    shape = value.get_shape().as_list()
    with tf.variable_scope(name):
        weights = weight('weights', [shape[1], output_shape], 0.02)
        biases = bias('biases', [output_shape], 0.0)
    if with_w:
        return tf.matmul(value, weights) + biases, weights, biases
    else:
        return tf.matmul(value, weights) + biases

In [9]:
# Leaky-ReLU层
def lrelu(x, leak = 0.2, name = 'lrelu'):
    with tf.variable_scope(name):
        return tf.maximum(x, leak * x, name = name)

In [10]:
# ReLU层
def relu(value, name = 'relu'):
    with tf.variable_scope(name):
        return tf.nn.relu(value)

In [11]:
# 反卷积层
def deconv2d(value, output_shape, k_h = 5, k_w = 5, strides = [1, 2, 2, 1], name = 'deconv2d', with_w = False):
    with tf.variable_scope(name):
        weights = weight('weights', 
                         [k_h, k_w, output_shape[-1], value.get_shape()[-1]])
        deconv = tf.nn.conv2d_transpose(value, weights, output_shape, strides = strides)
        biases = bias('biases', [output_shape[-1]])
        deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
        if with_w:
            return deconv, weights, biases
        else:
            return deconv

In [12]:
# 卷积层
def conv2d(value, output_dim, k_h = 5, k_w = 5, strides = [1, 2, 2, 1], name = 'conv2d'):
    with tf.variable_scope(name):
        weights = weight('weights', 
                         [k_h, k_w, value.get_shape()[-1], output_dim])
        conv = tf.nn.conv2d(value, weights, strides = strides, padding = 'SAME')
        biases = bias('biases', [output_dim])
        conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
        return conv

In [13]:
# Batch Normalization层
def batch_norm_layer(value, is_train = True, name = 'batch_norm'):
    with tf.variable_scope(name) as scope:
        if is_train:
            return batch_norm(value, decay = 0.9, epsilon = 1e-5, scale = True,
                              is_training = is_train, 
                              updates_collections = None, scope = scope)
        else:
            return batch_norm(value, decay = 0.9, epsilon = 1e-5, scale = True, 
                              is_training = is_train, reuse = True, 
                              updates_collections = None, scope = scope)

# Model

In [14]:
import scipy.misc

# 保存图片
def save_images(images, size, path):
    
    """
    Save the samples images
    The best size number is
            int(max(sqrt(image.shape[0]),sqrt(image.shape[1]))) + 1
    example:
        The batch_size is 64, then the size is recommended [8, 8]
        The batch_size is 32, then the size is recommended [6, 6]
    """
    
    # 图片归一化，主要用于生成器输出是tanh形式的归一化
    img = (images + 1.0) / 2.0
    h, w = img.shape[1], img.shape[2]
    
    # 生成一个大画布，用来保存生成的batch_size个图像
    merge_img = np.zeros((h * size[0], w * size[1], 3))
    
    # 循环把画布各个位置的值赋为batch里各幅图像的值
    for idx, image in enumerate(images):
        i = idx % size[1]
        j = idx // size[1]
        merge_img[j*h:j*h+h, i*w:i*w+w, :] = image
        
    # 保存画布
    return scipy.misc.imsave(path, merge_img)

In [15]:
BATCH_SIZE = 64

In [16]:
# 定义生成器
def generator(z, train = True):
    # 经过一个全连接，BN 和激活层 ReLu
    h1 = tf.nn.relu(batch_norm_layer(fully_connected(z, 1024, 'g_fully_connected1'), 
                                     is_train = train, name = 'g_bn1'))
    
    h2 = tf.nn.relu(batch_norm_layer(fully_connected(h1, 128 * 49, 'g_fully_connected2'), 
                                     is_train = train, name = 'g_bn2'))
    h2 = tf.reshape(h2, [64, 7, 7, 128], name = 'h2_reshape')

    h3 = tf.nn.relu(batch_norm_layer(deconv2d(h2, [64,14,14,128], 
                                              name = 'g_deconv2d3'), 
                                              is_train = train, name = 'g_bn3'))
    
    # 经过一个 sigmoid 函数把值归一化为 0~1 之间，
    h4 = tf.nn.sigmoid(deconv2d(h3, [64, 28, 28, 1], 
                                name = 'g_deconv2d4'), name = 'generate_image')
    
    return h4

In [17]:
# 定义判别器
def discriminator(image):
    # 卷积，激活，串联条件
    h1 = lrelu(
            conv2d(image, 11, name = 'd_conv2d1'),
            name = 'd_lrelu1')
    
    h2 = lrelu(
            batch_norm_layer(
                conv2d(h1, 74, name = 'd_conv2d2'),
                name = 'd_bn2'),
            name = 'd_lrelu2')
    h2 = tf.reshape(h2, [BATCH_SIZE, -1], name = 'reshape_lrelu2_to_2d')
    
    h3 = lrelu(
            batch_norm_layer(
                fully_connected(h2, 1024, name = 'd_fully_connected3'),
                name = 'd_bn3'),
            name = 'd_lrelu3')
    
    # 全连接层
    h4 = fully_connected(h3, 1, name = 'd_result_without_sigmoid')
    
    return tf.nn.sigmoid(h4, name = 'd_result_with_sigmoid'), h4

In [18]:
# 定义训练过程中的采样函数
def sampler(z, train = True):
    return generator(z, train = train)

# Training

In [19]:
def train():
    
    # 设置global_step，用来记录训练过程中的step
    global_step = tf.Variable(
        0, name = 'global_step', trainable = False
    )
    # 训练过程中的日志保存文件
    train_dir = './logs'
    
    # 放置三个placeholder，y表示约束条件，images表示送入判别器的图片，
    # z表示随机噪声
    #y = tf.placeholder(tf.float32, [BATCH_SIZE, 10], name = 'y')
    images = tf.placeholder(
        tf.float32, [BATCH_SIZE, 28, 28, 1], 
        name = 'real_images'
    )
    z = tf.placeholder(
        tf.float32, [None, 100], name = 'z'
    )
    
    with tf.variable_scope(tf.get_variable_scope()) as scope:
        # 由生成器生成图像
        G = generator(z)
        # 真实图像送入判别器
        D, D_logits = discriminator(images)
        # 重用变量
        scope.reuse_variables()
        # 生成图像送入判别器
        D_, D_logits_ = discriminator(G)
        # 采样器采集图像
        samples = sampler(z)
    
    # 损失计算
    d_loss_real = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            logits = D_logits, labels = tf.ones_like(D)
        )
    )
    d_loss_fake = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            logits = D_logits_, labels = tf.zeros_like(D_)
        )
    )
    d_loss = d_loss_real + d_loss_fake
    g_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            logits = D_logits_, labels = tf.ones_like(D_)
        )
    )
    
    # 总结操作
    z_sum = tf.summary.histogram('z', z)
    d_sum = tf.summary.histogram('d', D)
    d__sum = tf.summary.histogram('d_', D_)
    g_sum = tf.summary.image('G', G)
    
    d_loss_real_sum = tf.summary.scalar(
        'd_loss_real', d_loss_real
    )
    d_loss_fake_sum = tf.summary.scalar(
        'd_loss_fake', d_loss_fake
    )
    d_loss_sum = tf.summary.scalar('d_loss', d_loss)
    g_loss_sum = tf.summary.scalar('g_loss', g_loss)
    
    # 合并各自的总结
    G_sum = tf.summary.merge(
        [z_sum, d__sum, g_sum, d_loss_fake_sum, g_loss_sum]
    )
    D_sum = tf.summary.merge(
        [z_sum, d_sum, d_loss_real_sum, d_loss_sum]
    )
    
    # 生成器和判别器需要更新的变量
    t_vars = tf.trainable_variables()
    d_vars = [var for var in t_vars if 'd_' in var.name]
    g_vars = [var for var in t_vars if 'g_' in var.name]
    
    saver = tf.train.Saver()
    
    # 优化算法
    # 查看https://github.com/tensorflow/tensorflow/issues/6220
    d_optim = tf.train.AdamOptimizer(
        0.0002, beta1 = 0.5
    ).minimize(
        d_loss,
        var_list = d_vars,
        global_step = global_step
    )
    g_optim = tf.train.AdamOptimizer(
        0.0002, beta1 = 0.5
    ).minimize(
        g_loss, 
        var_list = g_vars, 
        global_step = global_step
    )
    
    # session
    sess = tf.Session()
    init = tf.global_variables_initializer()
    writer = tf.summary.FileWriter(train_dir, sess.graph)
    
    # 
    data_x, _ = read_data_2()
    sample_z = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100))
    sess.run(init)
    
    # 循环整个数据集25次
    index = 1
    for epoch in range(25):
        batch_idxs = 1093
        for idx in range(batch_idxs):
            batch_images = data_x[idx * 64 : (idx + 1) * 64] \
                            .reshape([64, 28, 28, 1])
            batch_z = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100))
            
            # 更新D
            _, summary_str = sess.run(
                [d_optim, D_sum],
                feed_dict = {
                    images: batch_images, 
                    z: batch_z
                }
            )
            writer.add_summary(summary_str, index)
            
            # 更新G两次
            _, summary_str = sess.run(
                [g_optim, G_sum],
                feed_dict = {
                    z: batch_z
                }
            )
            writer.add_summary(summary_str, index)
            
            _, summary_str = sess.run(
                [g_optim, G_sum],
                feed_dict = {
                    z: batch_z
                }
            )
            writer.add_summary(summary_str, index)
            
            index = index + 1
            
            # 计算训练过程中的损失，打印出来
            errD_fake, errD_real, errG = sess.run(
                [d_loss_fake, d_loss_real, g_loss],
                feed_dict = {
                    images: batch_images,
                    z: batch_z
                }
            )
            
            if idx % 20 == 0:
                print('Epoch: [%2d] [%4d/%4d] d_loss: %.8f, g_loss: %.8f' \
                      % (epoch, idx, batch_idxs, errD_fake + errD_real, errG))
            
            # 每更新100个batch就采样一次并保存到
            # /home/lyy/文档/Jupyter Notebook/DCGAN/samples
            if idx % 100 == 1:
                sample = sess.run(samples, feed_dict = {z: sample_z})
                samples_path = './samples'
                save_images(
                    sample, [8, 8], 
                    os.path.join(
                        samples_path, 
                        'test_%d_epoch_%d.png' % (epoch, idx)
                    )
                )
                print('save down')
                
            # 每更新500个batch就保存一次模型
            if idx % 500 == 2:
                checkpoint_path = os.path.join(train_dir, 'DCGAN_model.ckpt')
                saver.save(sess, checkpoint_path, global_step = idx + 1)
    
    sess.close()

In [20]:
train()

Epoch: [ 0] [   0/1093] d_loss: 1.57801890, g_loss: 0.54761648
save down
Epoch: [ 0] [  20/1093] d_loss: 1.50487578, g_loss: 0.53531468
Epoch: [ 0] [  40/1093] d_loss: 1.48597860, g_loss: 0.54527986
Epoch: [ 0] [  60/1093] d_loss: 1.41146767, g_loss: 0.57105857
Epoch: [ 0] [  80/1093] d_loss: 1.34436572, g_loss: 0.59605885
Epoch: [ 0] [ 100/1093] d_loss: 1.26468372, g_loss: 0.62694478
save down
Epoch: [ 0] [ 120/1093] d_loss: 1.32828057, g_loss: 0.63549292
Epoch: [ 0] [ 140/1093] d_loss: 1.26377702, g_loss: 0.65069610
Epoch: [ 0] [ 160/1093] d_loss: 1.34403205, g_loss: 0.58028102
Epoch: [ 0] [ 180/1093] d_loss: 1.25369883, g_loss: 0.64316398
Epoch: [ 0] [ 200/1093] d_loss: 1.29723299, g_loss: 0.65255749
save down
Epoch: [ 0] [ 220/1093] d_loss: 1.29197383, g_loss: 0.64055061
Epoch: [ 0] [ 240/1093] d_loss: 1.26878023, g_loss: 0.65366888
Epoch: [ 0] [ 260/1093] d_loss: 1.31012058, g_loss: 0.64424610
Epoch: [ 0] [ 280/1093] d_loss: 1.31362200, g_loss: 0.59152973
Epoch: [ 0] [ 300/1093] d

# Evaluation

In [20]:
def eval():
    
    test_dir = './eval'
    checkpoint_dir = './logs'
    
    z = tf.placeholder(tf.float32, [None, 100], name='z')
    
    G = generator(z, train=False)
    sample_z = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100))
    
    saver = tf.train.Saver()
    
    with tf.Session() as sess:
        print('Reading Checkpoints...')
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    
        if ckpt:
            saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir))
        
        samples = sess.run(G, feed_dict = {z: sample_z})
    
        save_images(samples, [8, 8], os.path.join(test_dir, 'test.png'))
        print('Sample image saved.')

In [21]:
eval()

Reading Checkpoints...
Sample image saved.
