In [1]:
import  tensorflow as tf
from    tensorflow import keras
from    keras import layers

In [9]:
class Generator(keras.Model):

    def __init__(self):
        super(Generator, self).__init__()

        # z: [b, 100] => [b, 3*3*512] => [b, 3, 3, 512] => [b, 64, 64, 3]
        self.fc = layers.Dense(3*3*512)
        # Conv2DTranspose是Conv的逆向，输出比输入维度大，通过padding和stride实现
        # output: kernal_size
        # 输出维度： (input_size - 1) * strides - 2 * padding + kernal_size
        self.conv1 = layers.Conv2DTranspose(256, 3, 3, 'valid') # 9 * 9
        self.bn1 = layers.BatchNormalization()

        self.conv2 = layers.Conv2DTranspose(128, 5, 2, 'valid') # 21
        self.bn2 = layers.BatchNormalization()

        self.conv3 = layers.Conv2DTranspose(3, 4, 3, 'valid') # 64

    def call(self, inputs, training=None):
        # [z, 100] => [z, 3*3*512]
        x = self.fc(inputs)
        x = tf.reshape(x, [-1, 3, 3, 512])
        x = tf.nn.leaky_relu(x)
        #
        x = tf.nn.leaky_relu(self.bn1(self.conv1(x), training=training))
        x = tf.nn.leaky_relu(self.bn2(self.conv2(x), training=training))
        x = self.conv3(x)
        x = tf.tanh(x)

        return x


class Discriminator(keras.Model):

    def __init__(self):
        super(Discriminator, self).__init__()

        # [b, 64, 64, 3] => [b, 1]
        # 输出维度(valid)： floor((input_size - kernal_size)/strides + 1)
        # 输出维度(same)： ceil(input_size/strides)
        self.conv1 = layers.Conv2D(64, 5, 3, 'valid')

        # 这里激活函数不适用relu,因为x<0时，梯度为0
        self.conv2 = layers.Conv2D(128, 5, 3, 'valid')
        self.bn2 = layers.BatchNormalization()

        self.conv3 = layers.Conv2D(256, 5, 3, 'valid')
        self.bn3 = layers.BatchNormalization()

        # [b, h, w ,c] => [b, -1]
        # 打平操作
        self.flatten = layers.Flatten()
        self.fc = layers.Dense(1)


    def call(self, inputs, training=None):
        # 这里激活函数不适用relu,因为x<0时，梯度为0，leaky_relu在小于0时，
        # 会趋近于0（越接近0， rele值也越接近0）
        x = tf.nn.leaky_relu(self.conv1(inputs))
        x = tf.nn.leaky_relu(self.bn2(self.conv2(x), training=training))
        x = tf.nn.leaky_relu(self.bn3(self.conv3(x), training=training))

        # [b, h, w, c] => [b, -1]
        x = self.flatten(x)
        # [b, -1] => [b, 1]
        logits = self.fc(x)

        return logits

In [10]:
d = Discriminator()
g = Generator()


x = tf.random.normal([2, 64, 64, 3])
z = tf.random.normal([2, 100])

prob = d(x)
print(prob)
x_hat = g(z)
print(x_hat.shape)

tf.Tensor(
[[0.15043981]
 [0.07876404]], shape=(2, 1), dtype=float32)
(2, 64, 64, 3)


In [11]:
import  os
import  numpy as np
import  tensorflow as tf
from    tensorflow import keras
from PIL import Image
import  glob
from    dataset import make_anime_dataset

In [12]:
def save_result(val_out, val_block_size, image_path, color_mode):
    def preprocess(img):
        img = ((img + 1.0) * 127.5).astype(np.uint8)
        # img = img.astype(np.uint8)
        return img

    preprocesed = preprocess(val_out)
    final_image = np.array([])
    single_row = np.array([])
    for b in range(val_out.shape[0]):
        # concat image into a row
        if single_row.size == 0:
            single_row = preprocesed[b, :, :, :]
        else:
            single_row = np.concatenate((single_row, preprocesed[b, :, :, :]), axis=1)

        # concat image row to final_image
        if (b+1) % val_block_size == 0:
            if final_image.size == 0:
                final_image = single_row
            else:
                final_image = np.concatenate((final_image, single_row), axis=0)

            # reset single row
            single_row = np.array([])

    if final_image.shape[2] == 1:
        final_image = np.squeeze(final_image, axis=2)
    Image.fromarray(final_image).save(image_path)

# 计算真图片的loss
def celoss_ones(logits):
    # [b, 1]
    # [b] = [1, 1, 1, 1,]
    # 使用sigmoid将输出转换为[0, 1]
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                   labels=tf.ones_like(logits))
    return tf.reduce_mean(loss)

# 计算假图片的loss
def celoss_zeros(logits):
    # [b, 1]
    # [b] = [1, 1, 1, 1,]
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                   labels=tf.zeros_like(logits))
    return tf.reduce_mean(loss)

def d_loss_fn(generator, discriminator, batch_z, batch_x, is_training):
    # 1. treat real image as real
    # 2. treat generated image as fake
    fake_image = generator(batch_z, is_training)
    d_fake_logits = discriminator(fake_image, is_training)
    d_real_logits = discriminator(batch_x, is_training)

    d_loss_real = celoss_ones(d_real_logits)
    d_loss_fake = celoss_zeros(d_fake_logits)

    loss = d_loss_fake + d_loss_real

    return loss


def g_loss_fn(generator, discriminator, batch_z, is_training):

    fake_image = generator(batch_z, is_training)
    d_fake_logits = discriminator(fake_image, is_training)
    loss = celoss_ones(d_fake_logits)

    return loss

In [10]:
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')


# hyper parameters
z_dim = 100
epochs = 3000000
batch_size = 512
learning_rate = 0.002
is_training = True

# 获取所有符合条件的路径
img_path = glob.glob(r'faces\*.jpg')

dataset, img_shape, _ = make_anime_dataset(img_path, batch_size)
print(dataset, img_shape)
sample = next(iter(dataset))
print(sample.shape, tf.reduce_max(sample).numpy(),
        tf.reduce_min(sample).numpy())
dataset = dataset.repeat()
db_iter = iter(dataset)


generator = Generator()
generator.build(input_shape = (None, z_dim))
discriminator = Discriminator()
discriminator.build(input_shape=(None, 64, 64, 3))

g_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5)
d_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5)


for epoch in range(epochs):

    batch_z = tf.random.uniform([batch_size, z_dim], minval=-1., maxval=1.)
    batch_x = next(db_iter)

    # train D
    with tf.GradientTape() as tape:
        d_loss = d_loss_fn(generator, discriminator, batch_z, batch_x, is_training)
    grads = tape.gradient(d_loss, discriminator.trainable_variables)
    d_optimizer.apply_gradients(zip(grads, discriminator.trainable_variables))


    with tf.GradientTape() as tape:
        g_loss = g_loss_fn(generator, discriminator, batch_z, is_training)
    grads = tape.gradient(g_loss, generator.trainable_variables)
    g_optimizer.apply_gradients(zip(grads, generator.trainable_variables))

    if epoch % 100 == 0:
        print(epoch, 'd-loss:',float(d_loss), 'g-loss:', float(g_loss))

        z = tf.random.uniform([100, z_dim])
        fake_image = generator(z, training=False)
        img_path = os.path.join('images', 'gan-%d.png'%epoch)
        save_result(fake_image.numpy(), 10, img_path, color_mode='P')

<PrefetchDataset element_spec=TensorSpec(shape=(512, 64, 64, 3), dtype=tf.float32, name=None)> (64, 64, 3)
(512, 64, 64, 3) 1.0 -1.0
0 d-loss: 1.5381639003753662 g-loss: 1.76137113571167
100 d-loss: 1.371740698814392 g-loss: 0.8220462799072266
200 d-loss: 1.3590083122253418 g-loss: 0.7357417345046997
300 d-loss: 1.3474998474121094 g-loss: 0.8031489253044128
400 d-loss: 1.3607661724090576 g-loss: 0.7511063814163208
500 d-loss: 1.3735337257385254 g-loss: 0.829103946685791
600 d-loss: 1.335923194885254 g-loss: 0.8518822193145752
700 d-loss: 1.329111099243164 g-loss: 0.7549296617507935
800 d-loss: 1.328869342803955 g-loss: 0.8306502103805542
900 d-loss: 1.32321298122406 g-loss: 0.8544598817825317
1000 d-loss: 1.3676694631576538 g-loss: 0.8505797982215881
1100 d-loss: 1.3882262706756592 g-loss: 0.9367552995681763
1200 d-loss: 1.3174521923065186 g-loss: 0.925320029258728


KeyboardInterrupt: 