In [1]:
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
import tensorflow_probability as tfp

from tensorflow.keras.constraints import max_norm
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras import activations
import numpy as np
import matplotlib.pyplot as plt

In [2]:
(ds_train, ds_test) = tfds.load('mnist', split=['train', 'test'], shuffle_files=True,as_supervised=False)
X_data = tf.convert_to_tensor([i['image'] for i in tfds.as_numpy(ds_train)], dtype=tf.float32) / 255
X_data_test = tf.convert_to_tensor([i['image'] for i in tfds.as_numpy(ds_test)], dtype=tf.float32) / 255


2022-08-01 13:19:15.241907: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-01 13:19:16.964737: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 46720 MB memory:  -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:04:00.0, compute capability: 8.6
2022-08-01 13:19:16.969533: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 46720 MB memory:  -> device: 1, name: NVIDIA RTX A6000, pci bus id: 0000:0d:00.0, compute capability: 8.6


In [3]:
def get_true_sample(size):
    idxs = np.random.choice(X_data.shape[0], size)
    return tf.gather(params=X_data, indices=idxs)

In [4]:
def cov2d_block(x, filters=128, act=activations.relu, kern=(5, 5), strd=(1, 1), pad='same', BN=False, DO=0.):
    initializer = keras.initializers.RandomNormal(stddev=0.01)
    x = keras.layers.Conv2D(filters, kern, strd, pad, activation=act, use_bias=True, kernel_initializer=initializer)(x)
    if BN:
        x = keras.layers.BatchNormalization()(x)
    x = act(x)
    if DO > 0.:
        x = keras.layers.Dropout(D)(x)
    return x

def mp_conv2d_block(x, filters=128, act=activations.relu, kern=(5, 5), strd=(1, 1), pad='same', DO=1.):
    
    initializer = keras.initializers.RandomNormal(stddev=0.01)
    x = keras.layers.Conv2D(filters, kern, strd, pad, activation=act, use_bias=True, kernel_initializer=initializer)(x)
    x = act(x)
    x = keras.layers.Dropout(DO)(x)
    x = keras.layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding="same")(x)
    
    return x

def deconv2d_block(x, filters=64, act=activations.relu, kern=(5, 5), strd=(1, 1), pad='same', BN=False, DO=0.):
    initializer = RandomNormal(stddev=0.01)
    
    x = keras.layers.Conv2DTranspose(filters, kern, strd, pad, activation=act, use_bias=True, 
                                         kernel_initializer=initializer)(x)
    if BN:
        x = keras.layers.BatchNormalization()(x)
    x = act(x)
    if DO > 0.:
        x = keras.layers.Dropout(DO)(x)
    return x

def dense_block(x, input_dim=256, return_shape=256):
    initializer = RandomNormal(stddev=0.01)
    
    x = keras.layers.Dense(return_shape, activation='relu',kernel_initializer=initializer, use_bias=True)(x)
    return x


In [5]:
class Discriminator:
    def __init__(self, image_shape=(28, 28, 1)):
        self.image_shape = image_shape
        self.model_layer = []
        self.model = None
        
    def initialize_model(self):
        input_dims = keras.layers.Input(shape=self.image_shape)
        xk = mp_conv2d_block(input_dims, filters=64, act=activations.relu, kern=(5, 5), strd=(2, 2), pad='same', DO=1.)
        xk = mp_conv2d_block(xk, filters=128, act=activations.relu, kern=(5, 5), strd=(2, 2), pad='same', DO=1.)
        xk = mp_conv2d_block(xk, filters=128, act=activations.relu, kern=(5, 5), strd=(1, 1), pad='same', DO=1.)
        xk = keras.layers.Flatten()(xk)
        xk = keras.layers.Dense(1)(xk)
        
        self.model = keras.Model(input_dims, xk)
        
    def forward_model(self, input_tensor):
        return self.model(input_tensor)
    
    def extent_model(self):
        raise NotImplementedError
    
class Generator:
    def __init__(self, latent_z=128, out_latent=128, image_shape=(28, 28, 1)):
        self.out_latent = out_latent
        self.latent_z = latent_z
        self.image_shape = image_shape
        self.model_layer = []
        self.model = None 

    def initialize_model(self):
        input_dims = keras.layers.Input(shape=self.latent_z)
        xk = dense_block(input_dims, input_dim=self.latent_z, return_shape=self.out_latent)
        xk = keras.layers.Reshape((1, 1, self.out_latent))(xk)
        xk = deconv2d_block(xk, filters=64, act=activations.relu, kern=(4, 4), strd=(1, 1), 
                                pad='same', BN=False, DO=0.)
        xk = deconv2d_block(xk, filters=128, act=activations.relu, kern=(4, 4), strd=(1, 1), 
                                pad='same', BN=False, DO=0.)
        xk = deconv2d_block(xk, filters=128, act=activations.relu, kern=(4, 4), strd=(1, 1), 
                                pad='same', BN=False, DO=0.)
        xk = keras.layers.Flatten()(xk)
        xk = keras.layers.Dense(tf.reduce_prod(self.image_shape))(xk)
        xk = keras.layers.Reshape(self.image_shape)(xk)
        self.model = keras.Model(input_dims, xk)
        
    def forward_model(self, input_tensor):
        return self.model(input_tensor)
    
    def extent_model(self):
        raise NotImplementedError 

In [6]:
def grad_loss_0(D, alpha, p_r, p_g):
    interpolate = p_r + alpha * (p_g - p_r)
    
    with tf.name_scope("Gradient_Penalty"):
        with tf.GradientTape() as g_tape:
            g_tape.watch(interpolate)
            inter_pred_disc = D.forward_model(interpolate)
        g_grad = g_tape.gradient(inter_pred_disc, [interpolate])
        norm_g = tf.sqrt(tf.reduce_sum(tf.square(g_grad)))
        g_grad = tf.reduce_mean(tf.math.squared_difference(norm_g, 1))
    
    return g_grad
        
def generator_loss_0(z, G, D, train_sets):
    fake_img = G.forward_model(z)
    real_score = D.forward_model(fake_img)
    fake_score = D.forward_model(train_sets)
    gen_loss = - tf.nn.softplus(real_score) 
    
    return gen_loss

def discrim_loss_0(z, G, D, train_sets, g_penalty=0.):
    fake_img = G.forward_model(z)
    real_score = D.forward_model(fake_img)
    fake_score = D.forward_model(train_sets)
    disc_loss = tf.nn.softplus(fake_score) + tf.nn.softplus(- real_score) 
    
    if g_penalty > 0.:
        grad_loss_dc = grad_loss_0(D, alpha=0.5, p_r=train_sets, p_g=fake_img)
    disc_loss += grad_loss_dc
    return disc_loss, grad_loss_dc

In [7]:
batch_size = 1000
latent_dim = 128
pm_prior0 = tf.zeros((batch_size, latent_dim))
pm_prior1 = tf.ones((batch_size, latent_dim)) * 0.01
grad_penalty = 1

G_opt = tf.keras.optimizers.RMSprop(0.0001)
D_opt = tf.keras.optimizers.RMSprop(0.0001)

In [8]:
@tf.function
def loop_train(D, G, epoch=50):

    for e in range(epoch):
        prior = tfp.distributions.Normal(pm_prior0, pm_prior1)
        z_k = prior.sample()
        true_img_k = get_true_sample(size=batch_size)
        
        with tf.GradientTape() as D_tape:
            disc_L, disc_g_loss = discrim_loss_0(z=z_k, G=G, D=D, train_sets=true_img_k, g_penalty=grad_penalty)
        disc_G = D_tape.gradient(disc_L, D.model.trainable_variables)
        D_opt.apply_gradients(zip(disc_G, D.model.trainable_variables))
        
        with tf.GradientTape() as G_tape:
            gen_L = generator_loss_0(z=z_k, G=G, D=D, train_sets=true_img_k)
        gen_G = G_tape.gradient(gen_L, G.model.trainable_variables)
        G_opt.apply_gradients(zip(gen_G, G.model.trainable_variables))
        
        tf.summary.scalar('generator_loss', tf.reduce_mean(gen_L), e)
        tf.summary.scalar('discriminator_loss', tf.reduce_mean(disc_L), e)
        tf.summary.image('generated_image', G.forward_model(z_k[:3]), e)
        tf.print(e, 'generator_loss : ', tf.reduce_mean(gen_L), 'discriminator_loss : ', tf.reduce_mean(disc_L))

In [9]:
tf.debugging.set_log_device_placement(True)

gpus = tf.config.list_physical_devices('GPU')
cpus = tf.config.list_physical_devices('CPU')
print("gpu :" , gpus, "cpu :", cpus)


gpu : [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')] cpu : [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [10]:
with tf.device('GPU:0'):
    T_disc = Discriminator()
    T_disc.initialize_model()

    T_gen = Generator()
    T_gen.initialize_model()

In [16]:
import time

start_time = time.perf_counter()
summary = tf.summary.create_file_writer(logdir='test_mnist_gans')
with summary.as_default():
    with tf.device('GPU:1'):
        results = loop_train(D=T_disc, G=T_gen, epoch=100)
print('time_execution', time.perf_counter() - start_time, "seconds")

0 generator_loss :  -0.691327572 discriminator_loss :  1.38715494
1 generator_loss :  -0.691343307 discriminator_loss :  1.38655794
2 generator_loss :  -0.691363335 discriminator_loss :  1.38693333
3 generator_loss :  -0.691264927 discriminator_loss :  1.38684154
4 generator_loss :  -0.691398323 discriminator_loss :  1.38587344
5 generator_loss :  -0.691491187 discriminator_loss :  1.38444948
6 generator_loss :  -0.691111505 discriminator_loss :  1.38320243
7 generator_loss :  -0.691399574 discriminator_loss :  1.38226211
8 generator_loss :  -0.691490471 discriminator_loss :  1.38006473
9 generator_loss :  -0.69155556 discriminator_loss :  1.3785249
10 generator_loss :  -0.691622257 discriminator_loss :  1.37707567
11 generator_loss :  -0.691723526 discriminator_loss :  1.37541771
12 generator_loss :  -0.69176209 discriminator_loss :  1.37329447
13 generator_loss :  -0.691911697 discriminator_loss :  1.3710947
14 generator_loss :  -0.691735744 discriminator_loss :  1.3693018
15 generat