# GAN for Molecular Generation Task

## GAN Model

In [1]:
import numpy as np
import tensorflow as tf
import warnings

In [2]:
LENGTH = 128
DFF = 32

# shape: (batch_size, length, dff)
cb2_embeddings = np.load("./data/processed_cb2.npy")
cb2_embeddings = tf.expand_dims(cb2_embeddings, axis=-1)
cb2_embeddings = tf.cast(cb2_embeddings, dtype=tf.float32)

cb2_embeddings.shape, tf.math.reduce_min(cb2_embeddings), tf.math.reduce_max(cb2_embeddings), tf.math.reduce_mean(cb2_embeddings)

(TensorShape([2723, 128, 32, 1]),
 <tf.Tensor: shape=(), dtype=float32, numpy=-3.5364208>,
 <tf.Tensor: shape=(), dtype=float32, numpy=3.512889>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0026531518>)

##### Optional normalization

In [3]:
OLD_RANGE = tf.math.reduce_max(cb2_embeddings) - tf.math.reduce_min(cb2_embeddings)
OLD_MIN = tf.math.reduce_min(cb2_embeddings)
NEW_RANGE = 1.0 - 0.0

# cb2_embeddings = tf.divide(
#    tf.subtract(
#       cb2_embeddings, 
#       tf.reduce_min(cb2_embeddings)
#    ), 
#    tf.subtract(
#       tf.reduce_max(cb2_embeddings), 
#       tf.reduce_min(cb2_embeddings)
#    )
# )

cb2_embeddings.shape, tf.math.reduce_min(cb2_embeddings), tf.math.reduce_max(cb2_embeddings), tf.math.reduce_mean(cb2_embeddings)

(TensorShape([2723, 128, 32, 1]),
 <tf.Tensor: shape=(), dtype=float32, numpy=-3.5364208>,
 <tf.Tensor: shape=(), dtype=float32, numpy=3.512889>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0026531518>)

In [4]:
cb2_embeddings = tf.divide(
    tf.multiply( tf.subtract(cb2_embeddings, OLD_MIN), NEW_RANGE ), OLD_RANGE
)

cb2_embeddings.shape, tf.math.reduce_min(cb2_embeddings), tf.math.reduce_max(cb2_embeddings), tf.math.reduce_mean(cb2_embeddings)

(TensorShape([2723, 128, 32, 1]),
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.50204545>)

In [5]:
# cb2_embeddings = tf.add(
    
#     tf.divide(
#         tf.multiply( tf.subtract(cb2_embeddings, 0), OLD_RANGE ), NEW_RANGE
#     ),

#     OLD_MIN
    
# )

# cb2_embeddings.shape, tf.math.reduce_min(cb2_embeddings), tf.math.reduce_max(cb2_embeddings), tf.math.reduce_mean(cb2_embeddings)

In [6]:
# cb2_embeddings = tf.multiply(cb2_embeddings, 3.55868)

# cb2_embeddings.shape, tf.math.reduce_min(cb2_embeddings), tf.math.reduce_max(cb2_embeddings), tf.math.reduce_mean(cb2_embeddings)

##### Model

In [7]:
from tensorflow.keras import layers

def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(8 * 8 * 256, use_bias=False, input_shape=(100,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Reshape((8, 8, 256)))
    assert model.output_shape == (None, 8, 8, 256)  # Note: None is the batch size

    model.add(layers.Conv2DTranspose(128, (5, 5), strides=(2, 1), padding='same', use_bias=False))
    assert model.output_shape == (None, 16, 8, 128)

    model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
    assert model.output_shape == (None, 32, 16, 64)

    model.add(layers.Conv2DTranspose(1, (5, 5), strides=(4, 2), padding='same', use_bias=False, activation='tanh'))
    assert model.output_shape == (None, 128, 32, 1)

    return model

generator = make_generator_model()
generator.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 16384)             1638400   
                                                                 
 batch_normalization (Batch  (None, 16384)             65536     
 Normalization)                                                  
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 16384)             0         
                                                                 
 reshape (Reshape)           (None, 8, 8, 256)         0         
                                                                 
 conv2d_transpose (Conv2DTr  (None, 16, 8, 128)        819200    
 anspose)                                                        
                                                                 
 conv2d_transpose_1 (Conv2D  (None, 32, 16, 64)        2

In [8]:
def make_discriminator_model():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same',
                                     input_shape=[128, 32, 1]))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(256, (5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Flatten())
    model.add(layers.Dense(1, activation='sigmoid'))

    return model

discriminator = make_discriminator_model()
discriminator.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 64, 16, 64)        1664      
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 64, 16, 64)        0         
                                                                 
 dropout (Dropout)           (None, 64, 16, 64)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 8, 128)        204928    
                                                                 
 leaky_re_lu_2 (LeakyReLU)   (None, 32, 8, 128)        0         
                                                                 
 dropout_1 (Dropout)         (None, 32, 8, 128)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 4, 256)       

In [9]:
cross_entropy = tf.keras.losses.BinaryCrossentropy()
def discriminator_loss(real_output, fake_output):
    # fake_loss = -tf.reduce_mean(fake_output)
    # real_loss = -tf.reduce_mean(fake_output)
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

def generator_loss(fake_output):
    # return -tf.reduce_mean(fake_output)
    return cross_entropy(tf.ones_like(fake_output), fake_output)

generator_optimizer = tf.keras.optimizers.legacy.Adam(.025)
discriminator_optimizer = tf.keras.optimizers.legacy.Adam(0.0000025)

In [10]:
EPOCHS = 150
noise_dim = 100
num_examples_to_generate = 16

seed = tf.random.normal([num_examples_to_generate, noise_dim])

In [11]:
def reward_valid_smiles(smiles):
    return tf.numpy_function(is_valid_smiles, [smiles], tf.int64)

def penalize_invalid_smiles(smiles):
    return tf.numpy_function(is_not_valid_smiles, [smiles], tf.int64)

In [12]:
@tf.function
def train_step(images):
    BATCH_SIZE = 8
    noise = tf.random.normal([BATCH_SIZE, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_images = generator(noise, training=True)

        real_output = discriminator(images, training=True)
        fake_output = discriminator(generated_images, training=True)

        # Wasserstein Loss for Generator
        gen_loss = -tf.reduce_mean(fake_output)

        # Wasserstein Loss for Discriminator
        disc_loss = tf.reduce_mean(fake_output) - tf.reduce_mean(real_output)

        # Gradient Penalty (Optional but can improve WGAN training)
        epsilon = tf.random.uniform(shape=[BATCH_SIZE, 1, 1, 1], minval=0.0, maxval=1.0)
        epsilon = tf.broadcast_to(epsilon, images.shape)  # Match the shape of epsilon to images
        interpolated_images = epsilon * images + (1 - epsilon) * generated_images
        with tf.GradientTape() as gp_tape:
            gp_tape.watch(interpolated_images)
            pred_interpolated = discriminator(interpolated_images, training=True)
        gradients = gp_tape.gradient(pred_interpolated, interpolated_images)
        gradients_l2 = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3]))
        gradient_penalty = tf.reduce_mean(tf.square(gradients_l2 - 1.0))
        disc_loss += 50 * gradient_penalty  # lambda_gp is the gradient penalty weight

        # Filter and Reward Valid SMILES
        valid_indices = tf.map_fn(reward_valid_smiles, generated_images, dtype=tf.int64)
        invalid_indices = tf.map_fn(penalize_invalid_smiles, generated_images, dtype=tf.int64)

        # Calculate the average reward for valid SMILES
        average_reward = tf.reduce_mean(valid_indices)
        average_penalty = tf.reduce_mean(invalid_indices)

        # Add the reward to the generator loss
        gen_loss += tf.cast(average_reward, dtype=tf.float32) + tf.cast(average_penalty, dtype=tf.float32)

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))


In [13]:
import os

checkpoint_dir = './gan_fresh_4_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                 discriminator_optimizer=discriminator_optimizer,
                                 generator=generator,
                                 discriminator=discriminator)

In [14]:
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x2a54f2450>

In [15]:
cb2_embeddings.shape, len(cb2_embeddings)

(TensorShape([2723, 128, 32, 1]), 2723)

## Transformer

In [16]:
def get_angles(pos, i, d_model):
  angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
  return pos * angle_rates

def positional_encoding(position, d_model):
  angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                          np.arange(d_model)[np.newaxis, :],
                          d_model)
  
  # apply sin to even indices in the array; 2i
  angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
  
  # apply cos to odd indices in the array; 2i+1
  angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    
  pos_encoding = angle_rads[np.newaxis, ...]
    
  return tf.cast(pos_encoding, dtype=tf.float32)

def create_padding_mask(seq):
  seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
  
  # add extra dimensions to add the padding
  # to the attention logits.
  return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)

def create_look_ahead_mask(size):
  mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
  return mask  # (seq_len, seq_len)

def create_masks(inp, tar):
  # Encoder padding mask
  enc_padding_mask = create_padding_mask(inp)
  
  # Used in the 2nd attention block in the decoder.
  # This padding mask is used to mask the encoder outputs.
  dec_padding_mask = create_padding_mask(inp)
  
  # Used in the 1st attention block in the decoder.
  # It is used to pad and mask future tokens in the input received by 
  # the decoder.
  look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])
  dec_target_padding_mask = create_padding_mask(tar)
  combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)
  
  return enc_padding_mask, combined_mask, dec_padding_mask

def scaled_dot_product_attention(q, k, v, mask):
  """Calculate the attention weights.
  q, k, v must have matching leading dimensions.
  k, v must have matching penultimate dimension, i.e.: seq_len_k = seq_len_v.
  The mask has different shapes depending on its type(padding or look ahead) 
  but it must be broadcastable for addition.
  
  Args:
    q: query shape == (..., seq_len_q, depth)
    k: key shape == (..., seq_len_k, depth)
    v: value shape == (..., seq_len_v, depth_v)
    mask: Float tensor with shape broadcastable 
          to (..., seq_len_q, seq_len_k). Defaults to None.
    
  Returns:
    output, attention_weights
  """

  matmul_qk = tf.matmul(q, k, transpose_b=True)  # (..., seq_len_q, seq_len_k)
  
  # scale matmul_qk
  dk = tf.cast(tf.shape(k)[-1], tf.float32)
  scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

  # add the mask to the scaled tensor.
  if mask is not None:
    scaled_attention_logits += (mask * -1e9)  

  # softmax is normalized on the last axis (seq_len_k) so that the scores
  # add up to 1.
  attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)  # (..., seq_len_q, seq_len_k)

  output = tf.matmul(attention_weights, v)  # (..., seq_len_q, depth_v)

  return output, attention_weights

class MultiHeadAttention(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads):
    super(MultiHeadAttention, self).__init__()
    self.num_heads = num_heads
    self.d_model = d_model
    
    assert d_model % self.num_heads == 0
    
    self.depth = d_model // self.num_heads
    
    self.wq = tf.keras.layers.Dense(d_model)
    self.wk = tf.keras.layers.Dense(d_model)
    self.wv = tf.keras.layers.Dense(d_model)
    
    self.dense = tf.keras.layers.Dense(d_model)
        
  def split_heads(self, x, batch_size):
    """Split the last dimension into (num_heads, depth).
    Transpose the result such that the shape is (batch_size, num_heads, seq_len, depth)
    """
    x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
    return tf.transpose(x, perm=[0, 2, 1, 3])
    
  def call(self, v, k, q, mask):
    batch_size = tf.shape(q)[0]
    
    q = self.wq(q)  # (batch_size, seq_len, d_model)
    k = self.wk(k)  # (batch_size, seq_len, d_model)
    v = self.wv(v)  # (batch_size, seq_len, d_model)
    
    q = self.split_heads(q, batch_size)  # (batch_size, num_heads, seq_len_q, depth)
    k = self.split_heads(k, batch_size)  # (batch_size, num_heads, seq_len_k, depth)
    v = self.split_heads(v, batch_size)  # (batch_size, num_heads, seq_len_v, depth)
    
    # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth)
    # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k)
    scaled_attention, attention_weights = scaled_dot_product_attention(
        q, k, v, mask)
    
    scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])  # (batch_size, seq_len_q, num_heads, depth)

    concat_attention = tf.reshape(scaled_attention, 
                                  (batch_size, -1, self.d_model))  # (batch_size, seq_len_q, d_model)

    output = self.dense(concat_attention)  # (batch_size, seq_len_q, d_model)
        
    return output, attention_weights

def point_wise_feed_forward_network(d_model, dff):
  return tf.keras.Sequential([
      tf.keras.layers.Dense(dff, activation='relu'),  # (batch_size, seq_len, dff)
      tf.keras.layers.Dense(d_model)  # (batch_size, seq_len, d_model)
  ])

In [17]:
class EncoderLayer(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads, dff, rate=0.1):
    super(EncoderLayer, self).__init__()

    self.mha = MultiHeadAttention(d_model, num_heads)
    self.ffn = point_wise_feed_forward_network(d_model, dff)

    self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    
    self.dropout1 = tf.keras.layers.Dropout(rate)
    self.dropout2 = tf.keras.layers.Dropout(rate)
    
  def call(self, x, training, mask):

    attn_output, _ = self.mha(x, x, x, mask)  # (batch_size, input_seq_len, d_model)
    attn_output = self.dropout1(attn_output, training=training)
    out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)
    
    ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
    ffn_output = self.dropout2(ffn_output, training=training)
    out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)
    
    return out2

class DecoderLayer(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads, dff, rate=0.1):
    super(DecoderLayer, self).__init__()

    self.mha1 = MultiHeadAttention(d_model, num_heads)
    self.mha2 = MultiHeadAttention(d_model, num_heads)

    self.ffn = point_wise_feed_forward_network(d_model, dff)
 
    self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    
    self.dropout1 = tf.keras.layers.Dropout(rate)
    self.dropout2 = tf.keras.layers.Dropout(rate)
    self.dropout3 = tf.keras.layers.Dropout(rate)
    
    
  def call(self, x, enc_output, training, 
           look_ahead_mask, padding_mask):
    # enc_output.shape == (batch_size, input_seq_len, d_model)

    attn1, attn_weights_block1 = self.mha1(x, x, x, look_ahead_mask)  # (batch_size, target_seq_len, d_model)
    attn1 = self.dropout1(attn1, training=training)
    out1 = self.layernorm1(attn1 + x)
    
    attn2, attn_weights_block2 = self.mha2(
        enc_output, enc_output, out1, padding_mask)  # (batch_size, target_seq_len, d_model)
    attn2 = self.dropout2(attn2, training=training)
    out2 = self.layernorm2(attn2 + out1)  # (batch_size, target_seq_len, d_model)
    
    ffn_output = self.ffn(out2)  # (batch_size, target_seq_len, d_model)
    ffn_output = self.dropout3(ffn_output, training=training)
    out3 = self.layernorm3(ffn_output + out2)  # (batch_size, target_seq_len, d_model)
    
    return out3, attn_weights_block1, attn_weights_block2

class Encoder(tf.keras.layers.Layer):
  def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,
               maximum_position_encoding, rate=0.1):
    super(Encoder, self).__init__()

    self.d_model = d_model
    self.num_layers = num_layers
    
    self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
    self.pos_encoding = positional_encoding(maximum_position_encoding, 
                                            self.d_model)
    
    
    self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) 
                       for _ in range(num_layers)]
  
    self.dropout = tf.keras.layers.Dropout(rate)
        
  def call(self, x, training, mask):

    seq_len = tf.shape(x)[1]
    
    # adding embedding and position encoding.
    x = self.embedding(x)  # (batch_size, input_seq_len, d_model)
    x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
    x += self.pos_encoding[:, :seq_len, :]

    x = self.dropout(x, training=training)
    
    for i in range(self.num_layers):
      x = self.enc_layers[i](x, training, mask)
    
    return x  # (batch_size, input_seq_len, d_model)

class Decoder(tf.keras.layers.Layer):
  def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size,
               maximum_position_encoding, rate=0.1):
    super(Decoder, self).__init__()

    self.d_model = d_model
    self.num_layers = num_layers
    
    self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)
    self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)
    
    self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate) 
                       for _ in range(num_layers)]
    self.dropout = tf.keras.layers.Dropout(rate)
    
  def call(self, x, enc_output, training, 
           look_ahead_mask, padding_mask):

    seq_len = tf.shape(x)[1]
    attention_weights = {}
    
    x = self.embedding(x)  # (batch_size, target_seq_len, d_model)
    x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
    x += self.pos_encoding[:, :seq_len, :]
    
    x = self.dropout(x, training=training)

    for i in range(self.num_layers):
      x, block1, block2 = self.dec_layers[i](x, enc_output, training,
                                             look_ahead_mask, padding_mask)
      
      attention_weights['decoder_layer{}_block1'.format(i+1)] = block1
      attention_weights['decoder_layer{}_block2'.format(i+1)] = block2
    
    # x.shape == (batch_size, target_seq_len, d_model)
    return x, attention_weights

In [18]:
class Transformer(tf.keras.Model):
  def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, 
               target_vocab_size, pe_input, pe_target, rate=0.1):
    super(Transformer, self).__init__()

    self.encoder = Encoder(num_layers, d_model, num_heads, dff, 
                           input_vocab_size, pe_input, rate)

    self.decoder = Decoder(num_layers, d_model, num_heads, dff, 
                           target_vocab_size, pe_target, rate)

    self.final_layer = tf.keras.layers.Dense(target_vocab_size)
    
  def call(self, inp, tar, training, enc_padding_mask, 
           look_ahead_mask, dec_padding_mask):

    enc_output = self.encoder(inp, training, enc_padding_mask)  # (batch_size, inp_seq_len, d_model)
    
    # dec_output.shape == (batch_size, tar_seq_len, d_model)
    dec_output, attention_weights = self.decoder(
        tar, enc_output, training, look_ahead_mask, dec_padding_mask)
    
    final_output = self.final_layer(dec_output)  # (batch_size, tar_seq_len, target_vocab_size)
    
    return final_output, attention_weights

In [19]:
num_layers = 4
d_model = 32
dff = 512
num_heads = 8

input_vocab_size = 510
target_vocab_size = 510
dropout_rate = 0.2

class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
  def __init__(self, d_model, warmup_steps=4000):
    super(CustomSchedule, self).__init__()
    
    self.d_model = d_model
    self.d_model = tf.cast(self.d_model, tf.float32)

    self.warmup_steps = warmup_steps
    
  def __call__(self, step):
    arg1 = tf.math.rsqrt(step)
    arg2 = step * (self.warmup_steps ** -1.5)
    
    return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

learning_rate = CustomSchedule(d_model)

optimizer = tf.keras.optimizers.legacy.Adam(learning_rate, beta_1=0.9, beta_2=0.98, 
                                     epsilon=1e-9)

In [20]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask
  
  return tf.reduce_sum(loss_)/tf.reduce_sum(mask)

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='train_accuracy')

transformer = Transformer(num_layers, d_model, num_heads, dff,
                          input_vocab_size, target_vocab_size, 
                          pe_input=input_vocab_size, 
                          pe_target=target_vocab_size,
                          rate=dropout_rate)

In [21]:
checkpoint_path = "./checkpoints/train"

ckpt = tf.train.Checkpoint(transformer=transformer,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

# if a checkpoint exists, restore the latest checkpoint.
if ckpt_manager.latest_checkpoint:
  ckpt.restore(ckpt_manager.latest_checkpoint)
  print ('Latest checkpoint restored!!')

Latest checkpoint restored!!


## SMILES Generation + Validity

In [22]:
from tokenizer import SmilesTokenizer
tokenizer = SmilesTokenizer("./data/vocab.txt")

def gen_smiles(generator, transformer):
    input_sequence = np.random.randint(0, 60, size=(1, 128))
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(input_sequence, input_sequence)
    enc_output = generator(tf.random.normal([1, 100]), training=False)
    enc_output = tf.squeeze(enc_output, axis=-1)
    decoder_output, _ = transformer.decoder(input_sequence, enc_output, False, combined_mask, dec_padding_mask)
    final_output = transformer.final_layer(decoder_output)

    smiles = []
    for row in final_output[0]:
        smiles.append(np.argmax(row))

    smiles = tokenizer.decode(smiles).replace(' ', '')
    return smiles[:smiles.find('[SEP]')]

In [23]:
from rdkit import Chem
from tokenizer import SmilesTokenizer

tokenizer = SmilesTokenizer("./data/vocab.txt")
EPOCH = 1
norm_states = dict()
def norm(smiles):
    norm_states[EPOCH] = [[smiles.shape, np.min(smiles), np.max(smiles), np.mean(smiles)]]
    
    OLD_MIN_2 = np.min(smiles)
    NEW_RANGE = np.max(smiles) - np.min(smiles)
    smiles = ( ( (smiles - OLD_MIN_2) * OLD_RANGE) / NEW_RANGE) + OLD_MIN
    
    norm_states[EPOCH].append( [smiles.shape, np.min(smiles), np.max(smiles), np.mean(smiles)] )
    
    return smiles

def is_valid_smiles(smiles):
    smiles = norm(smiles)
    
    input_sequence = np.random.randint(0, 60, size=(1, 128))
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(input_sequence, input_sequence)
    enc_output = tf.squeeze(smiles, axis=-1)
    decoder_output, _ = transformer.decoder(input_sequence, enc_output, False, combined_mask, dec_padding_mask)
    final_output = transformer.final_layer(decoder_output)

    smiles = []
    for row in final_output[0]:
        smiles.append(np.argmax(row))

    smiles = tokenizer.decode(smiles).replace(' ', '')
    smiles = smiles[:smiles.find('[SEP]')]

    x = Chem.MolFromSmiles(smiles)
    if x is None or smiles == '' or 128 > len(smiles) > 10:
        return 0
    print("valid SMILES generated", smiles)
    return 1

def is_not_valid_smiles(smiles):
    smiles = norm(smiles)
    
    input_sequence = np.random.randint(0, 60, size=(1, 128))
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(input_sequence, input_sequence)
    enc_output = tf.squeeze(smiles, axis=-1)
    decoder_output, _ = transformer.decoder(input_sequence, enc_output, False, combined_mask, dec_padding_mask)
    final_output = transformer.final_layer(decoder_output)

    smiles = []
    for row in final_output[0]:
        smiles.append(np.argmax(row))

    smiles = tokenizer.decode(smiles).replace(' ', '')
    smiles = smiles[:smiles.find('[SEP]')]

    x = Chem.MolFromSmiles(smiles)
    if x is None or smiles == '' or 128 > len(smiles) > 10:
        return -1
    return 0

## Training

In [24]:
import time
batch_size = 8

def train(dataset, epochs):
  for EPOCH in range(epochs):
    start = time.time()

    for i in range(batch_size, len(dataset), batch_size):
      input = dataset[i-batch_size : i]
      train_step(input)

    if (EPOCH + 1) % 5 == 0:
      checkpoint.save(file_prefix = checkpoint_prefix)
      print('saved checkpoint')

    print ('Time for epoch {} is {} sec'.format(EPOCH + 1, time.time()-start))
    
    noise = tf.random.normal([1, 100])
    generated_image = generator(noise, training=False)
    decision = discriminator(generated_image)
    print(decision.numpy()[0][0], gen_smiles(generator, transformer), '\n' )
    print(norm_states)

In [25]:
import contextlib
from rdkit import RDLogger

@contextlib.contextmanager
def suppress_rdkit_warnings():
    RDLogger.DisableLog('rdApp.error')

    try:
        yield
    finally:
        RDLogger.EnableLog('rdApp.error')

In [26]:
with suppress_rdkit_warnings():
    train(cb2_embeddings, 1)

Instructions for updating:
Use fn_output_signature instead
valid SMILES generated C
valid SMILES generated OCO
valid SMILES generated OOO
Time for epoch 1 is 203.0675241947174 sec
0.5069204 )C)n))))nn))n)))))nnnn)nNnnnn)nnccnc)cc)=ccc(Occccc(ccc(=c(( 

{1: [[(128, 32, 1), -0.99999976, 0.99999976, 0.015136715], [TensorShape([128, 32, 1]), -3.5364208, 3.512889, 0.04158578]]}


In [27]:
checkpoint.save(file_prefix = checkpoint_prefix)

'./gan_fresh_4_checkpoints/ckpt-7'

## Transformer Model

#### Model Building

In [None]:
def get_angles(pos, i, d_model):
  angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
  return pos * angle_rates

def positional_encoding(position, d_model):
  angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                          np.arange(d_model)[np.newaxis, :],
                          d_model)
  
  # apply sin to even indices in the array; 2i
  angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
  
  # apply cos to odd indices in the array; 2i+1
  angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    
  pos_encoding = angle_rads[np.newaxis, ...]
    
  return tf.cast(pos_encoding, dtype=tf.float32)

In [None]:
def create_padding_mask(seq):
  seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
  
  # add extra dimensions to add the padding
  # to the attention logits.
  return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)

def create_look_ahead_mask(size):
  mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
  return mask  # (seq_len, seq_len)

def create_masks(inp, tar):
  # Encoder padding mask
  enc_padding_mask = create_padding_mask(inp)
  
  # Used in the 2nd attention block in the decoder.
  # This padding mask is used to mask the encoder outputs.
  dec_padding_mask = create_padding_mask(inp)
  
  # Used in the 1st attention block in the decoder.
  # It is used to pad and mask future tokens in the input received by 
  # the decoder.
  look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])
  dec_target_padding_mask = create_padding_mask(tar)
  combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)
  
  return enc_padding_mask, combined_mask, dec_padding_mask

In [None]:
def scaled_dot_product_attention(q, k, v, mask):
  """Calculate the attention weights.
  q, k, v must have matching leading dimensions.
  k, v must have matching penultimate dimension, i.e.: seq_len_k = seq_len_v.
  The mask has different shapes depending on its type(padding or look ahead) 
  but it must be broadcastable for addition.
  
  Args:
    q: query shape == (..., seq_len_q, depth)
    k: key shape == (..., seq_len_k, depth)
    v: value shape == (..., seq_len_v, depth_v)
    mask: Float tensor with shape broadcastable 
          to (..., seq_len_q, seq_len_k). Defaults to None.
    
  Returns:
    output, attention_weights
  """

  matmul_qk = tf.matmul(q, k, transpose_b=True)  # (..., seq_len_q, seq_len_k)
  
  # scale matmul_qk
  dk = tf.cast(tf.shape(k)[-1], tf.float32)
  scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

  # add the mask to the scaled tensor.
  if mask is not None:
    scaled_attention_logits += (mask * -1e9)  

  # softmax is normalized on the last axis (seq_len_k) so that the scores
  # add up to 1.
  attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)  # (..., seq_len_q, seq_len_k)

  output = tf.matmul(attention_weights, v)  # (..., seq_len_q, depth_v)

  return output, attention_weights

class MultiHeadAttention(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads):
    super(MultiHeadAttention, self).__init__()
    self.num_heads = num_heads
    self.d_model = d_model
    
    assert d_model % self.num_heads == 0
    
    self.depth = d_model // self.num_heads
    
    self.wq = tf.keras.layers.Dense(d_model)
    self.wk = tf.keras.layers.Dense(d_model)
    self.wv = tf.keras.layers.Dense(d_model)
    
    self.dense = tf.keras.layers.Dense(d_model)
        
  def split_heads(self, x, batch_size):
    """Split the last dimension into (num_heads, depth).
    Transpose the result such that the shape is (batch_size, num_heads, seq_len, depth)
    """
    x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
    return tf.transpose(x, perm=[0, 2, 1, 3])
    
  def call(self, v, k, q, mask):
    batch_size = tf.shape(q)[0]
    
    q = self.wq(q)  # (batch_size, seq_len, d_model)
    k = self.wk(k)  # (batch_size, seq_len, d_model)
    v = self.wv(v)  # (batch_size, seq_len, d_model)
    
    q = self.split_heads(q, batch_size)  # (batch_size, num_heads, seq_len_q, depth)
    k = self.split_heads(k, batch_size)  # (batch_size, num_heads, seq_len_k, depth)
    v = self.split_heads(v, batch_size)  # (batch_size, num_heads, seq_len_v, depth)
    
    # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth)
    # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k)
    scaled_attention, attention_weights = scaled_dot_product_attention(
        q, k, v, mask)
    
    scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])  # (batch_size, seq_len_q, num_heads, depth)

    concat_attention = tf.reshape(scaled_attention, 
                                  (batch_size, -1, self.d_model))  # (batch_size, seq_len_q, d_model)

    output = self.dense(concat_attention)  # (batch_size, seq_len_q, d_model)
        
    return output, attention_weights

In [None]:
def point_wise_feed_forward_network(d_model, dff):
  return tf.keras.Sequential([
      tf.keras.layers.Dense(dff, activation='relu'),  # (batch_size, seq_len, dff)
      tf.keras.layers.Dense(d_model)  # (batch_size, seq_len, d_model)
  ])

In [None]:
class EncoderLayer(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads, dff, rate=0.1):
    super(EncoderLayer, self).__init__()

    self.mha = MultiHeadAttention(d_model, num_heads)
    self.ffn = point_wise_feed_forward_network(d_model, dff)

    self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    
    self.dropout1 = tf.keras.layers.Dropout(rate)
    self.dropout2 = tf.keras.layers.Dropout(rate)
    
  def call(self, x, training, mask):

    attn_output, _ = self.mha(x, x, x, mask)  # (batch_size, input_seq_len, d_model)
    attn_output = self.dropout1(attn_output, training=training)
    out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)
    
    ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
    ffn_output = self.dropout2(ffn_output, training=training)
    out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)
    
    return out2

In [None]:
class DecoderLayer(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads, dff, rate=0.1):
    super(DecoderLayer, self).__init__()

    self.mha1 = MultiHeadAttention(d_model, num_heads)
    self.mha2 = MultiHeadAttention(d_model, num_heads)

    self.ffn = point_wise_feed_forward_network(d_model, dff)
 
    self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    
    self.dropout1 = tf.keras.layers.Dropout(rate)
    self.dropout2 = tf.keras.layers.Dropout(rate)
    self.dropout3 = tf.keras.layers.Dropout(rate)
    
    
  def call(self, x, enc_output, training, 
           look_ahead_mask, padding_mask):
    # enc_output.shape == (batch_size, input_seq_len, d_model)

    attn1, attn_weights_block1 = self.mha1(x, x, x, look_ahead_mask)  # (batch_size, target_seq_len, d_model)
    attn1 = self.dropout1(attn1, training=training)
    out1 = self.layernorm1(attn1 + x)
    
    attn2, attn_weights_block2 = self.mha2(
        enc_output, enc_output, out1, padding_mask)  # (batch_size, target_seq_len, d_model)
    attn2 = self.dropout2(attn2, training=training)
    out2 = self.layernorm2(attn2 + out1)  # (batch_size, target_seq_len, d_model)
    
    ffn_output = self.ffn(out2)  # (batch_size, target_seq_len, d_model)
    ffn_output = self.dropout3(ffn_output, training=training)
    out3 = self.layernorm3(ffn_output + out2)  # (batch_size, target_seq_len, d_model)
    
    return out3, attn_weights_block1, attn_weights_block2

In [None]:
class Encoder(tf.keras.layers.Layer):
  def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,
               maximum_position_encoding, rate=0.1):
    super(Encoder, self).__init__()

    self.d_model = d_model
    self.num_layers = num_layers
    
    self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
    self.pos_encoding = positional_encoding(maximum_position_encoding, 
                                            self.d_model)
    
    
    self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) 
                       for _ in range(num_layers)]
  
    self.dropout = tf.keras.layers.Dropout(rate)
        
  def call(self, x, training, mask):

    seq_len = tf.shape(x)[1]
    
    # adding embedding and position encoding.
    x = self.embedding(x)  # (batch_size, input_seq_len, d_model)
    x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
    x += self.pos_encoding[:, :seq_len, :]

    x = self.dropout(x, training=training)
    
    for i in range(self.num_layers):
      x = self.enc_layers[i](x, training, mask)
    
    return x  # (batch_size, input_seq_len, d_model)

In [None]:
class Decoder(tf.keras.layers.Layer):
  def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size,
               maximum_position_encoding, rate=0.1):
    super(Decoder, self).__init__()

    self.d_model = d_model
    self.num_layers = num_layers
    
    self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)
    self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)
    
    self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate) 
                       for _ in range(num_layers)]
    self.dropout = tf.keras.layers.Dropout(rate)
    
  def call(self, x, enc_output, training, 
           look_ahead_mask, padding_mask):

    seq_len = tf.shape(x)[1]
    attention_weights = {}
    
    x = self.embedding(x)  # (batch_size, target_seq_len, d_model)
    x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
    x += self.pos_encoding[:, :seq_len, :]
    
    x = self.dropout(x, training=training)

    for i in range(self.num_layers):
      x, block1, block2 = self.dec_layers[i](x, enc_output, training,
                                             look_ahead_mask, padding_mask)
      
      attention_weights['decoder_layer{}_block1'.format(i+1)] = block1
      attention_weights['decoder_layer{}_block2'.format(i+1)] = block2
    
    # x.shape == (batch_size, target_seq_len, d_model)
    return x, attention_weights

In [None]:
class Transformer(tf.keras.Model):
  def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, 
               target_vocab_size, pe_input, pe_target, rate=0.1):
    super(Transformer, self).__init__()

    self.encoder = Encoder(num_layers, d_model, num_heads, dff, 
                           input_vocab_size, pe_input, rate)

    self.decoder = Decoder(num_layers, d_model, num_heads, dff, 
                           target_vocab_size, pe_target, rate)

    self.final_layer = tf.keras.layers.Dense(target_vocab_size)
    
  def call(self, inp, tar, training, enc_padding_mask, 
           look_ahead_mask, dec_padding_mask):

    enc_output = self.encoder(inp, training, enc_padding_mask)  # (batch_size, inp_seq_len, d_model)
    
    # dec_output.shape == (batch_size, tar_seq_len, d_model)
    dec_output, attention_weights = self.decoder(
        tar, enc_output, training, look_ahead_mask, dec_padding_mask)
    
    final_output = self.final_layer(dec_output)  # (batch_size, tar_seq_len, target_vocab_size)
    
    return final_output, attention_weights

In [None]:
num_layers = 4
d_model = 32
dff = 512
num_heads = 8

input_vocab_size = 510
target_vocab_size = 510
dropout_rate = 0.2

In [None]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
  def __init__(self, d_model, warmup_steps=4000):
    super(CustomSchedule, self).__init__()
    
    self.d_model = d_model
    self.d_model = tf.cast(self.d_model, tf.float32)

    self.warmup_steps = warmup_steps
    
  def __call__(self, step):
    arg1 = tf.math.rsqrt(step)
    arg2 = step * (self.warmup_steps ** -1.5)
    
    return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

In [None]:
learning_rate = CustomSchedule(d_model)

optimizer = tf.keras.optimizers.legacy.Adam(learning_rate, beta_1=0.9, beta_2=0.98, 
                                     epsilon=1e-9)

In [None]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask
  
  return tf.reduce_sum(loss_)/tf.reduce_sum(mask)

In [None]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='train_accuracy')

In [None]:
transformer = Transformer(num_layers, d_model, num_heads, dff,
                          input_vocab_size, target_vocab_size, 
                          pe_input=input_vocab_size, 
                          pe_target=target_vocab_size,
                          rate=dropout_rate)

In [None]:
checkpoint_path = "./checkpoints/train"

ckpt = tf.train.Checkpoint(transformer=transformer,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

# if a checkpoint exists, restore the latest checkpoint.
if ckpt_manager.latest_checkpoint:
  ckpt.restore(ckpt_manager.latest_checkpoint)
  print ('Latest checkpoint restored!!')

#### Generate SMILES

In [None]:
from tokenizer import SmilesTokenizer
tokenizer = SmilesTokenizer("./data/vocab.txt")

def gen_smiles(generator, transformer):
    input_sequence = np.random.randint(0, 60, size=(1, 128))
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(input_sequence, input_sequence)
    enc_output = generator(tf.random.normal([1, 100]), training=False)
    enc_output = tf.squeeze(enc_output, axis=-1)
    decoder_output, _ = transformer.decoder(input_sequence, enc_output, False, combined_mask, dec_padding_mask)
    final_output = transformer.final_layer(decoder_output)

    smiles = []
    for row in final_output[0]:
        smiles.append(np.argmax(row))

    smiles = tokenizer.decode(smiles).replace(' ', '')
    return smiles[:smiles.find('[SEP]')]

In [None]:
smiles = []
for _ in range(1000):
    smiles.append( gen_smiles(generator, transformer) )
print("done")

In [None]:
from rdkit import Chem

def is_valid_smiles(smiles):
    smiles_str = smiles.decode('utf-8')  # Convert from byte string to Python string
    x = Chem.MolFromSmiles(smiles_str)
    if x is None:
        return 0
    return 1

# valids = 0
# for smile in smiles:
#     if is_valid_smiles(smile):
#         valids += 1

In [None]:
valids

## Generation

In [27]:
from rdkit import Chem
from tokenizer import SmilesTokenizer

tokenizer = SmilesTokenizer("./data/vocab.txt")

def gen_smiles(generator, transformer):
    input_sequence = np.random.randint(0, 60, size=(1, 128))
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(input_sequence, input_sequence)
    enc_output = generator(tf.random.normal([1, 100]), training=False)
    
    enc_output = norm(enc_output)
    
    enc_output = tf.squeeze(enc_output, axis=-1)
    decoder_output, _ = transformer.decoder(input_sequence, enc_output, False, combined_mask, dec_padding_mask)
    final_output = transformer.final_layer(decoder_output)

    smiles = []
    for row in final_output[0]:
        smiles.append(np.argmax(row))

    smiles = tokenizer.decode(smiles).replace(' ', '')

    x = Chem.MolFromSmiles(smiles, sanitize=False)
    if x is None or smiles == '':
        return (smiles[:smiles.find('[SEP]')], 0)
        
    x = Chem.MolFromSmiles(smiles)
    if x is None:
        return (smiles[:smiles.find('[SEP]')], 2)

    return (smiles[:smiles.find('[SEP]')], 1)

In [28]:
total_gen = 50000
valid_gen = 0
valid_smi = []
grammar_gen = 0
grammar_smi = []

with suppress_rdkit_warnings():
    for i in range(total_gen):
        smi, v = gen_smiles(generator, transformer)
        
        if v == 1:
            print(smi, 1)
            valid_gen += 1
            valid_smi.append(smi)
        elif v == 2:
            print(smi, 2)
            grammar_gen += 1
            grammar_smi.append(smi)

        if i % 100 == 0:
            print(f"{i} smiles generated")

0 smiles generated
100 smiles generated
200 smiles generated
300 smiles generated
400 smiles generated
500 smiles generated
600 smiles generated
700 smiles generated
800 smiles generated
900 smiles generated
1000 smiles generated
1100 smiles generated
1200 smiles generated
1300 smiles generated
1400 smiles generated
1500 smiles generated
1600 smiles generated
1700 smiles generated
1800 smiles generated
1900 smiles generated
2000 smiles generated
2100 smiles generated
2200 smiles generated
2300 smiles generated
2400 smiles generated
2500 smiles generated
2600 smiles generated
2700 smiles generated
2800 smiles generated
2900 smiles generated
3000 smiles generated
3100 smiles generated
3200 smiles generated
3300 smiles generated
3400 smiles generated
3500 smiles generated
3600 smiles generated
3700 smiles generated
3800 smiles generated
3900 smiles generated
4000 smiles generated
4100 smiles generated
4200 smiles generated
4300 smiles generated
4400 smiles generated
4500 smiles generated


In [29]:
total_gen, valid_gen, grammar_gen

(50000, 0, 2)

In [None]:
noise = tf.random.normal([1, 100])
generated_image = generator(noise, training=False)

generated_image

In [None]:
decision = discriminator(generated_image)

decision.numpy()[0][0]