In [0]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [0]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import re
import os

In [0]:
def set_seed(seed):
  np.random.seed(seed)
  tf.random.set_seed(seed)

In [0]:
set_seed(1228)

In [0]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [0]:
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=1000, padding='post')
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=1000, padding='post')

y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)

# This document is for compare GLU units by get seperate conv layer and get gated units 
# OR create one single conv layer and split the key, value and get the gated value

In [0]:
class GLU_Seperated(tf.keras.layers.Layer):
  def __init__(self, units, kernel_size, strides, padding, dilation_rate):
    super(GLU_Seperated, self).__init__()
    self.units = units
    self.kernel_size = kernel_size
    self.strides = strides
    self.padding = padding
    self.dilation_rate = dilation_rate

    self.W_a = tf.keras.layers.Conv1D(filters=units, 
                                      kernel_size=kernel_size, 
                                      strides=strides, 
                                      padding=padding,
                                      dilation_rate=dilation_rate,
                                      kernel_initializer=tf.keras.initializers.Constant(1.),
                                      use_bias=False)
    
    self.W_b = tf.keras.layers.Conv1D(filters=units, 
                                      kernel_size=kernel_size, 
                                      strides=strides, 
                                      padding=padding, 
                                      dilation_rate=dilation_rate,
                                      kernel_initializer=tf.keras.initializers.Constant(1.),
                                      use_bias = False)

  def call(self, inputs):

    linear = self.W_a(inputs)
    print(linear)
    print(linear.shape)
    gated = self.W_b(inputs)
    gated = tf.keras.layers.Activation('sigmoid')(gated)
    print(gated)
    print(gated.shape)

    result = tf.keras.layers.Multiply()([linear, gated])
    print(result)
    print(result.shape)
    return result

In [0]:
test_emb = tf.keras.layers.Embedding(10000, 128)

In [121]:
test_input = tf.keras.layers.Input(shape=(1000,))
test_emb_result = test_emb(test_input)
test_glu = GLU_Seperated(128, 3, 1, 'same', 1)(test_emb_result)

test_linear_glu = tf.keras.models.Model(test_input, test_glu)

Tensor("glu__seperated_13/conv1d_37/Identity:0", shape=(None, 1000, 128), dtype=float32)
(None, 1000, 128)
Tensor("glu__seperated_13/activation_22/Identity:0", shape=(None, 1000, 128), dtype=float32)
(None, 1000, 128)
Tensor("glu__seperated_13/multiply_22/Identity:0", shape=(None, 1000, 128), dtype=float32)
(None, 1000, 128)


In [122]:
test_linear_glu.summary()

Model: "model_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_26 (InputLayer)        [(None, 1000)]            0         
_________________________________________________________________
embedding_15 (Embedding)     (None, 1000, 128)         1280000   
_________________________________________________________________
glu__seperated_13 (GLU_Seper (None, 1000, 128)         98304     
Total params: 1,378,304
Trainable params: 1,378,304
Non-trainable params: 0
_________________________________________________________________


In [0]:
test_input = np.ones(shape=(1000,))

In [108]:
glu_output = test_linear_glu(tf.expand_dims(tf.convert_to_tensor(test_input), axis=0))
glu_output.shape

tf.Tensor(
[[[0.35872915 0.35872915 0.35872915 ... 0.35872915 0.35872915 0.35872915]
  [0.538094   0.538094   0.538094   ... 0.538094   0.538094   0.538094  ]
  [0.538094   0.538094   0.538094   ... 0.538094   0.538094   0.538094  ]
  ...
  [0.538094   0.538094   0.538094   ... 0.538094   0.538094   0.538094  ]
  [0.538094   0.538094   0.538094   ... 0.538094   0.538094   0.538094  ]
  [0.35872915 0.35872915 0.35872915 ... 0.35872915 0.35872915 0.35872915]]], shape=(1, 1000, 128), dtype=float32)
(1, 1000, 128)
tf.Tensor(
[[[0.5887328  0.5887328  0.5887328  ... 0.5887328  0.5887328  0.5887328 ]
  [0.63136894 0.63136894 0.63136894 ... 0.63136894 0.63136894 0.63136894]
  [0.63136894 0.63136894 0.63136894 ... 0.63136894 0.63136894 0.63136894]
  ...
  [0.63136894 0.63136894 0.63136894 ... 0.63136894 0.63136894 0.63136894]
  [0.63136894 0.63136894 0.63136894 ... 0.63136894 0.63136894 0.63136894]
  [0.5887328  0.5887328  0.5887328  ... 0.5887328  0.5887328  0.5887328 ]]], shape=(1, 1000, 128)

TensorShape([1, 1000, 128])

In [0]:
class GLU_Split(tf.keras.layers.Layer):
  def __init__(self, units, kernel_size, strides, padding, dilation_rate):
    super(GLU_Split, self).__init__()
    self.units = units * 2
    self.kernel_size = kernel_size
    self.strides = strides
    self.padding = padding
    self.dilation_rate = dilation_rate

    self.W_a = tf.keras.layers.Conv1D(filters=units * 2, 
                                      kernel_size=kernel_size, 
                                      strides=strides,
                                      padding=padding,
                                      dilation_rate=dilation_rate,
                                      kernel_initializer=tf.keras.initializers.Constant(1.),
                                      use_bias=False)

  def call(self, inputs):
    x = self.W_a(inputs)
    print(x)
    print(x.shape)
    linear, gated = tf.split(x, 2, axis=-1)

    print(linear)
    print(linear.shape)
    gated = tf.keras.layers.Activation('sigmoid')(gated)

    print(gated)
    print(gated.shape)
    output = tf.keras.layers.Multiply()([linear, gated])

    print(output)
    print(output.shape)

    return output

In [119]:
test_input = tf.keras.layers.Input(shape=(1000,))
test_emb_result = test_emb(test_input)
test_glu = GLU_Split(128, 3, 1, 'same', 1)(test_emb_result)

test_linear_glu = tf.keras.models.Model(test_input, test_glu)

Tensor("glu__split_10/conv1d_36/Identity:0", shape=(None, 1000, 256), dtype=float32)
(None, 1000, 256)
Tensor("glu__split_10/split:0", shape=(None, 1000, 128), dtype=float32)
(None, 1000, 128)
Tensor("glu__split_10/activation_21/Identity:0", shape=(None, 1000, 128), dtype=float32)
(None, 1000, 128)
Tensor("glu__split_10/multiply_21/Identity:0", shape=(None, 1000, 128), dtype=float32)
(None, 1000, 128)


In [120]:
test_linear_glu.summary()

Model: "model_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_25 (InputLayer)        [(None, 1000)]            0         
_________________________________________________________________
embedding_15 (Embedding)     (None, 1000, 128)         1280000   
_________________________________________________________________
glu__split_10 (GLU_Split)    (None, 1000, 128)         98304     
Total params: 1,378,304
Trainable params: 1,378,304
Non-trainable params: 0
_________________________________________________________________


In [0]:
test_input = np.ones(shape=(1000,))

In [112]:
glu_output_split = test_linear_glu(tf.expand_dims(tf.convert_to_tensor(test_input), axis=0))
glu_output_split.shape

tf.Tensor(
[[[0.35872915 0.35872915 0.35872915 ... 0.35872915 0.35872915 0.35872915]
  [0.538094   0.538094   0.538094   ... 0.538094   0.538094   0.538094  ]
  [0.538094   0.538094   0.538094   ... 0.538094   0.538094   0.538094  ]
  ...
  [0.538094   0.538094   0.538094   ... 0.538094   0.538094   0.538094  ]
  [0.538094   0.538094   0.538094   ... 0.538094   0.538094   0.538094  ]
  [0.35872915 0.35872915 0.35872915 ... 0.35872915 0.35872915 0.35872915]]], shape=(1, 1000, 256), dtype=float32)
(1, 1000, 256)
tf.Tensor(
[[[0.35872915 0.35872915 0.35872915 ... 0.35872915 0.35872915 0.35872915]
  [0.538094   0.538094   0.538094   ... 0.538094   0.538094   0.538094  ]
  [0.538094   0.538094   0.538094   ... 0.538094   0.538094   0.538094  ]
  ...
  [0.538094   0.538094   0.538094   ... 0.538094   0.538094   0.538094  ]
  [0.538094   0.538094   0.538094   ... 0.538094   0.538094   0.538094  ]
  [0.35872915 0.35872915 0.35872915 ... 0.35872915 0.35872915 0.35872915]]], shape=(1, 1000, 128)

TensorShape([1, 1000, 128])

In [113]:
non_split = glu_output.numpy()
non_split

array([[[0.21119562, 0.21119562, 0.21119562, ..., 0.21119562,
         0.21119562, 0.21119562],
        [0.33973584, 0.33973584, 0.33973584, ..., 0.33973584,
         0.33973584, 0.33973584],
        [0.33973584, 0.33973584, 0.33973584, ..., 0.33973584,
         0.33973584, 0.33973584],
        ...,
        [0.33973584, 0.33973584, 0.33973584, ..., 0.33973584,
         0.33973584, 0.33973584],
        [0.33973584, 0.33973584, 0.33973584, ..., 0.33973584,
         0.33973584, 0.33973584],
        [0.21119562, 0.21119562, 0.21119562, ..., 0.21119562,
         0.21119562, 0.21119562]]], dtype=float32)

In [114]:
split = glu_output_split.numpy()
split

array([[[0.21119562, 0.21119562, 0.21119562, ..., 0.21119562,
         0.21119562, 0.21119562],
        [0.33973584, 0.33973584, 0.33973584, ..., 0.33973584,
         0.33973584, 0.33973584],
        [0.33973584, 0.33973584, 0.33973584, ..., 0.33973584,
         0.33973584, 0.33973584],
        ...,
        [0.33973584, 0.33973584, 0.33973584, ..., 0.33973584,
         0.33973584, 0.33973584],
        [0.33973584, 0.33973584, 0.33973584, ..., 0.33973584,
         0.33973584, 0.33973584],
        [0.21119562, 0.21119562, 0.21119562, ..., 0.21119562,
         0.21119562, 0.21119562]]], dtype=float32)

In [115]:
np.array_equal(non_split, split)

True