In [0]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [0]:
import time
import os
import unicodedata
import re
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
import pandas as pd
import sys

In [0]:
# Set random seed 
np.random.seed(500)
tf.random.set_seed(500)

In [0]:
class hyperparam:
  embedding_dim = 128
  batch_size=100
  num_epochs = 30

hp = hyperparam()

In [0]:
file_path = tf.keras.utils.get_file('spa-eng.zip', origin='http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip', extract=True)

Downloading data from http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip


In [0]:
file_path

'/root/.keras/datasets/spa-eng.zip'

In [0]:
os.listdir('/root/.keras/datasets/spa-eng')

['_about.txt', 'spa.txt']

In [0]:
text_file = '/root/.keras/datasets/spa-eng/spa.txt'

In [0]:
df = pd.read_csv(text_file, sep='\t', header=None)
df.columns = ['eng', 'spa']
df.head()

Unnamed: 0,eng,spa
0,Go.,Ve.
1,Go.,Vete.
2,Go.,Vaya.
3,Go.,Váyase.
4,Hi.,Hola.


In [0]:
def unicode_to_ascii(string):
  '''
  NFD = Normal Form Decomposed
  Mn = Nonspacing_Mark
  '''
  return ''.join(char for char in unicodedata.normalize('NFD', string) if unicodedata.category(char) != 'Mn')

In [0]:
def preprocess_sentence(sentence):
  sentence = unicode_to_ascii(sentence.lower().strip())
  sentence = re.sub(r'([?.!,¿])', r' \1 ', sentence)
  sentence = re.sub(r'[" "]+', ' ', sentence)
  sentence = re.sub(r'[^a-zA-Z?.!,¿]+', ' ' , sentence)
  sentence = sentence.rstrip().strip()
  sentence = '<start>' + sentence + '<end>'
  return sentence

In [0]:
def create_dataset(path, num_examples):
  lines = open(path, encoding='utf8').read().strip().split('\n')
  word_pair = [[preprocess_sentence(sentence) for sentence in line.split('\t')] for line in lines[:num_examples]]
  return zip(*word_pair)

In [0]:
english_sentence, spanish_sentence = create_dataset(path=text_file, num_examples=None)

In [0]:
type(english_sentence)

tuple

In [0]:
english_sentence[:3]

('<start>go .<end>', '<start>go .<end>', '<start>go .<end>')

In [0]:
def max_length(tensor):
  return max(len(tensor_unit) for tensor_unit in tensor)

In [0]:
def tokenize(language):
  language_token = tf.keras.preprocessing.text.Tokenizer(filters='')
  language_token.fit_on_texts(language)
  tensor = language_token.texts_to_sequences(language)
  tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, maxlen = max_length(tensor), padding='post')
  return tensor, language_token

In [0]:
def load_dataset(path, num_examples=None):
  target_language, input_language = create_dataset(path, num_examples)

  input_tensor, input_language_token = tokenize(input_language)
  target_tensor, target_language_token = tokenize(target_language)

  return input_tensor, input_language_token, target_tensor, target_language_token

In [0]:
input_tensor, input_language_token, target_tensor, target_language_token = load_dataset(text_file)

In [0]:
print(input_tensor.shape)
input_tensor

(118964, 51)


array([[1411,    1,    0, ...,    0,    0,    0],
       [1770,    1,    0, ...,    0,    0,    0],
       [3712,    1,    0, ...,    0,    0,    0],
       ...,
       [ 410, 6271,    3, ...,    0,    0,    0],
       [ 856, 1481,  151, ...,    0,    0,    0],
       [ 119,  129, 1572, ...,   56, 2248,    1]], dtype=int32)

In [0]:
print(target_tensor.shape)
target_tensor

(118964, 49)


array([[  637,     1,     0, ...,     0,     0,     0],
       [  637,     1,     0, ...,     0,     0,     0],
       [  637,     1,     0, ...,     0,     0,     0],
       ...,
       [  180,  4635,  9532, ...,     0,     0,     0],
       [ 1275,    86,    41, ...,     1,     0,     0],
       [  170,     5,    36, ..., 14545, 14546,     1]], dtype=int32)

In [0]:
input_vocabulary_size = len(input_language_token.index_word) + 1
target_vocabulary_size = len(target_language_token.index_word) + 1

In [0]:
X_train, X_test, y_train, y_test = train_test_split(input_tensor, target_tensor, test_size=0.2, random_state=500)

In [0]:
print(X_train.shape) # encoder input 
print(y_train.shape) # decoder output 
print('---')
print(X_test.shape) # encoder input
print(y_test.shape) # decoder output

(95171, 51)
(95171, 49)
---
(23793, 51)
(23793, 49)


In [0]:
# create decoder input
print(y_train.shape)
y_train

(95171, 49)


array([[  20,  409, 1266, ...,    0,    0,    0],
       [  20,  502,   10, ...,    0,    0,    0],
       [  39,   54, 5714, ...,    0,    0,    0],
       ...,
       [   2,  133,    3, ...,    0,    0,    0],
       [  74, 1307,  246, ...,    0,    0,    0],
       [  15,  294,   96, ...,    0,    0,    0]], dtype=int32)

In [0]:
y_train_input = np.zeros(shape=y_train.shape, dtype=np.int)
y_train_input[:, :-1] = y_train[:, 1:]

In [0]:
y_train_input.shape

(95171, 49)

In [0]:
y_train_input

array([[ 409, 1266,    4, ...,    0,    0,    0],
       [ 502,   10,  198, ...,    0,    0,    0],
       [  54, 5714,    1, ...,    0,    0,    0],
       ...,
       [ 133,    3,    4, ...,    0,    0,    0],
       [1307,  246, 1039, ...,    0,    0,    0],
       [ 294,   96,   44, ...,    0,    0,    0]])

In [0]:
print(y_test.shape)
y_test

(23793, 49)


array([[  20, 5738, 2416, ...,    0,    0,    0],
       [   2,  172, 1097, ...,    0,    0,    0],
       [  15,   10,   58, ...,    0,    0,    0],
       ...,
       [   8,  447,    6, ...,    0,    0,    0],
       [  15, 1449,    3, ...,    0,    0,    0],
       [2261,   38,  304, ...,    0,    0,    0]], dtype=int32)

In [0]:
y_test_input = np.zeros(shape=y_test.shape, dtype=np.int)
y_test_input[:, :-1] = y_test[:, 1:]

In [0]:
y_test_input.shape

(23793, 49)

In [0]:
y_test_input

array([[5738, 2416, 1638, ...,    0,    0,    0],
       [ 172, 1097,    1, ...,    0,    0,    0],
       [  10,   58,  617, ...,    0,    0,    0],
       ...,
       [ 447,    6,  603, ...,    0,    0,    0],
       [1449,    3,  513, ...,    0,    0,    0],
       [  38,  304, 1030, ...,    0,    0,    0]])

In [0]:
# x_train, x_test
# y_train, y_test
# y_train_input, y_test_input

train_enc_input, test_enc_input = X_train, X_test
train_dec_output, test_dec_output = y_train, y_test
train_dec_input, test_dec_input = y_train_input, y_test_input

input_vocabulary_size = len(input_language_token.index_word) + 1
target_vocabulary_size = len(target_language_token.index_word) + 1

In [0]:
class Attention(tf.keras.layers.Layer):
  def __init__(self):
    super(Attention, self).__init__()
  
  def build(self, input_shape):
    # the attention layer gets [encoder_sequence, decoder_hidden_state, target_sequence_length]
    # the hidden state shape = (B, units)
    self.input_sequence_length = input_shape[0][1]
    self.hidden_dim = input_shape[0][2]
    self.target_sequence_length = input_shape[1][1]

    # score function is concat
    self.W_a = tf.keras.layers.Dense(units=self.hidden_dim, use_bias=False)
    self.W_a.build(input_shape=(None, None, self.hidden_dim))
    self._trainable_weights += self.W_a._trainable_weights

    self.U_a = tf.keras.layers.Dense(units=self.hidden_dim, use_bias=False)
    self.U_a.build(input_shape = (None, None, self.hidden_dim))
    self._trainable_weights += self.U_a._trainable_weights
    
    self.V_a = tf.keras.layers.Dense(1, use_bias=False)
    self.V_a.build(input_shape = (None, None, self.hidden_dim))
    self._trainable_weights += self.V_a._trainable_weights
    super(Attention, self).build(input_shape)
  
  def call(self, inputs):
    source_hidden_states = inputs[0]
    target_hidden_state = inputs[1]
    current_timestep = inputs[2]

    target_hidden_state = tf.expand_dims(target_hidden_state, 1)

    weighted_hidden_state = self.W_a(source_hidden_states)
    weighted_target_state = self.U_a(target_hidden_state)

    weighted_sum = weighted_hidden_state + weighted_target_state
    weighted_sum = tf.keras.layers.Activation('tanh')(weighted_sum)

    attention_score = self.V_a(weighted_sum)
    attention_weight = tf.keras.layers.Activation('softmax')(attention_score)
    
    context_vector = source_hidden_states * attention_weight
    return context_vector, attention_weight

  

In [0]:
input_sequence_length, target_sequence_length = train_enc_input.shape[-1], train_dec_input.shape[-1]

In [0]:
input_sequence_length

51

In [0]:
# Encoder 
X_input = tf.keras.layers.Input(shape = (input_sequence_length,), name='input_sequences')
emb_input = tf.keras.layers.Embedding(input_vocabulary_size, hp.embedding_dim)(X_input)
encoder_output = tf.compat.v1.keras.layers.CuDNNLSTM(128, return_sequences=True)(emb_input)
encoder_output

<tf.Tensor 'cu_dnnlstm_10/Identity:0' shape=(None, 51, 128) dtype=float32>

In [0]:
# Decoder
Y_input = tf.keras.layers.Input(shape = (target_sequence_length,), name='target_sequences')
emb_target = tf.keras.layers.Embedding(target_vocabulary_size, hp.embedding_dim)(Y_input)

# To get initial states
initial_hidden_state = tf.keras.layers.Input(shape=(128,), name='hidden_state')
initial_cell_state = tf.keras.layers.Input(shape=(128,), name='cell_state')

hidden_state = initial_hidden_state
cell_state = initial_cell_state

decoder_recurrent_layer = tf.compat.v1.keras.layers.CuDNNLSTM(128, return_state=True)


# Attention Apply
attention_layer = Attention()

decoder_output_dense = tf.keras.layers.Dense(target_vocabulary_size, activation='softmax')

outputs = []
for timestep in range(target_sequence_length):
  current_word = tf.keras.layers.Lambda(lambda x: x[:, timestep:timestep+1, :])(emb_target) 
  context_vector, attention_weight = attention_layer([encoder_output, hidden_state, timestep])

  decoder_input = tf.keras.layers.Concatenate(axis=1)([context_vector, current_word])
  output, hidden_state, cell_state = decoder_recurrent_layer(decoder_input, initial_state=[hidden_state, cell_state])
  decoder_outputs = decoder_output_dense(output)
  outputs.append(decoder_outputs)

print('Attention added output', outputs)

outputs = tf.keras.layers.Lambda(lambda x : tf.keras.backend.permute_dimensions(tf.stack(x), pattern=(1,0,2)))(outputs)
outputs

Attention added output [<tf.Tensor 'dense_38/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_1/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_2/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_3/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_4/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_5/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_6/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_7/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_8/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_9/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_10/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_11/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_12/Identity:0' shape=(None, 14547) dtype=float32>, <tf.Tensor 'dense_38_13/Identity:0' shape=(None, 145

<tf.Tensor 'lambda_302/Identity:0' shape=(None, 49, 14547) dtype=float32>

In [0]:
model = tf.keras.models.Model(inputs=[X_input, Y_input, initial_hidden_state, initial_cell_state],
                              outputs=outputs)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# print(model.summary())
# NOTE: Summary is omitted due to length, deriving from number of simulated recurrent connections


In [0]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
hidden_state (InputLayer)       [(None, 128)]        0                                            
__________________________________________________________________________________________________
tf_op_layer_ExpandDims_249 (Ten [(None, 1, 128)]     0           hidden_state[0][0]               
__________________________________________________________________________________________________
tf_op_layer_dense_40/Tensordot/ [(3,)]               0           tf_op_layer_ExpandDims_249[0][0] 
__________________________________________________________________________________________________
tf_op_layer_dense_40/Tensordot/ [(2,)]               0           tf_op_layer_dense_40/Tensordot/Sh
______________________________________________________________________________________________

In [0]:
tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=True, dpi=200)

Output hidden; open in https://colab.research.google.com to view.

In [0]:
# Create placeholder variables of 0s
placeholder = tf.zeros(shape=(len(train_enc_input), 128))

# Train multi-step, multi-class classification model
model.fit(x={'input_sequences': train_enc_input, 'target_sequences': train_dec_input,
             'hidden_state': placeholder, 'cell_state': placeholder},
          y= train_dec_output,
          epochs=hp.num_epochs, batch_size=hp.batch_size)

Train on 95171 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30

KeyboardInterrupt: ignored

In [0]:
def evaluate(sentence):
  attention_plot = np.zeros((target_sequence_length, input_sequence_length))

  sentence = preprocess_sentence(sentence)

  inputs = [input_language_token[i] for i in sentence.split(' ')]
  inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs], maxlen=input_sequence_length, padding='post')
  inputs = tf.convert_to_tensor(inputs)

  result = ''

  hidden = [tf.zeros((1, 128))]
  enc_out, = encoder_output(inputs)

  for t in range(target_sequence_length):
    prediction, dec_hidden, attention_weight = 

In [0]:
test_dec_output.shape

(23793, 49)

In [0]:
# Test env

input_ =tf.keras.layers.Input(shape=(52,))
emb = tf.keras.layers.Embedding(100, 128)(input_)
gru = tf.keras.layers.GRU(100, return_sequences=True, return_state=True)(emb)

In [0]:
gru

[<tf.Tensor 'gru/Identity:0' shape=(None, 52, 100) dtype=float32>,
 <tf.Tensor 'gru/Identity_1:0' shape=(None, 100) dtype=float32>]

In [0]:
tf.rank(emb).numpy()
tf.rank(gru[0])

3