In [1]:
import os
import sklearn

import numpy as np 
import pandas as pd
import tensorflow as tf 
import tensorflow_datasets as tfds

from sklearn.model_selection import train_test_split
tf.__version__

  from .autonotebook import tqdm as notebook_tqdm


'2.11.0'

In [2]:
df = pd.read_csv("https://go.aws/38ECHUB", delimiter="\t", header=None, nrows=5000)
df.rename(columns={0:'en', 1:'fr'}, inplace=True)

In [3]:
df["en"] = df["en"].apply(lambda x : f"<start> {x}")

In [4]:
tokenizer_en = tf.keras.preprocessing.text.Tokenizer(filters='<>!"#$%&()*+,-./:;=?@[\\]^_`{|}~\t\n')
tokenizer_en.fit_on_texts(df["en"])
df["en_indices"] = tokenizer_en.texts_to_sequences(df["en"])

In [5]:
tokenizer_fr = tf.keras.preprocessing.text.Tokenizer(filters='<>!"#$%&()*+,-./:;=?@[\\]^_`{|}~\t\n')
tokenizer_fr.fit_on_texts(df["fr"])
df["fr_indices"] = tokenizer_fr.texts_to_sequences(df["fr"])

In [6]:
padding_en = tf.keras.preprocessing.sequence.pad_sequences( df["en_indices"], padding="post")
padding_fr = tf.keras.preprocessing.sequence.pad_sequences( df["fr_indices"], padding="post")

In [7]:
padding_fr.shape, padding_en.shape

((5000, 10), (5000, 5))

In [8]:
x_train, x_val, y_train, y_val =  train_test_split( padding_en,
                                                    padding_fr,
                                                    test_size=0.2)

In [9]:
BATCH_SIZE = 128

In [10]:
train = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(len(x_train)).batch(BATCH_SIZE)

In [11]:
n_embed = 16
n_gru = 32
vocab_inp_size = len(tokenizer_en.word_index)
vocab_tar_size = len(tokenizer_fr.word_index)

In [12]:
class encoder_maker(tf.keras.Model):
  def __init__(self, in_vocab_size, embed_dim, n_units):
    super().__init__()
   
    self.n_units = n_units
    self.embed = tf.keras.layers.Embedding(input_dim=in_vocab_size,
                                      output_dim=embed_dim)
  
    self.gru = tf.keras.layers.GRU(units=n_units,
                              return_sequences=True,
                              return_state=True)
  def __call__(self, input_batch):
   
    self.embed_out = self.embed(input_batch)
    self.gru_out, self.gru_state = self.gru(self.embed_out)

    return self.gru_out, self.gru_state

In [13]:
encoder = encoder_maker(vocab_inp_size+1, n_embed, n_gru)

In [14]:
class Bahdanau_attention_maker(tf.keras.layers.Layer):
  def __init__(self, attention_units):
    super().__init__()

    
    self.W1 = tf.keras.layers.Dense(units=attention_units)
    self.W2 = tf.keras.layers.Dense(units=attention_units)
    self.V = tf.keras.layers.Dense(units=1)

  def __call__(self, enc_out, state):
  
    self.W1_out = self.W1(enc_out)


    self.state = tf.expand_dims(state, axis = 1)
    self.W2_out = self.W2(self.state)

    self.sum = self.W1_out + self.W2_out  
    self.sum_scale = tf.nn.tanh(self.sum) 

    self.score = self.V(self.sum_scale) 

    self.attention_weights = tf.nn.softmax(self.score, axis=1)

    self.weighted_enc_out = enc_out * self.attention_weights

    self.context_vector = tf.reduce_sum(self.weighted_enc_out, axis=1)

    return self.context_vector, self.attention_weights

In [15]:
attention_layer = Bahdanau_attention_maker(8)

In [16]:
class decoder_maker(tf.keras.Model):
  def __init__(self, tar_vocab_size, embed_dim, n_units):
    super().__init__()
   
    self.embed = tf.keras.layers.Embedding(input_dim=tar_vocab_size, 
                                    output_dim=embed_dim)
    self.gru = tf.keras.layers.GRU(units=n_units, return_sequences=True,
                                   return_state=True)
    self.pred = tf.keras.layers.Dense(units=tar_vocab_size,activation="softmax")
    self.attention = Bahdanau_attention_maker(attention_units=n_units)

  def __call__(self, dec_in, enc_out, state):
  
    self.context_vector, self.attention_weights = self.attention(enc_out,state)

    self.embed_out = self.embed(dec_in)

    self.context_vector_expanded = tf.expand_dims(self.context_vector, axis=1)
    
    self.concat = tf.keras.layers.concatenate([self.embed_out,
                                               self.context_vector_expanded])

    self.gru_out, self.gru_state = self.gru(self.concat) 

    self.gru_out_reshape = tf.reshape(self.gru_out, shape=(-1, self.gru_out.shape[2]))

    self.pred_out = self.pred(self.gru_out_reshape)

    return self.pred_out, self.gru_state, self.attention_weights

In [17]:
decoder = decoder_maker(tar_vocab_size=vocab_tar_size+1, embed_dim=n_embed, n_units=n_gru)

In [18]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

In [19]:
checkpoint_dir = './'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)

In [20]:
def train_step(inp, targ):
  loss = 0

  with tf.GradientTape() as tape: 

    enc_output, enc_state = encoder(inp)

    dec_state = enc_state 

    dec_input = tf.expand_dims(targ[:,0], axis=1)

    for t in range(1, targ.shape[1]):
        pred, dec_state, _ = decoder(dec_input, enc_output, dec_state)

        loss += loss_function(targ[:, t], pred) 

        dec_input = tf.expand_dims(targ[:, t], 1)

  batch_loss = (loss / int(targ.shape[1]))

  variables = encoder.trainable_variables + decoder.trainable_variables 

  gradients = tape.gradient(loss, variables) 

  optimizer.apply_gradients(zip(gradients, variables))

  return batch_loss

In [21]:
import time
EPOCHS = 100

for epoch in range(EPOCHS):
  start = time.time()

  total_loss = 0

  for (batch, (inp, targ)) in enumerate(train):
    batch_loss = train_step(inp, targ)
    total_loss += batch_loss

    if batch % 10 == 0:
      print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                   batch,
                                                   batch_loss.numpy()))
  
  checkpoint.save(file_prefix = checkpoint_prefix)

  print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss))
  print('Time taken for 1 epoch {} sec'.format(time.time() - start))

  enc_input = x_val

  dec_input = tf.zeros(shape=(len(x_val),1))

  enc_out, enc_state = encoder(enc_input)

  dec_state = enc_state

  pred = []  

  for i in range(y_val.shape[1]-1):
    dec_out, dec_state, attention_w = decoder(dec_input, enc_out, dec_state)

    decoded_out = tf.expand_dims(tf.argmax(dec_out, axis=-1), axis=1)
                
    pred.append(tf.expand_dims(dec_out,axis=1))
    dec_input = decoded_out

  pred = tf.concat(pred, axis=1).numpy()
  print("\n val loss :", loss_function(y_val[:,1:],pred),"\n")
  

Epoch 1 Batch 0 Loss 1.6202
Epoch 1 Batch 10 Loss 1.7353
Epoch 1 Batch 20 Loss 1.6820
Epoch 1 Batch 30 Loss 1.7331
Epoch 1 Loss 54.0695
Time taken for 1 epoch 7.483062028884888 sec

 val loss : tf.Tensor(1.8214467, shape=(), dtype=float32) 

Epoch 2 Batch 0 Loss 1.6909
Epoch 2 Batch 10 Loss 1.6707
Epoch 2 Batch 20 Loss 1.6278
Epoch 2 Batch 30 Loss 1.4641
Epoch 2 Loss 50.3373
Time taken for 1 epoch 7.340072870254517 sec

 val loss : tf.Tensor(1.5682112, shape=(), dtype=float32) 

Epoch 3 Batch 0 Loss 1.4121
Epoch 3 Batch 10 Loss 1.3880
Epoch 3 Batch 20 Loss 1.4174
Epoch 3 Batch 30 Loss 1.4044
Epoch 3 Loss 43.6051
Time taken for 1 epoch 7.567406177520752 sec

 val loss : tf.Tensor(1.5449655, shape=(), dtype=float32) 

Epoch 4 Batch 0 Loss 1.2708
Epoch 4 Batch 10 Loss 1.3971
Epoch 4 Batch 20 Loss 1.3518
Epoch 4 Batch 30 Loss 1.3473
Epoch 4 Loss 43.2569
Time taken for 1 epoch 7.4945549964904785 sec

 val loss : tf.Tensor(1.5559798, shape=(), dtype=float32) 

Epoch 5 Batch 0 Loss 1.3343
Epo

In [22]:
enc_input = x_val

dec_input = tf.zeros(shape=(len(x_val),1))

enc_out, enc_state = encoder(enc_input)

dec_state = enc_state

pred = []  

for i in range(y_val.shape[1]-1):
  dec_out, dec_state, attention_w = decoder(dec_input, enc_out, dec_state)

  decoded_out = tf.expand_dims(tf.argmax(dec_out, axis=-1), axis=1)

  pred.append(decoded_out)
  dec_input = decoded_out

pred = tf.concat(pred, axis=-1).numpy()
for i in range(10):
  print("pred:", pred[i,:].tolist())
  print("true:", y_val[i,:].tolist()[1:])
  print("\n")

pred: [18, 95, 267, 177, 18, 95, 76, 11, 18]
true: [28, 1782, 0, 0, 0, 0, 0, 0, 0]


pred: [18, 95, 267, 177, 18, 351, 18, 95, 267]
true: [61, 216, 40, 0, 0, 0, 0, 0, 0]


pred: [18, 260, 6, 3, 50, 204, 16, 26, 2]
true: [1, 34, 40, 0, 0, 0, 0, 0, 0]


pred: [9, 6, 299, 22, 429, 9, 6, 299, 22]
true: [123, 19, 122, 0, 0, 0, 0, 0, 0]


pred: [18, 95, 73, 5, 17, 5, 17, 5, 17]
true: [99, 4, 0, 0, 0, 0, 0, 0, 0]


pred: [9, 6, 221, 22, 429, 9, 24, 52, 496]
true: [2, 1535, 0, 0, 0, 0, 0, 0, 0]


pred: [177, 132, 11, 333, 177, 132, 11, 333, 177]
true: [438, 0, 0, 0, 0, 0, 0, 0, 0]


pred: [9, 6, 221, 22, 429, 9, 24, 52, 338]
true: [5, 176, 0, 0, 0, 0, 0, 0, 0]


pred: [9, 24, 52, 338, 22, 429, 9, 24, 52]
true: [6, 798, 0, 0, 0, 0, 0, 0, 0]


pred: [177, 562, 177, 132, 11, 333, 177, 132, 11]
true: [8, 1131, 0, 0, 0, 0, 0, 0, 0]


