In [1]:
import numpy as np

import typing
from typing import Any, Tuple

import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing

import tensorflow_text as tf_text

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker


In [2]:
path="data/ita.txt"

# Load the dataset and separate input from target

In [3]:
def load_data(path):
    """
    load the files and return coupled list of input target
    """
    with open(path, "r",encoding="utf-8") as f:
        text =f.read() 

    lines = text.splitlines()
    pairs = [line.split('\t') for line in lines]

    inp = [inp for targ, inp,attr in pairs]
    targ = [targ for targ, inp,attr in pairs]

    return targ, inp

In [4]:
targ, inp = load_data(path)

In [5]:
BUFFER_SIZE = len(inp)
BATCH_SIZE = 64

dataset = tf.data.Dataset.from_tensor_slices((inp, targ)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE)

# Now we want to include all preprocessing inside the model 

In order to be able to export it as tf_saved model

In [6]:
def tf_lower_and_split_punct(text):
    # Split accecented characters.
    text = tf_text.normalize_utf8(text, 'NFKD')
    text = tf.strings.lower(text)
    # Keep space, a to z, and select punctuation.
    text = tf.strings.regex_replace(text, '[^ a-z.?!,]', '')
    # Add spaces around punctuation.
    text = tf.strings.regex_replace(text, '[.?!,]', r' \0 ')
    # Strip whitespace.
    text = tf.strings.strip(text)

    text = tf.strings.join(['[START]', text, '[END]'], separator=' ')
    return text

In [7]:
example_text = tf.constant('Ciao, tutto bene?')


In [8]:
print(example_text.numpy().decode())
print(tf_lower_and_split_punct(example_text).numpy().decode())

Ciao, tutto bene?
[START] ciao ,  tutto bene ? [END]


## Text Vectorization
for both input and target

In [9]:
max_vocab_size = 5000

input_text_processor = preprocessing.TextVectorization(
    standardize=tf_lower_and_split_punct,
    max_tokens=max_vocab_size)

In [10]:
input_text_processor.adapt(inp)

# Here are the first 10 words from the vocabulary:
input_text_processor.get_vocabulary()[:10]


['', '[UNK]', '[START]', '[END]', '.', 'tom', '?', 'non', 'e', 'di']

In [11]:
output_text_processor = preprocessing.TextVectorization(
    standardize=tf_lower_and_split_punct,
    max_tokens=max_vocab_size)

output_text_processor.adapt(targ)
output_text_processor.get_vocabulary()[:10]

['', '[UNK]', '[START]', '[END]', '.', 'you', 'tom', 'i', 'to', '?']

In [12]:
for x,y in dataset.take(1):
    print(x)
    print(y)

tf.Tensor(
[b'Quanto costa quello?' b'Tom era preoccupato per i tuoi figli.'
 b'Ha preso in affitto un appartamento.' b'Io feci un altro test.'
 b'Tom mi ha detto che non lo ha mai detto.' b'Ha detto che era bello.'
 b'La prossima volta sar\xc3\xa0 pi\xc3\xb9 fortunato.'
 b"C'\xc3\xa8 molta roba pericolosa in questo magazzino."
 b'A me non piacque il suggerimento di Tom.' b'Tom ha due impieghi.'
 b'Non lo sapevo che il costo della vita fosse cos\xc3\xac alto in Australia.'
 b'Ha ammesso la sua colpa.'
 b'Mi \xc3\xa8 passato velocemente il raffreddore.'
 b'Vorrei avere la tua fortuna.' b"Il secchio \xc3\xa8 pieno d'acqua."
 b'Inizialmente non le piaceva il cavallo.'
 b'Il dipinto si sta deteriorando.'
 b'Il nostro scaldabagno \xc3\xa8 rotto.'
 b'A me piacciono molti tipi di musica.' b'Tom aveva nostalgia di casa.'
 b'Potete aiutarmi, per piacere?'
 b"L'ufficio sta tenendo una festa di addio."
 b'Non andr\xc3\xb2 mai a Boston con te.'
 b'Io ho messo la mia valigia nel bagagliaio.'
 b'\xc

In [13]:
example_tokens = input_text_processor(x)


In [14]:
print(x[0])
print(example_tokens[0])

tf.Tensor(b'Quanto costa quello?', shape=(), dtype=string)
tf.Tensor(
[   2  138 1230   75    6    3    0    0    0    0    0    0    0    0
    0    0], shape=(16,), dtype=int64)


In [16]:
embedding_dim = 256
units = 1024


## Encoder

Pass the tokens, it learns some embeddings of those tokens and then pass them through a GRU RNN to process these vectors. It returns the sequence and the state

In [17]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, input_vocab_size, embedding_dim, enc_units):
        super(Encoder, self).__init__()
        self.enc_units = enc_units
        self.input_vocab_size = input_vocab_size

        # The embedding layer converts tokens to vectors
        self.embedding = tf.keras.layers.Embedding(self.input_vocab_size,
                                                   embedding_dim)

        # The GRU RNN layer processes those vectors sequentially.
        self.gru = tf.keras.layers.GRU(self.enc_units,
                                       # Return the sequence and state
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')

    def call(self, tokens, state=None):
        #shape_checker = ShapeChecker()
        #shape_checker(tokens, ('batch', 's'))

        # 2. The embedding layer looks up the embedding for each token.
        vectors = self.embedding(tokens)
        #shape_checker(vectors, ('batch', 's', 'embed_dim'))

        # 3. The GRU processes the embedding sequence.
        #    output shape: (batch, s, enc_units)
        #    state shape: (batch, enc_units)
        output, state = self.gru(vectors, initial_state=state)
        #shape_checker(output, ('batch', 's', 'enc_units'))
        #shape_checker(state, ('batch', 'enc_units'))

        # 4. Returns the new sequence and its state.
        return output, state

In [18]:
# Convert the input text to tokens.
example_tokens = input_text_processor(x)

# Encode the input sequence.
encoder = Encoder(input_text_processor.vocabulary_size(),
                  embedding_dim, units)
example_enc_output, example_enc_state = encoder(example_tokens)

print(f'Input batch, shape (batch): {x.shape}')
print(f'Input batch tokens, shape (batch, s): {example_tokens.shape}')
print(f'Encoder output, shape (batch, s, units): {example_enc_output.shape}')
print(f'Encoder state, shape (batch, units): {example_enc_state.shape}')

Input batch, shape (batch): (64,)
Input batch tokens, shape (batch, s): (64, 16)
Encoder output, shape (batch, s, units): (64, 16, 1024)
Encoder state, shape (batch, units): (64, 1024)


## Attention

In [19]:
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super().__init__()
        # For Eqn. (4), the  Bahdanau attention
        self.W1 = tf.keras.layers.Dense(units, use_bias=False)
        self.W2 = tf.keras.layers.Dense(units, use_bias=False)

        self.attention = tf.keras.layers.AdditiveAttention()

    def call(self, query, value, mask):
    
        # From Eqn. (4), `W1@ht`.
        w1_query = self.W1(query)

        # From Eqn. (4), `W2@hs`.
        w2_key = self.W2(value)

        query_mask = tf.ones(tf.shape(query)[:-1], dtype=bool)
        value_mask = mask

        context_vector, attention_weights = self.attention(
            inputs = [w1_query, value, w2_key],
            mask=[query_mask, value_mask],
            return_attention_scores = True,
        )


        return context_vector, attention_weights

## Decoder

In [20]:
class DecoderInput(typing.NamedTuple):
    new_tokens: Any
    enc_output: Any
    mask: Any

class DecoderOutput(typing.NamedTuple):
    logits: Any
    attention_weights: Any


class Decoder(tf.keras.layers.Layer):
    def __init__(self, output_vocab_size, embedding_dim, dec_units):
        super(Decoder, self).__init__()
        self.dec_units = dec_units
        self.output_vocab_size = output_vocab_size
        self.embedding_dim = embedding_dim

        # For Step 1. The embedding layer convets token IDs to vectors
        self.embedding = tf.keras.layers.Embedding(self.output_vocab_size,
                                                   embedding_dim)

        # For Step 2. The RNN keeps track of what's been generated so far.
        self.gru = tf.keras.layers.GRU(self.dec_units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')

        # For step 3. The RNN output will be the query for the attention layer.
        self.attention = BahdanauAttention(self.dec_units)

        # For step 4. Eqn. (3): converting `ct` to `at`
        self.Wc = tf.keras.layers.Dense(dec_units, activation=tf.math.tanh,
                                        use_bias=False)

        # For step 5. This fully connected layer produces the logits for each
        # output token.
        self.fc = tf.keras.layers.Dense(self.output_vocab_size)
        
    def call(self,inputs: DecoderInput,state=None) -> Tuple[DecoderOutput, tf.Tensor]:


        # Step 1. Lookup the embeddings
        vectors = self.embedding(inputs.new_tokens)

        # Step 2. Process one step with the RNN
        rnn_output, state = self.gru(vectors, initial_state=state)


        # Step 3. Use the RNN output as the query for the attention over the
        # encoder output.
        context_vector, attention_weights = self.attention(
          query=rnn_output, value=inputs.enc_output, mask=inputs.mask)

        # Step 4. Eqn. (3): Join the context_vector and rnn_output
        #     [ct; ht] shape: (batch t, value_units + query_units)
        context_and_rnn_output = tf.concat([context_vector, rnn_output], axis=-1)

        # Step 4. Eqn. (3): `at = tanh(Wc@[ct; ht])`
        attention_vector = self.Wc(context_and_rnn_output)

        # Step 5. Generate logit predictions:
        logits = self.fc(attention_vector)

        return DecoderOutput(logits, attention_weights), state

        
        


In [21]:
decoder = Decoder(output_text_processor.vocabulary_size(),
                  embedding_dim, units)

In [22]:
# Convert the target sequence, and collect the "[START]" tokens
example_output_tokens = output_text_processor(y)

start_index = output_text_processor.get_vocabulary().index('[START]')
first_token = tf.constant([[start_index]] * example_output_tokens.shape[0])

In [23]:
dec_result, dec_state = decoder(
    inputs = DecoderInput(new_tokens=first_token,
                          enc_output=example_enc_output,
                          mask=(example_tokens != 0)),
    state = example_enc_state
)

print(f'logits shape: (batch_size, t, output_vocab_size) {dec_result.logits.shape}')
print(f'state shape: (batch_size, dec_units) {dec_state.shape}')

logits shape: (batch_size, t, output_vocab_size) (64, 1, 5000)
state shape: (batch_size, dec_units) (64, 1024)


## Training del modello

In [24]:
class MaskedLoss(tf.keras.losses.Loss):
    def __init__(self):
        self.name = 'masked_loss'
        self.loss = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=True, reduction='none')

    def __call__(self, y_true, y_pred):
    
        # Calculate the loss for each item in the batch.
        loss = self.loss(y_true, y_pred)

        # Mask off the losses on padding.
        mask = tf.cast(y_true != 0, tf.float32)
        loss *= mask

        # Return the total.
        return tf.reduce_sum(loss)

In [25]:
class TrainTranslator(tf.keras.Model):
    def __init__(self, embedding_dim, units,
               input_text_processor,
               output_text_processor, 
               use_tf_function=True):
        super().__init__()
        # Build the encoder and decoder
        encoder = Encoder(input_text_processor.vocabulary_size(),
                          embedding_dim, units)
        decoder = Decoder(output_text_processor.vocabulary_size(),
                          embedding_dim, units)

        self.encoder = encoder
        self.decoder = decoder
        self.input_text_processor = input_text_processor
        self.output_text_processor = output_text_processor
        self.use_tf_function = use_tf_function

    def train_step(self, inputs):
        if self.use_tf_function:
            return self._tf_train_step(inputs)
        else:
            return self._train_step(inputs)

In [42]:
def _preprocess(self, input_text, target_text):
    

    # Convert the text to token IDs
    input_tokens = self.input_text_processor(input_text)
    target_tokens = self.output_text_processor(target_text)


    # Convert IDs to masks.
    input_mask = input_tokens != 0


    target_mask = target_tokens != 0

    return input_tokens, input_mask, target_tokens, target_mask

In [43]:
TrainTranslator._preprocess = _preprocess


In [48]:
@tf.function
def _train_step(self, inputs):
    input_text, target_text = inputs  

    (input_tokens, input_mask,
    target_tokens, target_mask) = self._preprocess(input_text, target_text)

    max_target_length = tf.shape(target_tokens)[1]

    with tf.GradientTape() as tape:
        # Encode the input
        enc_output, enc_state = self.encoder(input_tokens)


        # Initialize the decoder's state to the encoder's final state.
        # This only works if the encoder and decoder have the same number of
        # units.
        dec_state = enc_state
        loss = tf.constant(0.0)

        for t in tf.range(max_target_length-1):
            # Pass in two tokens from the target sequence:
            # 1. The current input to the decoder.
            # 2. The target for the decoder's next prediction.
            new_tokens = target_tokens[:, t:t+2]
            step_loss, dec_state = self._loop_step(new_tokens, input_mask,
                                             enc_output, dec_state)
            loss = loss + step_loss

        # Average the loss over all non padding tokens.
        average_loss = loss / tf.reduce_sum(tf.cast(target_mask, tf.float32))

    # Apply an optimization step
    variables = self.trainable_variables 
    gradients = tape.gradient(average_loss, variables)
    self.optimizer.apply_gradients(zip(gradients, variables))

    # Return a dict mapping metric names to current value
    return {'batch_loss': average_loss}

In [49]:
TrainTranslator._train_step = _train_step


In [50]:
def _loop_step(self, new_tokens, input_mask, enc_output, dec_state):
    input_token, target_token = new_tokens[:, 0:1], new_tokens[:, 1:2]

    # Run the decoder one step.
    decoder_input = DecoderInput(new_tokens=input_token,
                               enc_output=enc_output,
                               mask=input_mask)

    dec_result, dec_state = self.decoder(decoder_input, state=dec_state)


    # `self.loss` returns the total for non-padded tokens
    y = target_token
    y_pred = dec_result.logits
    step_loss = self.loss(y, y_pred)

    return step_loss, dec_state

In [54]:
TrainTranslator._loop_step = _loop_step


In [55]:
translator = TrainTranslator(
    embedding_dim, units,
    input_text_processor=input_text_processor,
    output_text_processor=output_text_processor,
    use_tf_function=False)

# Configure the loss and optimizer
translator.compile(
    optimizer=tf.optimizers.Adam(),
    loss=MaskedLoss(),
)

In [52]:
class BatchLogs(tf.keras.callbacks.Callback):
    def __init__(self, key):
        self.key = key
        self.logs = []

    def on_train_batch_end(self, n, logs):
        self.logs.append(logs[self.key])

batch_loss = BatchLogs('batch_loss')

In [56]:
translator.fit(dataset, epochs=3,
                     callbacks=[batch_loss])

Epoch 1/3

ResourceExhaustedError: 2 root error(s) found.
  (0) RESOURCE_EXHAUSTED:  OOM when allocating tensor with shape[64,1,256] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node while/decoder_5/embedding_11/embedding_lookup
 (defined at c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\layers\embeddings.py:191)
]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

	 [[Func/StatefulPartitionedCall/while/body/_59/input_control_node/_1259/_161]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

  (1) RESOURCE_EXHAUSTED:  OOM when allocating tensor with shape[64,1,256] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node while/decoder_5/embedding_11/embedding_lookup
 (defined at c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\layers\embeddings.py:191)
]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_55323]

Errors may have originated from an input operation.
Input Source operations connected to node while/decoder_5/embedding_11/embedding_lookup:
In[0] while/decoder_5/embedding_11/embedding_lookup/51680:	
In[1] while/strided_slice_1 (defined at C:\Users\matte\AppData\Local\Temp/ipykernel_20720/4086220853.py:2)

Operation defined at: (most recent call last)
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\runpy.py", line 194, in _run_module_as_main
>>>     return _run_code(code, main_globals, None,
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\runpy.py", line 87, in _run_code
>>>     exec(code, run_globals)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
>>>     app.launch_new_instance()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
>>>     app.start()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
>>>     self.io_loop.start()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
>>>     self.asyncio_loop.run_forever()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\asyncio\base_events.py", line 570, in run_forever
>>>     self._run_once()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\asyncio\base_events.py", line 1859, in _run_once
>>>     handle._run()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\asyncio\events.py", line 81, in _run
>>>     self._context.run(self._callback, *self._args)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
>>>     await self.process_one()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
>>>     await dispatch(*args)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
>>>     await result
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
>>>     reply_content = await reply_content
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
>>>     res = shell.run_cell(code, store_history=store_history, silent=silent)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
>>>     return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\IPython\core\interactiveshell.py", line 2901, in run_cell
>>>     result = self._run_cell(
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\IPython\core\interactiveshell.py", line 2947, in _run_cell
>>>     return runner(coro)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
>>>     coro.send(None)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\IPython\core\interactiveshell.py", line 3172, in run_cell_async
>>>     has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\IPython\core\interactiveshell.py", line 3364, in run_ast_nodes
>>>     if (await self.run_code(code, result,  async_=asy)):
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
>>>     exec(code_obj, self.user_global_ns, self.user_ns)
>>> 
>>>   File "C:\Users\matte\AppData\Local\Temp/ipykernel_20720/2481970495.py", line 1, in <module>
>>>     translator.fit(dataset, epochs=3,
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
>>>     return fn(*args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\engine\training.py", line 1216, in fit
>>>     tmp_logs = self.train_function(iterator)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\engine\training.py", line 878, in train_function
>>>     return step_function(self, iterator)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\engine\training.py", line 867, in step_function
>>>     outputs = model.distribute_strategy.run(run_step, args=(data,))
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\engine\training.py", line 860, in run_step
>>>     outputs = model.train_step(data)
>>> 
>>>   File "C:\Users\matte\AppData\Local\Temp/ipykernel_20720/2301851554.py", line 23, in train_step
>>>     return self._train_step(inputs)
>>> 
>>>   File "C:\Users\matte\AppData\Local\Temp/ipykernel_20720/3028482943.py", line 21, in _train_step
>>>     for t in tf.range(max_target_length-1):
>>> 
>>>   File "C:\Users\matte\AppData\Local\Temp/ipykernel_20720/3028482943.py", line 26, in _train_step
>>>     step_loss, dec_state = self._loop_step(new_tokens, input_mask,
>>> 
>>>   File "C:\Users\matte\AppData\Local\Temp/ipykernel_20720/4086220853.py", line 9, in _loop_step
>>>     dec_result, dec_state = self.decoder(decoder_input, state=dec_state)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
>>>     return fn(*args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\engine\base_layer.py", line 1083, in __call__
>>>     outputs = call_fn(inputs, *args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
>>>     return fn(*args, **kwargs)
>>> 
>>>   File "C:\Users\matte\AppData\Local\Temp/ipykernel_20720/2678724283.py", line 43, in call
>>>     vectors = self.embedding(inputs.new_tokens)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
>>>     return fn(*args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\engine\base_layer.py", line 1083, in __call__
>>>     outputs = call_fn(inputs, *args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
>>>     return fn(*args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\layers\embeddings.py", line 191, in call
>>>     out = tf.nn.embedding_lookup(self.embeddings, inputs)
>>> 

Input Source operations connected to node while/decoder_5/embedding_11/embedding_lookup:
In[0] while/decoder_5/embedding_11/embedding_lookup/51680:	
In[1] while/strided_slice_1 (defined at C:\Users\matte\AppData\Local\Temp/ipykernel_20720/4086220853.py:2)

Operation defined at: (most recent call last)
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\runpy.py", line 194, in _run_module_as_main
>>>     return _run_code(code, main_globals, None,
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\runpy.py", line 87, in _run_code
>>>     exec(code, run_globals)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
>>>     app.launch_new_instance()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
>>>     app.start()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
>>>     self.io_loop.start()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
>>>     self.asyncio_loop.run_forever()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\asyncio\base_events.py", line 570, in run_forever
>>>     self._run_once()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\asyncio\base_events.py", line 1859, in _run_once
>>>     handle._run()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\asyncio\events.py", line 81, in _run
>>>     self._context.run(self._callback, *self._args)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
>>>     await self.process_one()
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
>>>     await dispatch(*args)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
>>>     await result
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
>>>     reply_content = await reply_content
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
>>>     res = shell.run_cell(code, store_history=store_history, silent=silent)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
>>>     return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\IPython\core\interactiveshell.py", line 2901, in run_cell
>>>     result = self._run_cell(
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\IPython\core\interactiveshell.py", line 2947, in _run_cell
>>>     return runner(coro)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
>>>     coro.send(None)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\IPython\core\interactiveshell.py", line 3172, in run_cell_async
>>>     has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\IPython\core\interactiveshell.py", line 3364, in run_ast_nodes
>>>     if (await self.run_code(code, result,  async_=asy)):
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
>>>     exec(code_obj, self.user_global_ns, self.user_ns)
>>> 
>>>   File "C:\Users\matte\AppData\Local\Temp/ipykernel_20720/2481970495.py", line 1, in <module>
>>>     translator.fit(dataset, epochs=3,
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
>>>     return fn(*args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\engine\training.py", line 1216, in fit
>>>     tmp_logs = self.train_function(iterator)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\engine\training.py", line 878, in train_function
>>>     return step_function(self, iterator)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\engine\training.py", line 867, in step_function
>>>     outputs = model.distribute_strategy.run(run_step, args=(data,))
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\engine\training.py", line 860, in run_step
>>>     outputs = model.train_step(data)
>>> 
>>>   File "C:\Users\matte\AppData\Local\Temp/ipykernel_20720/2301851554.py", line 23, in train_step
>>>     return self._train_step(inputs)
>>> 
>>>   File "C:\Users\matte\AppData\Local\Temp/ipykernel_20720/3028482943.py", line 21, in _train_step
>>>     for t in tf.range(max_target_length-1):
>>> 
>>>   File "C:\Users\matte\AppData\Local\Temp/ipykernel_20720/3028482943.py", line 26, in _train_step
>>>     step_loss, dec_state = self._loop_step(new_tokens, input_mask,
>>> 
>>>   File "C:\Users\matte\AppData\Local\Temp/ipykernel_20720/4086220853.py", line 9, in _loop_step
>>>     dec_result, dec_state = self.decoder(decoder_input, state=dec_state)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
>>>     return fn(*args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\engine\base_layer.py", line 1083, in __call__
>>>     outputs = call_fn(inputs, *args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
>>>     return fn(*args, **kwargs)
>>> 
>>>   File "C:\Users\matte\AppData\Local\Temp/ipykernel_20720/2678724283.py", line 43, in call
>>>     vectors = self.embedding(inputs.new_tokens)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
>>>     return fn(*args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\engine\base_layer.py", line 1083, in __call__
>>>     outputs = call_fn(inputs, *args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
>>>     return fn(*args, **kwargs)
>>> 
>>>   File "c:\users\matte\anaconda3\envs\ai_env\lib\site-packages\keras\layers\embeddings.py", line 191, in call
>>>     out = tf.nn.embedding_lookup(self.embeddings, inputs)
>>> 

Function call stack:
train_function -> while_body_51644_rewritten -> train_function -> while_body_51644_rewritten


In [57]:
class Translator(tf.Module):

    def __init__(self, encoder, decoder, input_text_processor,
               output_text_processor):
        self.encoder = encoder
        self.decoder = decoder
        self.input_text_processor = input_text_processor
        self.output_text_processor = output_text_processor

        self.output_token_string_from_index = (
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=output_text_processor.get_vocabulary(),
                mask_token='',
                invert=True))

        # The output should never generate padding, unknown, or start.
        index_from_string = tf.keras.layers.experimental.preprocessing.StringLookup(
            vocabulary=output_text_processor.get_vocabulary(), mask_token='')
        token_mask_ids = index_from_string(['', '[UNK]', '[START]']).numpy()

        token_mask = np.zeros([index_from_string.vocabulary_size()], dtype=np.bool)
        token_mask[np.array(token_mask_ids)] = True
        self.token_mask = token_mask

        self.start_token = index_from_string(tf.constant('[START]'))
        self.end_token = index_from_string(tf.constant('[END]'))

In [59]:
translator = Translator(
    encoder=translator.encoder,
    decoder=translator.decoder,
    input_text_processor=input_text_processor,
    output_text_processor=output_text_processor,
)

In [63]:
def tokens_to_text(self, result_tokens):

    result_text_tokens = self.output_token_string_from_index(result_tokens)

    result_text = tf.strings.reduce_join(result_text_tokens,
                                       axis=1, separator=' ')

    result_text = tf.strings.strip(result_text)
    return result_text

In [64]:
Translator.tokens_to_text = tokens_to_text


In [65]:
example_output_tokens = tf.random.uniform(
    shape=[5, 2], minval=0, dtype=tf.int64,
    maxval=output_text_processor.vocabulary_size())
translator.tokens_to_text(example_output_tokens).numpy()

array([b'insisted name', b'where wounded', b'pompous unacceptable',
       b'audience beautiful', b'happily survivors'], dtype=object)

In [66]:
def sample(self, logits, temperature):

    token_mask = self.token_mask[tf.newaxis, tf.newaxis, :]
  

    # Set the logits for all masked tokens to -inf, so they are never chosen.
    logits = tf.where(self.token_mask, -np.inf, logits)

    if temperature == 0.0:
        new_tokens = tf.argmax(logits, axis=-1)
    else: 
        logits = tf.squeeze(logits, axis=1)
        new_tokens = tf.random.categorical(logits/temperature,
                                            num_samples=1)


    return new_tokens

In [67]:
Translator.sample = sample

In [68]:
def translate_unrolled(self,
                       input_text, *,
                       max_length=50,
                       return_attention=True,
                       temperature=1.0):
    
    batch_size = tf.shape(input_text)[0]
    input_tokens = self.input_text_processor(input_text)
    enc_output, enc_state = self.encoder(input_tokens)

    dec_state = enc_state
    new_tokens = tf.fill([batch_size, 1], self.start_token)

    result_tokens = []
    attention = []
    done = tf.zeros([batch_size, 1], dtype=tf.bool)

    for _ in range(max_length):
        dec_input = DecoderInput(new_tokens=new_tokens,
                                 enc_output=enc_output,
                                 mask=(input_tokens!=0))

        dec_result, dec_state = self.decoder(dec_input, state=dec_state)

        attention.append(dec_result.attention_weights)

        new_tokens = self.sample(dec_result.logits, temperature)

        # If a sequence produces an `end_token`, set it `done`
        done = done | (new_tokens == self.end_token)
        # Once a sequence is done it only produces 0-padding.
        new_tokens = tf.where(done, tf.constant(0, dtype=tf.int64), new_tokens)

        # Collect the generated tokens
        result_tokens.append(new_tokens)

        if tf.executing_eagerly() and tf.reduce_all(done):
            break

    # Convert the list of generates token ids to a list of strings.
    result_tokens = tf.concat(result_tokens, axis=-1)
    result_text = self.tokens_to_text(result_tokens)

    if return_attention:
        attention_stack = tf.concat(attention, axis=1)
        return {'text': result_text, 'attention': attention_stack}
    else:
        return {'text': result_text}

Translator.translate = translate_unrolled

In [71]:
input_text = tf.constant([
    'Fa molto freddo qua.', # "It's really cold here."
    'Buongiorno a tutti.', # "This is my life.""
])

result = translator.translate(
    input_text = input_text)

print(result['text'][0].numpy().decode())
print(result['text'][1].numpy().decode())
print()

it took a very cold here .
they figured all of everyone .

