#Import Statements

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn.metrics import confusion_matrix, roc_curve
import seaborn as sns
import datetime
import pathlib
import io
import os
import re
import string
import time
from numpy import random
import tensorflow_datasets as tfds
import tensorflow_probability as tfp
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer
from tensorflow.keras.layers import (Dense,Flatten,SimpleRNN,InputLayer,Conv1D,Bidirectional,GRU,LSTM,BatchNormalization,Dropout,Input, Embedding,TextVectorization)
from tensorflow.keras.losses import BinaryCrossentropy,CategoricalCrossentropy, SparseCategoricalCrossentropy
from tensorflow.keras.metrics import Accuracy,TopKCategoricalAccuracy, CategoricalAccuracy, SparseCategoricalAccuracy
from tensorflow.keras.optimizers import Adam
from google.colab import drive
from google.colab import files
from tensorboard.plugins import projector

#Data Preparation

##Download the data

In [2]:
!wget https://www.manythings.org/anki/fra-eng.zip

--2024-04-24 12:40:51--  https://www.manythings.org/anki/fra-eng.zip
Resolving www.manythings.org (www.manythings.org)... 173.254.30.110
Connecting to www.manythings.org (www.manythings.org)|173.254.30.110|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7943074 (7.6M) [application/zip]
Saving to: ‘fra-eng.zip’


2024-04-24 12:40:52 (14.8 MB/s) - ‘fra-eng.zip’ saved [7943074/7943074]



In [3]:
!unzip "/content/fra-eng.zip" -d "/content/dataset/"

Archive:  /content/fra-eng.zip
  inflating: /content/dataset/_about.txt  
  inflating: /content/dataset/fra.txt  


##Dataset From Kaggle

In [4]:
!pip install -q kaggle
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json
!kaggle datasets download -d dhruvildave/en-fr-translation-dataset

Downloading en-fr-translation-dataset.zip to /content
100% 2.54G/2.54G [00:29<00:00, 129MB/s]
100% 2.54G/2.54G [00:29<00:00, 91.2MB/s]


In [5]:
!unzip "/content/en-fr-translation-dataset.zip" -d "/content/dataset/"

Archive:  /content/en-fr-translation-dataset.zip
  inflating: /content/dataset/en-fr.csv  


In [6]:
dataset = tf.data.experimental.CsvDataset(
  "/content/dataset/en-fr.csv",
  [
    tf.string,
    tf.string
  ],
)

The code snippet demonstrates the use of `tf.data.experimental.CsvDataset` in TensorFlow to create a dataset from a CSV file. Let's break down what this code does:

```python
dataset = tf.data.experimental.CsvDataset(
  "/content/dataset/en-fr.csv",
  [
    tf.string,
    tf.string
  ],
)
```

Explanation:
- **`tf.data.experimental.CsvDataset`**: This is a TensorFlow function used to create a dataset from CSV (Comma-Separated Values) files. It reads and parses the contents of the CSV file.
  
- **`"/content/dataset/en-fr.csv"`**: This is the path to the CSV file from which the dataset will be created.

- **`[tf.string, tf.string]`**: This specifies the structure of each row in the CSV file. In this case, each row is expected to contain two columns, both of type `tf.string`. This means that each row will be parsed as a tuple of two string tensors.

When you use `tf.data.experimental.CsvDataset`, TensorFlow will read the specified CSV file and parse each row according to the provided structure (`[tf.string, tf.string]` in this case). This creates a dataset where each element corresponds to a row in the CSV file, and each element is a tuple containing the parsed values from the respective columns of that row.


##Data processing

In [7]:
text_dataset=tf.data.TextLineDataset("/content/dataset/fra.txt")

The code snippet uses `tf.data.TextLineDataset` in TensorFlow to create a dataset from a text file where each line of the file becomes an element of the dataset. Let's break down this code:

```python
text_dataset = tf.data.TextLineDataset("/content/dataset/fra.txt")
```

Explanation:
- **`tf.data.TextLineDataset`**: This is a TensorFlow function used to create a dataset where each element corresponds to a line of text from a text file.

- **`"/content/dataset/fra.txt"`**: This is the path to the text file from which the dataset will be created. Each line of this text file will be treated as a separate element in the dataset.

When you use `tf.data.TextLineDataset`, TensorFlow reads the specified text file (`fra.txt` in this case) and creates a dataset where each element corresponds to a line of text from the file. Each line is treated as a string tensor.

In [8]:
VOCAB_SIZE = 20000
ENGLISH_SEQUENCE_LENGTH = 64
FRENCH_SEQUENCE_LENGTH = 64
EMBEDDING_DIM=300
BATCH_SIZE=64

In [9]:
english_vectorize_layer = TextVectorization(
    standardize='lower_and_strip_punctuation',
    max_tokens=VOCAB_SIZE,
    output_mode='int',
    output_sequence_length=FRENCH_SEQUENCE_LENGTH
)

Thi code snippet sets up a `TextVectorization` layer in TensorFlow for English text.

Explanation:
- **`TextVectorization`**: This is a TensorFlow layer used for vectorizing text data. It tokenizes strings (e.g., sentences or documents) into integer sequences (sequences of token indices).

- **`standardize='lower_and_strip_punctuation'`**: This parameter specifies the standardization method for text preprocessing. Here, it converts all characters to lowercase and removes punctuation from the input text.

- **`max_tokens=VOCAB_SIZE`**: This sets the maximum size of the vocabulary (number of unique tokens) that the vectorization layer will generate. `VOCAB_SIZE` is a predefined constant that determines the vocabulary size.

- **`output_mode='int'`**: This parameter specifies the output mode of the vectorization layer. `'int'` indicates that the layer should output sequences of integers (token indices).

- **`output_sequence_length=FRENCH_SEQUENCE_LENGTH`**: This parameter sets the length of the output sequences generated by the vectorization layer. `FRENCH_SEQUENCE_LENGTH` determines the fixed length of output sequences. If a sequence is shorter than this length, it will be padded; if longer, it will be truncated.

In summary, `english_vectorize_layer` is configured to preprocess English text by converting it to lowercase, removing punctuation, tokenizing it into integer sequences based on a specified vocabulary size (`VOCAB_SIZE`), and ensuring that all output sequences have a fixed length (`FRENCH_SEQUENCE_LENGTH`).

In [10]:
french_vectorize_layer=TextVectorization(
    standardize='lower_and_strip_punctuation',
    max_tokens=VOCAB_SIZE,
    output_mode='int',
    output_sequence_length=FRENCH_SEQUENCE_LENGTH
)

In [11]:
def selector(input_text):
  split_text = tf.strings.split(input_text, '\t')
  return {'input_1':split_text[0:1],'input_2':'starttoken '+split_text[1:2]},split_text[1:2]+' endtoken'

Explanation:
- **`tf.strings.split(input_text, '\t')`**: This line uses TensorFlow's `tf.strings.split` function to split the input text (`input_text`) into segments based on the tab (`'\t'`) delimiter. This assumes that the input text is formatted with tab-separated values.

- **`split_text[0:1]`**: This extracts the first segment of the split text, corresponding to the first part before the tab delimiter.

- **`split_text[1:2]`**: This extracts the second segment of the split text, corresponding to the part after the tab delimiter.

- **`{'input_1': split_text[0:1], 'input_2': 'starttoken ' + split_text[1:2]}`**: This constructs a dictionary as the first part of the return value. It assigns the first segment (`split_text[0:1]`) to the key `'input_1'` and concatenates the second segment (`split_text[1:2]`) with the string `'starttoken '` to form the value associated with the key `'input_2'`.

- **`split_text[1:2] + ' endtoken'`**: This concatenates the second segment (`split_text[1:2]`) with the string `' endtoken'` as the second part of the return value.

Overall, the `selector` function processes input text by splitting it into two segments based on the tab delimiter (`'\t'`). It constructs a dictionary containing these segments (`input_1` and `input_2`) and returns a tuple with this dictionary as the first element and the processed text as the second element. This function is designed for data preprocessing or text manipulation tasks within a TensorFlow environment.

In [12]:
split_dataset=text_dataset.map(selector)

In [13]:
def separator(input_text):
  split_text=tf.strings.split(input_text,'\t')
  return split_text[0:1],'starttoken '+split_text[1:2]+' endtoken'

The `separator` function defined takes an input text and splits it using the tab (`'\t'`) delimiter. It then constructs and returns a tuple containing two elements:

1. **`split_text[0:1]`**: This extracts the first segment of the split text, which corresponds to the part before the tab delimiter.

2. **`'starttoken ' + split_text[1:2] + ' endtoken'`**: Here, `split_text[1:2]` extracts the second segment of the split text (the part after the tab delimiter). This segment is then concatenated with `'starttoken '` at the beginning and `' endtoken'` at the end to form a single string.

The function `separator` effectively preprocesses input text, splitting it into segments based on tab delimiters and formatting the output as described. The result is a tuple containing the processed segments suitable for further processing or analysis.

In [14]:
init_dataset=text_dataset.map(separator)

In [15]:
for i in split_dataset.take(3):
  print(i)

({'input_1': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Go.'], dtype=object)>, 'input_2': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'starttoken Va !'], dtype=object)>}, <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Va ! endtoken'], dtype=object)>)
({'input_1': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Go.'], dtype=object)>, 'input_2': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'starttoken Marche.'], dtype=object)>}, <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Marche. endtoken'], dtype=object)>)
({'input_1': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Go.'], dtype=object)>, 'input_2': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'starttoken En route !'], dtype=object)>}, <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'En route ! endtoken'], dtype=object)>)


In [16]:
english_training_data=init_dataset.map(lambda x,y:x)
english_vectorize_layer.adapt(english_training_data)

In [17]:
print(len(english_vectorize_layer.get_vocabulary()))
print(len(french_vectorize_layer.get_vocabulary()))

16952
2


In [18]:
# Take x,y as input and output y, where x=english text, y= french translation
french_training_data=init_dataset.map(lambda x,y:y)
# After extracting the french text from the dataset, we adapt the vectorize_layer to the french training data
french_vectorize_layer.adapt(french_training_data)

In [19]:
def vectorizer(inputs,output):
  return {'input_1':english_vectorize_layer(inputs['input_1']),
          'input_2':french_vectorize_layer(inputs['input_2'])},french_vectorize_layer(output)

The `vectorizer` function defined is a preprocessing function designed to vectorize input and output data for a machine learning model. Here's a breakdown of how it works:

- **Inputs**:
  - `inputs`: This parameter is expected to be a dictionary containing two keys:
    - `'input_1'`: This key corresponds to English text inputs.
    - `'input_2'`: This key corresponds to French text inputs.

- **Output**:
  - `output`: This parameter represents the target French text outputs.

- **Processing**:
  - `english_vectorize_layer(inputs['input_1'])`: This line uses the `english_vectorize_layer` (presumably a `TextVectorization` layer) to vectorize the English input text provided under the `'input_1'` key of the `inputs` dictionary.
  
  - `french_vectorize_layer(inputs['input_2'])`: Similarly, this line vectorizes the French input text provided under the `'input_2'` key using the `french_vectorize_layer`.
  
  - `french_vectorize_layer(output)`: This line vectorizes the target French text `output` using the `french_vectorize_layer`.

- **Return Value**:
  - The function returns a tuple:
    - The first element is a dictionary with two entries:
      - `'input_1'`: Vectorized representation of the English input text.
      - `'input_2'`: Vectorized representation of the French input text.
    - The second element is the vectorized representation of the target French output text.

Overall, the `vectorizer` function prepares input and output data for training a machine learning model. It uses specific vectorization layers (`english_vectorize_layer` and `french_vectorize_layer`) to convert raw text data into numerical formats suitable for model training. The output is structured to match the input requirements of a neural network model.

In [20]:
split_dataset

<_MapDataset element_spec=({'input_1': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'input_2': TensorSpec(shape=(None,), dtype=tf.string, name=None)}, TensorSpec(shape=(None,), dtype=tf.string, name=None))>

In [21]:
dataset=split_dataset.map(vectorizer)

In [22]:
for i in split_dataset.take(3):
  print(i)

({'input_1': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Go.'], dtype=object)>, 'input_2': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'starttoken Va !'], dtype=object)>}, <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Va ! endtoken'], dtype=object)>)
({'input_1': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Go.'], dtype=object)>, 'input_2': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'starttoken Marche.'], dtype=object)>}, <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Marche. endtoken'], dtype=object)>)
({'input_1': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Go.'], dtype=object)>, 'input_2': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'starttoken En route !'], dtype=object)>}, <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'En route ! endtoken'], dtype=object)>)


In [23]:
for i in dataset.take(1):
  print(i)

({'input_1': <tf.Tensor: shape=(1, 64), dtype=int64, numpy=
array([[45,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]])>, 'input_2': <tf.Tensor: shape=(1, 64), dtype=int64, numpy=
array([[  2, 104,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0]])>}, <tf.Tensor: shape=(1, 64), dtype=int64, numpy=
array([[104,   3,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,

In [24]:
dataset

<_MapDataset element_spec=({'input_1': TensorSpec(shape=(None, 64), dtype=tf.int64, name=None), 'input_2': TensorSpec(shape=(None, 64), dtype=tf.int64, name=None)}, TensorSpec(shape=(None, 64), dtype=tf.int64, name=None))>

In [25]:
dataset=dataset.shuffle(2048).unbatch().batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)

In [26]:
dataset

<_PrefetchDataset element_spec=({'input_1': TensorSpec(shape=(None, 64), dtype=tf.int64, name=None), 'input_2': TensorSpec(shape=(None, 64), dtype=tf.int64, name=None)}, TensorSpec(shape=(None, 64), dtype=tf.int64, name=None))>

In [27]:
NUM_BATCHES=int(200000/BATCH_SIZE)

In [28]:
train_dataset=dataset.take(int(0.9*NUM_BATCHES))
val_dataset=dataset.skip(int(0.9*NUM_BATCHES))

In [29]:
train_dataset

<_TakeDataset element_spec=({'input_1': TensorSpec(shape=(None, 64), dtype=tf.int64, name=None), 'input_2': TensorSpec(shape=(None, 64), dtype=tf.int64, name=None)}, TensorSpec(shape=(None, 64), dtype=tf.int64, name=None))>

In [30]:
len(dataset)

TypeError: The dataset length is unknown.

#Modeling

In [31]:
NUM_UNITS=256

In [32]:
# Define the encoder
input = Input(shape=(ENGLISH_SEQUENCE_LENGTH,), dtype='int64', name='input_1')
x = Embedding(VOCAB_SIZE, EMBEDDING_DIM, )(input)
encoded_input = Bidirectional(GRU(NUM_UNITS), )(x)

### DECODER
shifted_target=Input(shape=(FRENCH_SEQUENCE_LENGTH,), dtype="int64", name="input_2")
x=Embedding(VOCAB_SIZE,EMBEDDING_DIM,)(shifted_target)
x = GRU(NUM_UNITS*2, return_sequences=True)(x, initial_state=encoded_input)

### OUTPUT
x = Dropout(0.5)(x)
target=Dense(VOCAB_SIZE,activation="softmax")(x)
seq2seq_gru=Model([input,shifted_target],target)
seq2seq_gru.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 64)]                 0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 64)]                 0         []                            
                                                                                                  
 embedding (Embedding)       (None, 64, 300)              6000000   ['input_1[0][0]']             
                                                                                                  
 embedding_1 (Embedding)     (None, 64, 300)              6000000   ['input_2[0][0]']             
                                                                                              

This code snippet defines a sequence-to-sequence (seq2seq) model using a GRU-based encoder-decoder architecture.
### Encoder
```python
input = Input(shape=(ENGLISH_SEQUENCE_LENGTH,), dtype='int64', name='input_1')
x = Embedding(VOCAB_SIZE, EMBEDDING_DIM)(input)
encoded_input = Bidirectional(GRU(NUM_UNITS))(x)
```
- **Input Layer (`input`):** Defines the input layer for the encoder, specifying the shape of the input sequences (`ENGLISH_SEQUENCE_LENGTH`) and data type (`int64`).
  
- **Embedding Layer (`Embedding`):** Maps input integer sequences to dense vectors (`EMBEDDING_DIM`) using an embedding matrix of size `VOCAB_SIZE`.

- **Bidirectional GRU (`Bidirectional(GRU)`):** Applies a bidirectional GRU (Gated Recurrent Unit) layer to the embedded input sequences. This layer processes the input sequences in both forward and backward directions, capturing dependencies in both contexts.

### Decoder
```python
shifted_target = Input(shape=(FRENCH_SEQUENCE_LENGTH,), dtype="int64", name="input_2")
x = Embedding(VOCAB_SIZE, EMBEDDING_DIM)(shifted_target)
x = GRU(NUM_UNITS*2, return_sequences=True)(x, initial_state=encoded_input)
```
- **Input Layer (`shifted_target`):** Defines the input layer for the decoder, specifying the shape of the target sequences (`FRENCH_SEQUENCE_LENGTH`) and data type (`int64`).

- **Embedding Layer (`Embedding`):** Similar to the encoder, this layer maps target integer sequences to dense vectors using the same embedding matrix (`VOCAB_SIZE`, `EMBEDDING_DIM`).

- **GRU Decoder (`GRU`):** Applies a GRU layer to the embedded target sequences. The `initial_state` parameter is set to the `encoded_input` from the encoder, initializing the decoder's hidden state with the final state of the encoder.

### Output
```python
x = Dropout(0.5)(x)
target = Dense(VOCAB_SIZE, activation="softmax")(x)
```
- **Dropout Layer (`Dropout`):** Applies dropout regularization to prevent overfitting by randomly setting a fraction of input units to zero during training (`0.5` in this case).

- **Dense Layer (`Dense`):** Computes the output probabilities over the vocabulary (`VOCAB_SIZE`) using a softmax activation function, predicting the next token in the target sequence.

### Model Compilation
```python
seq2seq_gru = Model([input, shifted_target], target)
seq2seq_gru.summary()
```
- **Model Definition (`Model`):** Combines the encoder and decoder layers into a functional Keras model, taking both the input and target sequences as inputs and outputting the predicted target sequences (`target`).

- **Model Summary (`summary()`):** Displays the summary of the entire model architecture, showing the layer types, output shapes, and trainable parameters.

This architecture represents a basic seq2seq model using GRU cells for sequence encoding and decoding. It's commonly used for tasks like machine translation, where the model learns to generate a target sequence (e.g., translated sentences) from a given input sequence (e.g., source sentences).

In [33]:
class BLEU(tf.keras.metrics.Metric):
    def __init__(self,name='bleu_score'):
        super(BLEU,self).__init__()
        self.bleu_score=0

    def update_state(self,y_true,y_pred,sample_weight=None):
      y_pred=tf.argmax(y_pred,-1)
      self.bleu_score=0
      for i,j in zip(y_pred,y_true):
        tf.autograph.experimental.set_loop_options()

        total_words=tf.math.count_nonzero(i)
        total_matches=0
        for word in i:
          if word==0:
            break
          for q in range(len(j)):
            if j[q]==0:
              break
            if word==j[q]:
              total_matches+=1
              j=tf.boolean_mask(j,[False if y==q else True for y in range(len(j))])
              break

        self.bleu_score+=total_matches/total_words

    def result(self):
        return self.bleu_score/BATCH_SIZE

This is a custom BLEU (Bilingual Evaluation Understudy) metric in TensorFlow/Keras. The BLEU score is a common metric used for evaluating the quality of machine-translated text against one or more reference translations. Let's explain the key components of this custom metric implementation:

- **Class Definition (`BLEU`):** Defines a custom metric class named `BLEU` that inherits from `tf.keras.metrics.Metric`.

- **Initialization (`__init__`):** Initializes the BLEU metric by calling the superclass constructor (`super(BLEU, self).__init__()`). Here, the default name for the metric is set to `'bleu_score'`.

- **`update_state` Method:** This method is called to update the metric state based on the ground truth (`y_true`) and predicted (`y_pred`) values for each batch of data.

  - **Input Arguments:**
    - `y_true`: Ground truth values (expected output).
    - `y_pred`: Predicted values (model output).
    - `sample_weight`: Optional sample weights.

  - **Implementation Details:**
    1. **Argmax Prediction:** Converts the predicted values (`y_pred`) into class labels by taking the argmax along the last axis (`-1`). This is typical for sequence prediction tasks.
    
    2. **BLEU Calculation:** Iterates over each pair of predicted (`y_pred`) and true (`y_true`) sequences in the batch. For each sequence pair:
       - Calculates the total number of non-zero words (`total_words`) in the predicted sequence (`i`).
       - Counts the number of matching words (`total_matches`) between the predicted sequence and the true sequence (`j`). The matching process involves:
         - Iterating through each word in the predicted sequence.
         - Checking if the word exists in the true sequence (`j`).
         - Removing the matched word from the true sequence to prevent double counting.
       - Computes the BLEU score contribution for the sequence pair based on the ratio of `total_matches` to `total_words`.
       - Accumulates the BLEU score (`self.bleu_score`) by adding the contribution of each sequence pair in the batch.

- **`result` Method:** Computes the final BLEU score by dividing the accumulated BLEU score (`self.bleu_score`) by the batch size (`BATCH_SIZE`) and returns the result.

This custom BLEU metric implementation provides a way to compute BLEU scores directly within the TensorFlow/Keras framework for evaluating sequence prediction models. Note that the BLEU computation here is simplified and may not fully capture all aspects of the standard BLEU metric used in machine translation evaluation, such as handling n-grams and brevity penalty.

In [34]:
seq2seq_gru.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(5e-4),)
    #metrics=[BLEU()],
    #run_eagerly=True)

In [35]:
checkpoint_filepath = '/content/drive/MyDrive/NLP Repository/Projects/Sentiments_analysis/lstm.h5'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    mode='min',
    save_best_only=True,)

In [None]:
history=seq2seq_gru.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=20,
    callbacks=[model_checkpoint_callback])

Epoch 1/20
    478/Unknown - 5397s 11s/step - loss: 0.6783

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model_loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.title('model_accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

#Evaluation

In [None]:
seq2seq_gru.evaluate(val_dataset)

#Testing

In [None]:
index_to_word={x:y for x, y in zip(range(len(french_vectorize_layer.get_vocabulary())),
                                   french_vectorize_layer.get_vocabulary())}

In [None]:

def translator(english_sentence):
  tokenized_english_sentence=english_vectorize_layer([english_sentence])
  shifted_target='starttoken'

  for i in range(FRENCH_SEQUENCE_LENGTH):
    tokenized_shifted_target=french_vectorize_layer([shifted_target])
    output=seq2seq_gru.predict([tokenized_english_sentence,tokenized_shifted_target])
    french_word_index=tf.argmax(output,axis=-1)[0][i].numpy()
    current_word=index_to_word[french_word_index]
    if current_word=='endtoken':
      break
    shifted_target+=' '+current_word
  return shifted_target[11:]

In [None]:
translator('What makes you think that is not true?')

In [None]:
word_to_index={y:x for x, y in zip(range(len(french_vectorize_layer.get_vocabulary())),
                                   french_vectorize_layer.get_vocabulary())}