In [9]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import time

# Check for GPU availability
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# Load and preprocess the dataset
path_to_file = r"D:\Artificial\DL\RNN\eng-french\sec\eng_-french.csv"
df = pd.read_csv(path_to_file)
eng_sentences = df['English words/sentences'].values
fr_sentences = ['<start> ' + sent + ' <end>' for sent in df['French words/sentences'].values]

# Tokenize the sentences
eng_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
eng_tokenizer.fit_on_texts(eng_sentences)
eng_sequences = eng_tokenizer.texts_to_sequences(eng_sentences)

fr_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
fr_tokenizer.fit_on_texts(fr_sentences)
fr_sequences = fr_tokenizer.texts_to_sequences(fr_sentences)

# Pad sequences
max_eng_len = 20
max_fr_len = 20
eng_padded = tf.keras.preprocessing.sequence.pad_sequences(eng_sequences, maxlen=max_eng_len, padding='post')
fr_padded = tf.keras.preprocessing.sequence.pad_sequences(fr_sequences, maxlen=max_fr_len, padding='post')

# Split into training and validation sets
eng_train, eng_val, fr_train, fr_val = train_test_split(eng_padded, fr_padded, test_size=0.2)

# Create tf.data.Dataset
BATCH_SIZE = 64
train_dataset = tf.data.Dataset.from_tensor_slices((eng_train, fr_train)).shuffle(len(eng_train)).batch(BATCH_SIZE, drop_remainder=True)
val_dataset = tf.data.Dataset.from_tensor_slices((eng_val, fr_val)).batch(BATCH_SIZE, drop_remainder=True)

# Define Bahdanau Attention
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query, values):
        query_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(values) + self.W2(query_with_time_axis)))
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, attention_weights

# Define Encoder
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
        super(Encoder, self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.lstm = tf.keras.layers.LSTM(self.enc_units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')

    def call(self, x, hidden):
        x = self.embedding(x)
        output, state_h, state_c = self.lstm(x, initial_state=hidden)
        return output, [state_h, state_c]

    def initialize_hidden_state(self):
        return [tf.zeros((self.batch_sz, self.enc_units)), tf.zeros((self.batch_sz, self.enc_units))]

# Define Decoder
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
        super(Decoder, self).__init__()
        self.batch_sz = batch_sz
        self.dec_units = dec_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.lstm = tf.keras.layers.LSTM(self.dec_units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')
        self.fc = tf.keras.layers.Dense(vocab_size)
        self.attention = BahdanauAttention(self.dec_units)

    def call(self, x, hidden, enc_output):
        context_vector, attention_weights = self.attention(hidden[0], enc_output)
        x = self.embedding(x)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
        output, state_h, state_c = self.lstm(x, initial_state=hidden)
        output = tf.reshape(output, (-1, output.shape[2]))
        x = self.fc(output)
        return x, [state_h, state_c], attention_weights

# Model parameters
embedding_dim = 256
units = 1024
eng_vocab_size = len(eng_tokenizer.word_index) + 1
fr_vocab_size = len(fr_tokenizer.word_index) + 1
encoder = Encoder(eng_vocab_size, embedding_dim, units, BATCH_SIZE)
decoder = Decoder(fr_vocab_size, embedding_dim, units, BATCH_SIZE)

# Optimizer and loss function
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    return tf.reduce_mean(loss_)

# Training step
@tf.function
def train_step(inp, targ, enc_hidden):
    loss = 0
    with tf.GradientTape() as tape:
        enc_output, enc_hidden = encoder(inp, enc_hidden)
        dec_hidden = enc_hidden
        dec_input = tf.expand_dims([fr_tokenizer.word_index['<start>']] * BATCH_SIZE, 1)
        for t in range(1, targ.shape[1]):
            predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)
            loss += loss_function(targ[:, t], predictions)
            dec_input = tf.expand_dims(targ[:, t], 1)
    batch_loss = (loss / int(targ.shape[1]))
    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
    return batch_loss

# Validation step
def validate_step(inp, targ):
    enc_hidden = encoder.initialize_hidden_state()
    enc_output, enc_hidden = encoder(inp, enc_hidden)
    dec_hidden = enc_hidden
    dec_input = tf.expand_dims([fr_tokenizer.word_index['<start>']] * BATCH_SIZE, 1)
    loss = 0
    correct = 0
    tokens = 0
    for t in range(1, targ.shape[1]):
        predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)
        loss_t = loss_function(targ[:, t], predictions)
        loss += tf.reduce_sum(loss_t)
        mask = targ[:, t] != 0
        correct += tf.reduce_sum(tf.cast(tf.argmax(predictions, axis=1) == targ[:, t], tf.int32) * tf.cast(mask, tf.int32))
        tokens += tf.reduce_sum(tf.cast(mask, tf.int32))
        dec_input = tf.expand_dims(targ[:, t], 1)
    return loss, correct, tokens

# Validation function
def validate():
    total_loss = 0
    total_correct = 0
    total_tokens = 0
    num_batches = 0
    for (inp, targ) in val_dataset:
        loss, correct, tokens = validate_step(inp, targ)
        total_loss += loss
        total_correct += correct
        total_tokens += tokens
        num_batches += 1
    avg_loss = total_loss / total_tokens if total_tokens > 0 else 0
    avg_accuracy = total_correct / total_tokens if total_tokens > 0 else 0
    return avg_loss, avg_accuracy

# Training loop with early stopping
EPOCHS = 20
patience = 3
best_val_loss = float('inf')
wait = 0

for epoch in range(EPOCHS):
    start = time.time()
    enc_hidden = encoder.initialize_hidden_state()
    total_loss = 0
    for (batch, (inp, targ)) in enumerate(train_dataset):
        batch_loss = train_step(inp, targ, enc_hidden)
        total_loss += batch_loss
        if batch % 100 == 0:
            print(f'Epoch {epoch+1} Batch {batch} Loss {batch_loss.numpy():.4f}')

    val_loss, val_accuracy = validate()
    print(f'Epoch {epoch+1} Loss {(total_loss / len(train_dataset)):.4f} Val Loss {val_loss:.4f} Val Accuracy {val_accuracy:.4f}')

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        wait = 0
        encoder.save_weights('encoder_weights.h5')
        decoder.save_weights('decoder_weights.h5')
    else:
        wait += 1
        if wait >= patience:
            print("Early stopping triggered!")
            break

    print(f'Time taken for epoch {time.time() - start:.2f} sec\n')

# Load best model weights
encoder.load_weights('encoder_weights.h5')
decoder.load_weights('decoder_weights.h5')
print("Training completed with best model weights loaded.")

Num GPUs Available:  0
Epoch 1 Batch 0 Loss 4.1723


KeyboardInterrupt: 