In [None]:
import tensorflow as tf
import keras.backend as K
import numpy as np
from keras.layers import Dense, Bidirectional, LSTM, Input
from keras.layers import Activation, Embedding, Concatenate
from keras.models import Model
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import unicodedata
import re

import os
import io
import time

In [None]:
en_dir = "../input/english-to-french/small_vocab_en.csv"
fr_dir = "../input/english-to-french/small_vocab_fr.csv"

def make_data(Xpath, Ypath, num_examples=None):
    X = []
    with open(en_dir,'r', encoding='utf-8') as f:
        for line in f:
            line = line.rstrip()
            line = "<SOS> " + line +" <EOS>"
            X.append(line)
    Y = []
    with open(fr_dir,'r', encoding='utf-8') as f:
        for line in f:
            line = line.rstrip()
            line = "<SOS> " + line +" <EOS>"
            Y.append(line)
    
    return X[:num_examples], Y[:num_examples]

In [None]:
def tokenize(text):
    tokenizer = Tokenizer(filters="")
    tokenizer.fit_on_texts(text)
    sequence = tokenizer.texts_to_sequences(text) 
    pad_tensor = pad_sequences(sequence, padding='post')
    
    return pad_tensor, tokenizer

In [None]:
def load_data(path, num_examples=None):
    X , Y = make_data(path[0], path[1], num_examples)
    
    x_tensor, x_token = tokenize(X)
    y_tensor, y_token = tokenize(Y)
    
    return x_tensor, y_tensor, x_token, y_token


In [None]:
num_examples = 50000
X, Y, X_token, Y_token = load_data([en_dir, fr_dir], num_examples)
maxX, maxY = X.shape[1], Y.shape[1]
print(f"Maximum length of english sentence {maxX}")
print(f"Maximum length of french sentence {maxY}")
print(X[1])
print(len(X))
print(Y[1])
print(len(Y))

In [None]:
xtrain, xvalid, ytrain, yvalid = train_test_split(X, Y, test_size=0.3)

In [None]:
BUFFER_SIZE= len(xtrain)
BATCH_SIZE = 35000
steps_per_epoch = len(xtrain)//BATCH_SIZE
EMBEDDING_DIMS = 128
units = 1024

vocab_en = len(X_token.word_index) + 1
vocab_fr = len(Y_token.word_index) + 1

dataset = tf.data.Dataset.from_tensor_slices((xtrain, ytrain)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [None]:
xtrain.shape

In [None]:
class Encoder(Model):
    def __init__(self, vocab, n_a, embedding_dims):
        super(Encoder, self).__init__()
        self.n_a = n_a
        self.vocab = vocab
        self.dims = embedding_dims
        self.embedding = Embedding(self.vocab, self.dims)
        
        self.lstm = Bidirectional(LSTM(self.n_a, return_sequences=True))
        self.concat = Concatenate()
        
    def call(self,X):
        #print(f"X shape before embedding Encoder {X.shape}")
        X = self.embedding(X)
        #print(f"X shape embedding Encoder {X.shape}")
        # layer output, forward a and c, backward a and c
        a = self.lstm(X)
        #print(f"a shape embedding Encoder {a.shape}")
        
        return a

In [None]:
n_a = 19
n_s = 23

x_example, y_example = next(iter(dataset))
encoder = Encoder(vocab_en, n_a, EMBEDDING_DIMS)

a = encoder(xtrain)


In [None]:
class Attention(Model):
    def __init__(self, units):
        super(Attention, self).__init__()
        self.Dense1 = Dense(units, activation='tanh')
        self.Dense2 = Dense(units, activation='tanh')
        self.Dense3 = Dense(1)
        self.dot = tf.keras.layers.Dot(axes=1)
        self.Concat = Concatenate(axis=-1)

        
    def call(self, s, a):
        
        #print(f"S shape before concat, {s.shape}")
        s = tf.keras.layers.RepeatVector(a.shape[1])(s)
        
        concat = self.Concat([a, s])
        #print(f"concat shape{concat.shape}")
        energies = self.Dense3(self.Dense1(concat))
        
        alpha = tf.nn.softmax(energies, axis=1)
        
        context = self.dot([alpha,a])
        
        #print(f"context shape {context.shape}")
        context = tf.reduce_sum(context, axis=1)
        
        
        return context

In [None]:
att = Attention(n_s)
s = tf.zeros((35000,n_s))
att_context = att(s, a)

In [None]:
class Decoder(Model):
    def __init__(self, vocab, n_s):
        super(Decoder, self).__init__()
        
        self.embedding = Embedding(vocab_fr, 128)
        self.lstm = LSTM(n_s,return_state=True)
        self.output_layer = Dense(23)
        self.attention = Attention(n_s)
        
    def call(self, s, c, a):
        
        context = self.attention(s, a)
        #print(f"Shape of context {context.shape} before lstm")
        context = tf.expand_dims(context, 1)
        #print(context.shape)
        s, _, c = self.lstm(inputs=context, initial_state=[s, c])
        #print(f"shape of s and c {s.shape}::: {c.shape}")
        out = self.output_layer(inputs=s)
        #print(f"ouput after Dense{out.shape}")
        out = tf.nn.softmax(out, axis=1)
        #print(f"output after softmax {out.shape}")
        #print("=========================================================================")
        return out, s ,c

In [None]:
vocab_fr

In [None]:
def dd(s, c):


    decoder = Decoder(vocab_fr, n_s)
    out,s ,c  = decoder(s, c, a)
    return out , s , c

s0 = tf.zeros((35000, n_s))
c0 = tf.zeros((35000, n_s))

s = s0
c = c0

outputs = []
for i in range(maxY):
    out, s, c = dd(s, c)
    print(out.shape)

In [None]:
len(outputs)

In [None]:
y_example.shape

In [None]:
for i in outputs:
    print(Y_token.index_word[i])

In [None]:
decoder = Decoder(vocab_fr, n_s)
def model(tx, ty, m, n_a, n_s, vocab_en, vocab_fr, EMBEDDING_DIMS):
    
    X = Input(shape=(tx,))
    s0 = Input(shape=(n_s,))
    c0 = Input(shape=(n_s,))    
    
    s= s0
    c= c0
    outputs = []

    a = encoder(X)

    for t in range(ty):
        
        output,s ,c  = decoder(s0, c0, a) 
        
        outputs.append(output)
        
        #print(f"output shape = {output.shape}")

    model = Model(inputs=[X, s0, c0], outputs=outputs)
    print(output)
    
    return model
    

In [None]:
model = model(maxX, maxY, BATCH_SIZE, n_a, n_s, vocab_en, vocab_fr, EMBEDDING_DIMS)

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

In [None]:
s0 = tf.zeros((BATCH_SIZE, n_s))
c0 = tf.zeros((BATCH_SIZE, n_s))
model.fit([xtrain[:BATCH_SIZE], s0, c0], ytrain[:BATCH_SIZE],
         epochs=5, batch_size=32)

In [None]:
''''@tf.function
def train_step(X, Y, enc_hidden):
    loss = 0
    with tf.GradientTape() as tape:
        enc_output, enc_hidden = encoder(X, enc_hidden)
        
        dec_hidden = enc_hidden
        dec_input = tf.expand_dims([Y_token.word_index['<sos>']] * BATCH_SIZE, 1)
        
        for t in range(1, Y.shape[1]):
            predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)
            loss += loss_function(Y[:, t], predictions)
            
            dec_input = tf.expand_dims(Y[:, t], 1)
            
        batch_loss = (loss/int(Y.shape[1]))
        variables = encoder.trainable_variables + decoder.trainable_variables
        
        gradients = tape.gradients(loss, variables)
        
        optimizer.apply_gradients(zip(gradients, variables))
        
        return batch_loss''''

In [None]:
'''EPOCHS = 10

for epoch in range(EPOCHS):
    start = time.time()
    enc_hidden = encoder.initialize_hidden()
    total_loss = 0
    
    for (batch, (X, Y)) in enumerate(dataset.take(steps_per_epoch)):
        batch_loss = train_step(X, Y, enc_hidden)
        total_loss += batch_loss
        
        if batch%100 == 0:
            print(f"Epoch {epoch+1} Batch {batch} Loss {batch_loss.numpy():.4f}")
            
        
        print(f"Epoch {epoch+1} Loss{total_loss/steps_per_epoch:.4f}")
        print(f"Time per Epoch {time.time() - start}")'''