In [1]:
!pip install wandb

!wget https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar

!tar -xf dakshina_dataset_v1.0.tar

Collecting wandb
  Downloading wandb-0.12.16-py2.py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 5.0 MB/s 
[?25hCollecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.5.12-py2.py3-none-any.whl (145 kB)
[K     |████████████████████████████████| 145 kB 44.4 MB/s 
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting GitPython>=1.0.0
  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
[K     |████████████████████████████████| 181 kB 61.4 MB/s 
[?25hCollecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.9-py3-none-any.whl (9.4 kB)
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle
  Downloading setproctitle-1.2.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29 kB)
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 2.4 MB/s 
[?25hCo

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import cv2
import pathlib

DATAPATH = "./dakshina_dataset_v1.0"

trainpath = os.path.join(DATAPATH, "hi", "lexicons", "hi"+".translit.sampled.train.tsv")
valpath = os.path.join(DATAPATH, "hi", "lexicons", "hi"+".translit.sampled.dev.tsv")
testpath = os.path.join(DATAPATH, "hi", "lexicons", "hi"+".translit.sampled.test.tsv")
train = pd.read_csv(
    trainpath,
    sep="\t",
    names=["tgt", "src", "count"],
)
val = pd.read_csv(
    valpath,
    sep="\t",
    names=["tgt", "src", "count"],
)
test = pd.read_csv(
    testpath,
    sep="\t",
    names=["tgt", "src", "count"],
)



# create train data
#train_data = preprocess(list(train["src"]), list(train["tgt"]))

In [3]:
source=list(train["src"])
target=list(train["tgt"])

source_chars = set()
target_chars = set()

#removing non str nan types
source = [str(x) for x in source]
target = [str(x) for x in target]

source_words = []
target_words = []

for src, tgt in zip(source, target):
    tgt = "\t" + tgt + "\n"
    source_words.append(src)
    target_words.append(tgt)
    for char in src:
        if char not in source_chars:
            source_chars.add(char)
    for char in tgt:
        if char not in target_chars:
            target_chars.add(char)

source_chars = sorted(list(source_chars))
target_chars = sorted(list(target_chars))

#The space needs to be appended so that the encode function doesn't throw errors
source_chars.append(" ")
target_chars.append(" ")

num_encoder_tokens = len(source_chars)
num_decoder_tokens = len(target_chars)
max_source_length = max([len(txt) for txt in source_words])
max_target_length = max([len(txt) for txt in target_words])

print("Number of samples:", len(source))
print("Source Vocab length:", num_encoder_tokens)
print("Target Vocab length:", num_decoder_tokens)
print("Max sequence length for inputs:", max_source_length)
print("Max sequence length for outputs:", max_target_length)


Number of samples: 44204
Source Vocab length: 27
Target Vocab length: 66
Max sequence length for inputs: 20
Max sequence length for outputs: 21


In [4]:
def dictionary_lookup( vocab):
  char2int = dict([(char, i) for i, char in enumerate(vocab)])
  int2char = dict((i, char) for char, i in char2int.items())
  return char2int, int2char


In [5]:
def encode(source, target, source_chars, target_chars, source_char2int=None, target_char2int=None):
        num_encoder_tokens = len(source_chars)
        num_decoder_tokens = len(target_chars)
        max_source_length = max([len(txt) for txt in source])
        max_target_length = max([len(txt) for txt in target])

        source_vocab, target_vocab = None, None
        if source_char2int == None and target_char2int == None:
            print("Generating the dictionary lookups for character to integer mapping and back")
            source_char2int, source_int2char = dictionary_lookup(source_chars)
            target_char2int, target_int2char = dictionary_lookup(target_chars)

            source_vocab = (source_char2int, source_int2char)
            target_vocab = (target_char2int, target_int2char)

        encoder_input_data = np.zeros(
            (len(source), max_source_length, num_encoder_tokens), dtype="float32"
        )
        decoder_input_data = np.zeros(
            (len(source), max_target_length, num_decoder_tokens), dtype="float32"
        )
        decoder_target_data = np.zeros(
            (len(source), max_target_length, num_decoder_tokens), dtype="float32"
        )

        for i, (input_text, target_text) in enumerate(zip(source, target)):
            for t, char in enumerate(input_text):
                encoder_input_data[i, t, source_char2int[char]] = 1.0
            encoder_input_data[i, t + 1 :, source_char2int[" "]] = 1.0
            for t, char in enumerate(target_text):
                # decoder_target_data is ahead of decoder_input_data by one timestep
                decoder_input_data[i, t, target_char2int[char]] = 1.0
                if t > 0:
                    # decoder_target_data will be ahead by one timestep
                    # and will not include the start character.
                    decoder_target_data[i, t - 1, target_char2int[char]] = 1.0
            decoder_input_data[i, t + 1 :, target_char2int[" "]] = 1.0
            decoder_target_data[i, t:, target_char2int[" "]] = 1.0
        if source_vocab != None and target_vocab != None:
            return (
                encoder_input_data,
                decoder_input_data,
                decoder_target_data,
                source_vocab,
                target_vocab,
            )
        else:
            return encoder_input_data, decoder_input_data, decoder_target_data

In [6]:
train_data=encode(source_words, target_words, source_chars, target_chars)

Generating the dictionary lookups for character to integer mapping and back


In [7]:
 # create train data
(train_encoder_input,
    train_decoder_input,
    train_decoder_target,
    source_vocab,
    target_vocab,
) = train_data
source_char2int, source_int2char = source_vocab
target_char2int, target_int2char = target_vocab


In [8]:
 # create val data (only encode function suffices as the dictionary lookup should be kep the same.
val_data = encode(
    val["src"].to_list(),
    val["tgt"].to_list(),
    list(source_char2int.keys()),
    list(target_char2int.keys()),
    source_char2int=source_char2int,
    target_char2int=target_char2int,
)
val_encoder_input, val_decoder_input, val_decoder_target = val_data
source_char2int, source_int2char = source_vocab
target_char2int, target_int2char = target_vocab

# create test data
test_data = encode(
    test["src"].to_list(),
    test["tgt"].to_list(),
    list(source_char2int.keys()),
    list(target_char2int.keys()),
    source_char2int=source_char2int,
    target_char2int=target_char2int,
)
test_encoder_input, test_decoder_input, test_decoder_target = test_data
source_char2int, source_int2char = source_vocab
target_char2int, target_int2char = target_vocab


In [9]:
#call attention using:
from tensorflow.keras.layers import AdditiveAttention

In [10]:
import os

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras import layers
 

#from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, Input, InputLayer, Flatten, Activation, LSTM, SimpleRNN, GRU, TimeDistributed,Concatenate
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model, Sequential,  Model
from tensorflow.keras.callbacks import EarlyStopping


import wandb


class Sq2Sq_attention():

    def __init__(self, numEncoders,cell_type,latentDim,dropout,numDecoders,hidden,srcChar2Int, tgtChar2Int):
        
        self.numEncoders = numEncoders
        self.cell_type = cell_type
        self.latentDim = latentDim
        self.dropout =dropout
        self.numDecoders = numDecoders
        self.hidden = hidden
        self.tgtChar2Int = tgtChar2Int
        self.srcChar2Int = srcChar2Int
    
    def build_configurable_model(self):       
        if self.cell_type == "RNN":
            # encoder
            encoder_inputs = Input(shape=(None, len(self.srcChar2Int)))
            encoder_outputs = encoder_inputs
            for i in range(1, self.numEncoders + 1):
                encoder = SimpleRNN(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_outputs, state = encoder(encoder_inputs)
            encoder_states = [state]

            # decoder
            decoder_inputs = Input(shape=(None, len(self.tgtChar2Int)))
            decoder_outputs = decoder_inputs
            for i in range(1, self.numDecoders + 1):
                decoder = SimpleRNN(
                    self.latentDim,
                    return_sequences=True,
                    return_state=True,
                    dropout=self.dropout,
                )
                decoder_outputs, _ = decoder(decoder_inputs, initial_state=encoder_states)

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_outputs = hidden(decoder_outputs)
            decoder_dense = Dense(len(self.tgtChar2Int), activation="softmax")
            decoder_outputs = decoder_dense(hidden_outputs)
            model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
            
            return model
        
        elif self.cell_type == "LSTM":
            # encoder
            encoder_inputs = Input(shape=(None, len(self.srcChar2Int)))
            encoder_outputs = encoder_inputs
            for i in range(1, self.numEncoders + 1):
                encoder = LSTM(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_outputs, state_h, state_c = encoder(encoder_outputs)
            encoder_states = [state_h, state_c]

            # decoder
            decoder_inputs = Input(shape=(None, len(self.tgtChar2Int)))
            decoder_outputs = decoder_inputs
            for i in range(1, self.numDecoders + 1):
                decoder = LSTM(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                decoder_outputs, _, _ = decoder(
                    decoder_outputs, initial_state=encoder_states
                )

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_outputs = hidden(decoder_outputs)
            decoder_dense = Dense(len(self.tgtChar2Int), activation="softmax")
            decoder_outputs = decoder_dense(hidden_outputs)
            model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
            
            return model
        
        elif self.cell_type == "GRU":
            # encoder
            encoder_inputs = Input(shape=(None, len(self.srcChar2Int)))
            encoder_outputs = encoder_inputs
            for i in range(1, self.numEncoders + 1):
                encoder = GRU(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_outputs, state = encoder(encoder_inputs)
            encoder_states = [state]

            # decoder
            decoder_inputs = Input(shape=(None, len(self.tgtChar2Int)))
            decoder_outputs = decoder_inputs
            for i in range(1, self.numDecoders + 1):
                decoder = GRU(
                    self.latentDim,
                    return_sequences=True,
                    return_state=True,
                    dropout=self.dropout,
                )
                decoder_outputs, _ = decoder(decoder_inputs, initial_state=encoder_states)

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_outputs = hidden(decoder_outputs)
            decoder_dense = Dense(len(self.tgtChar2Int), activation="softmax")
            decoder_outputs = decoder_dense(hidden_outputs)
            model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
            
            return model
    
    def build_attention_model(self):       
        
        if self.cell_type == "RNN":
            # encoder
            encoder_inputs = Input(shape=(None, len(self.srcChar2Int)))
            encoder_outputs = encoder_inputs
            for i in range(1, self.numEncoders + 1):
                encoder = SimpleRNN(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_outputs, state = encoder(encoder_inputs) 
                
                if i == 1:
                    encoder_first_outputs= encoder_outputs                  
            encoder_states = [state]
            

            # decoder
            decoder_inputs = Input(shape=(None, len(self.tgtChar2Int)))
            decoder_outputs = decoder_inputs
            for i in range(1, self.numDecoders + 1):
                decoder = SimpleRNN(
                    self.latentDim,
                    return_sequences=True,
                    return_state=True,
                    dropout=self.dropout,
                )
                decoder_outputs, _ = decoder(decoder_inputs, initial_state=encoder_states)
                
                if i == self.numDecoders:
                    decoder_first_outputs = decoder_outputs

            #attention_layer = AttentionLayer(name='attention_layer')
            #attention_out, attention_states = attention_layer([encoder_first_outputs, decoder_first_outputs])
            attention_out = AdditiveAttention(use_scale=True)([decoder_first_outputs, encoder_first_outputs])


            decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out])

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_time = TimeDistributed(hidden, name='time_distributed_layer')
            hidden_outputs = hidden(decoder_concat_input)
            decoder_dense = Dense(len(self.tgtChar2Int), activation="softmax")
            decoder_outputs = decoder_dense(hidden_outputs)
            model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
            
            return model
        
        elif self.cell_type == "LSTM":
            # encoder
            encoder_inputs = Input(shape=(None, len(self.srcChar2Int)))
            encoder_outputs = encoder_inputs
            for i in range(1, self.numEncoders + 1):
                encoder = LSTM(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_outputs, state_h, state_c = encoder(encoder_outputs)
                if i == 1:
                    encoder_first_outputs= encoder_outputs                  
         
            encoder_states = [state_h, state_c]

            # decoder
            decoder_inputs = Input(shape=(None, len(self.tgtChar2Int)))
            decoder_outputs = decoder_inputs
            for i in range(1, self.numDecoders + 1):
                decoder = LSTM(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                decoder_outputs, _, _ = decoder(
                    decoder_outputs, initial_state=encoder_states
                )
                if i == self.numDecoders:
                    decoder_first_outputs = decoder_outputs

#            attention_layer = AttentionLayer(name='attention_layer')
#            attention_out, attention_states = attention_layer([encoder_first_outputs, decoder_first_outputs])

            attention_out = AdditiveAttention(use_scale=True)([decoder_first_outputs, encoder_first_outputs])

            decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out])

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_time = TimeDistributed(hidden, name='time_distributed_layer')
            hidden_outputs = hidden(decoder_concat_input)
            decoder_dense = Dense(len(self.tgtChar2Int), activation="softmax")
            decoder_outputs = decoder_dense(hidden_outputs)
            model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
            
            return model
        
        elif self.cell_type == "GRU":
            # encoder
            encoder_inputs = Input(shape=(None, len(self.srcChar2Int)))
            encoder_outputs = encoder_inputs
            for i in range(1, self.numEncoders + 1):
                encoder = GRU(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_outputs, state = encoder(encoder_inputs)

                if i == 1:
                    encoder_first_outputs= encoder_outputs                  
         
            encoder_states = [state]

            # decoder
            decoder_inputs = Input(shape=(None, len(self.tgtChar2Int)))
            decoder_outputs = decoder_inputs
            for i in range(1, self.numDecoders + 1):
                decoder = GRU(
                    self.latentDim,
                    return_sequences=True,
                    return_state=True,
                    dropout=self.dropout,
                )
                decoder_outputs, _ = decoder(decoder_inputs, initial_state=encoder_states)
                if i == self.numDecoders:
                    decoder_first_outputs = decoder_outputs



            #attention_layer = AttentionLayer(name='attention_layer')
            #attention_out, attention_states = attention_layer([encoder_first_outputs, decoder_first_outputs])
            attention_out = AdditiveAttention(use_scale=True)([decoder_first_outputs, encoder_first_outputs])

            decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out])

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_time = TimeDistributed(hidden, name='time_distributed_layer')
            hidden_outputs = hidden(decoder_concat_input)
            decoder_dense = Dense(len(self.tgtChar2Int), activation="softmax")
            decoder_outputs = decoder_dense(hidden_outputs)
            model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
            
            return model

In [None]:
!cp -rf ./TrainedModels /content/gdrive/MyDrive/CS6910/Assignment3/

In [11]:
from wandb.keras import WandbCallback
wandb.init()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
def train():

  config= {
      "cell_type": "LSTM",
      "latentDim": 256,
      "hidden": 128,
      "optimiser": "adam",
      "numEncoders": 1,
      "numDecoders": 1,
      "dropout": 0.2,
      "epochs": 20,
      "batch_size": 32,
  }


  #wandb.init(config=config_defaults,  project="CS6910-Assignment-3_att")
  #config = wandb.config
  '''wandb.run.name = (
      str(config.cell_type)
      + "eng"
      + str(config.numEncoders)
      + "_"
      + "hi"
      + "_"
      + str(config.numDecoders)
      + "_"
      + config.optimiser
      + "_"
      + str(config.epochs)
      + "_"
      + str(config.dropout) 
      + "_"
      + str(config.batch_size)
      + "_"
      + str(config.latentDim)
  )
  wandb.run.save()
'''
  modelInit = Sq2Sq_attention(
    config["numEncoders"], 
    config["cell_type"], 
    config["latentDim"], 
    config["dropout"], 
    config["numDecoders"], 
    config["hidden"] ,
    srcChar2Int=source_char2int, 
    tgtChar2Int=target_char2int
    )

  model = modelInit.build_configurable_model()

  model.summary()

  model.compile(
      optimizer=config["optimiser"],
      loss="categorical_crossentropy",
      metrics=["accuracy"],
  )

  earlystopping = EarlyStopping(
      monitor="val_accuracy", min_delta=0.01, patience=5, verbose=2, mode="auto"
  )

  model.fit(
      [train_encoder_input, train_decoder_input],
      train_decoder_target,
      batch_size=config["batch_size"],
      epochs=config["epochs"],
      validation_data=([val_encoder_input, val_decoder_input], val_decoder_target),
      callbacks=[earlystopping, WandbCallback()],
  )

  model.save(os.path.join("./TrainedModels", "best_model_wo_attn"))    

In [None]:
train()

Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_9 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 input_10 (InputLayer)          [(None, None, 66)]   0           []                               
                                                                                                  
 lstm_8 (LSTM)                  [(None, None, 256),  290816      ['input_9[0][0]']                
                                 (None, 256),                                                     
                                 (None, 256)]                                                     
                                                                                            



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 14: early stopping




INFO:tensorflow:Assets written to: ./TrainedModels/best_model_wo_attn/assets


INFO:tensorflow:Assets written to: ./TrainedModels/best_model_wo_attn/assets


In [None]:
import tensorflow
model = tensorflow.keras.models.load_model('/content/TrainedModels/best_model_wo_attn')

In [None]:
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_9 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 input_10 (InputLayer)          [(None, None, 66)]   0           []                               
                                                                                                  
 lstm_8 (LSTM)                  [(None, None, 256),  290816      ['input_9[0][0]']                
                                 (None, 256),                                                     
                                 (None, 256)]                                                     
                                                                                            

In [12]:
def train_attn():

  config= {
      "cell_type": "RNN",
      "latentDim": 256,
      "hidden": 128,
      "optimiser": "adam",
      "numEncoders": 1,
      "numDecoders": 3,
      "dropout": 0.3,
      "epochs": 20,
      "batch_size": 64,
  }



  modelInit = Sq2Sq_attention(
    config["numEncoders"], 
    config["cell_type"], 
    config["latentDim"], 
    config["dropout"], 
    config["numDecoders"], 
    config["hidden"] ,
    srcChar2Int=source_char2int, 
    tgtChar2Int=target_char2int
    )

  model = modelInit.build_attention_model()

  model.summary()

  model.compile(
      optimizer=config["optimiser"],
      loss="categorical_crossentropy",
      metrics=["accuracy"],
  )

  earlystopping = EarlyStopping(
      monitor="val_accuracy", min_delta=0.01, patience=5, verbose=2, mode="auto"
  )

  model.fit(
      [train_encoder_input, train_decoder_input],
      train_decoder_target,
      batch_size=config["batch_size"],
      epochs=config["epochs"],
      validation_data=([val_encoder_input, val_decoder_input], val_decoder_target),
      callbacks=[earlystopping, WandbCallback()],
  )

  model.save(os.path.join("./TrainedModels", "best_model_attn"))    

In [13]:
train_attn()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None, 66)]   0           []                               
                                                                                                  
 simple_rnn (SimpleRNN)         [(None, None, 256),  72704       ['input_1[0][0]']                
                                 (None, 256)]                                                     
                                                                                                  
 simple_rnn_3 (SimpleRNN)       [(None, None, 256),  82688       ['input_2[0][0]',            



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 13: early stopping
INFO:tensorflow:Assets written to: ./TrainedModels/best_model_attn/assets


In [14]:
import tensorflow
model_attn = tensorflow.keras.models.load_model('/content/TrainedModels/best_model_attn')

In [None]:
 def decode_sequence(input_seq,config,encoder_model,decoder_model,target_char2int=target_char2int,target_int2char=target_int2char):
            # Encode the input as state vectors.
            states_value = encoder_model.predict(input_seq)

            # Generate empty target sequence of length 1.
            target_seq = np.zeros((1, 1, len(target_char2int)))
            # Populate the first character of target sequence with the start character.
            target_seq[0, 0, target_char2int["\n"]] = 1.0

            # Sampling loop for a batch of sequences
            # (to simplify, here we assume a batch of size 1).
            stop_condition = False
            decoded_sentence = ""
            while not stop_condition:
                if config["cell_type"] == "LSTM":
                    output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
                elif config["cell_type"] == "RNN" or config["cell_type"] == "GRU":
                    states_value = states_value[0].reshape((1, 256))
                    output_tokens, h = decoder_model.predict([target_seq] + [states_value])

                # Sample a token
                sampled_token_index = np.argmax(output_tokens[0, -1, :])
                sampled_char = target_int2char[sampled_token_index]
                decoded_sentence += sampled_char

                # Exit condition: either hit max length
                # or find stop character.
                if sampled_char == "\n" or len(decoded_sentence) > 25:
                    stop_condition = True

                # Update the target sequence (of length 1).
                target_seq = np.zeros((1, 1, len(target_char2int)))
                target_seq[0, 0, sampled_token_index] = 1.0

                # Update states
                if config["cell_type"] == "LSTM":
                    states_value = [h, c]
                elif config["cell_type"] == "RNN" or config["cell_type"] == "GRU":
                    states_value = [h]
            return decoded_sentence

In [None]:
def test_model(model,target_char2int=target_char2int,target_int2char=target_int2char,attention = False):
  if attention == False:
    
    config = {
        "cell_type": "LSTM",
        "latentDim": 256,
        "hidden": 128,
        "optimiser": "adam",
        "numEncoders": 1,
        "numDecoders": 1,
        "dropout": 0.2,
        "epochs": 20,
        "batch_size": 32,
    }
    

    encoder_inputs = model.input[0]
    
    if config["numEncoders"] == 1:
        encoder_outputs, state_h_enc, state_c_enc = model.get_layer(name = "lstm_8").output 
    else:           
        encoder_outputs, state_h_enc, state_c_enc = model.get_layer(name = "lstm_"+ str(config["numEncoders"]-1)).output

    encoder_states = [state_h_enc, state_c_enc]
    encoder_model = Model(encoder_inputs, encoder_states)

    decoder_inputs = model.input[1]
    decoder_state_input_h = Input(shape=(config["latentDim"],))
    decoder_state_input_c = Input(shape=(config["latentDim"],))
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    decoder_lstm = model.layers[3]
    decoder_outputs, state_h_dec, state_c_dec = decoder_lstm( decoder_inputs, initial_state=decoder_states_inputs )
    decoder_states = [state_h_dec, state_c_dec]
    decoder_dense = model.layers[-2]
    decoder_outputs = decoder_dense(decoder_outputs)
    
    decoder_dense = model.layers[-1]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = Model(
        [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states
    )

    acc = 0
    sourcelang = []
    predictions = []
    original = []
    for i, row in test.iterrows():
        input_seq = test_encoder_input[i : i + 1]
        decoded_sentence = decode_sequence(input_seq,config,encoder_model,decoder_model)
        
        og_tokens = [target_char2int[x] for x in row["tgt"]]
        predicted_tokens = [target_char2int[x] for x in decoded_sentence.rstrip("\n")]
        # if decoded_sentence == row['tgt']:
        #   acc += 1
        sourcelang.append(row['src'])
        original.append(row['tgt'])
        predictions.append(decoded_sentence)

        if og_tokens == predicted_tokens:
            acc += 1

        if i % 100 == 0:
            print(f"Finished {i} examples")
            print(f"Source: {row['src']}")
            print(f"Original: {row['tgt']}")
            print(f"Predicted: {decoded_sentence}")
            print(f"Accuracy: {acc / (i+1)}")
            print(og_tokens)
            print(predicted_tokens)
            

    print(f'Test Accuracy: {acc}')
    #wandb.log({'test_accuracy': acc / len(test)})
    #wandb.finish()
    return acc / len(test), sourcelang, original, predictions

In [None]:
acc,lang,org,pred=test_model(model)

Finished 0 examples
Source: ank
Original: अंक
Predicted: अंक

Accuracy: 1.0
[5, 3, 17]
[5, 3, 17]
Finished 100 examples
Source: anukulata
Original: अनुकूलता
Predicted: अनुकुलता

Accuracy: 0.2079207920792079
[5, 36, 54, 17, 55, 44, 32, 51]
[5, 36, 54, 17, 54, 44, 32, 51]
Finished 200 examples
Source: avaru
Original: अवरु
Predicted: अवरू

Accuracy: 0.263681592039801
[5, 45, 43, 54]
[5, 45, 43, 55]
Finished 300 examples
Source: aabru
Original: आबरू
Predicted: आबरू

Accuracy: 0.30564784053156147
[6, 39, 43, 55]
[6, 39, 43, 55]
Finished 400 examples
Source: inhaletion
Original: इनहेलेशन
Predicted: इंहलेशन

Accuracy: 0.2793017456359102
[7, 36, 49, 58, 44, 58, 46, 36]
[7, 3, 49, 44, 58, 46, 36]
Finished 500 examples
Source: umesh
Original: उमेश
Predicted: उमेश

Accuracy: 0.26746506986027946
[9, 41, 58, 46]
[9, 41, 58, 46]
Finished 600 examples
Source: asphalt
Original: एस्फाल्ट
Predicted: असफलत

Accuracy: 0.2545757071547421
[12, 48, 63, 38, 51, 44, 63, 27]
[5, 48, 38, 44, 32]
Finished 700 exa

In [1]:
#accuracy of model without attention
accuracy

0.3054119269217511

In [None]:
dict2 = [{"input":lang[i], "true": org[i], "predicted": pred[i]} for i in range(len(lang))] 
test_predictions = pd.DataFrame(dict2)
test_predictions.to_csv('predictions_without_attn.csv', index=False, sep='\t')

In [None]:
test_predictions 

Unnamed: 0,input,true,predicted
0,ank,अंक,अंक\n
1,anka,अंक,अंका\n
2,ankit,अंकित,अंकित\n
3,anakon,अंकों,अनकों\n
4,ankhon,अंकों,अंखों\n
...,...,...,...
4497,holding,होल्डिंग,हॉलिंग\n
4498,hoshangabaad,होशंगाबाद,होशनाबाद\n
4499,hoshangabad,होशंगाबाद,होशनाबाद\n
4500,hostes,होस्टेस,हॉस्टर्स\n


In [None]:
model_attn.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 input_3 (InputLayer)           [(None, None, 66)]   0           []                               
                                                                                                  
 simple_rnn_1 (SimpleRNN)       [(None, None, 256),  72704       ['input_2[0][0]']                
                                 (None, 256)]                                                     
                                                                                                  
 simple_rnn_4 (SimpleRNN)       [(None, None, 256),  82688       ['input_3[0][0]',            

In [29]:
model_attn.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None, 66)]   0           []                               
                                                                                                  
 simple_rnn (SimpleRNN)         [(None, None, 256),  72704       ['input_1[0][0]']                
                                 (None, 256)]                                                     
                                                                                                  
 simple_rnn_3 (SimpleRNN)       [(None, None, 256),  82688       ['input_2[0][0]',            

In [32]:
config = {
      "cell_type": "RNN",
      "latentDim": 256,
      "hidden": 128,
      "optimiser": "adam",
      "numEncoders": 1,
      "numDecoders": 3,
      "dropout": 0.3,
      "epochs": 20,
      "batch_size": 64,
  }

In [35]:
model=model_attn
encoder_inputs = model.input[0]
if config['numEncoders'] == 1:
    encoder_outputs, state = model.get_layer(name = "simple_rnn").output
else:
    encoder_outputs, state = model.get_layer(name = "simple_rnn_"+ str(config['numEncoders']-1)).output
encoder_first_outputs, _ = model.get_layer(name = "simple_rnn").output
encoder_states = [state]

encoder_model = Model(encoder_inputs, outputs = [encoder_first_outputs, encoder_outputs] + encoder_states)

decoder_inputs = model.input[1]

decoder_state = Input(shape=(config['latentDim'],), name="input_3")
decoder_hidden_state = Input(shape=(None,config["latentDim"]), name = "input_4")
decoder_states_inputs = [decoder_state]

decoder_gru = model.get_layer(name = "simple_rnn_"+ str(config['numEncoders'] + config['numDecoders'] -1))#model.layers[-3]
(decoder_outputs, state) = decoder_gru(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state]

            
attention_layer = model.get_layer(name='additive_attention')
    #decoder_outputs_att = decoder_ouputs
attention_out = attention_layer([decoder_outputs,decoder_hidden_state])

decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out])

decoder_dense = model.layers[-2]
decoder_time = TimeDistributed(decoder_dense)
hidden_outputs = decoder_time(decoder_concat_input)
decoder_dense = model.layers[-1]
decoder_outputs = decoder_dense(hidden_outputs)

decoder_model = Model(inputs = [decoder_inputs] + [decoder_hidden_state , decoder_states_inputs], outputs = [decoder_outputs] + decoder_states)
    



In [42]:
def decode_sequence_attn(input_seq,target_char2int=target_char2int,target_int2char=target_int2char):
    # Encode the input as state vectors.
    encoder_first_outputs, _, states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, len(target_char2int)))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0, target_char2int["\n"]] = 1.0

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ""
    attention_weights = []
    while not stop_condition:
        if config['cell_type'] == "LSTM":
            output_tokens, h, c = decoder_model.predict([target_seq, encoder_first_outputs] + states_value)
        elif config['cell_type'] == "RNN" or config['cell_type'] == "GRU":
            states_value = states_value[0].reshape((1, config['latentDim']))
            output_tokens, h = decoder_model.predict([target_seq] + [encoder_first_outputs] + [states_value])
        #dec_ind = np.argmax(output_tokens, axis=-1)[0, 0]
        #attention_weights.append((dec_ind, attn_states))
        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = target_int2char[sampled_token_index]
        decoded_sentence += sampled_char

        # Exit condition: either hit max length
        # or find stop character.
        if sampled_char == "\n" or len(decoded_sentence) > 25:
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, len(target_char2int)))
        target_seq[0, 0, sampled_token_index] = 1.0

        # Update states
        if config['cell_type'] == "LSTM":
            states_value = [h, c]
        elif config['cell_type'] == "RNN" or config['cell_type'] == "GRU":
            states_value = [h]
    return decoded_sentence #, attention_weights



In [43]:
acc = 0
sourcelang = []
predictions = []
original = []
#attention_weights_test = []
for i, row in test.iterrows():
    input_seq = test_encoder_input[i : i + 1]
    decoded_sentence = decode_sequence_attn(input_seq,target_char2int=target_char2int,target_int2char=target_int2char)
    og_tokens = [target_char2int[x] for x in row["tgt"]]
    predicted_tokens = [target_char2int[x] for x in decoded_sentence.rstrip("\n")]
    # if decoded_sentence == row['tgt']:
    #   acc += 1
    sourcelang.append(row['src'])
    original.append(row['tgt'])
    predictions.append(decoded_sentence)
    #attention_weights_test.append(attention_weights)
    if og_tokens == predicted_tokens:
        acc += 1

    if i % 100 == 0:
        print(f"Finished {i} examples")
        print(f"Source: {row['src']}")
        print(f"Original: {row['tgt']}")
        print(f"Predicted: {decoded_sentence}")
        print(f"Accuracy: {acc / (i+1)}")
        print(og_tokens)
        print(predicted_tokens)
        

print(f'Test Accuracy: {acc}')

accuracy= acc / len(test)  
lang=sourcelang
org=original
pred=predictions #,


Finished 0 examples
Source: ank
Original: अंक
Predicted: आंक

Accuracy: 0.0
[5, 3, 17]
[6, 3, 17]
Finished 100 examples
Source: anukulata
Original: अनुकूलता
Predicted: आनुकुलता

Accuracy: 0.0
[5, 36, 54, 17, 55, 44, 32, 51]
[6, 36, 54, 17, 54, 44, 32, 51]
Finished 200 examples
Source: avaru
Original: अवरु
Predicted: आवरू

Accuracy: 0.014925373134328358
[5, 45, 43, 54]
[6, 45, 43, 55]
Finished 300 examples
Source: aabru
Original: आबरू
Predicted: आभ्रु

Accuracy: 0.06312292358803986
[6, 39, 43, 55]
[6, 40, 63, 43, 54]
Finished 400 examples
Source: inhaletion
Original: इनहेलेशन
Predicted: इनहालेतियों

Accuracy: 0.11471321695760599
[7, 36, 49, 58, 44, 58, 46, 36]
[7, 36, 49, 51, 44, 58, 32, 52, 42, 61, 3]
Finished 500 examples
Source: umesh
Original: उमेश
Predicted: उमेश

Accuracy: 0.1437125748502994
[9, 41, 58, 46]
[9, 41, 58, 46]
Finished 600 examples
Source: asphalt
Original: एस्फाल्ट
Predicted: आस्पल्त

Accuracy: 0.1314475873544093
[12, 48, 63, 38, 51, 44, 63, 27]
[6, 48, 63, 37, 44, 6

In [45]:
#accuracy of attention model
accuracy

0.20457574411372723

In [44]:
dict2 = [{"input":lang[i], "true": org[i], "predicted": pred[i]} for i in range(len(lang))] 
test_predictions = pd.DataFrame(dict2)
test_predictions.to_csv('predictions_attention.csv', index=False, sep='\t')