# Transliteration Model - Usage

### Defining our custom accuracy function `custom_sparse_categorical_accuracy`

In [1]:
from keras import backend as K
def custom_sparse_categorical_accuracy(y_true, y_pred):
    return K.cast(K.equal(K.max(y_true, axis=-1),
                          K.cast(K.argmax(y_pred, axis=-1), K.floatx())),
                  K.floatx())

Using TensorFlow backend.


### Loading the model

In [2]:
from keras.models import load_model
model = load_model('transliteration_model.h5', custom_objects={'custom_sparse_categorical_accuracy': custom_sparse_categorical_accuracy})

### Input Pre-Processing Functions 

In [3]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

def tokenize(x):
    """
    Tokenize x
    :param x: List of sentences/strings to be tokenized
    :return: Tuple of (tokenized x data, tokenizer used to tokenize x)
    """
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(x)
    return tokenizer.texts_to_sequences(x), tokenizer

def pad(x, length=None):
    """
    Pad x
    :param x: List of sequences.
    :param length: Length to pad the sequence to.  If None, use length of longest sequence in x.
    :return: Padded numpy array of sequences
    """
    return pad_sequences(x, maxlen=length, padding='post')

In [4]:
import collections
import helper
import numpy as np
import random


english_words = helper.load_data('data/neural_english.txt')
bengali_words = helper.load_data('data/neural_bengali.txt')

In [5]:
def preprocess(x, y):
    """
    Preprocess x and y
    :param x: Feature List of sentences
    :param y: Label List of sentences
    :return: Tuple of (Preprocessed x, Preprocessed y, x tokenizer, y tokenizer)
    """
    preprocess_x, x_tk = tokenize(x)
    preprocess_y, y_tk = tokenize(y)

    preprocess_x = pad(preprocess_x)

    return preprocess_x, x_tk, y_tk

preproc_bengali_words, bengali_tokenizer, english_tokenizer =\
    preprocess(bengali_words, english_words)

max_bengali_sequence_length = preproc_bengali_words.shape[1]

In [6]:
def input_preprocess(word, tokenizer, sequence_length):
    """
    Preprocess the input
    Return : Preprocessed input and the Tokenizer
    """
    preprocess_x = tokenizer.texts_to_sequences(word)
    
    result = [0 for i in range(sequence_length)]
    
    k = 0
    
    for i in range(len(preprocess_x)):
        try :
            result[k] = preprocess_x[i][0]
            k = k + 1
        except :
            pass
    
    return np.array([result]).astype('int32')    

In [7]:
def logits_to_text(logits, tokenizer):
    """
    Turn logits from a neural network into text using the tokenizer
    :param logits: Logits from a neural network
    :param tokenizer: Keras Tokenizer fit on the labels
    :return: String that represents the text of the logits
    """
    index_to_words = {id: word for word, id in tokenizer.word_index.items()}
    index_to_words[0] = ' '

    return ' '.join([index_to_words[prediction] for prediction in np.argmax(logits, 1)])

In [8]:
def pre_processed_bengali_word(bengali_word):
    result = ""
    for i in bengali_word:
        result = result + i + " "
    return result

### Get Prediction

In [9]:
def get_prediction(bengali_word, bengali_tokenizer, max_seq_length, english_tokenizer):
    p_word = pre_processed_bengali_word(bengali_word)
    preproc_input = input_preprocess(p_word, bengali_tokenizer, max_seq_length)
    prediction = logits_to_text(model.predict(preproc_input)[0], english_tokenizer)
    
    result = ""
    for i in prediction.strip():
        if i != ' ':
            result = result + i.upper()
    
    return result

### Saving necessary files for easy reproducibility

In [10]:
import pickle

with open("ben_token.pickle", "wb") as handle:
    pickle.dump(bengali_tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open("max_seq_length.txt", "w") as fp:
    fp.write(str(max_bengali_sequence_length))

with open("eng_token.pickle", "wb") as handle:
    pickle.dump(english_tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)