### English to Hindi Transliteration Tutorial

#### GPU

In [1]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


#### Download Data

In [2]:
!mkdir data
!wget -N "https://raw.githubusercontent.com/bsantraigi/tensorflow-seq2seq-hindi/master/data/Hindi%20-%20Word%20Transliteration%20Pairs%201.txt" -P data/

--2021-04-12 22:17:35--  https://raw.githubusercontent.com/bsantraigi/tensorflow-seq2seq-hindi/master/data/Hindi%20-%20Word%20Transliteration%20Pairs%201.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 773211 (755K) [text/plain]
Saving to: ‘data/Hindi - Word Transliteration Pairs 1.txt’


Last-modified header missing -- time-stamps turned off.
2021-04-12 22:17:35 (43.9 MB/s) - ‘data/Hindi - Word Transliteration Pairs 1.txt’ saved [773211/773211]



In [3]:
!pip install tensorflow==1.13.2

Collecting tensorflow==1.13.2
[?25l  Downloading https://files.pythonhosted.org/packages/bc/70/45d3b9fab768215a2055c7819d39547a4b0b7401b4583094068741aff99b/tensorflow-1.13.2-cp37-cp37m-manylinux1_x86_64.whl (92.7MB)
[K     |████████████████████████████████| 92.7MB 43kB/s 
Collecting tensorboard<1.14.0,>=1.13.0
[?25l  Downloading https://files.pythonhosted.org/packages/0f/39/bdd75b08a6fba41f098b6cb091b9e8c7a80e1b4d679a581a0ccd17b10373/tensorboard-1.13.1-py3-none-any.whl (3.2MB)
[K     |████████████████████████████████| 3.2MB 33.0MB/s 
Collecting keras-applications>=1.0.6
[?25l  Downloading https://files.pythonhosted.org/packages/71/e3/19762fdfc62877ae9102edf6342d71b28fbfd9dea3d2f96a882ce099b03f/Keras_Applications-1.0.8-py3-none-any.whl (50kB)
[K     |████████████████████████████████| 51kB 5.6MB/s 
Collecting tensorflow-estimator<1.14.0rc0,>=1.13.0
[?25l  Downloading https://files.pythonhosted.org/packages/bb/48/13f49fc3fa0fdf916aa1419013bb8f2ad09674c275b4046d5ee669a46873/tensorfl

In [4]:
!pip install indic-transliteration


Collecting indic-transliteration
[?25l  Downloading https://files.pythonhosted.org/packages/ad/b2/cbb12eda3aaf61ac6265bca156c1039d0a089c73b6b772e52ed12aa9a9f6/indic_transliteration-2.1.0-py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 6.7MB/s 
[?25hCollecting selenium==3.141.0
[?25l  Downloading https://files.pythonhosted.org/packages/80/d6/4294f0b4bce4de0abf13e17190289f9d0613b0a44e5dd6a7f5ca98459853/selenium-3.141.0-py2.py3-none-any.whl (904kB)
[K     |████████████████████████████████| 911kB 33.9MB/s 
[?25hCollecting regex==2020.9.27
[?25l  Downloading https://files.pythonhosted.org/packages/99/bf/c4f2ade22b5ef3c7e6a3c52bc188bdfe3abb4b0d935db3a1b26945747906/regex-2020.9.27-cp37-cp37m-manylinux2010_x86_64.whl (662kB)
[K     |████████████████████████████████| 665kB 47.9MB/s 
[?25hCollecting backports.functools-lru-cache==1.6.1
  Downloading https://files.pythonhosted.org/packages/da/d1/080d2bb13773803648281a49e3918f65b31b7beebf009887a529357fd44a/backport

In [5]:
import nltk
from collections import Counter
from tqdm import tqdm_notebook
import numpy as np
import tensorflow as tf
from tensorflow.contrib import seq2seq
from tensorflow.contrib.rnn import DropoutWrapper
import random

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [6]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

#### Global Parameters

In [7]:
MAX_SEQ_LEN = 20
BATCH_SIZE = 64

### Language Vocabulary 
* (Vocab of characters, i.e. an Alphabet)

In [8]:
class Lang:
    def __init__(self, counter, vocab_size):
        self.word2id = {}
        self.id2word = {}
        self.pad = "<PAD>"
        self.sos = "<SOS>"
        self.eos = "<EOS>"
        self.unk = "<UNK>"
        
        self.ipad = 0
        self.isos = 1
        self.ieos = 2
        self.iunk = 3
        
        self.word2id[self.pad] = 0
        self.word2id[self.sos] = 1
        self.word2id[self.eos] = 2
        self.word2id[self.unk] = 3
        
        self.id2word[0] = self.pad
        self.id2word[1] = self.sos
        self.id2word[2] = self.eos
        self.id2word[3] = self.unk
        
        curr_id = 4
        for w, c in counter.most_common(vocab_size):
            self.word2id[w] = curr_id
            self.id2word[curr_id] = w
            curr_id += 1
            
    def encodeSentence(self, s, max_len=-1):
        wseq = s.lower().strip()
        if max_len == -1:
            return [self.word2id[w] if w in self.word2id else self.iunk for w in wseq]
        else:
            return ([self.word2id[w] if w in self.word2id else self.iunk for w in wseq] + [self.ieos] + [self.ipad]*max_len)[:max_len]
        
    def encodeSentence2(self, s, max_len=-1):
        wseq = wseq = s.lower().strip()
        return min(max_len, len(wseq)+1), \
            ([self.word2id[w] if w in self.word2id else self.iunk for w in wseq] + \
                [self.ieos] + [self.ipad]*max_len)[:max_len]
    
    def decodeSentence(self, id_seq):
        id_seq = np.array(id_seq + [self.ieos])
        j = np.argmax(id_seq==self.ieos)
        s = ''.join([self.id2word[x] for x in id_seq[:j]])
        s = s.replace(self.unk, "UNK")
        return s

In [9]:
# Total number of samples to read
N = 30823

### Reading the data files
- Each line contains a hindi word in both English and Devnagari script

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [11]:
hi_counter = Counter()
hi_sentences=[]
en_counter = Counter()
en_sentences=[]
#with open("Hindi - Word Transliteration Pairs 1.txt") as f:

with open("/content/drive/MyDrive/Hinglish/Hindi - Word Transliteration Pairs 1.txt") as f:
    for line in tqdm_notebook(f, total=N, desc="Reading file:"):
        en, hi = line.strip().split("\t")
        hi_sentences.append(hi)
        en_sentences.append(en)
    for line in tqdm_notebook(hi_sentences, desc="Processing inputs:"):
        for w in line.strip():
            hi_counter[w] += 1
    for line in tqdm_notebook(en_sentences, desc="Processing inputs:"):
        for w in line.strip():
            en_counter[w] += 1

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, description='Reading file:', max=30823.0, style=ProgressStyle(descript…




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  if sys.path[0] == '':


HBox(children=(FloatProgress(value=0.0, description='Processing inputs:', max=30823.0, style=ProgressStyle(des…




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  from ipykernel import kernelapp as app


HBox(children=(FloatProgress(value=0.0, description='Processing inputs:', max=30823.0, style=ProgressStyle(des…




In [12]:
# A few sample hindi characters
print("Most common hi characters in dataset:\n", hi_counter.most_common(5))

print("\nTotal (hi)characters gathered from dataset:",len(hi_counter))

# A few sample english characters
print("\nMost common en characters in dataset:\n", en_counter.most_common(5))

print("\nTotal (en)characters gathered from dataset:", len(en_counter))

Most common hi characters in dataset:
 [('ा', 21123), ('र', 9205), ('े', 8100), ('न', 7225), ('ी', 6546)]

Total (hi)characters gathered from dataset: 66

Most common en characters in dataset:
 [('a', 57220), ('n', 15015), ('i', 14015), ('h', 13805), ('e', 12264)]

Total (en)characters gathered from dataset: 27


In [13]:
en_lang = Lang(en_counter, len(en_counter))
hi_lang = Lang(hi_counter, len(hi_counter))

In [14]:
print(en_lang)
print(hi_lang)

<__main__.Lang object at 0x7fdb7a4dfd90>
<__main__.Lang object at 0x7fdb7a4dfb90>


In [15]:
print("Test en encoding:", en_lang.encodeSentence("Shukriya"))

print("Test en decoding:", en_lang.decodeSentence(en_lang.encodeSentence("Shukriya", 10)))

print("Test hindi encoding:", hi_lang.encodeSentence("शुक्रिया", 10))

print("Test hindi decoding:", hi_lang.decodeSentence((hi_lang.encodeSentence("शुक्रिया", 10))))

Test en encoding: [15, 7, 10, 13, 9, 6, 20, 4]
Test en decoding: shukriya
Test hindi encoding: [35, 19, 15, 22, 5, 12, 21, 4, 2, 0]
Test hindi decoding: शुक्रिया


In [16]:
VE = len(en_lang.word2id)
VH = len(hi_lang.word2id)

# The Seq2Seq architecture
Seq2Seq is a method of encoder-decoder based machine translation and language processing that maps an input of sequence to an output of sequence with a tag and attention value. The idea is to use 2 RNNs that will work together with a special token and try to predict the next state sequence from the previous sequence.

A typical sequence to sequence model has two parts – an encoder and a decoder. Both the parts are practically two different neural network models combined into one giant network. 
Broadly, the task of an encoder network is to understand the input sequence, and create a smaller dimensional representation of it. This representation is then forwarded to a decoder network which generates a sequence of its own that represents the output. Let’s take an example of a conversational agent to understand the concept.

- We will implement a seq2seq architecture for transliteration in Tensorflow r1.13.1 / r1.14
- Debugging Tip: Always keep track of tensor dimensions!
- **Tensorflow Computation Graph** - We will build a tf computation graph first. This is the representation used by tf for any neural network architecture. Once the computation graph is built, you can feed data to it for training or inference

#### Character Embedding Matrix

In [17]:
en_word_emb_matrix = tf.get_variable("en_word_emb_matrix", (VE, 300), dtype=tf.float32)
hi_word_emb_matrix = tf.get_variable("hi_word_emb_matrix", (VH, 300), dtype=tf.float32)

Instructions for updating:
Colocations handled automatically by placer.


#### Placeholders
- Input to a tensorflow graph is 

In [18]:
keep_prob = tf.placeholder(tf.float32)

input_ids = tf.placeholder(tf.int32, (None, MAX_SEQ_LEN))
input_lens = tf.placeholder(tf.int32, (None, ))

ph_target_ids = tf.placeholder(tf.int32, (None, MAX_SEQ_LEN))
target_lens = tf.placeholder(tf.int32, (None, ))

In [19]:
# Add SOS or GO symbol
target_ids = tf.concat([tf.fill([BATCH_SIZE,1], hi_lang.isos), ph_target_ids], -1)

#### Building the computation graph

In [20]:
input_emb = tf.nn.embedding_lookup(en_word_emb_matrix, input_ids)
target_emb = tf.nn.embedding_lookup(hi_word_emb_matrix, target_ids[:, :-1])

In [21]:
input_emb.shape

TensorShape([Dimension(None), Dimension(20), Dimension(300)])

#### Encoder - RNN based sequence encoder

In [22]:
encoder_cell = tf.nn.rnn_cell.GRUCell(128) # 128 is the dimension of hidden state
encoder_cell = DropoutWrapper(encoder_cell, output_keep_prob=keep_prob) # Adding Dropout for regularization

Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.


In [23]:
enc_outputs, enc_state = tf.nn.dynamic_rnn(
    encoder_cell, # The encoder GRU cell
    input_emb, # Embedded input sequence
    sequence_length=input_lens, # Sequence lengths of individual inputs in a batch
    initial_state=encoder_cell.zero_state(BATCH_SIZE, dtype=tf.float32)
)

Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [24]:
# Confirm the shape of the final hidden state
enc_state.shape

TensorShape([Dimension(64), Dimension(128)])

#### Decoder

In [25]:
decoder_cell = tf.nn.rnn_cell.GRUCell(128)
decoder_cell = DropoutWrapper(decoder_cell, output_keep_prob=keep_prob)

#### Decoder to Output Vocab Projection Layer

In [26]:
output_projection = tf.layers.Dense(len(hi_lang.word2id))

#### Decoder Training Helper

In [27]:
helper = seq2seq.TrainingHelper(target_emb, target_lens)
decoder = seq2seq.BasicDecoder(decoder_cell, helper, enc_state, output_projection)
outputs, _, outputs_lens = seq2seq.dynamic_decode(decoder, maximum_iterations=MAX_SEQ_LEN, 
                                                  impute_finished=False, swap_memory=True)
output_max_len = tf.reduce_max(outputs_lens)

#### And Decoder Inference Helper

In [28]:
# Using the decoder_cell without dropout here.
infer_helper = seq2seq.GreedyEmbeddingHelper(hi_word_emb_matrix, tf.fill([BATCH_SIZE, ], hi_lang.isos), hi_lang.ieos)
infer_decoder = seq2seq.BasicDecoder(decoder_cell, infer_helper, enc_state, output_projection)
infer_output = seq2seq.dynamic_decode(infer_decoder, maximum_iterations=MAX_SEQ_LEN, swap_memory=True)

#### Loss and Optimizers

In [29]:
# Sequence mask:
# To make sure we don't back-propagate error from output of length positions
masks = tf.sequence_mask(target_lens, output_max_len, dtype=tf.float32, name='masks')

# Loss function - weighted softmax cross entropy
cost = seq2seq.sequence_loss(
    outputs[0],
    target_ids[:, 1:(output_max_len + 1)],
    masks)

# Optimizer
optimizer = tf.train.AdamOptimizer(0.0001)

In [30]:
train_op = optimizer.minimize(cost)

In [31]:
init = tf.global_variables_initializer()

#### Tensorflow Sessions

In [32]:
sess_config = tf.ConfigProto()
sess_config.gpu_options.allow_growth = True

In [33]:
sess = tf.InteractiveSession(config=sess_config)
sess.run(init)

#### Minibatch Training + Validation
- Performance Evaluation using BLEU scores

In [34]:
random.seed(41)

In [35]:
parallel = list(zip(en_sentences, hi_sentences))

In [36]:
random.shuffle(parallel)

In [37]:
parallel[123]

('kijiyega', 'कीजियेगा')

In [38]:
train_n = int(0.95*N)
valid_n = N - train_n

In [39]:
train_pairs = parallel[:train_n].copy()
valid_pairs = parallel[train_n:]

In [40]:
def small_test():
    all_bleu = []
    smoothing = nltk.translate.bleu_score.SmoothingFunction().method7
    for m in range(0, valid_n, BATCH_SIZE):
        # print(f"Status: {m}/{N}", end='\r')
        n = m + BATCH_SIZE
        if n > valid_n:
            # print("Epoch Complete...")
            break

        input_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
        input_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
        for i in range(m, n):
            b,a = en_lang.encodeSentence2(valid_pairs[i][0], MAX_SEQ_LEN)
            input_batch[i-m,:] = a
            input_lens_batch[i-m] = b

    #     target_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
    #     target_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
    #     for i in range(m, n):
    #         b,a = hi_lang.encodeSentence2(valid_pairs[i][1], MAX_SEQ_LEN)
    #         target_batch[i-m,:] = a
    #         target_lens_batch[i-m] = b

        feed_dict={
            input_ids: input_batch,
            input_lens: input_lens_batch,
            #target_ids: target_batch,
            #target_lens: target_lens_batch,
            keep_prob: 1.0
        }
        pred_batch = sess.run(infer_output[0].sample_id, feed_dict=feed_dict)
        for k, pred_ in enumerate(pred_batch):
            pred_s = hi_lang.decodeSentence(list(pred_))
            ref = valid_pairs[m+k][1]
            try:
                _bx = nltk.translate.bleu_score.sentence_bleu(
                    [ref],
                    pred_s,
                    weights=[1/4]*4,
                    smoothing_function=smoothing)
            except ZeroDivisionError:
                _bx = 0
            all_bleu.append(_bx)

    print(f"BLEU Score: {np.mean(all_bleu)}")

In [41]:
for _e in range(10):
    # Mix things up a bit.
    random.shuffle(train_pairs)
    pbar = tqdm_notebook(range(0, train_n, BATCH_SIZE))
    batch_loss = 0
    bxi = 0
    for m in pbar:
        n = m + BATCH_SIZE
        if n <= train_n:
            # print("Epoch Complete... \n")

            input_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
            input_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
            for i in range(m, n):
                b,a = en_lang.encodeSentence2(train_pairs[i][0], MAX_SEQ_LEN)
                input_batch[i-m,:] = a
                input_lens_batch[i-m] = b

            target_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
            target_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
            for i in range(m, n):
                b,a = hi_lang.encodeSentence2(train_pairs[i][1], MAX_SEQ_LEN)
                target_batch[i-m,:] = a
                target_lens_batch[i-m] = b

            feed_dict={
                input_ids: input_batch,
                input_lens: input_lens_batch,
                ph_target_ids: target_batch,
                target_lens: target_lens_batch,
                keep_prob: 0.8 
            }
            sess.run(train_op, feed_dict=feed_dict)
            batch_loss += sess.run(cost, feed_dict=feed_dict)
            pbar.set_description(f"Epoch: {_e} >> Loss: {batch_loss/(bxi+1):2.2F}:")
            bxi += 1
            if (1 + n//BATCH_SIZE) % 100 == 0:
                small_test()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=458.0), HTML(value='')))

BLEU Score: 0.0
BLEU Score: 0.06838558288235048
BLEU Score: 0.10599391144344654
BLEU Score: 0.134205212864294



HBox(children=(FloatProgress(value=0.0, max=458.0), HTML(value='')))

BLEU Score: 0.18399427636824583
BLEU Score: 0.21232150718085682
BLEU Score: 0.22985928735986186
BLEU Score: 0.24334757012334626



HBox(children=(FloatProgress(value=0.0, max=458.0), HTML(value='')))

BLEU Score: 0.26880910760111143
BLEU Score: 0.2731656796942811
BLEU Score: 0.2847091788019427
BLEU Score: 0.29270622406059055



HBox(children=(FloatProgress(value=0.0, max=458.0), HTML(value='')))

BLEU Score: 0.30116652369085845
BLEU Score: 0.3097188066009022
BLEU Score: 0.3132556994869862
BLEU Score: 0.31475088109802446



HBox(children=(FloatProgress(value=0.0, max=458.0), HTML(value='')))

BLEU Score: 0.3253125794327763
BLEU Score: 0.32830916376859837
BLEU Score: 0.33483066844364745
BLEU Score: 0.33961109367871717



HBox(children=(FloatProgress(value=0.0, max=458.0), HTML(value='')))

BLEU Score: 0.34218557429139446
BLEU Score: 0.35365306744227637
BLEU Score: 0.35973911168531264
BLEU Score: 0.3622143874721217



HBox(children=(FloatProgress(value=0.0, max=458.0), HTML(value='')))

BLEU Score: 0.3644332815251898
BLEU Score: 0.37224316388202555
BLEU Score: 0.3774720615204538
BLEU Score: 0.3871267228911461



HBox(children=(FloatProgress(value=0.0, max=458.0), HTML(value='')))

BLEU Score: 0.39626761545062933
BLEU Score: 0.3976882221683209
BLEU Score: 0.4034818756427811
BLEU Score: 0.4083851704005243



HBox(children=(FloatProgress(value=0.0, max=458.0), HTML(value='')))

BLEU Score: 0.41448196777220253
BLEU Score: 0.42418853958577135
BLEU Score: 0.4352989669542479
BLEU Score: 0.4342613488648752



HBox(children=(FloatProgress(value=0.0, max=458.0), HTML(value='')))

BLEU Score: 0.4488719948349764
BLEU Score: 0.44732904479517693
BLEU Score: 0.45101506244910533
BLEU Score: 0.46417481166995495



In [42]:
#saver = tf.train.Saver()


In [43]:
'''sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver.save(sess, 'my_test_model')'''

"sess = tf.Session()\nsess.run(tf.global_variables_initializer())\nsaver.save(sess, 'my_test_model')"

In [44]:
'''sess=tf.Session()    
#First let's load meta graph and restore weights
saver = tf.train.import_meta_graph('my_test_model.meta')
saver.restore(sess,tf.train.latest_checkpoint('./'))
print("Model restored.")    

graph = tf.get_default_graph()


input_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
input_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
b,a = en_lang.encodeSentence2('raama', MAX_SEQ_LEN)
input_batch[0, :] = a
input_lens_batch[0] = b

feed_dict={
    input_ids: input_batch,
    input_lens: input_lens_batch,
    #target_ids: target_batch,
    #target_lens: target_lens_batch,
    keep_prob: 1.0
}


tf.local_variables_initializer().run()

pred_batch = sess.run(infer_output[0].sample_id, feed_dict=feed_dict)
pred_ = pred_batch[0]
pred_s = hi_lang.decodeSentence(list(pred_))
# ref = valid_pairs[m+k][1]
print(pred_s)'''

'sess=tf.Session()    \n#First let\'s load meta graph and restore weights\nsaver = tf.train.import_meta_graph(\'my_test_model.meta\')\nsaver.restore(sess,tf.train.latest_checkpoint(\'./\'))\nprint("Model restored.")    \n\ngraph = tf.get_default_graph()\n\n\ninput_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)\ninput_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)\nb,a = en_lang.encodeSentence2(\'raama\', MAX_SEQ_LEN)\ninput_batch[0, :] = a\ninput_lens_batch[0] = b\n\nfeed_dict={\n    input_ids: input_batch,\n    input_lens: input_lens_batch,\n    #target_ids: target_batch,\n    #target_lens: target_lens_batch,\n    keep_prob: 1.0\n}\n\n\ntf.local_variables_initializer().run()\n\npred_batch = sess.run(infer_output[0].sample_id, feed_dict=feed_dict)\npred_ = pred_batch[0]\npred_s = hi_lang.decodeSentence(list(pred_))\n# ref = valid_pairs[m+k][1]\nprint(pred_s)'

In [45]:
input_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
input_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
b,a = en_lang.encodeSentence2('raama', MAX_SEQ_LEN)
input_batch[0, :] = a
input_lens_batch[0] = b

feed_dict={
    input_ids: input_batch,
    input_lens: input_lens_batch,
    #target_ids: target_batch,
    #target_lens: target_lens_batch,
    keep_prob: 1.0
}
pred_batch = sess.run(infer_output[0].sample_id, feed_dict=feed_dict)
pred_ = pred_batch[0]
pred_s = hi_lang.decodeSentence(list(pred_))
# ref = valid_pairs[m+k][1]
print(pred_s)

रामा


### Let's see some real translation examples now!

In [46]:
def transliterate(s):
  
      input_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
      input_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
      b,a = en_lang.encodeSentence2(s, MAX_SEQ_LEN)
      input_batch[0, :] = a
      input_lens_batch[0] = b
      
      feed_dict={
          input_ids: input_batch,
          input_lens: input_lens_batch,
          #target_ids: target_batch,
          #target_lens: target_lens_batch,
          keep_prob: 1.0
      }
      pred_batch = sess.run(infer_output[0].sample_id, feed_dict=feed_dict)
      pred_ = pred_batch[0]
      pred_s = hi_lang.decodeSentence(list(pred_))
      # ref = valid_pairs[m+k][1]
      return pred_s