In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np

import unicodedata
import re
import os
import io

np.random.seed(42)
tf.random.set_seed(42)

path1 = '/content/drive/MyDrive/sourcelang.txt'
path2='/content/drive/MyDrive/targetlang.txt'

In [None]:
def unicode_to_ascii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
            if unicodedata.category(c) != 'Mn')

def preprocess(s):
    s = unicode_to_ascii(s.lower().strip())
    s = re.sub(r"([?.!,¿])", r" \1 ", s)
    s = re.sub(r'[" "]+', " ", s)
    s = re.sub(r"[^a-zA-Z?.!,¿]+", " ", s)
    s = s.strip()
    # adding start-of-sequence (sos) token and end-of-sequence (eos) token
    s = '<sos> ' + s + ' <eos>'
    return s

In [None]:
def tokenize(language):
    # Use <unk> token for unkown words
    tokenizer = Tokenizer(filters='', oov_token='<unk>')
    tokenizer.fit_on_texts(language)

    tensor = tokenizer.texts_to_sequences(language)
    tensor = pad_sequences(tensor, padding='post')
    return tensor, tokenizer

def load_dataset(path1,path2 ,num_examples=None, prints=False) :
    lines = open(path1, encoding='UTF-8').read().strip().split('\n')
    lines2 = open(path2, encoding='UTF-8').read().strip().split('\n')
    
    # list containing word pairs in the format: [[ENGLISH], [FRENCH]]
    word_pairs = [preprocess(l) for l in lines[:num_examples]]
    input_lang=word_pairs
    word_pairs2 = [preprocess(l) for l in lines2[:num_examples]]
    targ_lang=word_pairs2

    if prints:
        print(targ_lang[-1])
        print(input_lang[-1])
        return
    
    input_tensor, input_tokenizer = tokenize(input_lang)
    targ_tensor, targ_tokenizer = tokenize(targ_lang)

    return input_tensor, targ_tensor, input_tokenizer, targ_tokenizer

load_dataset(path1,path2,num_examples=60000, prints=True)

<sos> i think was azam understand not . <eos>
<sos> i think azam did not understand . <eos>


In [None]:
inp_tensor, targ_tensor, inp_lang, targ_lang = load_dataset(path1,path2)
inp_tensor_train, inp_tensor_val, \
targ_tensor_train, targ_tensor_val = \
train_test_split(inp_tensor, targ_tensor, test_size=0.2)
print("Input tensors: ", inp_tensor_train.shape, inp_tensor_val.shape)
print("Target tensors: ", targ_tensor_train.shape, targ_tensor_val.shape)

Input tensors:  (68176, 15) (17044, 15)
Target tensors:  (68176, 17) (17044, 17)


In [None]:
buffer_size = len(inp_tensor_train)
batch_size = 16
steps_per_epoch = len(inp_tensor_train) // batch_size
embedding_dim = 300
units = 1024 
vocab_inp_size = len(inp_lang.index_word) + 1
vocab_targ_size = len(targ_lang.index_word) + 1

def create_dataset(shuffle=True, buffer_size=buffer_size, batch_size=batch_size):
    ds = tf.data.Dataset.from_tensor_slices((inp_tensor_train, targ_tensor_train))
    if shuffle:
        ds = ds.shuffle(buffer_size)
    ds = ds.batch(batch_size, drop_remainder=True)
    return ds.prefetch(1)

train_dataset = create_dataset()
valid_dataset = create_dataset(shuffle=False)
inp_batch, targ_batch = next(iter(train_dataset))
inp_batch.shape, targ_batch.shape

(TensorShape([16, 15]), TensorShape([16, 17]))

In [None]:
from tensorflow.keras.layers import Embedding, LSTM, dot, Dense,GRU

class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, encoder_units, batch_size):
        super(Encoder, self).__init__()
        self.batch_size= batch_size
        self.encoder_units=encoder_units
        self.embedding=tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru= tf.keras.layers.GRU(encoder_units, 
                                      return_sequences=True,
                                      return_state=True,
                                      recurrent_initializer='glorot_uniform'
                                     )
    
    def call(self, x, hidden):
        #pass the input x to the embedding layer
        x= self.embedding(x)
        # pass the embedding and the hidden state to GRU
        output, state = self.gru(x, initial_state=hidden)
        return output, state
    
    def initialize_hidden_state(self):
        return tf.zeros((self.batch_size, self.encoder_units))


encoder = Encoder(vocab_inp_size,embedding_dim,units,batch_size)
sample_hidden = encoder.initialize_hidden_state()
sample_output, sample_hidden= encoder(inp_batch,sample_hidden)
print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))
print ('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape))

Encoder output shape: (batch size, sequence length, units) (16, 15, 1024)
Encoder Hidden state shape: (batch size, units) (16, 1024)


In [None]:
def print_shapes(enc_output, dec_state, score, attention_weights, context_vector):
    print(f"btach_size: {batch_size}")
    print(f"seq_length: {inp_tensor_train.shape[1]}")
    print(f"enc_units: {units}")
    print()
    print(f"enc_output:        {enc_output.shape}")
    print(f"dec_state:         {dec_state.shape}")
    print(f"score:             {score.shape}")
    print(f"attention_weights: {attention_weights.shape}")
    print(f"context_vector:    {context_vector.shape}")

In [None]:
def LuongAttention(query, values):
  
  query_with_time_axis = tf.expand_dims(query, 1)

  score = tf.matmul(query_with_time_axis, values, transpose_b=True)

  attention_weights = tf.nn.softmax(score, axis=1)

  attention_weights = tf.reshape(attention_weights, shape=(-1, attention_weights.shape[2], 1))


  context_vector = attention_weights * values
  context_vector = tf.reduce_sum(context_vector, axis=1)

  return context_vector, attention_weights

In [None]:

attention_result, attention_weights = LuongAttention(sample_hidden, sample_output)

print("context vector shape: (batch size, units) {}".format(attention_result.shape))
print("Attention weights shape: (batch_size, sequence_length, 1) {}".format(attention_weights.shape))

context vector shape: (batch size, units) (16, 1024)
Attention weights shape: (batch_size, sequence_length, 1) (16, 15, 1)


In [None]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units, batch_size):
    super(Decoder, self).__init__()
    self.batch_size = batch_size
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc = tf.keras.layers.Dense(vocab_size)

  def call(self, x, hidden, enc_output):

    context_vector, attention_weights = LuongAttention(hidden, enc_output)

    x = self.embedding(x)

    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    output, state = self.gru(x)

    output = tf.reshape(output, (-1, output.shape[2]))

    x = self.fc(output)

    return x, state, attention_weights

In [None]:
decoder = Decoder(vocab_targ_size, embedding_dim, units, batch_size)

sample_decoder_output, _, _ = decoder(tf.random.uniform((batch_size, 1)),
                                      sample_hidden, sample_output)

print ('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape))

Decoder output shape: (batch_size, vocab size) (16, 8095)


In [None]:
def print_status_bar(iteration, total, loss):    
    metrics = "loss: {:.4f}".format(loss) 
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics,end=end)

In [None]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask
  return tf.reduce_mean(loss_)

@tf.function
def train_step(inp_batch, targ_batch, enc_state_h):
    loss = 0

    with tf.GradientTape() as tape:
        enc_output, enc_state_h= encoder(inp_batch, enc_state_h)
        # at the beginning we set the decoder state to the encoder state
        dec_state_h = enc_state_h

        # at the begining we feed the <sos> token as input for the decoder, 
        # then we will feed the target as input
        dec_input = tf.expand_dims([targ_lang.word_index['<sos>']] * batch_size, 1)
        for t in range(1, targ_batch.shape[1]): # targ_batch.shape[1] == seq length
            predictions, dec_state_h, _ = decoder(dec_input, dec_state_h, enc_output)
            loss += loss_function(targ_batch[:, t], predictions)
            dec_input = tf.expand_dims(targ_batch[:, t], 1)

    batch_loss = loss / int(targ_batch.shape[1])
    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
    return batch_loss

In [None]:
epochs = 3

for epoch in range(epochs):
    print("Epoch {}/{}".format(epoch + 1, epochs))
    enc_state_h= encoder.initialize_hidden_state()
    total_loss = 0
    
    for (batch, (inp, targ)) in enumerate(train_dataset.take(steps_per_epoch)):
        batch_loss = train_step(inp, targ, enc_state_h)
        total_loss += batch_loss

        print_status_bar(batch, steps_per_epoch, batch_loss.numpy())
    print_status_bar(steps_per_epoch, steps_per_epoch, total_loss / steps_per_epoch)

In [None]:
def evaluate(sentence, targ_tensor, inp_tensor):
    # targ_tensor.shape[1] == max seq length for the target language (EN)
    # inp_tensor.shape[1] == max seq length for the input language (FR)
    attention_plot = np.zeros((targ_tensor.shape[1], inp_tensor.shape[1]))
    
    sentence = preprocess(sentence)

    inputs = inp_lang.texts_to_sequences([sentence])
    inputs = pad_sequences(inputs, maxlen=inp_tensor.shape[1], padding='post')
    inputs = tf.convert_to_tensor(inputs)
    result = ''

    enc_state_h =tf.zeros((1, units))
    enc_output, enc_state_h= encoder(inputs, enc_state_h)
    dec_state_h = enc_state_h
    dec_input = tf.expand_dims([targ_lang.word_index['<sos>']], 0)

    for t in range(targ_tensor.shape[1]):
        predictions, dec_state_h, attention_weights = decoder(dec_input,dec_state_h,enc_output)
        
        attention_weights = tf.reshape(attention_weights, (-1, ))
        # attention_plot[t] = attention_weights.numpy()

        predicted_id = tf.argmax(predictions[0]).numpy()
        result += targ_lang.index_word[predicted_id] + ' '

        # stop prediction
        if targ_lang.index_word[predicted_id] == '<eos>':
            return result, sentence

        dec_input = tf.expand_dims([predicted_id], 0)
    return result, sentence


In [None]:
# function for plotting the attention weights to visualize how the model works internally
def plot_attention(attention, sentence, predicted_sentence):
  fig = plt.figure(figsize=(10,10))
  ax = fig.add_subplot(1, 1, 1)
  ax.matshow(attention, cmap='viridis')
    
  ax.set_xticklabels([''] + sentence, rotation=90)
  ax.set_yticklabels([''] + predicted_sentence)

  plt.show()


def translate(sentence, ground_truth=None, plot_weights=False):
    result, sentence = evaluate(sentence, targ_tensor, inp_tensor)

    print(f'{"Input:":15s} {sentence}')
    print(f'{"Prediction:":15s} {result}')
    if ground_truth: print(f'{"Ground truth:":15s} {ground_truth}') 
    
    if plot_weights:
        attention_plot = attention_plot[:len(result.split(' ')), :len(sentence.split(' '))]
        plot_attention(attention_plot, sentence.split(' '), result.split(' '))

In [None]:
def preprocess_sequence(seq, language):
    sentence = language.sequences_to_texts([seq.numpy()])[0]
    sentence = sentence.split(' ')
    sentence = [s for s in sentence if s != '<sos>' and s != '<eos>' and s != '<unk>']
    return ' '.join(sentence)

In [None]:
for inp_batch, targ_batch in train_dataset.take(100):
    for inp, targ in zip(inp_batch, targ_batch):
        sentence = preprocess_sequence(inp, inp_lang)
        ground_truth = preprocess_sequence(targ, targ_lang)
        translate(sentence, ground_truth)
        print()
        break

Input:          <sos> i write poems in my free time . <eos>
Prediction:     i my free write . <eos> 
Ground truth:   i my free time poems write .

Input:          <sos> khurum was not wearing cowboy boots . <eos>
Prediction:     was khurum boots wear now not . <eos> 
Ground truth:   was khurum cowboy boots wear now not .

Input:          <sos> the crow is eating grass . <eos>
Prediction:     crow grass eat now . <eos> 
Ground truth:   crow grass eat now .

Input:          <sos> i think we will be fine . <eos>
Prediction:     i think we fine be after . <eos> 
Ground truth:   i think we fine be after .

Input:          <sos> he is chubby . <eos>
Prediction:     he chubby . <eos> 
Ground truth:   he chubby .

Input:          <sos> he was dressed in blue . <eos>
Prediction:     was he blue dress . <eos> 
Ground truth:   was he blue dress .

Input:          <sos> fasten your seat belt please . <eos>
Prediction:     fasten . <eos> 
Ground truth:   fasten your seat belt please .

Input:      

In [None]:
def gettranslation(sentence, ground_truth=None, plot_weights=True):
    result, sentence = evaluate(sentence, targ_tensor, inp_tensor)

    # print(f'{"Input:":15s} {sentence}')
    # print(f'{"Prediction:":15s} {result}')
    # if ground_truth: print(f'{"Ground truth:":15s} {ground_truth}') 
    
    if plot_weights:
        attention_plot = attention_plot[:len(result.split(' ')), :len(sentence.split(' '))]
        plot_attention(attention_plot, sentence.split(' '), result.split(' '))

    return sentence,result,ground_truth

In [None]:
from nltk.translate.bleu_score import sentence_bleu

In [None]:
groundt=list()
predict=list()

numsamples=100

for inp_batch, targ_batch in train_dataset.take(numsamples):
    for inp, targ in zip(inp_batch, targ_batch):
        sentence = preprocess_sequence(inp, inp_lang)
        ground_truth = preprocess_sequence(targ, targ_lang)
        s,r,g=gettranslation(sentence, ground_truth, plot_weights=False)
        groundt.append(ground_truth)
        predict.append(r)
        break

In [None]:
j=0
for i in groundt:
  j=j+1

In [None]:
j

100

In [None]:
groundt

['she his room go yes no ?',
 'was mahi dua close friends .',
 'was he many dangers expose .',
 'azan uninsure .',
 'the dog milkman bark at now after not .',
 'was my grandfather house build .',
 'i i rahib know believe not .',
 'was her mother him advise not .',
 'she loses the soil .',
 'was said me tell that was he happy .',
 'was she beautiful hat show .',
 'i zabhi rid get now .',
 'was khayam them closely look .',
 'i come now .',
 'was i think was you quit go .',
 'was i help here come .',
 'i several moheem times meet full .',
 'i her sister very much like .',
 'was i it notice .',
 'i you advance know let after .',
 'was i lot learn hope now .',
 'was nazeer gazala do see .',
 'i think fahad cooperative be not .',
 'was i pakistan work use .',
 'was i play want .',
 'was izatullah later short time leave .',
 'i now know .',
 'i them see .',
 'i my room clean full now not yes no ?',
 'arif room clean yes no ?',
 'you able come be after yes no ?',
 'was pigeons grass sit on ful

In [None]:
len(predict)

100

In [None]:
file1 = open("/content/predict.txt","w")

In [None]:
z=0
for i in predict:
  file1.writelines(i+'\n')
  z=z+1
file1.close()

In [None]:
z

100

In [None]:
file1 = open("/content/predict.txt","r+")

In [None]:
file2 = open("/content/ground.txt","w")

In [None]:
z=0
for i in groundt:
  file2.writelines(i+'\n')
  z=z+1
file2.close()

In [None]:
file1.read()

'she his room go yes no ? <eos> \nwas mahi dua close friends . <eos> \nwas he many dangers absorb . <eos> \nazan uninsure . <eos> \nthe dog milkman bark at now after not . <eos> \nwas my grandfather house house house house house house house house house house house house house house \ni know not i know not i know not i know not i know not i know \nwas her mother him advise not . <eos> \nthey robbers abdul qadir jilani the soil the soil the soil the soil the soil the soil \nwas he happy . <eos> \nwas she beautiful hat show . <eos> \ni zabhi rid get now . <eos> \nwas khayam them closely look . <eos> \ni come now . <eos> \nwas you go . <eos> \nwas i help . <eos> \ni several moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem \ni her sister a much like . <eos> \nwas i it notice . <eos> \ni you know after . <eos> \nwas i lot learn hope now . <eos> \nwas nazeer gazala do see . <eos> \ni think fahad cooperative be not . <eos> \nwas i pakista

In [None]:
predict

['she his room go yes no ? <eos> ',
 'was mahi dua close friends . <eos> ',
 'was he many dangers absorb . <eos> ',
 'azan uninsure . <eos> ',
 'the dog milkman bark at now after not . <eos> ',
 'was my grandfather house house house house house house house house house house house house house house ',
 'i know not i know not i know not i know not i know not i know ',
 'was her mother him advise not . <eos> ',
 'they robbers abdul qadir jilani the soil the soil the soil the soil the soil the soil ',
 'was he happy . <eos> ',
 'was she beautiful hat show . <eos> ',
 'i zabhi rid get now . <eos> ',
 'was khayam them closely look . <eos> ',
 'i come now . <eos> ',
 'was you go . <eos> ',
 'was i help . <eos> ',
 'i several moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem ',
 'i her sister a much like . <eos> ',
 'was i it notice . <eos> ',
 'i you know after . <eos> ',
 'was i lot learn hope now . <eos> ',
 'was nazeer gazala do see . 

In [None]:
from nltk.translate.bleu_score import corpus_bleu,SmoothingFunction
g1=list()
g2=list()
g3=list()
g4=list()

groundt=list()
predict=list()
cc = SmoothingFunction()
def bleu_score(numsamples):
  for inp_batch, targ_batch in train_dataset.take(numsamples):
    for inp, targ in zip(inp_batch, targ_batch):
        sentence = preprocess_sequence(inp, inp_lang)
        ground_truth = preprocess_sequence(targ, targ_lang)
        s,r,g=gettranslation(sentence, ground_truth, plot_weights=False)
        groundt.append(ground_truth)
        predict.append(r)
        break

bleu_score(100)

In [None]:
predict[:3]

['i yet pay full not . <eos> ',
 'it tasty yes no ? <eos> ',
 'you us hear yes no ? <eos> ']

In [None]:
result=list()
l=0
j=list()
# for i in predict:
j=[n.replace('<eos>','') for n in predict]
print(j[:10])
x=[z.rstrip() for z in j]
print(x[:10])
print(groundt[:10])
# # predict[1].split()[:-1]
i=0

# while i<len(predict):
#   predict[i]=predict[i].replace('<eos>', "")
#   i+=1

['i yet pay full not .  ', 'it tasty yes no ?  ', 'you us hear yes no ?  ', 'let me think .  ', 'was waleed dark walk down .  ', 'i new shoes need .  ', 'was i particularly happy feel not .  ', 'was i look down .  ', 'was i floor sweep .  ', 'i my behavior behavior behavior behavior behavior behavior behavior behavior behavior behavior behavior behavior behavior behavior behavior ']
['i yet pay full not .', 'it tasty yes no ?', 'you us hear yes no ?', 'let me think .', 'was waleed dark walk down .', 'i new shoes need .', 'was i particularly happy feel not .', 'was i look down .', 'was i floor sweep .', 'i my behavior behavior behavior behavior behavior behavior behavior behavior behavior behavior behavior behavior behavior behavior behavior']
['i yet pay full not .', 'it tasty yes no ?', 'you us hear yes no ?', 'let me think .', 'was waleed dark hallway walk down .', 'i new shoes need .', 'was i particularly happy feel not .', 'was i down look .', 'was i floor sweep .', 'i my recent be

In [None]:
len(predict)

100

In [None]:
groundt

['was i bus get off .',
 'was azan us not here swim tell not .',
 'i yet breakfast eat full not .',
 'was i iqbal me alone leave ask .',
 'i way help out i can .',
 'was her mother him advise full now not yes no ?',
 'i know why was ismail karachi come .',
 'was subhan time track lose .',
 'i hope lose start now .',
 'was she plane fly yes no ?',
 'he very handsome .',
 'was madni curious but was dua not .',
 'was i chess shoaib beat .',
 'it tense situation .',
 'blanks fill full after now not yes no ?',
 'was amanullah me the job get .',
 'arif room clean full .',
 'was my feet hurt .',
 'i hope i anyone offend full not .',
 'was we intrude mean not .',
 'was we wrong turn take .',
 'ismail try .',
 'i i you know think not .',
 'bride bright red dress wear .',
 'was he wink sleep not .',
 'i know that i be after .',
 'was laal home his umbrella leave .',
 'i know it my fault .',
 'he often his parents write .',
 'was i see him road cross .',
 'doctor my teeth check now after .',
 'te

In [None]:
x 

['was i bus get off .',
 'was azan us not .',
 'i yet breakfast eat full not .',
 'was i iqbal me alone leave ask .',
 'i can .',
 'was her mother him advise full now not yes no ?',
 'i know why was ismail karachi come .',
 'was subhan time track lose .',
 'i hope lose start start start start start start start start start start start start start start',
 'was she plane fly yes no ?',
 'he very handsome .',
 'was madni curious but was madni curious but was madni curious but was madni curious but was',
 'was i chess shoaib beat .',
 'it tense situation .',
 'blanks fill full after now not yes no ?',
 'was amanullah me the job get .',
 'arif room clean full .',
 'was my feet hurt .',
 'i hope i hope i hope i hope i hope i hope i hope i hope i',
 'was we intrude mean not .',
 'was we wrong turn take .',
 'ismail try .',
 'i i i i i i i i i i i i i i i i i',
 'bride bright red dress wear .',
 'was he wink sleep not .',
 'i know that i know that i know that i know that i know that i know',
 

In [None]:
groundt=groundt[:93]

In [None]:
len(groundt)

93

In [None]:
x[1].split()

['was', 'we', 'our', 'neighbors', 'see', '.']

In [None]:
g1

[0.9285714285714286, 0.9285714285714286]

In [None]:
g2

[0.09636241116594318, 0.9636241116594315]

In [None]:
g3

[0.07192820457722461, 0.9780129266060039]

In [None]:
g4

[0.04007543120290852, 0.981643576691373]

In [None]:
gram1=np.sum(g1[:65])/65
gram2=np.sum(g2[:65])/65
gram3=np.sum(g3[:65])/65
gram4=np.sum(g4[:65])/65

print(f'1-gram: {gram1} 2-gram: {gram2} 3-gram: {gram3} 4-gram: {gram4}')

In [None]:
plt.bar(x =['1-gram','2-gram','3-gram','4-gram'], height =[gram1,gram2,gram3,gram4] )
plt.title("Average BLEU Score")
plt.ylim((0,1))
plt.show()

In [None]:
!pip install jiwer

Collecting jiwer
  Downloading jiwer-2.2.0-py3-none-any.whl (13 kB)
Collecting python-Levenshtein
  Downloading python-Levenshtein-0.12.2.tar.gz (50 kB)
[?25l[K     |██████▌                         | 10 kB 20.6 MB/s eta 0:00:01[K     |█████████████                   | 20 kB 15.2 MB/s eta 0:00:01[K     |███████████████████▌            | 30 kB 10.2 MB/s eta 0:00:01[K     |██████████████████████████      | 40 kB 8.8 MB/s eta 0:00:01[K     |████████████████████████████████| 50 kB 3.1 MB/s 
Building wheels for collected packages: python-Levenshtein
  Building wheel for python-Levenshtein (setup.py) ... [?25l[?25hdone
  Created wheel for python-Levenshtein: filename=python_Levenshtein-0.12.2-cp37-cp37m-linux_x86_64.whl size=149863 sha256=19fdb8a72a87b83d62af7b688d77c60243ff56b706dcad9049e73263cc2f9139
  Stored in directory: /root/.cache/pip/wheels/05/5f/ca/7c4367734892581bb5ff896f15027a932c551080b2abd3e00d
Successfully built python-Levenshtein
Installing collected packages: pyth

In [None]:
len(groundt)

100

In [None]:
groundt

In [None]:
from jiwer import wer
e=0
for i in range(len(groundt)):
  e+=wer(groundt[i],x[i])

print(f'WER: {e/100}')

WER: 0.2212896825396825


In [None]:
e/93

0.9518731865506055

In [None]:
!pip install pyter3
import pyter



In [None]:
print(pyter.ter(groundt[92].split(),result[92].split()[:-1]))

1.75


In [None]:
tererror=0
l=0
for i in range(len(groundt)):
  # tererror=pyter.ter(groundt[i].split(),result[i].split())
  tererror+=pyter.ter(groundt[i].split(),x[i].split())



In [None]:
len(tererror)

93

In [None]:
tererror/100

0.1496442577030812

In [None]:
f= open('/content/predict.txt','r+')

In [None]:
file3 = open("/content/predict2.txt","w")

In [None]:
prprocess=list()
for i in f:
  print(i[:-7])
  file3.writelines(i[:-7]+'\n')
file3.close()

she his room go yes no ? 
was mahi dua close friends . 
was he many dangers absorb . 
azan uninsure . 
the dog milkman bark at now after not . 
was my grandfather house house house house house house house house house house house house house 
i know not i know not i know not i know not i know not i
was her mother him advise not . 
they robbers abdul qadir jilani the soil the soil the soil the soil the soil the
was he happy . 
was she beautiful hat show . 
i zabhi rid get now . 
was khayam them closely look . 
i come now . 
was you go . 
was i help . 
i several moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem m
i her sister a much like . 
was i it notice . 
i you know after . 
was i lot learn hope now . 
was nazeer gazala do see . 
i think fahad cooperative be not . 
was i pakistan work use . 
was i play want . 
was izatullah later short time leave . 
i now know . 
i them see . 
i my room clean full now not yes no ? 
arif room clean yes no

In [None]:
prprocess

['she his room go yes no ? ',
 'was mahi dua close friends . ',
 'was he many dangers absorb . ',
 'azan uninsure . ',
 'the dog milkman bark at now after not . ',
 'was my grandfather house house house house house house house house house house house house house ',
 'i know not i know not i know not i know not i know not i',
 'was her mother him advise not . ',
 'they robbers abdul qadir jilani the soil the soil the soil the soil the soil the',
 'was he happy . ',
 'was she beautiful hat show . ',
 'i zabhi rid get now . ',
 'was khayam them closely look . ',
 'i come now . ',
 'was you go . ',
 'was i help . ',
 'i several moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem moheem m',
 'i her sister a much like . ',
 'was i it notice . ',
 'i you know after . ',
 'was i lot learn hope now . ',
 'was nazeer gazala do see . ',
 'i think fahad cooperative be not . ',
 'was i pakistan work use . ',
 'was i play want . ',
 'was izatullah later short t