In [1]:
import os
import string
import re
import numpy as np

In [2]:
def load_doc(filename):
  # open the file as read only
  file = open(filename, mode= "r" , encoding="utf-8-sig" )
  # read all text
  text = file.read()
  # close the file
  file.close()
  return text

In [3]:
def isEnglishSentence(sentence):
    
    if len(sentence) == 0:
        return False
    
    numberOfEnglishChars = 0

    for c in sentence:
        isEnglishLetter = re.match(r'[a-zA-Z0-9]', c)
        if (isEnglishLetter is None) == False:
            numberOfEnglishChars += 1
    
    if numberOfEnglishChars == 0:
        return False
    
    #print((numberOfEnglishChars/len(sentence)*100))
    if ((numberOfEnglishChars/len(sentence)*100) > 50):
        return True
    
    return False

In [4]:
#I don't know how to detect this.
def isBanglaSentence(sentence):
    
    
    if isEnglishSentence(sentence) == False:
        numberOfChar = 0
        
        for c in sentence:
            if c.isalpha():
                numberOfChar += 1
        
        if numberOfChar == 0:
            return False
        
        if (len(sentence)/numberOfChar)*100 > 10:
            return True
    else:
        return False

In [5]:
def getEnglishAndBanglaSentences(splittedLines):
    #prepare english and bangla sentences
    english_sentence_list = []
    bangla_sentence_list = []
    
    eng_sentence = ""
    bng_sentence = ""
    
    for line in splittedLines:
        if isEnglishSentence(line):
            eng_sentence = line
        elif isBanglaSentence(line):
            bng_sentence = line
            
        if len(eng_sentence) > 0 and len(bng_sentence) > 0:
            english_sentence_list.append(eng_sentence)
            bangla_sentence_list.append(bng_sentence)
            eng_sentence = ""
            bng_sentence = ""
    
    return english_sentence_list, bangla_sentence_list

In [6]:
# Get english and bangla sentences first
text = load_doc("./Dataset/chat-1.txt")
splittedLines = text.splitlines()

eng_s, ban_s = getEnglishAndBanglaSentences(splittedLines=splittedLines)

In [7]:
#Get English and Bangla word list from a docname
def getEnglishAndBanglaWordList(docname):
    text = load_doc(docname)
    
    splittedLines = text.splitlines()
    lengthOfSplittedLines = len(splittedLines)
    eng_sentence, bng_sentence = getEnglishAndBanglaSentences(splittedLines=splittedLines)
    
#    print(eng_sentence, bng_sentence)
    if (len(eng_sentence) == len(bng_sentence)) == False:
        print("This is a case that should not occur")
    
    engWords = []
    bngWords = []
    
#     print(eng_sentence)
    
    for i in range(0, len(eng_sentence)):
#         print(i, eng_sentence[i], bng_sentence[i])
        ew = eng_sentence[i].split(";")
        bw = bng_sentence[i].split(";")
        
        if (len(ew) != len(bw)):
            print("English - %s, bangla - %s, Count %d %d" % (ew, bw, len(ew), len(bw)))
        engWords.extend(ew)
        bngWords.extend(bw)
        
    return engWords, bngWords

In [8]:
ewlist, bwlist = getEnglishAndBanglaWordList("./Dataset/" + "chat-15.txt")

In [9]:
dataset_names = os.listdir("./Dataset")

english_word_list = []
bangla_word_list = []

for name in dataset_names:
    
    if (name.find(".txt") != -1):
        ewlist, bwlist = getEnglishAndBanglaWordList("./Dataset/" + name)
        english_word_list.extend(ewlist)
        bangla_word_list.extend(bwlist)
        
print(len(english_word_list))
print(len(bangla_word_list))

22081
22081


In [10]:
#Verify if the indexing is alright
print(english_word_list[40], bangla_word_list[40])
print(english_word_list[505], bangla_word_list[505])
print(english_word_list[5225], bangla_word_list[5225])
print(english_word_list[15000], bangla_word_list[15000])
print(english_word_list[22077], bangla_word_list[22077])

bidyalaya বিদ্যালয়
oi ওই
jekono যে
osudi ওষুধই
pode পদে


In [11]:
#CLEAN THE DATA
def cleanWord(word, isBangla):
    cleaned = ""
    for c in word:
        if c == '\ufeff' or c == '\u200c':
            continue
        if isBangla and not(re.match(r'[a-zA-Z৷।]', c) == None):
            continue
        if(re.match(r'[’‘“\'\":\-!@#$%^&?*\_/+,."("")"\\–” ]', c) == None):
            cleaned += c
            
    return cleaned
    
def shouldIncludeWord(word):
    if len(word) > 0:
        return True
    return False

In [12]:
def clean_word_list(current_ew_word_list, current_bw_word_list):
    cleaned_english_word_list = []
    cleaned_bangla_word_list = []
    encountered_words = {}

    for ew, bw in zip(current_ew_word_list, current_bw_word_list):
        if encountered_words.get(ew) == None:
            encountered_words[ew] = True
        else:
            continue

        if shouldIncludeWord(ew):
            cleaned_ew = cleanWord(ew, False).lower()
            cleaned_bw = cleanWord(bw, True)

            cleaned_english_word_list.append(cleaned_ew)
            cleaned_bangla_word_list.append(cleaned_bw)
    
    return cleaned_english_word_list, cleaned_bangla_word_list

In [13]:
cleaned_english_words, cleaned_bangla_words = clean_word_list(english_word_list, bangla_word_list)

In [14]:
all_characters_set = set([])

for ew, bw in zip(cleaned_english_words, cleaned_bangla_words):
    
    ec_array = list(ew)
    _ = [all_characters_set.add(c) for c in ec_array]
    
    bc_array = list(bw)
    _ = [all_characters_set.add(c) for c in bc_array]
    
all_characters_set.add('')

In [15]:
#Character encoding

chars_i_to_c = dict((i, c) for i, c in enumerate(all_characters_set))
chars_c_to_i = dict((c, i) for i, c in enumerate(all_characters_set))

In [16]:
def getMaxWordLength(wordArray):
    maxLen = 0
    for w in wordArray:
        if len(w) > maxLen:
            maxLen = len(w)

    return maxLen

In [17]:
maxBanglaWordLength = getMaxWordLength(cleaned_bangla_words)
maxEnglishWordLength = getMaxWordLength(cleaned_english_words)

In [18]:
def getVectorizedWord(word, encoder, maxLength):
    vectorized_word = []
    
    for c in word:
        cv = encoder[c]
        vectorized_word.append(cv)
        
    #Add Padding
    vectorized_word.extend([encoder['']] * (maxLength - len(word)))
    
    return vectorized_word

In [19]:
def reverseVectorization(vectorized_word, decoder):
    mainWord = ""
    
    for i in vectorized_word:
        mainWord += decoder[i]
        
    return mainWord

In [20]:
from keras.utils.np_utils import to_categorical

def convert_vector_to_one_hot_encoded_array(vector, total_classes):
    one_hot_encoded_vector = to_categorical(vector, num_classes=total_classes)
    return one_hot_encoded_vector
    
def convert_one_hot_encoded_to_vector(one_hot_encoded):
    vectorized = []
    
    for one_hot_array in one_hot_encoded:
        vectorized.append(np.argmax(one_hot_array))
    
    return vectorized

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [21]:
def wordToOneHotEncode(word, encoder, vector_max_length, total_one_hot_classes):
    vectorRepresentation = getVectorizedWord(word, encoder, vector_max_length)
    one_hot_encoded = convert_vector_to_one_hot_encoded_array(vectorRepresentation, total_one_hot_classes)
    return one_hot_encoded

def oneHotEncodeToWord(one_hot, decoder):
    vectorRepresentation = convert_one_hot_encoded_to_vector(one_hot)
    word = reverseVectorization(vectorRepresentation, decoder)
    return word

In [22]:
banglish_word_train_set = []
bangla_word_result_set = []
banglish_word_max_length = 22
bangla_word_max_length = 21
total_one_hot_classes = 98

for ew in cleaned_english_words:
    banglish_word_train_set.append(wordToOneHotEncode(ew, chars_c_to_i, banglish_word_max_length, total_one_hot_classes))

for bw in cleaned_bangla_words:
    bangla_word_result_set.append(wordToOneHotEncode(bw, chars_c_to_i, bangla_word_max_length, total_one_hot_classes))
    

In [23]:
banglish_word_train_set = np.asarray(banglish_word_train_set)
bangla_word_result_set = np.asarray(bangla_word_result_set)

In [24]:
#TRAIN TEST SPLIT
from sklearn.model_selection import train_test_split

train_x, test_x, train_y, test_y = train_test_split(banglish_word_train_set, bangla_word_result_set, test_size=0.20, random_state=42)

In [25]:
print(oneHotEncodeToWord(train_x[0], chars_i_to_c), oneHotEncodeToWord(train_y[0], chars_i_to_c))
print(oneHotEncodeToWord(train_x[95], chars_i_to_c), oneHotEncodeToWord(train_y[95], chars_i_to_c))
print(oneHotEncodeToWord(train_x[1500], chars_i_to_c), oneHotEncodeToWord(train_y[1500], chars_i_to_c))
print(oneHotEncodeToWord(train_x[3871], chars_i_to_c), oneHotEncodeToWord(train_y[3871], chars_i_to_c))

eta এটা
bhot ভোটে
sankha সংখ্যা
sob সব


In [26]:
def getWordForVector(one_hot):
    for r in one_hot:
        vs = np.argmax(r, axis=1)
        rv = reverseVectorization(vs, chars_i_to_c)
    return rv

In [27]:
def getResultForOneWord(model, one_hot):
    results = model.predict(one_hot, batch_size=32)
    for r in results:
        vs = np.argmax(r, axis=1)
        rv = reverseVectorization(vs, chars_i_to_c)
    return rv

In [28]:
import statistics as s
import tensorflow as tf

class LossHistory(tf.keras.callbacks.Callback):
    
    def __init__(self, train_x, train_y, test_x, test_y):
        self.epoch_count = 0
        self.train_acc = []
        self.train_loss = []
        self.validation_acc = []
        self.validation_loss = []
        #self.filename = filename
        self.train_x = train_x
        self.train_y = train_y
        self.test_x = test_x
        self.test_y = test_y
        #self.f = open(filename,"w+")
    
    def computeResultForTwoWords(self, wTrue, wPred):
        totalMatch = 0
        for i in range(0,len(wTrue)):
            if i < len(wPred):
                if wTrue[i] == wPred[i]:
                    totalMatch += 1
        
        if len(wTrue) == 0:
            return 0
        
        return totalMatch/len(wTrue)
    
    def summaryOfValidationData(self):
        x_val, y_val = self.validation_data[0], self.validation_data[1]
        self.evaluateModel(self.model, x_val, y_val, shouldPrint=True)
        
    def evaluateModel(self, model, x_val, y_val, shouldPrint=False):
        allResults = []
        for i in range(0, x_val.shape[0]):
            wPred = getResultForOneWord(model, x_val[i:i+1])
            wTrue = getWordForVector(y_val[i:i+1])
            
            if shouldPrint:
                print(getWordForVector(x_val[i:i+1]), wPred, wTrue)
            
            result = self.computeResultForTwoWords(wTrue, wPred)
            
            allResults.append(result)
            
        return s.mean(allResults)

    def on_epoch_end(self, batch, logs={}):
        
        train_result = self.evaluateModel(self.model, self.train_x, self.train_y)
        test_result = self.evaluateModel(self.model, self.test_x, self.test_y)
        
        #train_acc = logs.get('acc')
        train_loss = logs.get('loss')
        #val_acc = logs.get('val_acc')
        val_loss = logs.get('val_loss')
        
        #self.train_acc.append(train_acc)
        self.train_loss.append(train_loss)
        #self.validation_acc.append(val_acc)
        self.validation_loss.append(val_loss)
        
        self.epoch_count += 1
        #self.f.write("%.2f %.2f %.2f %.2f\n " % (train_acc, train_loss, val_acc, val_loss))
        print("Epoch %d [Train Result] - Acc %.3f, Loss %.3f, [Validation Result] - Acc %.3f, Loss %.3f" % (self.epoch_count, train_result, train_loss, test_result, val_loss))


In [29]:
#Bidirectional LSTM(CPU)
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras import regularizers


def createTestBidirectionalModelCPU(summary):
    model = Sequential()
    model.add(Bidirectional(LSTM(256), input_shape=(banglish_word_max_length, total_one_hot_classes)))
    model.add(RepeatVector(bangla_word_max_length))
    model.add(Dropout(0.5))
    model.add(Bidirectional(LSTM(256, return_sequences=True)))
    model.add(Dropout(0.5))
#     model.add(TimeDistributed(Reshape((-1, 256))))
#     model.add(TimeDistributed(Reshape((-1, 512))))
    model.add(TimeDistributed(Dense((512))))
    model.add(TimeDistributed(Dense((256))))
    model.add(TimeDistributed(Dense((128))))
    model.add(TimeDistributed(Dense(total_one_hot_classes, activation= 'softmax')))
    model.compile(loss= 'kullback_leibler_divergence' , optimizer= 'adam')

    if(summary):
        print(model.summary())
    
    return model

In [30]:
cpuModel = createTestBidirectionalModelCPU(summary = True)
        
history = LossHistory(train_x, train_y, test_x, test_y)

#Should get about 74% validation accuracy. Can take about 4-5 hours depending on CPU
cpuModel.fit(train_x, train_y, validation_data=(test_x, test_y), epochs=100, batch_size=64, callbacks=[history], verbose = 0)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, 512)               727040    
_________________________________________________________________
repeat_vector (RepeatVector) (None, 21, 512)           0         
_________________________________________________________________
dropout (Dropout)            (None, 21, 512)           0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 21, 512)           1574912   
_________________________________________________________________
dropout_1 (Dropout)          (None, 21, 512)           0         
_________________________________________________________________
time_distributed (TimeDistri (None, 21, 512)           262656    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 21, 256)           131328    
__________

<tensorflow.python.keras.callbacks.History at 0x1a379bd2b0>

In [61]:
#from tensorflow.keras.models import load_model

In [62]:
# import tensorflow as tf

# keras_file = "bdmodel1.h5"

# tf.keras.models.save_model(cpuModel, keras_file)

In [63]:
#IGNORE THIS PART. Mobile conversion does not work.
# converter = tf.contrib.lite.TFLiteConverter.from_keras_model_file(keras_file)
# tflite_model = converter.convert()
# open("bdmodel.tflite", "wb").write(tflite_model)

INFO:tensorflow:Froze 8 variables.
INFO:tensorflow:Converted 8 variables to const ops.


RuntimeError: TOCO failed see console for info.
b'2019-01-09 16:00:45.487000: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArrayV3\n2019-01-09 16:00:45.487416: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1127] Op node missing output type attribute: gru/TensorArray\n2019-01-09 16:00:45.487439: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArrayV3\n2019-01-09 16:00:45.487458: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1127] Op node missing output type attribute: gru/TensorArray_1\n2019-01-09 16:00:45.487528: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArrayScatterV3\n2019-01-09 16:00:45.487545: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1127] Op node missing output type attribute: gru/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3\n2019-01-09 16:00:45.487567: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.487591: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.487611: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.487630: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.487909: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.487943: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.487969: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: LoopCond\n2019-01-09 16:00:45.487979: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1127] Op node missing output type attribute: gru/while/LoopCond\n2019-01-09 16:00:45.488028: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArrayReadV3\n2019-01-09 16:00:45.488045: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.488059: I tensorflow/contrib/lite/toco/import_tensorflow.cc:189] Unsupported data type in placeholder op: 20\n2019-01-09 16:00:45.488076: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.488092: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.488106: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.488118: I tensorflow/contrib/lite/toco/import_tensorflow.cc:189] Unsupported data type in placeholder op: 20\n2019-01-09 16:00:45.488165: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.488213: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.488256: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.488270: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.488282: I tensorflow/contrib/lite/toco/import_tensorflow.cc:189] Unsupported data type in placeholder op: 20\n2019-01-09 16:00:45.488325: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.488364: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.488407: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.488422: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.488434: I tensorflow/contrib/lite/toco/import_tensorflow.cc:189] Unsupported data type in placeholder op: 20\n2019-01-09 16:00:45.488472: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.488575: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.488640: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArrayWriteV3\n2019-01-09 16:00:45.488653: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1127] Op node missing output type attribute: gru/while/TensorArrayWrite/TensorArrayWriteV3\n2019-01-09 16:00:45.488667: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.488679: I tensorflow/contrib/lite/toco/import_tensorflow.cc:189] Unsupported data type in placeholder op: 20\n2019-01-09 16:00:45.488709: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Exit\n2019-01-09 16:00:45.488722: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Exit\n2019-01-09 16:00:45.488740: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArrayReadV3\n2019-01-09 16:00:45.490391: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArrayV3\n2019-01-09 16:00:45.490413: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1127] Op node missing output type attribute: gru_1/TensorArray\n2019-01-09 16:00:45.490426: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArrayV3\n2019-01-09 16:00:45.490436: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1127] Op node missing output type attribute: gru_1/TensorArray_1\n2019-01-09 16:00:45.490482: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArrayScatterV3\n2019-01-09 16:00:45.490493: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1127] Op node missing output type attribute: gru_1/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3\n2019-01-09 16:00:45.490513: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.490525: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.490538: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.490550: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.490575: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.490589: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.490602: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: LoopCond\n2019-01-09 16:00:45.490609: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1127] Op node missing output type attribute: gru_1/while/LoopCond\n2019-01-09 16:00:45.490648: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArrayReadV3\n2019-01-09 16:00:45.490661: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.490669: I tensorflow/contrib/lite/toco/import_tensorflow.cc:189] Unsupported data type in placeholder op: 20\n2019-01-09 16:00:45.490678: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.490690: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.490702: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.490710: I tensorflow/contrib/lite/toco/import_tensorflow.cc:189] Unsupported data type in placeholder op: 20\n2019-01-09 16:00:45.490746: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.490784: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.490822: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.490832: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.490840: I tensorflow/contrib/lite/toco/import_tensorflow.cc:189] Unsupported data type in placeholder op: 20\n2019-01-09 16:00:45.490875: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.490914: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.490949: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.490960: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.490968: I tensorflow/contrib/lite/toco/import_tensorflow.cc:189] Unsupported data type in placeholder op: 20\n2019-01-09 16:00:45.491002: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.491088: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: ReadVariableOp\n2019-01-09 16:00:45.491143: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArrayWriteV3\n2019-01-09 16:00:45.491153: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1127] Op node missing output type attribute: gru_1/while/TensorArrayWrite/TensorArrayWriteV3\n2019-01-09 16:00:45.491163: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Enter\n2019-01-09 16:00:45.491172: I tensorflow/contrib/lite/toco/import_tensorflow.cc:189] Unsupported data type in placeholder op: 20\n2019-01-09 16:00:45.491196: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: Exit\n2019-01-09 16:00:45.491207: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArraySizeV3\n2019-01-09 16:00:45.491216: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1127] Op node missing output type attribute: gru_1/TensorArrayStack/TensorArraySizeV3\n2019-01-09 16:00:45.491240: I tensorflow/contrib/lite/toco/import_tensorflow.cc:1080] Converting unsupported operation: TensorArrayGatherV3\n2019-01-09 16:00:45.496660: I tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc:39] Before Removing unused ops: 196 operators, 343 arrays (0 quantized)\n2019-01-09 16:00:45.500695: I tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc:39] Before general graph transformations: 196 operators, 343 arrays (0 quantized)\n2019-01-09 16:00:45.504493: I tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc:39] After general graph transformations pass 1: 189 operators, 338 arrays (0 quantized)\n2019-01-09 16:00:45.507552: I tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc:39] After general graph transformations pass 2: 188 operators, 337 arrays (0 quantized)\n2019-01-09 16:00:45.510149: I tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc:39] After general graph transformations pass 3: 187 operators, 335 arrays (0 quantized)\n2019-01-09 16:00:45.512738: I tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc:39] Before dequantization graph transformations: 187 operators, 335 arrays (0 quantized)\n2019-01-09 16:00:45.514622: F tensorflow/contrib/lite/toco/tooling_util.cc:618] Check failed: dim >= 1 (0 vs. 1)\n'
None


In [31]:
getWordForVector(train_y[9:10])

'আচ্ছা'

In [51]:
getResultForOneWord(cpuModel, train_x[9:10])

'াাাাা'

In [63]:
# Note that chars_c_to_i, banglish_word_max_length, total_one_hot_classes are defined above. 
# so, no need to anything for that
# But make sure that previous statements were executed
def getPrediction(model, word):
    oneHotEncodedWord = wordToOneHotEncode(word, chars_c_to_i, banglish_word_max_length, total_one_hot_classes)
    oneHotEncodedWord = oneHotEncodedWord.reshape((1,oneHotEncodedWord.shape[0],oneHotEncodedWord.shape[1]))
    return getResultForOneWord(model, oneHotEncodedWord)

In [64]:
#Usage
getPrediction(cpuModel, "aacchaa")

'াাাাা'