In [1]:
import wget
import os
import tarfile

In [2]:
import numpy as np
import pandas as pd

In [3]:
import tensorflow as tf
from tensorflow import keras

In [4]:
from keras.layers import SimpleRNN,GRU,LSTM,Embedding,Input,Dense

In [5]:
filename = 'dakshina_dataset_v1.0'
url = 'https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar'
if not os.path.exists(filename+'.tar') and not os.path.exists(filename):
    filename_tar = wget.download(url)
    file = tarfile.open(filename_tar)
    print('\nExtracting files ....')
    file.extractall()
    file.close()
    print('Done')
    os.remove(filename_tar)
elif not os.path.exists(filename):
    filename_tar = filename + '.tar'
    file = tarfile.open(filename_tar)
    print('\nExtracting files ....')
    file.extractall()
    file.close()
    print('Done')
    os.remove(filename_tar)

In [6]:
lang = 'bn'
train_path =  filename+f"/{lang}/lexicons/{lang}.translit.sampled.train.tsv"
val_path = filename+f"/{lang}/lexicons/{lang}.translit.sampled.dev.tsv"
test_path = filename+f"/{lang}/lexicons/{lang}.translit.sampled.test.tsv"

In [7]:
def read_data(path):
    df = pd.read_csv(path,header=None,sep='\t')
    df.dropna(inplace=True)
    input_texts,target_texts = df[1].to_list(),df[0].to_list()
    return input_texts,target_texts

In [8]:
def parse_text(texts):
    characters = set()
    for text in texts:
        for c in text:
            if c not in characters:
                characters.add(c)
    characters.add(' ')
    return sorted(list(characters))

In [9]:
def start_end_pad(texts):
    for i in range(len(texts)):
        texts[i] = "\t" + texts[i] + "\n"
    return texts

In [10]:
train_input_texts,train_target_texts = read_data(train_path)
val_input_texts,val_target_texts = read_data(val_path)
test_input_texts,test_target_texts = read_data(test_path)

In [11]:
train_target_texts = start_end_pad(train_target_texts)
val_target_texts = start_end_pad(val_target_texts)
test_target_texts = start_end_pad(test_target_texts)

In [12]:
latent_dim = 256
batch_size = 64
epochs = 10
input_embedding_size = 16

In [13]:
def enc_dec_tokens(train_input_texts,train_target_texts,val_input_texts,val_target_texts):
    
    input_characters = parse_text(train_input_texts + val_input_texts)
    target_characters = parse_text(train_target_texts + val_target_texts)
    num_encoder_tokens = len(input_characters)
    num_decoder_tokens = len(target_characters)
    max_encoder_seq_length = max([len(txt) for txt in train_input_texts + val_input_texts])
    max_decoder_seq_length = max([len(txt) for txt in train_target_texts + val_target_texts])

    print("Number of training samples:", len(train_input_texts))
    print("Number of validation samples:", len(val_input_texts))
    print("Number of unique input tokens:", num_encoder_tokens)
    print("Number of unique output tokens:", num_decoder_tokens)
    print("Max sequence length for inputs:", max_encoder_seq_length)
    print("Max sequence length for outputs:", max_decoder_seq_length)
    
    input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
    target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])
    
    return input_token_index,target_token_index,max_encoder_seq_length,max_decoder_seq_length,num_encoder_tokens,num_decoder_tokens

In [14]:
def data_processing(input_texts,enc_length,input_token_index,num_encoder_tokens, target_texts,dec_length,target_token_index,num_decoder_tokens):
    encoder_input_data = np.zeros(
        (len(input_texts), enc_length), dtype="float32"
    )
    decoder_input_data = np.zeros(
            (len(input_texts), dec_length), dtype="float32"
        )
    decoder_target_data = np.zeros(
            (len(input_texts), dec_length, num_decoder_tokens), dtype="float32"
        )

    for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
        
        for t, char in enumerate(input_text):
            encoder_input_data[i, t] = input_token_index[char]
        encoder_input_data[i, t + 1 :] = input_token_index[' ']
        
        for t, char in enumerate(target_text):
                # decoder_target_data is ahead of decoder_input_data by one timestep
            decoder_input_data[i, t] = target_token_index[char]
            if t > 0:
                    # decoder_target_data will be ahead by one timestep
                    # and will not include the start character.
                decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
        decoder_input_data[i, t + 1 :] = target_token_index[' ']
        decoder_target_data[i, t:, target_token_index[' ']] = 1.0
    return encoder_input_data,decoder_input_data,decoder_target_data

In [15]:
input_token_index,target_token_index,max_encoder_seq_length,max_decoder_seq_length,num_encoder_tokens,num_decoder_tokens = enc_dec_tokens(train_input_texts,train_target_texts,val_input_texts,val_target_texts)

Number of training samples: 94543
Number of validation samples: 9279
Number of unique input tokens: 27
Number of unique output tokens: 63
Max sequence length for inputs: 22
Max sequence length for outputs: 24


In [16]:
train_encoder_input,train_decoder_input,train_decoder_target = data_processing(train_input_texts,max_encoder_seq_length,input_token_index,num_encoder_tokens, train_target_texts,max_decoder_seq_length,target_token_index,num_decoder_tokens)
val_encoder_input,val_decoder_input,val_decoder_target = data_processing(val_input_texts,max_encoder_seq_length,input_token_index,num_encoder_tokens, val_target_texts,max_decoder_seq_length,target_token_index,num_decoder_tokens)
test_encoder_input,test_decoder_input,test_decoder_target = data_processing(test_input_texts,max_encoder_seq_length,input_token_index,num_encoder_tokens, test_target_texts,max_decoder_seq_length,target_token_index,num_decoder_tokens)

In [17]:
def make_model(num_encoder_tokens,num_decoder_tokens,input_embedding_size=16,num_enc_layers=1,num_dec_layers=1,hidden_layer_size=64,cell_type='LSTM',dropout=0,r_dropout=0,cell_activation='tanh'):
    cell = {
        'RNN':SimpleRNN,
        'LSTM':LSTM,
        'GRU':GRU
    }
    encoder_input = Input(shape=(None,),name='input_1')
    encoder_input_embedding = Embedding(num_encoder_tokens,input_embedding_size,name='embedding_1')(encoder_input)
    
    encoder_sequences, *encoder_state = cell[cell_type](hidden_layer_size,activation=cell_activation,return_sequences=True,return_state=True,dropout=dropout,recurrent_dropout=r_dropout,name="encoder_1")(encoder_input_embedding)
    
    for i in range(1,num_enc_layers):
        encoder_sequences, *encoder_state = cell[cell_type](hidden_layer_size,activation=cell_activation,return_sequences=True,return_state=True,dropout=dropout,recurrent_dropout=r_dropout,name=f"encoder_{i+1}")(encoder_sequences)
        
    decoder_input = Input(shape=(None,),name='input_2')
    decoder_input_embedding = Embedding(num_decoder_tokens,input_embedding_size,name='embedding_2')(decoder_input)
    
    decoder_sequences, *decoder_state = cell[cell_type](hidden_layer_size,activation=cell_activation,return_sequences=True,return_state=True,dropout=dropout,recurrent_dropout=r_dropout,name="decoder_1")(decoder_input_embedding ,initial_state=encoder_state)
    
    for i in range(1,num_dec_layers):
        decoder_sequences, *decoder_state = cell[cell_type](hidden_layer_size,activation=cell_activation,return_sequences=True,return_state=True,dropout=dropout,recurrent_dropout=r_dropout,name=f"decoder_{i+1}")(decoder_sequences ,initial_state=encoder_state)
    
    decoder_dense = Dense(num_decoder_tokens,activation="softmax",name="dense_1")(decoder_sequences)
    
    model = keras.Model([encoder_input,decoder_input],decoder_dense)
    model.summary()
    return model

In [None]:
model = make_model(num_encoder_tokens,num_decoder_tokens,input_embedding_size=32,hidden_layer_size=128,num_enc_layers=2,num_dec_layers=2,cell_type='LSTM')
model.compile(
    optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
)
model.fit(
    [train_encoder_input, train_decoder_input],
    train_decoder_target,
    batch_size=batch_size,
    validation_data=([val_encoder_input, val_decoder_input],val_decoder_target),
    epochs=epochs,
)

In [115]:
# model.save("s2s")



INFO:tensorflow:Assets written to: s2s/assets


INFO:tensorflow:Assets written to: s2s/assets


In [48]:
def create_inference_model(model):
    # Calculating number of layers in encoder and decoder
    num_enc_layers, num_dec_layers = 0, 0
    for layer in model.layers:
        num_enc_layers += layer.name.startswith('encoder')
        num_dec_layers += layer.name.startswith('decoder')

    # Encoder input
    encoder_input = model.input[0]      # Input_1
    # Encoder cell final layer
    encoder_cell = model.get_layer("encoder_"+str(num_enc_layers))
    encoder_type = encoder_cell.__class__.__name__
    encoder_seq, *encoder_state = encoder_cell.output
    # Encoder model
    encoder_model = keras.Model(encoder_input, encoder_state)

    # Decoder input
    decoder_input = model.input[1]      # Input_2
    decoder_input_embedding = model.get_layer("embedding_2")(decoder_input)
    decoder_sequences = decoder_input_embedding
    # Inputs to decoder layers' initial states
    decoder_states, decoder_state_inputs = [], []
    for i in range(1, num_dec_layers+1):
        if encoder_type == 'LSTM':
            decoder_state_input = [Input(shape=(encoder_state[0].shape[1],), name="input_"+str(2*i+1)), 
                                   Input(shape=(encoder_state[1].shape[1],), name="input_"+str(2*i+2))]
        else:
            decoder_state_input = [Input(shape=(encoder_state[0].shape[1],), name="input_"+str(i+2))]

        decoder_cell = model.get_layer("decoder_"+str(i))
        decoder_sequences, *decoder_state = decoder_cell(decoder_sequences, initial_state=decoder_state_input)
        decoder_states += decoder_state
        decoder_state_inputs += decoder_state_input

    # Softmax FC layer
    decoder_dense = model.get_layer("dense_1")
    decoder_dense_output = decoder_dense(decoder_sequences)

    # Decoder model
    decoder_model = keras.Model(
        [decoder_input] + decoder_state_inputs, [decoder_dense_output] + decoder_states
    )

    return encoder_model, decoder_model, num_enc_layers, num_dec_layers


def convert_to_word(predictions, target_token_index, reverse_target_char_index = None):
    # Function to return the predictions after cutting the END_CHAR and BLANK_CHAR s at the end.
    # If char_dec == None, the predictions are in the form of decoded string, otherwise as list of integers
    no_samples = len(predictions) if type(predictions) is list else predictions.shape[0]
    pred_words = ['' for _ in range(no_samples)]
    for i, pred_list in enumerate(predictions):
        for l in pred_list:
            # Stop word : END_CHAR
            if l == target_token_index['\n']:
                break
            pred_words[i] += reverse_target_char_index[l] if reverse_target_char_index is not None else l
    
    return pred_words

In [52]:
def beam_decoder_infer(model,input_seqs,max_decoder_seq_length,K=1,target_seqs=None,starting_char=0,batch_size=64):
    
    reverse_input_char_index = dict((i, char) for char, i in input_token_index.items())
    reverse_target_char_index = dict((i, char) for char, i in target_token_index.items())
    encoder_model,decoder_model,num_enc_layers,num_dec_layers=create_inference_model(model)
    encoder_output = encoder_model.predict(input_seqs,batch_size=batch_size)
    encoder_output = encoder_output if type(encoder_output) is list else [encoder_output]
    
    num_samples = input_seqs.shape[0]
    
    final_outputs = np.zeros((num_samples,K,max_decoder_seq_length),dtype=np.int32)
    
    final_errors = np.zeros((num_samples,K))
    
    decoder_k_inputs = np.zeros((num_samples,1,1))
    decoder_k_inputs[:, :, 0] = starting_char
    
    decoder_k_out = [[(0, [])] for _ in range(num_samples)]
    
    errors = [[0] for _ in range(num_samples)]
    
    states = [encoder_output*num_dec_layers]
    
    for idx in range(max_decoder_seq_length):
        all_k_beams = [[] for _ in range(num_samples)]
        all_decoder_states = [[] for _ in range(num_samples)]
        all_errors = [[] for _ in range(num_samples)]
        
        for k in range(len(decoder_k_out[0])):
            decoder_output, *decoder_states = decoder_model.predict([decoder_k_inputs[:,k]] + states[k],batch_size=batch_size)
            top_k = np.argsort(decoder_output[:,-1,:],axis=-1)[:,-K:]
            for n in range(num_samples):
                all_k_beams[n]+= [(decoder_k_out[n][k][0] + np.log(decoder_output[n, -1, top_k[n][i]]),decoder_k_out[n][k][1] + [top_k[n][i]]) for i in range(K)]
                if target_seqs is not None:
                    all_errors[n] += [errors[n][k] - np.log(decoder_output[n,-1,target_seqs[n,idx]])]*K
                all_decoder_states[n] += [[decoder_state[n:n+1] for decoder_state in decoder_states]] * K
        sorted_index = list(range(len(all_k_beams[0])))
        sorted_index = [sorted(sorted_index,key = lambda ix: all_k_beams[n][ix][0])[-K:][::-1] for n in range(num_samples)]
        
        decoder_k_out = [[all_k_beams[n][index] for index in sorted_index[n]] for n in range(num_samples)]
        
        decoder_k_inputs = np.array([[all_k_beams[n][index][-1] for index in sorted_index[n]] for n in range(num_samples)])
        
        states = [all_decoder_states[0][index] for index in sorted_index[0]]
        
        for n in range(1,num_samples):
            states = [[np.concatenate((states[i][j],all_decoder_states[n][index][j])) for j in range(len(all_decoder_states[n][index]))] for i,index in  enumerate(sorted_index[n])]
        if target_seqs is not None:
            errors = [[all_errors[n][index] for index in sorted_index[n]] for n in range(num_samples)]
    final_outputs = np.array([[decoder_k_out[n][i][1] for i in range(K)] for n in range(num_samples)])
    if target_seqs is not None:
        final_errors = np.array(errors)/max_decoder_seq_length
    return final_outputs,final_errors,np.array(states)
def calc_metrics(k_outputs, target_seqs,target_token_index,reverse_target_char_index,k_errors=None,exact_word=True,display=False):
    matches = np.mean(k_outputs == np.repeat(target_seqs.reshape((target_seqs.shape[0],1,target_seqs.shape[1])),k_outputs.shape[1],axis=1),axis=-1)
    best_k = np.argmax(matches,axis=-1)
    best_index = (tuple(range(best_k.shape[0])),tuple(best_k))
    accuracy = np.mean(matches[best_index])
    k_predictions = list()
    loss = None
    if k_errors is not None:
        loss = np.mean(k_errors[best_index])
    if exact_word:
        equal = [0] * k_outputs.shape[0]
        true_out = convert_to_word(target_seqs,target_token_index,reverse_target_char_index)
        for k in range(k_outputs.shape[1]):
            pred_out = convert_to_word(k_outputs[:,k], target_token_index,reverse_target_char_index)
            equal = [equal[i] or (pred_out[i] == true_out[i]) for i in range(k_outputs.shape[0])]
            if display==True:
                k_predictions.append(pred_out)
        exact_accuracy = np.mean(equal)
        if display==True:
            return accuracy,exact_accuracy,loss,true_out,k_predictions
        return accuracy,exact_accuracy,loss
    return accuracy,loss
def beam_decoder(model,input_seqs,target_seqs_onehot,max_decoder_seq_length,target_token_index,reverse_target_char_index,K=1,model_batch_size=64,infer_batch_size=512,exact_word=True,return_outputs=False,return_states=False,display=False):
    target_seqs = np.argmax(target_seqs_onehot,axis=-1)
    k_outputs,k_errors,k_states=None,None,None
    for i in range(0,input_seqs.shape[0],infer_batch_size):
        tmp_k_outputs,tmp_k_errors,tmp_k_states = beam_decoder_infer(model,input_seqs[i:i+infer_batch_size],max_decoder_seq_length,K,target_seqs[i:i+infer_batch_size],target_token_index['\t'],model_batch_size)
        
        if k_errors is None:
            k_outputs,k_errors,k_states = tmp_k_outputs,tmp_k_errors,tmp_k_states
        else:
            k_outputs = np.concatenate((k_outputs,tmp_k_outputs))
            k_errors = np.concatenate((k_errors,tmp_k_errors))
            k_states = np.concatenate((k_states,tmp_k_states),axis=2)
    return_elements = []
    if return_outputs:
        return_elements += [k_outputs]
    if return_states:
        return_elements += [k_states]
    if len(return_elements) > 0:
        return calc_metrics(k_outputs,target_seqs,target_token_index,reverse_target_char_index,k_errors,exact_word,display) + tuple(return_elements)
    return calc_metrics(k_outputs,target_seqs,target_token_index,reverse_target_char_index,k_errors,exact_word,display)

In [53]:
outs = beam_decoder(model,val_encoder_input,val_decoder_target,max_decoder_seq_length,target_token_index,reverse_target_char_index,display=True)


2022-04-20 19:16:53.429678: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:16:53.505983: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:16:53.633195: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:16:53.964944: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:16:54.038858: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:16:54.115272: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:16:54.430384: I tensorflow/core/grappler/optimizers/cust

2022-04-20 19:17:17.794285: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:17:17.866802: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:17:17.938055: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:17:18.257737: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:17:18.333000: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:17:18.413640: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:17:23.034565: I tensorflow/core/grappler/optimizers/cust

2022-04-20 19:17:47.219105: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:17:47.285667: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:17:47.378376: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:17:47.708588: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:17:47.780283: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:17:47.881233: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:17:48.605590: I tensorflow/core/grappler/optimizers/cust

2022-04-20 19:18:13.524676: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:18:13.603106: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:18:13.706804: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:18:14.080944: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:18:14.153015: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:18:14.229129: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:18:19.291358: I tensorflow/core/grappler/optimizers/cust

2022-04-20 19:18:41.754443: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:18:46.507695: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:18:46.585186: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:18:46.716410: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:18:46.991868: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:18:47.070016: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-20 19:18:47.172385: I tensorflow/core/grappler/optimizers/cust

In [62]:
np.array(outs[-2])[np.array(outs[-1][0]) == np.array(outs[-2])]

array(['আপ', 'আপ', 'আমর', 'আমর', 'আমর', 'আমর', 'আমর', 'আরকে', 'আলসার',
       'আলসার', 'আলসার', 'আলসার', 'আসর', 'আসে', 'আসে', 'উঠে', 'উঠে',
       'উদর', 'উদর', 'উল', 'উল', 'উল', 'একার', 'ঘুর', 'ঘুর', 'জেস', 'জেস',
       'দোর', 'নির', 'নির', 'প্রো', 'ফল', 'বন', 'বনে', 'বাগে', 'ভর', 'ভর',
       'সুর', 'হইল', 'হইল', 'হইল', 'হইল', 'হইল', 'হুক'], dtype='<U17')

In [66]:
np.array(val_input_texts)[np.array(outs[-1][0]) == np.array(outs[-2])]

array(['aap', 'ap', 'aamor', 'amar', 'ammor', 'amoor', 'amor', 'aarke',
       'alsaar', 'alsaara', 'alsar', 'alsara', 'asor', 'ase', 'ashe',
       'uthe', 'uthhe', 'udar', 'udor', 'ul', 'ula', 'uul', 'ekar',
       'ghoor', 'ghur', 'jess', 'zess', 'dor', 'nirau', 'niro', 'pra',
       'fall', 'bon', 'boney', 'baugey', 'bhor', 'vor', 'sur', 'hailo',
       'hoeel', 'hoil', 'hoila', 'hoilo', 'huka'], dtype='<U20')

In [34]:
# Define sampling models
# Restore the model and construct the encoder and decoder.
model = keras.models.load_model("s2s")

# encoder_inputs = model.input[0]  # input_1
# encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output  # lstm_1
# encoder_states = [state_h_enc, state_c_enc]
# encoder_model = keras.Model(encoder_inputs, encoder_states)

# decoder_inputs = model.input[1]  # input_2
# decoder_state_input_h = keras.Input(shape=(latent_dim,))
# decoder_state_input_c = keras.Input(shape=(latent_dim,))
# decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
# decoder_lstm = model.layers[3]
# decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
#     decoder_inputs, initial_state=decoder_states_inputs
# )
# decoder_states = [state_h_dec, state_c_dec]
# decoder_dense = model.layers[4]
# decoder_outputs = decoder_dense(decoder_outputs)
# decoder_model = keras.Model(
#     [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states
# )

# Reverse-lookup token index to decode sequences back to
# something readable.
reverse_input_char_index = dict((i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict((i, char) for char, i in target_token_index.items())
# encoder_model,decoder_model,num_enc_layers,num_dec_layers=create_inference_model(model)



2022-04-20 18:59:32.399478: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-04-20 18:59:32.399731: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1


In [24]:
reverse_target_char_index

{0: '\t',
 1: '\n',
 2: ' ',
 3: 'ঁ',
 4: 'ং',
 5: 'ঃ',
 6: 'অ',
 7: 'আ',
 8: 'ই',
 9: 'ঈ',
 10: 'উ',
 11: 'ঊ',
 12: 'ঋ',
 13: 'এ',
 14: 'ঐ',
 15: 'ও',
 16: 'ঔ',
 17: 'ক',
 18: 'খ',
 19: 'গ',
 20: 'ঘ',
 21: 'ঙ',
 22: 'চ',
 23: 'ছ',
 24: 'জ',
 25: 'ঝ',
 26: 'ঞ',
 27: 'ট',
 28: 'ঠ',
 29: 'ড',
 30: 'ঢ',
 31: 'ণ',
 32: 'ত',
 33: 'থ',
 34: 'দ',
 35: 'ধ',
 36: 'ন',
 37: 'প',
 38: 'ফ',
 39: 'ব',
 40: 'ভ',
 41: 'ম',
 42: 'য',
 43: 'র',
 44: 'ল',
 45: 'শ',
 46: 'ষ',
 47: 'স',
 48: 'হ',
 49: '়',
 50: 'া',
 51: 'ি',
 52: 'ী',
 53: 'ু',
 54: 'ূ',
 55: 'ৃ',
 56: 'ে',
 57: 'ৈ',
 58: 'ো',
 59: 'ৌ',
 60: '্',
 61: 'ৎ',
 62: '২'}

In [28]:
[[4]]*4

[[4], [4], [4], [4]]

In [68]:
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)
#     print(states_value)
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, 1))
    # Populate the first character of target sequence with the start character.
    target_seq[0,0,0] = target_token_index['\t']

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = True
    decoded_sentence = ""
    states = [states_value]*num_dec_layers
        # Sample a token
#     sampled_token_indices = np.argmax(output_tokens[0,:,:],axis=1)
    while stop_condition:
        output_tokens, *decoder_states = decoder_model.predict([target_seq] + states)
        print(output_tokens.shape)
        idx = np.argmax(output_tokens[0,-1,:])
        c = reverse_target_char_index[idx]
        decoded_sentence += c
        if(c=='\n' or len(decoded_sentence)>max_decoder_seq_length):
            stop_condition = False
        target_seq = np.zeros((1, 1,num_decoder_tokens))
        target_seq[0,0,0] = target_token_index[c]
        states = decoder_states
#         beam_samples = beam_search_decoder(output_tokens[0],10)
#         results = list()
#         for sampled_token_indices,score in beam_samples:
#             for i in sampled_token_indices:
#                 c = reverse_target_char_index[i]
#                 if(c=='\n' or len(decoded_sentence) > max_decoder_seq_length):
#                     stop_condition = True
#                 decoded_sentence += c

#                 # Exit condition: either hit max length
#                 # or find stop character.
#             results.append(decoded_sentence)
#             decoded_sentence = ""
    return decoded_sentence

In [23]:
train_encoder_input[0,1]

14.0

In [40]:
decoder_model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding_2 (Embedding)        (None, None, 32)     2016        ['input_2[0][0]']                
                                                                                                  
 input_3 (InputLayer)           [(None, 128)]        0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 128)]        0           []                               
                                                                                            

In [28]:
import random

In [69]:
rand_indices = random.sample(range(train_encoder_input.shape[0]),20)
for seq_index in rand_indices:
    # Take one sequence (part of the training set)
    # for trying out decoding.
    input_seq = train_encoder_input[seq_index,:].reshape((1,-1))
    decoded_sentences = decode_sequence(input_seq)
    print("-")
    print("Input sentence:", train_input_texts[seq_index])
#     print("Target sentence :",train_target_texts[seq_index])
    print("Decoded sentence:", decoded_sentences)



ValueError: in user code:

    File "/opt/homebrew/Caskroom/miniforge/base/envs/mak/lib/python3.8/site-packages/keras/engine/training.py", line 1621, in predict_function  *
        return step_function(self, iterator)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/mak/lib/python3.8/site-packages/keras/engine/training.py", line 1611, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/homebrew/Caskroom/miniforge/base/envs/mak/lib/python3.8/site-packages/keras/engine/training.py", line 1604, in run_step  **
        outputs = model.predict_step(data)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/mak/lib/python3.8/site-packages/keras/engine/training.py", line 1572, in predict_step
        return self(x, training=False)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/mak/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/opt/homebrew/Caskroom/miniforge/base/envs/mak/lib/python3.8/site-packages/keras/engine/input_spec.py", line 213, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" '

    ValueError: Exception encountered when calling layer "model_1" (type Functional).
    
    Input 0 of layer "decoder_1" is incompatible with the layer: expected ndim=3, found ndim=4. Full shape received: (None, 1, None, 32)
    
    Call arguments received:
      • inputs=('tf.Tensor(shape=(None, 1, None), dtype=float32)', ('tf.Tensor(shape=(None, 128), dtype=float32)', 'tf.Tensor(shape=(None, 128), dtype=float32)'), ('tf.Tensor(shape=(None, 128), dtype=float32)', 'tf.Tensor(shape=(None, 128), dtype=float32)'))
      • training=False
      • mask=None


In [83]:
decoder_k_out = [[(0, [1,2])] for _ in range(10)]

In [84]:
len(decoder_k_out[0])

1

In [25]:
from math import log
from numpy import array
from numpy import argmax

# beam search
def beam_search_decoder(data, k):
    sequences = [[list(), 0.0]]
    # walk over each step in sequence
    for row in data:
        all_candidates = list()
        # expand each current candidate
        print(len(sequences))
        for i in range(len(sequences)):
            seq, score = sequences[i]
            for j in range(len(row)):
                candidate = [seq + [j], score - log(row[j])]
#                 print(candidate)
                all_candidates.append(candidate)
        # order all candidates by score
        ordered = sorted(all_candidates, key=lambda tup:tup[1])
        # select k best
        sequences = ordered[:k]
    return sequences

# define a sequence of 10 words over a vocab of 5 words
data = [[0.1, 0.2, 0.3, 0.4, 0.5],
		[0.5, 0.4, 0.3, 0.2, 0.1],
		[0.1, 0.2, 0.3, 0.4, 0.5],
		[0.5, 0.4, 0.3, 0.2, 0.1],
		[0.1, 0.2, 0.3, 0.4, 0.5],
		[0.5, 0.4, 0.3, 0.2, 0.1],
		[0.1, 0.2, 0.3, 0.4, 0.5],
		[0.5, 0.4, 0.3, 0.2, 0.1],
		[0.1, 0.2, 0.3, 0.4, 0.5],
		[0.5, 0.4, 0.3, 0.2, 0.1]]
data = array(data)
# decode sequence
result = beam_search_decoder(data, 3)
# print result
for seq in result:
	print(seq)

1
3
3
3
3
3
3
3
3
3
[[4, 0, 4, 0, 4, 0, 4, 0, 4, 0], 6.931471805599453]
[[4, 0, 4, 0, 4, 0, 4, 0, 4, 1], 7.154615356913663]
[[4, 0, 4, 0, 4, 0, 4, 0, 3, 0], 7.154615356913663]
