In [None]:
# Imports

import tensorflow as tf
from tensorflow.keras import datasets, models, layers
from sklearn.model_selection import train_test_split
import wandb
from wandb.keras import WandbCallback
from matplotlib import pyplot
import os
import numpy as np

In [2]:
# Setting up input flows

dataset_path = "../../dakshina_dataset_v1.0/ta/lexicons"
train_path = dataset_path + "/ta.translit.sampled.train.tsv"
test_path = dataset_path + "/ta.translit.sampled.test.tsv"

train_data = ''.join([each.decode('utf-8') for each in open(train_path, 'rb')]).split()
y_train, X_train, z_train = train_data[::3], train_data[1::3], [int(each) for each in train_data[2::3]]

test_data = ''.join([each.decode('utf-8') for each in open(test_path, 'rb')]).split()
y_test, X_test, z_test = test_data[::3], test_data[1::3], [int(each) for each in test_data[2::3]]

In [3]:
# Setting up the vocabulary

input_vocab, output_vocab = set(), set()

max_input_len, max_output_len = 0, 0

for each in X_train:
    for every in each:
        input_vocab.add(every)
    max_input_len = max(max_input_len, len(each))
for each in X_test:
    for every in each:
        input_vocab.add(every)
    max_input_len = max(max_input_len, len(each))

for each in y_train:
    for every in each:
        output_vocab.add(every)
    max_output_len = max(max_output_len, len(each))
for each in y_test:
    for every in each:
        output_vocab.add(every)
    max_output_len = max(max_output_len, len(each))
        
input_vocab.add(" ")
output_vocab.add(" ")

input_vocab = sorted(list(input_vocab))
output_vocab = sorted(list(output_vocab))
input_v_len = len(input_vocab)
output_v_len = len(output_vocab)

input_inv = dict([(char, i) for i, char in enumerate(input_vocab)])
output_inv = dict([(char, i) for i, char in enumerate(output_vocab)])

In [4]:
def onehot(X, y):
    
    encoder_input_data = np.zeros((len(X), max_input_len, input_v_len), dtype="float32")
    decoder_input_data = np.zeros((len(X), max_output_len, output_v_len), dtype="float32")
    decoder_target_data = np.zeros((len(X), max_output_len, output_v_len), dtype="float32")

    for i, (a, b) in enumerate(zip(X, y)):
        for t, char in enumerate(a):
            encoder_input_data[i, t, input_inv[char]] = 1.0
        encoder_input_data[i, t + 1 :, output_inv[" "]] = 1.0
        for t, char in enumerate(b):
            decoder_input_data[i, t, output_inv[char]] = 1.0
            if t > 0:
                decoder_target_data[i, t - 1, output_inv[char]] = 1.0
        decoder_input_data[i, t + 1 :, output_inv[" "]] = 1.0
        decoder_target_data[i, t:, output_inv[" "]] = 1.0
    
    return encoder_input_data, decoder_input_data, decoder_target_data

In [5]:
def myRNN(latent_dim, embed_dim, dropout, cell_type):
    encoder_inputs = tf.keras.Input(shape=(None, input_v_len))
    encoder_embedded = layers.Embedding(input_dim=input_v_len, output_dim=embed_dim)(encoder_input)
    
    
    if cell_type == 'LSTM':
        encoder = layers.LSTM(latent_dim, return_state=True)
         _, state_h, state_C = encoder(encoder_embedded)
        encoder_states = [state_h, state_c]
    else:
        if cell_type == 'SimpleRNN':
            encoder = layers.SimpleRNN(latent_dim, return_state=True)
        else:
            encoder = layers.GRU(latent_dim, return_state=True)
        _, state_h = encoder(encoder_embedded)
        encoder_states = [state_h]
    
    decoder_inputs = tf.keras.Input(shape=(None, output_v_len))
    decoder_embedded = layers.Embedding(input_dim=output_v_len, output_dim=embed_dim)(decoder_input)
    
    if cell_type == 'LSTM':
        decoder = layers.LSTM(latent_dim, return_sequences=True, return_state=True)
        decoder_outputs, _, _ = decoder_RNN(decoder_embedded, initial_state=encoder_states)
    else:
        if cell_type == 'SimpleRNN':
            decoder = layers.SimpleRNN(latent_dim, return_sequences=True, return_state=True)
        else:
            decoder = layers.GRU(latent_dim, return_sequences=True, return_state=True)
        decoder_outputs, _ = decoder_RNN(decoder_embedded, initial_state=encoder_states)
    
    decoder_dense = layers.Dense(output_v_len, activation="softmax")
    decoder_dense_drop = layers.Dropout(dropout)(decoder_outputs)
    decoder_outputs = decoder_dense(decoder_dense_drop)

    model = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
    
    return model

In [6]:
model = myRNN(128)

In [7]:
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, 27)]   0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, None, 47)]   0                                            
__________________________________________________________________________________________________
simple_rnn (SimpleRNN)          [(None, 128), (None, 19968       input_1[0][0]                    
__________________________________________________________________________________________________
simple_rnn_1 (SimpleRNN)        [(None, None, 128),  22528       input_2[0][0]                    
                                                                 simple_rnn[0][1]      

In [8]:
def train(model, batch_size=64, epochs=1):
    model.compile(optimizer="nadam", loss="categorical_crossentropy", metrics=["accuracy"])
    
    aa, bb, cc = onehot(X_train, y_train)
    model.fit([aa, bb], cc, batch_size=batch_size, epochs=epochs, validation_split=0.1,)

In [9]:
train(model)



In [10]:
def decompose(model):
    latent_dim = model.layers[3].units
    
    encoder_inputs = model.input[0]
    _, state_h_enc = model.layers[2].output
    encoder_states = [state_h_enc]
    encoder_model = tf.keras.Model(encoder_inputs, encoder_states)

    decoder_inputs = model.input[1]
    decoder_state_input_h = tf.keras.Input(shape=(latent_dim,), name="input_3")
    decoder_states_inputs = [decoder_state_input_h]
    decoder_RNN = model.layers[3]
    decoder_outputs, state_h_dec = decoder_RNN(decoder_inputs, initial_state=decoder_states_inputs)
    decoder_states = [state_h_dec]
    decoder_dense = model.layers[4]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = tf.keras.Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
    
    return encoder_model, decoder_model

In [11]:
ok, okok = decompose(model)
ok.summary()
okok.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None, 27)]        0         
_________________________________________________________________
simple_rnn (SimpleRNN)       [(None, 128), (None, 128) 19968     
Total params: 19,968
Trainable params: 19,968
Non-trainable params: 0
_________________________________________________________________
Model: "functional_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, None, 47)]   0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 128)]        0                                        

In [12]:
print(ok.input.shape, ok.output.shape)
print(okok.input[0].shape, okok.input[1].shape, okok.output[0].shape, okok.output[1].shape)

(None, None, 27) (None, 128)
(None, None, 47) (None, 128) (None, None, 47) (None, 128)


In [13]:
# Decode the sequence

reverse_inp = dict((i, char) for char, i in input_inv.items())
reverse_out = dict((i, char) for char, i in output_inv.items())

def decode_sequence(model, input_seq):
    
    encoder_model, decoder_model = decompose(model)
    
    enc_states = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1, output_v_len))
    target_seq[0, 0, output_inv[" "]] = 1.0

    stop_condition = False
    final_ans = ""
    while not stop_condition:
        print(len(final_ans))
        output_chars, h = decoder_model.predict([target_seq] + [enc_states])

        sampled_char_index = np.argmax(output_chars[0, -1, :])
        sampled_char = reverse_out[sampled_char_index]
        final_ans += sampled_char

        if sampled_char == " " or len(final_ans) > max_output_len:
            stop_condition = True

        target_seq = np.zeros((1, 1, output_v_len))
        target_seq[0, 0, sampled_char_index] = 1.0

        enc_states = [h]
    return final_ans

In [26]:
n = 100
aa, bb, cc = onehot(X_train, y_train)
print(bb[n:n+1])
decode_sequence(model, aa[n:n+1])

[[[0. 0. 1. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]]]
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17


'யைப்பட்டிர்டுட்டு '

In [None]:
list(np.array([[1, 2], [1, 2]]))

In [None]:
sweep_config = {
    'method': 'grid',
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'model': {
            'values': [
                'InceptionV3', 'InceptionResNetV2', 'ResNet50', 'Xception',
                'NASNetLarge'
            ]
        },
        'retrain': {
            'values': [0.1, 0.15, 0.2]
        }
    }
}