In [1]:
# Imports

import tensorflow as tf
from tensorflow.keras import datasets, models, layers
from sklearn.model_selection import train_test_split
import wandb
from wandb.keras import WandbCallback
from matplotlib import pyplot
import os
import numpy as np

In [2]:
# Setting up input flows

dataset_path = "../../dakshina_dataset_v1.0/ta/lexicons"
train_path = dataset_path + "/ta.translit.sampled.train.tsv"
test_path = dataset_path + "/ta.translit.sampled.test.tsv"

train_data = ''.join([each.decode('utf-8') for each in open(train_path, 'rb')]).split()
y_train, X_train, z_train = train_data[::3], train_data[1::3], [int(each) for each in train_data[2::3]]

test_data = ''.join([each.decode('utf-8') for each in open(test_path, 'rb')]).split()
y_test, X_test, z_test = test_data[::3], test_data[1::3], [int(each) for each in test_data[2::3]]

In [3]:
# Setting up the vocabulary

input_vocab, output_vocab = set(), set()

max_input_len, max_output_len = 0, 0

for each in X_train:
    for every in each:
        input_vocab.add(every)
    max_input_len = max(max_input_len, len(each))
for each in X_test:
    for every in each:
        input_vocab.add(every)
    max_input_len = max(max_input_len, len(each))

for each in y_train:
    for every in each:
        output_vocab.add(every)
    max_output_len = max(max_output_len, len(each))
for each in y_test:
    for every in each:
        output_vocab.add(every)
    max_output_len = max(max_output_len, len(each))
        
input_vocab.add(" ")
output_vocab.add(" ")
output_vocab.add("\t")
output_vocab.add("\n")

input_vocab = sorted(list(input_vocab))
output_vocab = sorted(list(output_vocab))
input_v_len = len(input_vocab)
output_v_len = len(output_vocab)
max_output_len += 2

input_inv = dict([(char, i) for i, char in enumerate(input_vocab)])
output_inv = dict([(char, i) for i, char in enumerate(output_vocab)])

In [4]:
def onehot(X, y):
    
    encoder_input_data = np.zeros((len(X), max_input_len, input_v_len), dtype="float32")
    decoder_input_data = np.zeros((len(X), max_output_len, output_v_len), dtype="float32")
    decoder_target_data = np.zeros((len(X), max_output_len, output_v_len), dtype="float32")

    for i, (a, b) in enumerate(zip(X, y)):
        for t, char in enumerate(a):
            encoder_input_data[i, t, input_inv[char]] = 1.0
        encoder_input_data[i, t + 1 :, input_inv[" "]] = 1.0
        for t, char in enumerate("\t" + b + "\n"):
            decoder_input_data[i, t, output_inv[char]] = 1.0
            if t > 0:
                decoder_target_data[i, t - 1, output_inv[char]] = 1.0
        decoder_input_data[i, t + 1 :, output_inv[" "]] = 1.0
        decoder_target_data[i, t:, output_inv[" "]] = 1.0
    
    return encoder_input_data, decoder_input_data, decoder_target_data

In [5]:
def myRNN(latent_dim, num_encoders = 1, num_decoders = 1, embed_dim = 1000, dropout = 0.0, cell_type = 'GRU'):
    encoder_inputs = tf.keras.Input(shape=(None, input_v_len))
    #encoder_inputs = layers.Embedding(input_dim=input_v_len, output_dim=embed_dim)(encoder_inputs)
    
    encoder_inp = encoder_inputs
    
    for ii in range(num_encoders):
        if cell_type == 'LSTM':
            encoder = layers.LSTM(latent_dim, return_state=True, return_sequences=True, dropout = dropout)
            encoder_inp, state_h, state_c = encoder(encoder_inp)
            encoder_states = [state_h, state_c]
        else:
            if cell_type == 'RNN':
                encoder = layers.SimpleRNN(latent_dim, return_state=True, return_sequences=True, dropout = dropout)
            else:
                encoder = layers.GRU(latent_dim, return_state=True, return_sequences=True, dropout = dropout)
            encoder_inp, state_h = encoder(encoder_inp)
            encoder_states = [state_h]
    
    decoder_inputs = tf.keras.Input(shape=(None, output_v_len))
    #decoder_inputs = layers.Embedding(input_dim=output_v_len, output_dim=embed_dim)(decoder_inputs)
    
    decoder_out = decoder_inputs
    
    for ii in range(num_decoders):
        if cell_type == 'LSTM':
            decoder = layers.LSTM(latent_dim, return_sequences=True, return_state=True, dropout = dropout)
            decoder_out, _, _ = decoder(decoder_out, initial_state=encoder_states)
        else:
            if cell_type == 'RNN':
                decoder = layers.SimpleRNN(latent_dim, return_sequences=True, return_state=True, dropout = dropout)
            else:
                decoder = layers.GRU(latent_dim, return_sequences=True, return_state=True, dropout = dropout)
            decoder_out, _ = decoder(decoder_out, initial_state=encoder_states)
    
    decoder_outputs = decoder_out
    decoder_dense = layers.Dense(output_v_len, activation="softmax")
    decoder_outputs = decoder_dense(decoder_outputs)
    
    model = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
    
    return model

In [None]:
def train(model, batch_size=64, epochs=25):
    model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"])
    
    aa, bb, cc = onehot(X_train, y_train)
    model.fit([aa, bb], cc, batch_size=batch_size, epochs=epochs, validation_split=0.2,callbacks=[WandbCallback()])

In [None]:
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'hidden_layer_size': {
            'values': [
                16, 64, 128, 256
            ],
        },
        'num_encoders': {
            'values': [1, 2, 3]
        },
        'num_decoders': {
            'values': [1, 2, 3]
        },
        'dropout': {
            'values': [0.0, 0.2, 0.3]
        },
        'cell_type': {
            'values': ['RNN', 'LSTM', 'GRU']
        },
        'num_epochs': {
            'values': [30, 45, 60]
        }
    }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, entity = '0x2e4', project = 'cs6910-a3')

In [None]:

def run():
    default_config = {'hidden_layer_size': 16, 'num_encoders': 1, 'num_decoders': 1, 'dropout': 0.0, 'cell_type': 'RNN', 'num_epochs': 30}

    run = wandb.init(project='cs6910-a3', config=default_config)
    config = wandb.config

    model = myRNN(latent_dim=config.hidden_layer_size, num_encoders = config.num_encoders, num_decoders = config.num_decoders, dropout = config.dropout, cell_type = config.cell_type)

    optimizer = tf.keras.optimizers.Nadam()
    loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

    train(model, epochs=config.num_epochs)

In [None]:
wandb.agent(sweep_id, run)

In [8]:
best_model = myRNN(cell_type = 'GRU', latent_dim = 128, dropout = 0.3, num_encoders = 3, num_decoders = 3)

best_model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"])
aa, bb, cc = onehot(X_train, y_train)
best_model.fit([aa, bb], cc, batch_size=64, epochs=5, validation_split=0.2)

best_model.summary()

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "functional_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            [(None, None, 27)]   0                                            
__________________________________________________________________________________________________
gru_8 (GRU)                     [(None, None, 128),  60288       input_5[0][0]                    
__________________________________________________________________________________________________
gru_9 (GRU)                     [(None, None, 128),  99072       gru_8[0][0]                      
__________________________________________________________________________________________________
input_6 (InputLayer)            [(None, None, 49)]   0                                            
_____________________________________

In [9]:
cell_type = 'GRU'
latent_dim = 128
num_encoders = 3
num_decoders = 3

encoder_inputs = best_model.input[0]
_, *encoder_states = best_model.layers[num_encoders + 1].output
encoder_model = tf.keras.Model(encoder_inputs, encoder_states)

decoder_inputs = best_model.input[1]  # input_2
decoder_outputs = decoder_inputs

decoder_state_inputs = []
decoder_state_outputs = []

for ii in range(num_decoders):
    if cell_type == 'LSTM':
        temp_inputs = [tf.keras.Input(shape=(latent_dim,), name = 'decoder_0_' + str(ii)), tf.keras.Input(shape=(latent_dim,), name = 'decoder_1_' + str(ii))]
    else:
        temp_inputs = [tf.keras.Input(shape=(latent_dim,), name = 'decoder_' + str(ii))]
    decoder_state_inputs += temp_inputs

    decoder = best_model.layers[num_encoders + 2 + ii]
    decoder_outputs, *temp_states = decoder(decoder_outputs, initial_state=temp_inputs)
    decoder_state_outputs += temp_states
    
decoder_dense = best_model.layers[num_encoders + num_decoders + 2]
decoder_outputs = decoder_dense(decoder_outputs)

decoder_model = tf.keras.Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_state_outputs)

In [None]:
best_model.save('../saved_models/my_model')

In [None]:
best_model = models.load_model('../saved_models/my_model')
best_model.summary()

In [10]:
encoder_model.summary()
decoder_model.summary()

Model: "functional_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, None, 27)]        0         
_________________________________________________________________
gru_8 (GRU)                  [(None, None, 128), (None 60288     
_________________________________________________________________
gru_9 (GRU)                  [(None, None, 128), (None 99072     
_________________________________________________________________
gru_10 (GRU)                 [(None, None, 128), (None 99072     
Total params: 258,432
Trainable params: 258,432
Non-trainable params: 0
_________________________________________________________________
Model: "functional_9"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)      

In [11]:
# Decode the sequence

reverse_inp = dict((i, char) for char, i in input_inv.items())
reverse_out = dict((i, char) for char, i in output_inv.items())

def decode_sequence(input_seq):
    
    enc_states = [encoder_model.predict(input_seq)] * num_decoders

    target_seq = np.zeros((1, 1, output_v_len))
    target_seq[0, 0, output_inv["\t"]] = 1.0

    stop_condition = False
    final_ans = ""
    while not stop_condition:
        output_chars, *h = decoder_model.predict([target_seq] + enc_states)
        enc_states = h
        
        sampled_char_index = np.argmax(output_chars[0, -1, :])
        sampled_char = reverse_out[sampled_char_index]
        final_ans += sampled_char

        if sampled_char == "\n" or len(final_ans) > max_output_len:
            stop_condition = True

        target_seq = np.zeros((1, 1, output_v_len))
        target_seq[0, 0, sampled_char_index] = 1.0
    return final_ans[:-1]

In [13]:
cnt = 0

for ii in range(test_in.shape[0]):
    res = decode_sequence(test_in[ii: ii + 1])
    #print(X_test[ii], " - ", res, " - ", y_test[ii])
    if res == y_test[ii]:
        cnt += 1
    if ii > 0 and ii % 10 == 0:
        print(cnt / ii, end = ' ')

0.0 0.1 0.06666666666666667 0.05 0.06 0.08333333333333333 0.07142857142857142 0.0625 0.13333333333333333 0.19 0.18181818181818182 

KeyboardInterrupt: 

In [12]:
test_in, test_out, _ = onehot(X_test, y_test)

In [None]:
pyplot.figure(figsize=[50, 100])
all_classes = list(test_generator.class_indices.keys())

ax = pyplot.subplot(11, 3, 1)
ax1 = pyplot.subplot(11, 3, 2)
ax2 = pyplot.subplot(11, 3, 3)
ax.axis('off')
ax.text(0.3, 0.5, "Sample Image", fontsize=70)
ax1.axis('off')
ax1.text(0.3, 0.5, "Prediction", fontsize=70)
ax2.axis('off')
ax2.text(0.3, 0.5, "True Class", fontsize=70)

for some in os.listdir(test_path):
    idx = test_generator.class_indices[some]
    new_path = test_path + "/" + some
    img_path = new_path + "/" + os.listdir(new_path)[0]
    img = tf.keras.preprocessing.image.load_img(img_path,
                                                target_size=(max_shape[0],
                                                             max_shape[1]))
    img_np = np.asarray(img)
    ax = pyplot.subplot(11, 3, 3 * idx + 4)
    ax1 = pyplot.subplot(11, 3, 3 * idx + 5)
    ax2 = pyplot.subplot(11, 3, 3 * idx + 6)
    ax.imshow(img_np)
    ax1.axis('off')
    ax1.text(0.3, 0.5, all_classes[y_pred[200 * idx]], fontsize=70)
    ax2.axis('off')
    ax2.text(0.3, 0.5, some, fontsize=70)

wandb.init(project='cs6910-a2')
wandb.log({'Sample Predictions': pyplot})

In [None]:
[] + [1, 2]