In [None]:
# Imports

import tensorflow as tf
from tensorflow.keras import datasets, models, layers
from sklearn.model_selection import train_test_split
import wandb
from wandb.keras import WandbCallback
from matplotlib import pyplot
import os
import numpy as np

In [None]:
# Setting up input flows

dataset_path = "../../dakshina_dataset_v1.0/ta/lexicons"
train_path = dataset_path + "/ta.translit.sampled.train.tsv"
test_path = dataset_path + "/ta.translit.sampled.test.tsv"

train_data = ''.join([each.decode('utf-8') for each in open(train_path, 'rb')]).split()
y_train, X_train, z_train = train_data[::3], train_data[1::3], [int(each) for each in train_data[2::3]]

test_data = ''.join([each.decode('utf-8') for each in open(test_path, 'rb')]).split()
y_test, X_test, z_test = test_data[::3], test_data[1::3], [int(each) for each in test_data[2::3]]

In [None]:
# Setting up the vocabulary

input_vocab, output_vocab = set(), set()

max_input_len, max_output_len = 0, 0

for each in X_train:
    for every in each:
        input_vocab.add(every)
    max_input_len = max(max_input_len, len(each))
for each in X_test:
    for every in each:
        input_vocab.add(every)
    max_input_len = max(max_input_len, len(each))

for each in y_train:
    for every in each:
        output_vocab.add(every)
    max_output_len = max(max_output_len, len(each))
for each in y_test:
    for every in each:
        output_vocab.add(every)
    max_output_len = max(max_output_len, len(each))
        
input_vocab.add(" ")
output_vocab.add(" ")

input_vocab = sorted(list(input_vocab))
output_vocab = sorted(list(output_vocab))
input_v_len = len(input_vocab)
output_v_len = len(output_vocab)

input_inv = dict([(char, i) for i, char in enumerate(input_vocab)])
output_inv = dict([(char, i) for i, char in enumerate(output_vocab)])

In [None]:
def onehot(X, y):
    
    encoder_input_data = np.zeros((len(X), max_input_len, input_v_len), dtype="float32")
    decoder_input_data = np.zeros((len(X), max_output_len, output_v_len), dtype="float32")
    decoder_target_data = np.zeros((len(X), max_output_len, output_v_len), dtype="float32")

    for i, (a, b) in enumerate(zip(X, y)):
        for t, char in enumerate(a):
            encoder_input_data[i, t, input_inv[char]] = 1.0
        encoder_input_data[i, t + 1 :, output_inv[" "]] = 1.0
        for t, char in enumerate(b):
            decoder_input_data[i, t, output_inv[char]] = 1.0
            if t > 0:
                decoder_target_data[i, t - 1, output_inv[char]] = 1.0
        decoder_input_data[i, t + 1 :, output_inv[" "]] = 1.0
        decoder_target_data[i, t:, output_inv[" "]] = 1.0
    
    return encoder_input_data, decoder_input_data, decoder_target_data

In [None]:
def myRNN(latent_dim, num_encoders = 1, num_decoders = 1, embed_dim = 1000, dropout = 0.0, cell_type = 'GRU'):
    encoder_inputs = tf.keras.Input(shape=(None, input_v_len))
    #encoder_inputs = layers.Embedding(input_dim=input_v_len, output_dim=embed_dim)(encoder_inputs)
    
    encoder_inp = encoder_inputs
    
    for ii in range(num_encoders):
        if cell_type == 'LSTM':
            encoder = layers.LSTM(latent_dim, return_state=True, return_sequences=True)
            encoder_inp, state_h, state_C = encoder(encoder_inp)
            encoder_states = [state_h, state_c]
        else:
            if cell_type == 'RNN':
                encoder = layers.SimpleRNN(latent_dim, return_state=True, return_sequences=True)
            else:
                encoder = layers.GRU(latent_dim, return_state=True, return_sequences=True)
            encoder_inp, state_h = encoder(encoder_inp)
            encoder_states = [state_h]
    
    decoder_inputs = tf.keras.Input(shape=(None, output_v_len))
    #decoder_inputs = layers.Embedding(input_dim=output_v_len, output_dim=embed_dim)(decoder_inputs)
    
    decoder_out = decoder_inputs
    temp_states = encoder_states
    
    for ii in range(num_decoders):
        if cell_type == 'LSTM':
            decoder = layers.LSTM(latent_dim, return_sequences=True, return_state=True)
            decoder_out, temp_state_h, temp_state_c = decoder_RNN(decoder_out, initial_state=temp_states)
            temp_states = [temp_state_h, temp_state_c]
        else:
            if cell_type == 'RNN':
                decoder = layers.SimpleRNN(latent_dim, return_sequences=True, return_state=True)
            else:
                decoder = layers.GRU(latent_dim, return_sequences=True, return_state=True)
            decoder_out, temp_state_h = decoder(decoder_out, initial_state=temp_states)
            temp_states = [temp_state_h]
    
    decoder_outputs = decoder_out
    decoder_dense = layers.Dense(output_v_len, activation="softmax")
    decoder_dense_drop = layers.Dropout(dropout)(decoder_outputs)
    decoder_outputs = decoder_dense(decoder_dense_drop)

    model = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
    
    return model

In [None]:
model = myRNN(128, 3, 3)

In [None]:
model.summary()

In [None]:
def train(model, batch_size=64, epochs=25):
    model.compile(optimizer="nadam", loss="categorical_crossentropy", metrics=["accuracy"])
    
    aa, bb, cc = onehot(X_train, y_train)
    model.fit([aa, bb], cc, batch_size=batch_size, epochs=epochs, validation_split=0.1,callbacks=[WandbCallback()])

In [None]:
train(model)

In [None]:
def decompose(model):
    latent_dim = model.layers[3].units
    
    encoder_inputs = model.input[0]
    _, state_h_enc = model.layers[2].output
    encoder_states = [state_h_enc]
    encoder_model = tf.keras.Model(encoder_inputs, encoder_states)

    decoder_inputs = model.input[1]
    decoder_state_input_h = tf.keras.Input(shape=(latent_dim,), name="input_3")
    decoder_states_inputs = [decoder_state_input_h]
    decoder_RNN = model.layers[3]
    decoder_outputs, state_h_dec = decoder_RNN(decoder_inputs, initial_state=decoder_states_inputs)
    decoder_states = [state_h_dec]
    decoder_dropout = model.layers[4]
    decoder_outputs = decoder_dropout(decoder_outputs)
    decoder_dense = model.layers[5]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = tf.keras.Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
    
    return encoder_model, decoder_model

In [None]:
ok, okok = decompose(model)
ok.summary()
okok.summary()

In [None]:
print(ok.input.shape, ok.output.shape)
print(okok.input[0].shape, okok.input[1].shape, okok.output[0].shape, okok.output[1].shape)

In [None]:
# Decode the sequence

reverse_inp = dict((i, char) for char, i in input_inv.items())
reverse_out = dict((i, char) for char, i in output_inv.items())

def decode_sequence(model, input_seq):
    
    encoder_model, decoder_model = decompose(model)
    
    enc_states = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1, output_v_len))
    target_seq[0, 0, output_inv[" "]] = 1.0

    stop_condition = False
    final_ans = ""
    while not stop_condition:
        output_chars, h = decoder_model.predict([target_seq] + [enc_states])

        sampled_char_index = np.argmax(output_chars[0, -1, :])
        sampled_char = reverse_out[sampled_char_index]
        final_ans += sampled_char

        if sampled_char == " " or len(final_ans) > max_output_len:
            stop_condition = True

        target_seq = np.zeros((1, 1, output_v_len))
        target_seq[0, 0, sampled_char_index] = 1.0

        enc_states = [h]
    return final_ans

In [None]:
n = 50
aa, bb, cc = onehot(X_train, y_train)
decode_sequence(model, aa[n:n+1])

In [None]:
list(np.array([[1, 2], [1, 2]]))

In [None]:
sweep_config = {
    'method': 'random',
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'hidden_layer_size': {
            'values': [
                16, 64, 128, 256
            ],
        },
        'num_encoders': {
            'values': [1, 2, 3]
        },
        'num_decoders': {
            'values': [1, 2, 3]
        },
        'dropout': {
            'values': [0.0, 0.2, 0.3]
        },
        'cell_type': {
            'values': ['RNN', 'LSTM', 'GRU']
        },
        'num_epochs': {
            'values': [30, 45, 60]
        }
    }
}

In [None]:

sweep_id = wandb.sweep(sweep_config, entity = '0x2e4', project = 'cs6910-a3')

In [None]:

def run():
    default_config = {'hidden_layer_size': 16, 'num_encoders': 1, 'num_decoders': 1, 'dropout': 0.0, 'cell_type': 'RNN', 'num_epochs': 30}

    run = wandb.init(project='cs6910-a3', config=default_config)
    config = wandb.config

    model = myRNN(latent_dim=config.hidden_layer_size, num_encoders = config.num_encoders, num_decoders = config.num_decoders, dropout = config.dropout, cell_type = config.cell_type)

    optimizer = tf.keras.optimizers.Nadam()
    loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

    train(model, config.num_epochs)

In [None]:

wandb.agent(sweep_id, run)