**INSTALL**

In [None]:
!pip install tqdm
from tqdm.notebook import tqdm
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import Input
from keras.callbacks import EarlyStopping
from keras.layers import Embedding, SimpleRNN, LSTM, GRU, Dense
from keras.models import Model



In [None]:
batch_size = 64  # Batch size for training.
epochs = 20  # Number of epochs to train for.
latent_dim = 128  # Latent dimensionality of the encoding space.
embedding_size = 128 # Number of features in Embedding
num_samples = 44205  # Number of samples to train on.
# Path to the data txt file on disk.
#data_path = "/content/drive/MyDrive/Deep Learning/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd /content/drive/MyDrive/dakshina_dataset_v1.0/hi/lexicons

/content/drive/MyDrive/dakshina_dataset_v1.0/hi/lexicons


In [None]:
data_path = "hi.translit.sampled.train.tsv"
test_path="hi.translit.sampled.test.tsv"
val_path="hi.translit.sampled.dev.tsv"

**PREAPARING DATA**

In [None]:
def prepare_data(data_path, batch_size, num_samples):
  with tf.device('/device:GPU:0'):  
    input_texts = []
    target_texts = []
    input_characters = set()
    target_characters = set()
    with open(data_path, "r", encoding="utf-8") as f:
        lines = f.read().split("\n")
        print()
    for line in lines[: min(num_samples, len(lines) - 1)]:
        target_text, input_text, _ = line.split("\t")
        # We use "tab" as the "start sequence" character
        # for the targets, and "\n" as "end sequence" character.
        target_text = "\t" + target_text.strip("\t\n ") + "\n"
        input_texts.append(input_text.strip("\t\n "))
        target_texts.append(target_text)
        for char in input_text:
            if char not in input_characters:
                input_characters.add(char)
        for char in target_text:
            if char not in target_characters:
                target_characters.add(char)

    input_characters = sorted(list(input_characters)+[' '])
    target_characters = sorted(list(target_characters)+[' '])
    num_encoder_tokens = len(input_characters)
    num_decoder_tokens = len(target_characters)
    max_encoder_seq_length = max([len(txt) for txt in input_texts])
    max_decoder_seq_length = max([len(txt) for txt in target_texts])

    input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
    target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])

    encoder_input_data = np.zeros( (len(input_texts), max_encoder_seq_length), dtype="float32")
    decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length), dtype="float32")
    decoder_target_data = np.zeros( (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float32")

    for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
        for t, char in enumerate(input_text):
            encoder_input_data[i, t] = input_token_index[char]
        encoder_input_data[i, t+1:] = input_token_index[' ']

        for t, char in enumerate(target_text):
            decoder_input_data[i, t] = target_token_index[char]
        decoder_input_data[i, t+1:] = input_token_index[' ']

        for t, char in enumerate(target_text):        
            if t > 0:
                decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
        decoder_target_data[i, t:, target_token_index[" "]] = 1.0

    return input_texts, target_text, encoder_input_data, decoder_input_data, decoder_target_data, num_encoder_tokens,num_decoder_tokens,\
          max_encoder_seq_length, max_decoder_seq_length, input_token_index, target_token_index
              

In [None]:
input_texts, target_text, encoder_input_data, decoder_input_data, decoder_target_data, num_encoder_tokens,num_decoder_tokens,\
 max_encoder_seq_length, max_decoder_seq_length, input_token_index, target_token_index = prepare_data(data_path, batch_size, num_samples)

In [None]:
hidden_layers=1
dropout=0.2
rec_dropout=0.2

**BUILDING MODEL**

In [None]:
def build_model(encoder_input_size, decoder_input_size, embedding_size, latent_dim, hidden_layers, input_vocab, target_vocab, dropout, rec_dropout, cell_type='LSTM'):
  with tf.device('/device:GPU:0'):  
    # Encoder Inputs
    encoder_inputs = Input(shape=(encoder_input_size))
    encoder_embeddings = Embedding(input_vocab, embedding_size)(encoder_inputs)
    #Decoder Inputs
    decoder_inputs = Input(shape=(decoder_input_size))
    decoder_embeddings = Embedding(target_vocab, embedding_size)(decoder_inputs)

    if cell_type == 'RNN':
      # Encoder    
      encoder_RNNs = [SimpleRNN(latent_dim, dropout= dropout, recurrent_dropout=rec_dropout, return_state=True, return_sequences=True) for _ in range(hidden_layers)]
      encoder_states = []
      encoder_output, state_h = encoder_RNNs[0](encoder_embeddings)
      encoder_states.extend([state_h])
      for i in range(1, hidden_layers):
        encoder_output, state_h_e = encoder_RNNs[i](encoder_output)
        encoder_states.append(state_h_e)

      # Decoder    
      decoder_RNNs, decoder_states = [], []
      decoder_RNNs.append(SimpleRNN(latent_dim, dropout= dropout, recurrent_dropout=rec_dropout, return_state=True, return_sequences=True))
      decoder_output, state_h = decoder_RNNs[0](decoder_embeddings, initial_state=encoder_states[0])
      decoder_states.append(state_h)
      for i in range(1, hidden_layers):
        decoder_RNNs.append(SimpleRNN(latent_dim, dropout= dropout, recurrent_dropout=rec_dropout, return_sequences=True, return_state=True))
        decoder_output, state_h_d = decoder_RNNs[i](decoder_output, initial_state=encoder_states[i])
        decoder_states.append(state_h_d)

      decoder_dense = keras.layers.Dense(target_vocab, activation="softmax")
      decoder_output = decoder_dense(decoder_output)
      model = Model([encoder_inputs, decoder_inputs], decoder_output)

      # Encoder (Inference)
      encoder_states_i = []
      encoder_output_i, state_h_i = encoder_RNNs[0](encoder_embeddings)
      encoder_states_i.append(state_h_i)
      for i in range(1, hidden_layers):
        encoder_output_i, state_h_e, state_c_e = encoder_RNNs[i](encoder_output_i)
        encoder_states_i.append(state_h_e)
      encoder_model = Model(encoder_inputs, encoder_states_i)

      # Decoder (Inference)
      decoder_states_inputs = [keras.Input(shape=(latent_dim)) for _ in range(hidden_layers)]
      decoder_states_i = []
      decoder_output_i, state_h_d_i = decoder_RNNs[0](decoder_embeddings, initial_state=decoder_states_inputs[0])
      decoder_states_i.append(state_h_d_i)
      for i in range(1, hidden_layers):
        decoder_output_i, state_h_d_i = decoder_RNNs[i](decoder_output_i, initial_state=decoder_states_inputs[i])
        decoder_states_i.append(state_h_d_i)

      decoder_output_i = decoder_dense(decoder_output_i)
      decoder_model = keras.models.Model([decoder_inputs] + decoder_states_inputs, [decoder_output_i] + decoder_states_i)

    elif cell_type == 'GRU':
      # Encoder    
      encoder_GRUs = [GRU(latent_dim, dropout=dropout, recurrent_dropout=rec_dropout, return_state=True, return_sequences=True) for _ in range(hidden_layers)]
      encoder_states = []
      encoder_output, state_h = encoder_GRUs[0](encoder_embeddings)
      encoder_states.extend([state_h])
      for i in range(1, hidden_layers):
        encoder_output, state_h_e = encoder_GRUs[i](encoder_output)
        encoder_states.append(state_h_e)

      # Decoder    
      decoder_GRUs, decoder_states = [], []
      decoder_GRUs.append(GRU(latent_dim, dropout=dropout, recurrent_dropout=rec_dropout, return_state=True, return_sequences=True))
      decoder_output, state_h = decoder_GRUs[0](decoder_embeddings, initial_state=encoder_states[0])
      decoder_states.append(state_h)
      for i in range(1, hidden_layers):
        decoder_GRUs.append(GRU(latent_dim, dropout=dropout, recurrent_dropout=rec_dropout, return_sequences=True, return_state=True))
        decoder_output, state_h_d = decoder_GRUs[i](decoder_output, initial_state=encoder_states[i])
        decoder_states.append(state_h_d)

      decoder_dense = keras.layers.Dense(target_vocab, activation="softmax")
      decoder_output = decoder_dense(decoder_output)
      model = Model([encoder_inputs, decoder_inputs], decoder_output)

      # Encoder (Inference)
      encoder_states_i = []
      encoder_output_i, state_h_i = encoder_GRUs[0](encoder_embeddings)
      encoder_states_i.append(state_h_i)
      for i in range(1, hidden_layers):
        encoder_output_i, state_h_e = encoder_GRUs[i](encoder_output_i)
        encoder_states_i.append(state_h_e)
      encoder_model = Model(encoder_inputs, encoder_states_i)

      # Decoder (Inference)
      decoder_states_inputs = [keras.Input(shape=(latent_dim)) for _ in range(hidden_layers)]
      decoder_states_i = []
      decoder_output_i, state_h_d_i = decoder_GRUs[0](decoder_embeddings, initial_state=decoder_states_inputs[0])
      decoder_states_i.append(state_h_d_i)
      for i in range(1, hidden_layers):
        decoder_output_i, state_h_d_i = decoder_GRUs[i](decoder_output_i, initial_state=decoder_states_inputs[i])
        decoder_states_i.append(state_h_d_i)

      decoder_output_i = decoder_dense(decoder_output_i)
      decoder_model = keras.models.Model([decoder_inputs] + decoder_states_inputs, [decoder_output_i] + decoder_states_i)

    else:
      # Encoder
      encoder_inputs = Input(shape=(encoder_input_size))
      encoder_embeddings = Embedding(input_vocab, embedding_size)(encoder_inputs)
      encoder_LSTMs = [LSTM(latent_dim, dropout=dropout, recurrent_dropout=rec_dropout, return_state=True, return_sequences=True) for _ in range(hidden_layers)]
      encoder_states = []
      encoder_output, state_h, state_c = encoder_LSTMs[0](encoder_embeddings)
      encoder_states.extend([state_h, state_c])
      for i in range(1, hidden_layers):
        encoder_output, state_h_e, state_c_e = encoder_LSTMs[i](encoder_output)
        encoder_states.extend([state_h_e, state_c_e])

      # Decoder
      decoder_LSTMs, decoder_states = [], []
      decoder_LSTMs.append(LSTM(latent_dim, dropout=dropout, recurrent_dropout=rec_dropout, return_state=True, return_sequences=True))
      decoder_output, state_h, state_c = decoder_LSTMs[0](decoder_embeddings, initial_state=encoder_states[:2])
      decoder_states.extend([state_h, state_c])
      for i in range(1, hidden_layers):
        decoder_LSTMs.append(LSTM(latent_dim, dropout=dropout, recurrent_dropout=rec_dropout, return_sequences=True, return_state=True))
        decoder_output, state_h_d, state_c_d = decoder_LSTMs[i](decoder_output, initial_state=encoder_states[2*i:][:2])
        decoder_states.extend([state_h_d, state_c_d])

      decoder_dense = keras.layers.Dense(target_vocab, activation="softmax")
      decoder_output = decoder_dense(decoder_output)
      model = Model([encoder_inputs, decoder_inputs], decoder_output)

      # Encoder (Inference)
      encoder_states_i = []
      encoder_output_i, state_h_i, state_c_i = encoder_LSTMs[0](encoder_embeddings)
      encoder_states_i.extend([state_h_i, state_c_i])
      for i in range(1, hidden_layers):
        encoder_output_i, state_h_e, state_c_e = encoder_LSTMs[i](encoder_output_i)
        encoder_states_i.extend([state_h_e, state_c_e])
      encoder_model = Model(encoder_inputs, encoder_states_i)

      # Decoder (Inference)
      decoder_states_inputs = [keras.Input(shape=(latent_dim)) for _ in range(2*hidden_layers)]
      decoder_states_i = []
      decoder_output_i, state_h_d_i, state_c_d_i = decoder_LSTMs[0](decoder_embeddings, initial_state=decoder_states_inputs[:2])
      decoder_states_i.extend([state_h_d_i, state_c_d_i])
      for i in range(1, hidden_layers):
        decoder_output_i, state_h_d_i, state_c_d_i = decoder_LSTMs[i](decoder_output_i, initial_state=decoder_states_inputs[2*i:][:2])
        decoder_states_i.extend([state_h_d_i, state_c_d_i])

      decoder_output_i = decoder_dense(decoder_output_i)
      decoder_model = keras.models.Model([decoder_inputs] + decoder_states_inputs, [decoder_output_i] + decoder_states_i)

    return model, encoder_model, decoder_model

In [None]:
model, encoder_model, decoder_model= build_model(max_encoder_seq_length, max_decoder_seq_length, embedding_size,
                                                 latent_dim,hidden_layers,num_encoder_tokens, num_decoder_tokens,dropout,rec_dropout, cell_type= 'LSTM') 
model.summary()

In [None]:
model.compile(
    optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]
)
model.fit(
    [encoder_input_data, decoder_input_data],
    decoder_target_data,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.2,
)

**WANDB**

In [None]:
!pip install --upgrade wandb

In [None]:
# Init wandb
import wandb
from wandb.keras import WandbCallback

wandb.init(project="assignment-3", entity="ravi-kumar")

In [None]:
# Configure the sweep – specify the parameters to search through, the search strategy, the optimization metric et all.
sweep_config = {
    'method': 'random', #grid, random
    'metric': {
      'name': 'accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'cell_type' : {
            'values': ['RNN', 'GRU', 'LSTM']
        },
        'embedding_size':{
            'values': [256, 512]
        },
        'hidden_layers' :{
            'values': [2, 3]
        },
        'hidden_layer_size' :{
            'values': [64, 128]
        },
        'dropout' :{
            'values': [0.2, 0.3]
        },
        # 'rec_dropout' :{
        #     'values': [0.2, 0.3]
        # },
        'batch_size' : {
            'values': [32, 64]
        },
        'epochs':{
            'values': [30]
        }
    }
}

In [None]:
def train():
  with tf.device('/device:GPU:0'):    
    # Wandb Configuration
    config_defaults = {
        'cell_type' : 'RNN',
        'embedding_size': 128,
        'hidden_layers' : 1,
        'hidden_layer_size' : 128,
        'dropout' : 0.3,
        'rec_dropout' : 0.2,
        'batch_size' : 64,
        'epochs': 10
    }
    wandb.init(config=config_defaults)
    config= wandb.config

    # Parameters
    batch_size = config.batch_size
    epochs = config.epochs
    latent_dim = config.hidden_layer_size
    embedding_size = config.embedding_size
    hidden_layers = config.hidden_layers
    cell_type = config.cell_type
    dropout = config.dropout
    rec_dropout = 0 #config.rec_dropout
    num_samples = 44205
    # Path to the data txt file on disk.
    data_path = "/content/drive/MyDrive/Deep Learning/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"

    # Preparing/Loading Data
    input_texts, target_text, encoder_input_data, decoder_input_data, decoder_target_data, num_encoder_tokens,num_decoder_tokens,\
    max_encoder_seq_length, max_decoder_seq_length, input_token_index, target_token_index = prepare_data(data_path, batch_size, num_samples)

    # Building Model
    model, encoder_model, decoder_model= build_model(max_encoder_seq_length, max_decoder_seq_length, embedding_size,
                                                 latent_dim, hidden_layers, num_encoder_tokens, num_decoder_tokens, dropout, rec_dropout, cell_type= cell_type) 
    
    # Training Model
    model.compile(
    optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]
    )
    model.fit(
        [encoder_input_data, decoder_input_data],
        decoder_target_data,
        batch_size=batch_size,
        epochs=epochs,
        validation_split=0.2,
        callbacks=[WandbCallback()]
    )

In [None]:
sweep_id = wandb.sweep(sweep_config, entity="ravi-kumar", project="assignment-3")

In [None]:
wandb.agent(sweep_id, train)