
Installing Wandb

In [7]:
!pip install wandb




**Importing Necessary Pacakages**

In [8]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from matplotlib.font_manager import FontProperties
%matplotlib inline
import wandb
from wandb.keras import WandbCallback
from tensorflow.keras.layers import Layer
from tensorflow.keras import backend as K


Downloading the Dataset

In [None]:
%%capture

!curl https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar --output daksh.tar
!tar -xvf  'daksh.tar'

**Getting Lexicons from the Tamil Dataset**

In [6]:
# Load the training lexicons
with open("/content/dakshina_dataset_v1.0/ta/lexicons/ta.translit.sampled.train.tsv") as f:
    train_lines = f.readlines()
train_pairs = [line.strip("\n").split("\t") for line in train_lines]
train_map = dict([(example[1], example[0]) for example in train_pairs])

# Load the validation lexicons
with open("/content/dakshina_dataset_v1.0/ta/lexicons/ta.translit.sampled.dev.tsv") as f:
    validation_lines = f.readlines()
validation_pairs = [line.strip("\n").split("\t") for line in validation_lines]
validation_map = dict([(example[1], example[0]) for example in validation_pairs])

# Load the test lexicons
with open("/content/dakshina_dataset_v1.0/ta/lexicons/ta.translit.sampled.test.tsv") as f:
    test_lines = f.readlines()
test_pairs = [line.strip("\n").split("\t") for line in test_lines]
test_map = dict([(example[1], example[0]) for example in test_pairs])

# Number of training examples
M_train = len(train_map.keys())

# Number of validation examples
M_val = len(validation_map.keys())

# Number of test examples
M_test = len(test_map.keys())


Preprocessing the Dataset for building the model

Below Cell Contains Two functions

**Function Name**: data

**Description** : This function responsible for data preprocessing, augmentation.

**Arguments** : data_dict

**Returns** : input_characters, target_characters, num_input_tokens, num_target_tokens, max_encoder_seq_length, max_decoder_seq_length, input_char_map, target_char_map





**Function Name**: one_hot_coding.


**Description** : This also like preproccesing which wll help to convert the array as one hot encoding

**Arguments** : data_dict, max_encoder_seq_length, max_decoder_seq_length, num_input_tokens, num_target_tokens

**Returns**: input_words, target_words, encoder_input_data, decoder_input_data, decoder_output_data.

In [10]:
def data_preprocess(data_dict):
    """
    Returns important information about the data.
    """
    input_words = []
    target_words = []
    input_characters = []
    target_characters = []

    for key in data_dict:
        # Store the word in the source language
        input_words.append(key)
        # Store the word in the target language
        target_words.append("\t"+data_dict[key]+"\n")
        # Add the characters to the respective character lists
        input_characters = list(set(input_characters + list(key)))
        target_characters = list(set(target_characters + list(data_dict[key])))

    # Sort the input characters
    input_characters = sorted(list(set(input_characters)))
    # Sort the target characters
    target_characters = target_characters + ["\t", "\n"]
    target_characters = sorted(list(set(target_characters)))

    # Number of unique tokens in the source language
    num_input_tokens = len(input_characters)
    # Number of unique tokens in the target language
    num_target_tokens = len(target_characters)

    # Max input word length
    max_encoder_seq_length = max([len(word) for word in input_words]) 
    # Max output word length
    max_decoder_seq_length = max([len(word) for word in target_words])
    # Map characters to numerical indices (using +1 to avoid any character being mapped to 0)
    input_char_map = dict([(ch, i + 1) for i, ch in enumerate(input_characters)])
    target_char_map = dict([(ch, i + 1) for i, ch in enumerate(target_characters)])

    return input_characters, target_characters, num_input_tokens, num_target_tokens, max_encoder_seq_length, max_decoder_seq_length, input_char_map, target_char_map

def one_hot_coding(data_dict, max_encoder_seq_length, max_decoder_seq_length, num_input_tokens, num_target_tokens):
    """
    This function takes the training/validation/test dictionary as input and produces
    the one-hot encoded versions of the respective data.
    """
    input_words = []
    target_words = []

    for key in data_dict:
        # Store the word in the source language
        input_words.append(key)
        # Store the word in the target language
        target_words.append("\t"+data_dict[key]+"\n")

    M = len(data_dict.keys())
    encoder_input_data = np.zeros((M, max_encoder_seq_length, num_input_tokens + 1), dtype="float")
    decoder_input_data = np.zeros((M, max_decoder_seq_length, num_target_tokens + 1), dtype="float")
    decoder_output_data = np.zeros((M, max_decoder_seq_length, num_target_tokens + 1), dtype="float")

    for i in range(M):
        source_word = input_words[i]
        target_word = target_words[i]

        # One-hot encoding for the input
        for j, ch in enumerate(source_word):
            encoder_input_data[i, j, input_char_map[ch]] = 1.0

        # One-hot encoding for the output
        for j, ch in enumerate(target_word):
            decoder_input_data[i, j, target_char_map[ch]]= 1.0
            if j >= 1:
                # The decoder output is one step ahead of the decoder input
                decoder_output_data[i, j-1, target_char_map[ch]] = 1.0

    

    return input_words, target_words, encoder_input_data, decoder_input_data, decoder_output_data


In [11]:
input_characters, target_characters, num_input_tokens, num_target_tokens, max_encoder_seq_length, max_decoder_seq_length, input_char_map, target_char_map = data_preprocess(train_map)
input_words, target_words, encoder_input_data, decoder_input_data, decoder_output_data = one_hot_coding(train_map, max_encoder_seq_length, max_decoder_seq_length, num_input_tokens, num_target_tokens)
val_input_words, val_target_words, val_encoder_input_data, val_decoder_input_data, val_decoder_output_data = one_hot_coding(validation_map, max_encoder_seq_length, max_decoder_seq_length, num_input_tokens, num_target_tokens)
test_input_words, test_target_words, test_encoder_input_data, test_decoder_input_data, test_decoder_output_data = one_hot_coding(test_map, max_encoder_seq_length, max_decoder_seq_length, num_input_tokens, num_target_tokens)

In [12]:
# Using label encoding for the encoder inputs (and then find an embedding using the Embedding layer)
encoder_input_data = np.argmax(encoder_input_data, axis=2)
val_encoder_input_data = np.argmax(val_encoder_input_data, axis=2)
test_encoder_input_data = np.argmax(test_encoder_input_data, axis=2)

decoder_input_data = np.argmax(decoder_input_data, axis=2)
val_decoder_input_data = np.argmax(val_decoder_input_data, axis=2)
test_decoder_input_data = np.argmax(test_decoder_input_data, axis=2)

# Dictionaries mapping from indices to characters
reverse_input_char_map = dict((i, char) for char, i in input_char_map.items())
reverse_target_char_map = dict((i, char) for char, i in target_char_map.items())
reverse_target_char_map[0] = "\n"

**Defining Attention Layer**

In [13]:
class AttentionLayer(Layer):
    """
    This class implements Bahdanau attention (https://arxiv.org/pdf/1409.0473.pdf).
    There are three sets of weights introduced W_a, U_a, and V_a
     """

    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)
        # Create a trainable weight variable for this layer.

        self.W_a = self.add_weight(name='W_a',
                                   shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.U_a = self.add_weight(name='U_a',
                                   shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.V_a = self.add_weight(name='V_a',
                                   shape=tf.TensorShape((input_shape[0][2], 1)),
                                   initializer='uniform',
                                   trainable=True)

        super(AttentionLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, inputs, verbose=False):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_out_seq, self.W_a)

            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>', U_a_dot_h.shape)

            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)
            if verbose:
                print('Ws+Uh>', Ws_plus_Uh.shape)

            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]

        fake_state_c = K.sum(encoder_out_seq, axis=1)
        fake_state_e = K.sum(encoder_out_seq, axis=2)  # <= (batch_size, enc_seq_len, latent_dim

        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step, decoder_out_seq, [fake_state_e],
        )

        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step, e_outputs, [fake_state_c],
        )

        return c_outputs, e_outputs

    def compute_output_shape(self, input_shape):
        """ Outputs produced by the layer """
        return [
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
        ]


**Building Rnn Model with attention**

Below Cell Contains a function of Build_model

**Function Name**: build_model

**Description** : This function responsible for building the rnn network with attention

**Arguments** : latent_dim,rnn_type,embedding_dim,dropout

**Returns** : Model





In [14]:
def build_model(latent_dim, rnn_type, embedding_dim, dropout):
    """
    latent_dim: Number of cells in the encoder and decoder layers
    rnn_type: choice of cell type: Simple RNN, LSTM, GRU
    num_encoder_layers: Number of layers in the encoder
    num_decoder_layers: Number of layers in the decoder
    embedding_dim: Dimenions of the vector to represent each character
    dropout: fraction of neurons to drop out
    """
    ## ENCODER
    # encoder_input = keras.Input(shape=(None, num_input_tokens), name="EncoderInput")
    encoder_input = keras.Input(shape=(None, ), name="EncoderInput")
    encoder_embedding = keras.layers.Embedding(num_input_tokens + 1, embedding_dim, name="EncoderInputEmbedding", mask_zero=True)(encoder_input)

    #encoder lstm 1
    if rnn_type == 'LSTM':
        encoder_lstm = tf.keras.layers.LSTM(latent_dim,return_sequences=True,return_state=True,name="EncoderLayer" ,dropout=dropout,recurrent_dropout=dropout)
        encoder_output, state_h, state_c = encoder_lstm(encoder_embedding)
    if rnn_type == 'GRU':
        encoder_gru = tf.keras.layers.GRU(latent_dim,return_sequences=True,return_state=True,name="EncoderLayer" ,dropout=dropout,recurrent_dropout=dropout)
        encoder_output, gru_state = encoder_gru(encoder_embedding)
    if rnn_type == 'RNN':
        encoder_rnn = tf.keras.layers.SimpleRNN(latent_dim,return_sequences=True,return_state=True,name="EncoderLayer" ,dropout=dropout,recurrent_dropout=dropout)
        encoder_output, rnn_state = encoder_rnn(encoder_embedding)   

    ## DECODER
    decoder_input = keras.Input(shape=(None, ), name="DecoderInput")
    dec_emb = keras.layers.Embedding(num_target_tokens + 1, 64, name="DecoderInputEmbedding", mask_zero=True)(decoder_input)

    if rnn_type == 'LSTM':
        decoder_lstm = tf.keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True, name="DecoderLayer" ,dropout=dropout,recurrent_dropout=dropout )
        decoder_output,decoder_fwd_state, decoder_back_state = decoder_lstm(dec_emb,initial_state=[state_h, state_c])
    if rnn_type == 'GRU':
        decoder_gru = tf.keras.layers.GRU(latent_dim,return_sequences=True,return_state=True,name="DecoderLayer" ,dropout=dropout,recurrent_dropout=dropout)
        decoder_output, decoder_state_gru = decoder_gru(dec_emb,initial_state = gru_state)
    if rnn_type =='RNN':
        decoder_rnn = tf.keras.layers.SimpleRNN(latent_dim,return_sequences=True,return_state=True,name="DecoderLayer" ,dropout=dropout,recurrent_dropout=dropout)
        decoder_output, decoder_state_rnn = decoder_rnn(dec_emb,initial_state = rnn_state)

    # Attention layer
    attn_out, attn_states = AttentionLayer(name='attention_layer')([encoder_output, decoder_output])


    # Concat attention input and decoder LSTM output
    decoder_concat_input = tf.keras.layers.Concatenate(axis=-1, name='concat_layer')([decoder_output, attn_out])

    #dense layer
    decoder_dense =  tf.keras.layers.TimeDistributed(keras.layers.Dense(num_target_tokens + 1, activation='softmax'))
    #decoder_dense = keras.layers.Dense(num_target_tokens + 1, activation="softmax", name="DecoderDenseLayer") # Softmax picks one character
    decoder_outputs = decoder_dense(decoder_concat_input)

    # Define the model 
    model = keras.Model([encoder_input, decoder_input], decoder_outputs)

    return model


**Building Inference Model with attention**

Below Cell Contains a function of Build_Inference_model

**Function Name**:build_inference_model

**Description** : This function responsible for building the inference model.

**Arguments** : model,rnn_type, latent_dim

**Returns** : encoder_model,decoder_model

In [15]:
def build_inference_model(model,rnn_type, latent_dim):
  if rnn_type == "LSTM":
              # Input to the encoder, sequence of characters (word) in the source language
              encoder_inputs = model.input[0]

              # Output of the encoder
              encoder_outputs, state_h_enc, state_c_enc = model.layers[4].output
              encoder_states = [encoder_outputs,state_h_enc, state_c_enc]

              # Create an encoder model 
              encoder_model = keras.Model(encoder_inputs, encoder_states)


              # Input to the decoder

              decoder_state_input_h = keras.Input(shape=(latent_dim,))
              decoder_state_input_c = keras.Input(shape=(latent_dim,))
              decoder_hidden_state_inputs = keras.Input(shape=(None,latent_dim))
              decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

              decoder_inputs = model.layers[1].output
              decoder_embedding_layer = model.layers[3]
              decoder_embedding = decoder_embedding_layer(decoder_inputs)

              decoder_lstm = model.layers[5]

              decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(decoder_embedding, initial_state=decoder_states_inputs)
              decoder_states = [state_h_dec, state_c_dec]

              #attention
              attn_layer = model.layers[6]
              attn_out_inf, attn_states_inf = attn_layer([decoder_hidden_state_inputs,decoder_outputs])

              #concat
              concate = model.layers[7]
              decoder_inf_concat = concate([decoder_outputs,attn_out_inf])

              # Softmax layer
              decoder_dense = model.layers[8]
              decoder_outputs = decoder_dense(decoder_inf_concat)

              # Create the decoder model
              decoder_model = keras.Model([decoder_inputs] + [decoder_hidden_state_inputs,decoder_state_input_h,decoder_state_input_c], [decoder_outputs] + decoder_states +[attn_states_inf])


  else:
              # Input to the encoder, sequence of characters (word) in the source language
              encoder_inputs = model.input[0]

              # Output of the encoder
              encoder_outputs, state_gru_enc = model.layers[4].output
              encoder_states = [encoder_outputs,state_gru_enc]

              # Create an encoder model 
              encoder_model = keras.Model(encoder_inputs, encoder_states)

              # Input to the decoder

              decoder_state_input = keras.Input(shape=(latent_dim,))
              decoder_hidden_state_inputs = keras.Input(shape=(None,latent_dim))
              decoder_states_inputs = [decoder_state_input]

              decoder_inputs = model.layers[1].output
              decoder_embedding_layer = model.layers[3]
              decoder_embedding = decoder_embedding_layer(decoder_inputs)

              decoder_gru = model.layers[5]

              decoder_outputs, state_gru_dec = decoder_gru(decoder_embedding, initial_state=decoder_states_inputs)
              decoder_states = [state_gru_dec]


              #attention
              attn_layer = model.layers[6]
              attn_out_inf, attn_states_inf = attn_layer([decoder_hidden_state_inputs,decoder_outputs])

              #concat
              concate = model.layers[7]
              decoder_inf_concat = concate([decoder_outputs,attn_out_inf])

              # Softmax layer
              decoder_dense = model.layers[8]
              decoder_outputs = decoder_dense(decoder_inf_concat)

              # Create the decoder model
              decoder_model = keras.Model([decoder_inputs] + [decoder_hidden_state_inputs,decoder_state_input], [decoder_outputs] + decoder_states +[attn_states_inf])

  return encoder_model, decoder_model


**Decoding the Sequence**

Below Cell Contains a function of decode_sequence

**Function Name**:decode_sequence

**Description** : This function responsible for decoding the words

**Arguments** : input_words,rnn_type ,encoder_model, decoder_model

**Returns** : decoded_words


In [16]:


def decode_sequence(input_words,rnn_type ,encoder_model, decoder_model):

    """
    Decodes the given input sequence, one character at a time.
    """
    if rnn_type == "LSTM":
                # Get batch_size
                batch_size = input_words.shape[0]
                # Encode the input string
                encoded_hidden_cell_states = encoder_model.predict(input_words)

                target_sequence = np.zeros((batch_size, 1, num_target_tokens+1))
                # Set the first character to "tab" as it is the start of sequence character
                target_sequence[:, 0, target_char_map["\t"]] = 1.0
                target_sequence = np.argmax(target_sequence, axis=2)

                decoded_words = [""]*batch_size
                for i in range(max_decoder_seq_length):
                    output_tokens, h, c,attn = decoder_model.predict([target_sequence] + encoded_hidden_cell_states)

                    # Sample the most probable character using softmax outputs
                    sampled_char_indices = np.argmax(output_tokens[:, -1, :], axis=1)

                    # Update the target sequence which goes back as input to the decoder.
                    target_sequence = np.zeros((batch_size, 1, num_target_tokens+1))

                    for j, ch_index in enumerate(sampled_char_indices):
                        decoded_words[j] += reverse_target_char_map[ch_index]
                        target_sequence[j, 0, ch_index] = 1.0

                    target_sequence = np.argmax(target_sequence, axis=2)

                    # Update the hidden state and cell state 
                    encoded_hidden_cell_states[1],encoded_hidden_cell_states[2] = [h, c]

                # To remove the \n chars
                decoded_words = [word[:word.find("\n")] for word in decoded_words]
    else:
                            # Get batch_size
                batch_size = input_words.shape[0]
                # Encode the input string
                encoded_hidden_cell_states = encoder_model.predict(input_words)

                target_sequence = np.zeros((batch_size, 1, num_target_tokens+1))
                # Set the first character to "tab" as it is the start of sequence character
                target_sequence[:, 0, target_char_map["\t"]] = 1.0
                target_sequence = np.argmax(target_sequence, axis=2)

                decoded_words = [""]*batch_size
                for i in range(max_decoder_seq_length):
                    output_tokens, state, attn = decoder_model.predict([target_sequence] + [encoded_hidden_cell_states])

                    # Sample the most probable character using softmax outputs
                    sampled_char_indices = np.argmax(output_tokens[:, -1, :], axis=1)

                    # Update the target sequence which goes back as input to the decoder.
                    target_sequence = np.zeros((batch_size, 1, num_target_tokens+1))

                    for j, ch_index in enumerate(sampled_char_indices):
                        decoded_words[j] += reverse_target_char_map[ch_index]
                        target_sequence[j, 0, ch_index] = 1.0

                    target_sequence = np.argmax(target_sequence, axis=2)

                    # Update the hidden state and cell state 
                    encoded_hidden_cell_states[1] = [state]

                # To remove the \n chars
                decoded_words = [word[:word.find("\n")] for word in decoded_words]


    
    
    return decoded_words



Below Cell Contains Function name called setRunName.

**Function Name**: setRunName

**Description** : This function responsible for creating the run name based on sweep config.

**Arguments** : rnn_type,latent_dim,embedding_dim,batch_size,epochs
Returns : run name.


In [18]:
def set_run_name(rnn_type,latent_dim,embedding_dim,batch_size,epochs):         
         
         run_name = "_".join(["cell",rnn_type,"ncell",str(latent_dim),"emb", str(embedding_dim), "dp", str(dropout), "bs", str(batch_size),
                      "epoc", str(epochs)])
    
         return run_name

Below cell contains the train function

**Function name** : train

**Description** : This function contains the config defaluts and the code for building the rnn model. We call this function from wand.agent().

**Arguments** : None

**Return** : None

In [22]:
def train():
    """
    This function performs hyperparameter search using WANDB
    """
    #, , num_encoder_layers, num_decoder_layers, embedding_dim, dropout, 

    # Default values for hyper-parameters
    config_defaults = {
        "latent_dim": 256,
        "rnn_type": "LSTM",
        "embedding_dim": 16,
        "batch_size": 64,
        "epochs": 10
    }

    # Initialize a new wandb run
    wandb.init(config=config_defaults)
    
    # Config is a variable that holds and saves hyperparameters and inputs
    config = wandb.config

    # Local variables, values obtained from wandb config
    rnn_type = config.rnn_type
    latent_dim = config.latent_dim
    embedding_dim = config.embedding_dim
    dropout = 0.3
    batch_size = config.batch_size
    epochs = config.epochs

    # Define the model
    model = build_model(latent_dim, rnn_type, embedding_dim, dropout, 1)

    # Compile the model
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

    # Train the model
    history = model.fit(
            [encoder_input_data, decoder_input_data],
            decoder_output_data,
            batch_size = batch_size,
            epochs = epochs,
            verbose = 2,
            validation_data = ([val_encoder_input_data, val_decoder_input_data], val_decoder_output_data),
            callbacks=[WandbCallback()]
            )
    
    

    # Get the encoder and decoder model
    encoder_model, decoder_model = build_inference_model(model,config.rnn_type,config.latent_dim)

    outputs = []
    n = val_encoder_input_data.shape[0]
    batch_size = 1000
    for i in range(0, n, batch_size):
        # Inputs
        query = val_encoder_input_data[i:i+batch_size]
        # Results
        decoded_words = decode_sequence(query,config.rnn_type,encoder_model, decoder_model)
        outputs = outputs + decoded_words

    # To remove the tab and newline characters from the ground truth
    actual_words = [word[1:-1] for word in val_target_words]
    # Calculate validation accuracy
    validation_inference_accuracy = np.mean(np.array(outputs) == np.array(actual_words))
    print("Validation accuracy based on whole string matching = {} %".format(validation_inference_accuracy*100.0))

    wandb.log({"inference_val_accuracy": validation_inference_accuracy})

    # Plots of accuracy and loss
    plt.plot(history.history["val_accuracy"], label="Validation accuracy")
    plt.plot(history.history["accuracy"], label = "Training accuracy")
    plt.title("Accuracy vs epoch", size=14)
    plt.legend()
    plt.show()

    plt.plot(history.history["val_loss"], label="Validation loss")
    plt.plot(history.history["loss"], label = "Training loss")
    plt.title("Loss vs epoch", size=14)
    plt.legend()
    plt.show()

    # Meaningful name for the run
    wandb.run.name = set_run_name(config.rnn_type,config.latent_dim,config.embedding_dim,config.batch_size,config.epochs)
    wandb.run.save()
    wandb.run.finish()



Below cell contains the wandb sweep config.


In [24]:
# Sweep configuration
sweep_config = {
  "name": "Assignment 3",
  "metric": {
      "name":"val_accuracy",
      "goal": "maximize"
  },
  "method": "bayes",
  "parameters": {
        "rnn_type": {
            "values": ["LSTM", "RNN", "GRU"]
        },
        "latent_dim": {
            "values": [128, 256, 512]
        },
        "embedding_dim": {
            "values": [126, 258, 64]
        },
        "batch_size": {
            "values": [16, 64, 128]
        },
        "epochs": {
            "values": [10]
        }
    }
}


In [None]:
sweep_id = "ou020ze8"
wandb.agent(sweep_id, train,entity="swe-rana", project="CS6910_Assignment3", count=20)