## 1 Downloading the Dakshina dataset

In [1]:
!pip install wandb==0.12.2
!wget https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
!tar -xf dakshina_dataset_v1.0.tar



'wget' is not recognized as an internal or external command,
operable program or batch file.
tar: Error opening archive: Failed to open 'dakshina_dataset_v1.0.tar'


## 2 Processing the dataset
### 2.1 Data Processing:

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import os
import cv2


class DataProcessing():

    def __init__(self, path, s_lang = 'en', t_lang = "te"):
    
        self.s_lang = s_lang
        self.t_lang = t_lang
    
        self.trainpath = os.path.join(path, t_lang, "lexicons", t_lang+".translit.sampled.train.tsv")
        self.valpath = os.path.join(path, t_lang, "lexicons", t_lang+".translit.sampled.dev.tsv")
        self.testpath = os.path.join(path, t_lang, "lexicons", t_lang+".translit.sampled.test.tsv")

        self.train = pd.read_csv(
            self.trainpath,
            sep="\t",
            names=["tgt", "src", "count"],
        )
        self.test = pd.read_csv(
            self.testpath,
            sep="\t",
            names=["tgt", "src", "count"],
        )
        self.val = pd.read_csv(
            self.valpath,
            sep="\t",
            names=["tgt", "src", "count"],
        )

        #Train Data
        self.train_data = self.preprocess(self.train["src"].to_list(), self.train["tgt"].to_list())
        (
            self.train_encoder_input,
            self.train_decoder_input,
            self.train_decoder_target,
            self.source_voccab,
            self.target_voccab,
        ) = self.train_data

        #character to integer and integer to character
        self.src_charTOint, self.src_intTOchar = self.source_voccab
        self.tar_charTOint, self.tar_intTOchar = self.target_voccab

        #Validation Data
        self.val_data = self.encode(
            self.val["src"].to_list(),
            self.val["tgt"].to_list(),
            list(self.src_charTOint.keys()),
            list(self.tar_charTOint.keys()),
            src_charTOint=self.src_charTOint,
            tar_charTOint=self.tar_charTOint,
        )
        self.val_encoder_input, self.val_decoder_input, self.val_decoder_target = self.val_data
        self.src_charTOint, self.src_intTOchar = self.source_voccab
        self.tar_charTOint, self.tar_intTOchar = self.target_voccab

        #Test Data
        self.test_data = self.encode(
            self.test["src"].to_list(),
            self.test["tgt"].to_list(),
            list(self.src_charTOint.keys()),
            list(self.tar_charTOint.keys()),
            src_charTOint=self.src_charTOint,
            tar_charTOint=self.tar_charTOint,
        )
        self.test_encoder_input, self.test_decoder_input, self.test_decoder_target = self.test_data
        self.src_charTOint, self.src_intTOchar = self.source_voccab
        self.tar_charTOint, self.tar_intTOchar = self.target_voccab


    def dictionary_lookup(self, voccab):
        charTOint = dict([(char, i) for i, char in enumerate(voccab)])
        intTOchar = dict((i, char) for char, i in charTOint.items())
        return charTOint, intTOchar

    def preprocess(self, source , target):
        source_chars = set()
        target_chars = set()

        source = [str(x) for x in source]
        target = [str(x) for x in target]

        source_words,target_words = [],[]
        for src, tgt in zip(source, target):
            tgt = "\t" + tgt + "\n"
            
            source_words.append(src)
            target_words.append(tgt)

            for char in src:
                if char not in source_chars:
                    source_chars.add(char)

            for char in tgt:
                if char not in target_chars:
                    target_chars.add(char)

        source_chars = sorted(list(source_chars))
        target_chars = sorted(list(target_chars))

        #Add space
        source_chars.append(" ")
        target_chars.append(" ")

        num_encoder_tokens = len(source_chars)
        num_decoder_tokens = len(target_chars)
        max_source_length = max([len(txt) for txt in source_words])
        max_target_length = max([len(txt) for txt in target_words])

        print("No. of samples:", len(source))
        print("Src voccab length:", num_encoder_tokens)
        print("Tar voccab length:", num_decoder_tokens)
        print("Max iput sequence length:", max_source_length)
        print("Max output sequence length:", max_target_length)

        return self.encode(source_words, target_words, source_chars, target_chars)
    
    def encode(self, source, target, source_chars, target_chars, src_charTOint=None, tar_charTOint=None):
        num_decoder_tokens = len(target_chars)
        num_encoder_tokens = len(source_chars)
        max_source_length = max([len(txt) for txt in source])
        max_target_length = max([len(txt) for txt in target])

        source_voccab, target_voccab = None, None
        if src_charTOint == None and tar_charTOint == None:

            print("Dictionary lookups for char to int mapping and vice versa")
            src_charTOint, src_intTOchar = self.dictionary_lookup(source_chars)
            tar_charTOint, tar_intTOchar = self.dictionary_lookup(target_chars)

            source_voccab = (src_charTOint, src_intTOchar)
            target_voccab = (tar_charTOint, tar_intTOchar)

        encoder_input_data = np.zeros(
            (len(source), max_source_length, num_encoder_tokens), dtype="float32"
        )
        decoder_input_data = np.zeros(
            (len(source), max_target_length, num_decoder_tokens), dtype="float32"
        )
        decoder_target_data = np.zeros(
            (len(source), max_target_length, num_decoder_tokens), dtype="float32"
        )

        for i, (input_text, target_text) in enumerate(zip(source, target)):
            for t, char in enumerate(input_text):
                encoder_input_data[i, t, src_charTOint[char]] = 1.0
            encoder_input_data[i, t + 1 :, src_charTOint[" "]] = 1.0
            for t, char in enumerate(target_text):
                
                decoder_input_data[i, t, tar_charTOint[char]] = 1.0
                if t > 0:
                    decoder_target_data[i, t - 1, tar_charTOint[char]] = 1.0

            decoder_input_data[i, t + 1 :, tar_charTOint[" "]] = 1.0
            decoder_target_data[i, t:, tar_charTOint[" "]] = 1.0

        if source_voccab != None and target_voccab != None:
            return (
                encoder_input_data,
                decoder_input_data,
                decoder_target_data,
                source_voccab,
                target_voccab,
            )
        else:

            return encoder_input_data, decoder_input_data, decoder_target_data


### 2.2 Processing the database

(input language = English and output language = Telugu)

In [6]:
import numpy as np
import pandas as pd
import os

DATAPATH = r"dakshina_dataset_v1.0"

dataBase = DataProcessing(DATAPATH) 



No. of samples: 58550
Src voccab length: 27
Tar voccab length: 66
Max iput sequence length: 25
Max output sequence length: 22
Dictionary lookups for char to int mapping and vice versa


## 3 RNNs model for sequence to sequence machine translation 
### 3.1 Seq2Seq *Attention* Layer

In [4]:
!pip install tensorflow==2.8



In [8]:
import os
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer



class AttentionLayer(Layer):
    
    #Bahdanau attention
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)

        self.W_a = self.add_weight(name='W_a',shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),initializer='uniform',trainable=True)

        self.U_a = self.add_weight(name='U_a',shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),initializer='uniform',trainable=True)
        
        self.V_a = self.add_weight(name='V_a',shape=tf.TensorShape((input_shape[0][2], 1)),initializer='uniform',trainable=True)

        super(AttentionLayer, self).build(input_shape)  

    def call(self, inp, verbose=False):
        # [encoder_output_sequence, decoder_output_sequence]
        
        assert type(inp) == list
        enc_out_seq, dec_out_seq = inp
        
        if verbose:
            print('encoder_out-', enc_out_seq.shape)
            print('decoder_out-', dec_out_seq.shape)

        def energy_step(inp, states):
            #step fn
            assert_msg = "The tates must be an iterable but got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

           #shaping tensor
            en_seq_len, en_hidden = enc_out_seq.shape[1], enc_out_seq.shape[2]
            de_hidden = inp.shape[-1]

            W_a_dot_s = K.dot(enc_out_seq, self.W_a)

            U_a_dot_h = K.expand_dims(K.dot(inp, self.U_a), 1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>', U_a_dot_h.shape)

            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)
            if verbose:
                print('Ws+Uh>', Ws_plus_Uh.shape)

            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inp, states):
            #step function

            assert_msg = "The states have to be an iterable but got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            c_i = K.sum(enc_out_seq * K.expand_dims(inp, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]

        fake_state_c = K.sum(enc_out_seq, axis=1)
        fake_state_e = K.sum(enc_out_seq, axis=2)  

        last_out, e_out, _ = K.rnn(
            energy_step, dec_out_seq, [fake_state_e],
        )

        #Context vec
        last_out, c_out, _ = K.rnn(
            context_step, e_out, [fake_state_c],
        )

        return c_out, e_out

    def compute_output_shape(self, input_shape):
        return [
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
        ]

### 3.1 Seq2Seq translation Model class

In [9]:
import os
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers
#from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, Input, InputLayer, Flatten, Activation, LSTM, SimpleRNN, GRU, TimeDistributed, Concatenate
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model, Sequential,  Model
from tensorflow.keras.callbacks import EarlyStopping

import wandb
wandb.login()

class S2STranslation():

    def __init__(self, ModConfigDict, srcCharTOInt, tgtCharTOInt, using_pretrained_model = False):
        self.cell_type = ModConfigDict["cell_type"]
        self.numEncoders = ModConfigDict["numEncoders"]
        self.latentDim = ModConfigDict["latentDim"]
        self.numDecoders = ModConfigDict["numDecoders"]
        self.hidden = ModConfigDict["hidden"]
        self.dropout = ModConfigDict["dropout"]
       
        self.tgtCharTOInt = tgtCharTOInt
        self.srcCharTOInt = srcCharTOInt

    def build_configurable_model(self):  

        #RNN     
        if self.cell_type == "RNN":

            # encoder
            encoder_inp = Input(shape=(None, len(self.srcCharTOInt)))
            encoder_out = encoder_inp
            for i in range(1, self.numEncoders + 1):
                encoder = SimpleRNN(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_out, state = encoder(encoder_inp)
            encoder_state = [state]

            # decoder
            decoder_inp = Input(shape=(None, len(self.tgtCharTOInt)))
            decoder_out = decoder_inp
            for i in range(1, self.numDecoders + 1):
                decoder = SimpleRNN(
                    self.latentDim,
                    return_sequences=True,
                    return_state=True,
                    dropout=self.dropout,
                )
                decoder_out, _ = decoder(decoder_inp, initial_state=encoder_state)

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_out = hidden(decoder_out)
            dec_dense = Dense(len(self.tgtCharTOInt), activation="softmax")
            decoder_out = dec_dense(hidden_out)
            model = Model([encoder_inp, decoder_inp], decoder_out)
            
            return model
        
        #LSTM
        elif self.cell_type == "LSTM":

            # encoder
            encoder_inp = Input(shape=(None, len(self.srcCharTOInt)))
            encoder_out = encoder_inp
            for i in range(1, self.numEncoders + 1):
                encoder = LSTM(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_out, state_h, state_c = encoder(encoder_out)
            encoder_state = [state_h, state_c]

            # decoder
            decoder_inp = Input(shape=(None, len(self.tgtCharTOInt)))
            decoder_out = decoder_inp

            for i in range(1, self.numDecoders + 1):
                decoder = LSTM(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                decoder_out, _, _ = decoder(
                    decoder_out, initial_state=encoder_state
                )

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_out = hidden(decoder_out)
            dec_dense = Dense(len(self.tgtCharTOInt), activation="softmax")
            decoder_out = dec_dense(hidden_out)
            model = Model([encoder_inp, decoder_inp], decoder_out)
            
            return model

        #GRU
        elif self.cell_type == "GRU":

            # encoder
            encoder_inp = Input(shape=(None, len(self.srcCharTOInt)))
            encoder_out = encoder_inp

            for i in range(1, self.numEncoders + 1):
                encoder = GRU(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_out, state = encoder(encoder_inp)
            encoder_state = [state]

            # decoder
            decoder_inp = Input(shape=(None, len(self.tgtCharTOInt)))
            decoder_out = decoder_inp
            for i in range(1, self.numDecoders + 1):
                decoder = GRU(
                    self.latentDim,
                    return_sequences=True,
                    return_state=True,
                    dropout=self.dropout,
                )
                decoder_out, _ = decoder(decoder_inp, initial_state=encoder_state)

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_out = hidden(decoder_out)
            dec_dense = Dense(len(self.tgtCharTOInt), activation="softmax")
            decoder_out = dec_dense(hidden_out)
            model = Model([encoder_inp, decoder_inp], decoder_out)
            
            return model


            
    def build_attention_model(self):

        #RNN       
        if self.cell_type == "RNN":
            # encoder
            encoder_inp = Input(shape=(None, len(self.srcCharTOInt)))
            encoder_out = encoder_inp
            for i in range(1, self.numEncoders + 1):
                encoder = SimpleRNN(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_out, state = encoder(encoder_inp) 
                
                if i == 1:
                    encoder_first_outputs= encoder_out                  
            encoder_state = [state]
            

            # decoder
            decoder_inp = Input(shape=(None, len(self.tgtCharTOInt)))
            decoder_out = decoder_inp
            for i in range(1, self.numDecoders + 1):
                decoder = SimpleRNN(
                    self.latentDim,
                    return_sequences=True,
                    return_state=True,
                    dropout=self.dropout,
                )

                decoder_out, _ = decoder(decoder_inp, initial_state=encoder_state)
                
                if i == self.numDecoders:
                    decoder_first_outputs = decoder_out

            attention_layer = AttentionLayer(name='attention_layer')
            attention_out, attention_st = attention_layer([encoder_first_outputs, decoder_first_outputs])


            decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attention_out])

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_time = TimeDistributed(hidden, name='time_distributed_layer')
            hidden_out = hidden(decoder_concat_input)
            dec_dense = Dense(len(self.tgtCharTOInt), activation="softmax")
            decoder_out = dec_dense(hidden_out)
            model = Model([encoder_inp, decoder_inp], decoder_out)
            
            return model
        
        #LSTM
        elif self.cell_type == "LSTM":
            
            # encoder
            encoder_inp = Input(shape=(None, len(self.srcCharTOInt)))
            encoder_out = encoder_inp
            for i in range(1, self.numEncoders + 1):
                encoder = LSTM(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_out, state_h, state_c = encoder(encoder_out)
                if i == 1:
                    encoder_first_outputs= encoder_out                  
         
            encoder_state = [state_h, state_c]

            # decoder
            decoder_inp = Input(shape=(None, len(self.tgtCharTOInt)))
            decoder_out = decoder_inp
            for i in range(1, self.numDecoders + 1):
                decoder = LSTM(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                decoder_out, _, _ = decoder(
                    decoder_out, initial_state=encoder_state
                )
                if i == self.numDecoders:
                    decoder_first_outputs = decoder_out

            attention_layer = AttentionLayer(name='attention_layer')
            attention_out, attention_st = attention_layer([encoder_first_outputs, decoder_first_outputs])

            decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attention_out])

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_time = TimeDistributed(hidden, name='time_distributed_layer')
            hidden_out = hidden(decoder_concat_input)
            dec_dense = Dense(len(self.tgtCharTOInt), activation="softmax")
            decoder_out = dec_dense(hidden_out)
            model = Model([encoder_inp, decoder_inp], decoder_out)
            
            return model
        
        #GRU
        elif self.cell_type == "GRU":

            # encoder
            encoder_inp = Input(shape=(None, len(self.srcCharTOInt)))
            encoder_out = encoder_inp

            for i in range(1, self.numEncoders + 1):
                encoder = GRU(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_out, state = encoder(encoder_inp)

                if i == 1:
                    encoder_first_outputs= encoder_out                  
         
            encoder_state = [state]

            # decoder
            decoder_inp = Input(shape=(None, len(self.tgtCharTOInt)))
            decoder_out = decoder_inp

            for i in range(1, self.numDecoders + 1):
                decoder = GRU(
                    self.latentDim,
                    return_sequences=True,
                    return_state=True,
                    dropout=self.dropout,
                )
                decoder_out, _ = decoder(decoder_inp, initial_state=encoder_state)
                if i == self.numDecoders:
                    decoder_first_outputs = decoder_out

            attention_layer = AttentionLayer(name='attention_layer')
            attention_out, attention_st = attention_layer([encoder_first_outputs, decoder_first_outputs])

            decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attention_out])

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_time = TimeDistributed(hidden, name='time_distributed_layer')
            hidden_out = hidden(decoder_concat_input)
            dec_dense = Dense(len(self.tgtCharTOInt), activation="softmax")
            decoder_out = dec_dense(hidden_out)
            model = Model([encoder_inp, decoder_inp], decoder_out)
            
            return model

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkaranwxlia[0m (use `wandb login --relogin` to force relogin)


### 3.2 Training the Model


In [7]:
import numpy as np
import pandas as pd
import os

import tensorflow as tf
from tensorflow.keras.layers import RNN, LSTM, GRU, Dense
from tensorflow.keras import Input, Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model
from wandb.keras import WandbCallback

#using a gpu
physical_devices = tf.config.list_physical_devices('GPU')
try:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
    pass


def train():
    default_config = {
        "cell_type": "RNN",
        "latentDim": 256,
        "hidden": 128,
        "optimiser": "rmsprop",
        "numEncoders": 1,
        "numDecoders": 1,
        "dropout": 0.2,
        "epochs": 1,
        "batch_size": 64,
    }

    wandb.init(config=default_config)
    #wandb.init(config=default_config,  project="Assignment-3_WithAttention", entity="karanwxlia")
    config = wandb.config
    
    wandb.run.name = (
        str(config.cell_type)
        + dataBase.source_lang
        + str(config.numEncoders)
        + "_"
        + dataBase.target_lang
        + "_"
        + str(config.numDecoders)
        + "_"
        + config.optimiser
        + "_"
        + str(config.epochs)
        + "_"
        + str(config.dropout) 
        + "_"
        + str(config.batch_size)
        + "_"
        + str(config.latentDim)
    )
    wandb.run.save()

    modelInit = S2STranslation(config,srcChar2Int=dataBase.src_charTOint, tgtChar2Int=dataBase.tar_charTOint)
    
    model = modelInit.build_attention_model()
    model.summary()
    model.compile(
        optimizer=config.optimiser,
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    earlystopping = EarlyStopping(
        monitor="val_accuracy", min_delta=0.01, patience=5, verbose=2, mode="auto"
    )
    model.fit(
        [dataBase.train_encoder_input, dataBase.train_decoder_input],
        dataBase.train_decoder_target,
        batch_size=config.batch_size,
        epochs=config.epochs,
        validation_data=([dataBase.val_encoder_input, dataBase.val_decoder_input], dataBase.val_decoder_target),
        callbacks=[earlystopping, WandbCallback()],
    )

    model.save(os.path.join("./TrainedAttentionModels", wandb.run.name))    
    wandb.finish()
    
    #return model






Running the wandb sweep: 

In [9]:
  
sweep_config = {
    "name": "Bayesian Sweep without attention - 2",
    "method": "bayes",
    "metric": {"name": "val_accuracy", "goal": "maximize"},
    "parameters": {
        
        "cell_type": {"values": ["RNN", "GRU", "LSTM"]},
        
        "latentDim": {"values": [256, 128, 64, 32]},
        
        "hidden": {"values": [128, 64, 32, 16]},
        
        "optimiser": {"values": ["rmsprop", "adam"]},
        
        "numEncoders": {"values": [1, 2, 3]},
        
        "numDecoders": {"values": [1, 2, 3]},
        
        "dropout": {"values": [0.1, 0.2, 0.3]},
        
        "epochs": {"values": [5,10,15, 20]},
        
        "batch_size": {"values": [32, 64]},
    },
}

sweep_id = wandb.sweep(sweep_config, project="Assignment-3_WithAttention", entity="karanwxlia")

wandb.agent(sweep_id, train, count = 100)


#train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: q6wtpkdx
Sweep URL: https://wandb.ai/karanwxlia/Assignment-3_WithAttention/sweeps/q6wtpkdx


[34m[1mwandb[0m: Agent Starting Run: 49rva5iy with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden: 32
[34m[1mwandb[0m: 	latentDim: 64
[34m[1mwandb[0m: 	numDecoders: 3
[34m[1mwandb[0m: 	numEncoders: 3
[34m[1mwandb[0m: 	optimiser: rmsprop
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkaranwxlia[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.13.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None, 66)]   0           []                               
                                                                                                  
 simple_rnn_2 (SimpleRNN)       [(None, None, 64),   5888        ['input_1[0][0]']                
                                 (None, 64)]                                                      
                                                                                                  
 simple_rnn_5 (SimpleRNN)       [(None, None, 64),   8384        ['input_2[0][0]',            

0,1
accuracy,0.90426
best_epoch,10.0
best_val_loss,1.0864
epoch,11.0
loss,0.32383
val_accuracy,0.82967
val_loss,1.13446


0,1
accuracy,▁▅▆▇▇███████
epoch,▁▂▂▃▄▄▅▅▆▇▇█
loss,█▄▃▂▂▁▁▁▁▁▁▁
val_accuracy,▁▄▄▆▄▃▇▅████
val_loss,▅▅▆▂▆█▁▅▂▂▁▂


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: i94dt723 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden: 16
[34m[1mwandb[0m: 	latentDim: 64
[34m[1mwandb[0m: 	numDecoders: 2
[34m[1mwandb[0m: 	numEncoders: 2
[34m[1mwandb[0m: 	optimiser: rmsprop
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.13.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None, 66)]   0           []                               
                                                                                                  
 gru_1 (GRU)                    [(None, None, 64),   17856       ['input_1[0][0]']                
                                 (None, 64)]                                                      
                                                                                                  
 gru_3 (GRU)                    [(None, None, 64),   25344       ['input_2[0][0]',            



INFO:tensorflow:Assets written to: ./TrainedAttentionModels\GRUen2_te_2_rmsprop_10_0.1_64_64\assets


INFO:tensorflow:Assets written to: ./TrainedAttentionModels\GRUen2_te_2_rmsprop_10_0.1_64_64\assets


0,1
accuracy,0.92457
best_epoch,0.0
best_val_loss,1.3399
epoch,9.0
loss,0.25492
val_accuracy,0.80444
val_loss,1.61788


0,1
accuracy,▁▃▃▅▆▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▆▅▄▃▂▁▁▁▁
val_accuracy,▁▂▃▄▄▂▇█▇█
val_loss,▁▂▂▂▅█▃▃▅▄


[34m[1mwandb[0m: Agent Starting Run: vkl73je3 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden: 16
[34m[1mwandb[0m: 	latentDim: 256
[34m[1mwandb[0m: 	numDecoders: 1
[34m[1mwandb[0m: 	numEncoders: 1
[34m[1mwandb[0m: 	optimiser: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.13.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None, 66)]   0           []                               
                                                                                                  
 lstm (LSTM)                    [(None, None, 256),  290816      ['input_1[0][0]']                
                                 (None, 256),                                                     
                                 (None, 256)]                                                     
                                                                                              



INFO:tensorflow:Assets written to: ./TrainedAttentionModels\LSTMen1_te_1_adam_10_0.1_32_256\assets


INFO:tensorflow:Assets written to: ./TrainedAttentionModels\LSTMen1_te_1_adam_10_0.1_32_256\assets


0,1
accuracy,0.94619
best_epoch,0.0
best_val_loss,1.32723
epoch,6.0
loss,0.18166
val_accuracy,0.80607
val_loss,1.72839


0,1
accuracy,▁▆▇████
epoch,▁▂▃▅▆▇█
loss,█▂▂▁▁▁▁
val_accuracy,▁▇▆▃█▇█
val_loss,▁▃▅█▇█▇


[34m[1mwandb[0m: Agent Starting Run: 9wdy2txc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden: 32
[34m[1mwandb[0m: 	latentDim: 32
[34m[1mwandb[0m: 	numDecoders: 2
[34m[1mwandb[0m: 	numEncoders: 3
[34m[1mwandb[0m: 	optimiser: rmsprop
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.13.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 lstm (LSTM)                    [(None, None, 32),   7680        ['input_1[0][0]']                
                                 (None, 32),                                                      
                                 (None, 32)]                                                      
                                                                                                  
 lstm_1 (LSTM)                  [(None, None, 32),   8320        ['lstm[0][0]']                   
                                 (None, 32),                                                  



INFO:tensorflow:Assets written to: ./TrainedAttentionModels\LSTMen3_te_2_rmsprop_20_0.3_64_32\assets


INFO:tensorflow:Assets written to: ./TrainedAttentionModels\LSTMen3_te_2_rmsprop_20_0.3_64_32\assets


0,1
accuracy,0.79005
best_epoch,2.0
best_val_loss,1.51263
epoch,9.0
loss,0.72949
val_accuracy,0.72187
val_loss,1.71786


0,1
accuracy,▁▄▅▆▆▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▅▇▇█▇▇███
val_loss,▃▁▁▃▂▆▇▆▅█


[34m[1mwandb[0m: Agent Starting Run: tksszgd4 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden: 128
[34m[1mwandb[0m: 	latentDim: 128
[34m[1mwandb[0m: 	numDecoders: 1
[34m[1mwandb[0m: 	numEncoders: 3
[34m[1mwandb[0m: 	optimiser: rmsprop
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.13.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None, 66)]   0           []                               
                                                                                                  
 gru_2 (GRU)                    [(None, None, 128),  60288       ['input_1[0][0]']                
                                 (None, 128)]                                                     
                                                                                                  
 gru_3 (GRU)                    [(None, None, 128),  75264       ['input_2[0][0]',            



INFO:tensorflow:Assets written to: ./TrainedAttentionModels\GRUen3_te_1_rmsprop_20_0.3_32_128\assets


INFO:tensorflow:Assets written to: ./TrainedAttentionModels\GRUen3_te_1_rmsprop_20_0.3_32_128\assets


0,1
accuracy,0.90994
best_epoch,0.0
best_val_loss,1.44864
epoch,8.0
loss,0.29632
val_accuracy,0.78459
val_loss,1.74924


0,1
accuracy,▁▅▆▇▇████
epoch,▁▂▃▄▅▅▆▇█
loss,█▄▃▂▂▁▁▁▁
val_accuracy,▃▄▁█▇▇▇▆▆
val_loss,▁▂▆▁▄▅▆▆█


[34m[1mwandb[0m: Agent Starting Run: a13yjr8g with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden: 16
[34m[1mwandb[0m: 	latentDim: 32
[34m[1mwandb[0m: 	numDecoders: 3
[34m[1mwandb[0m: 	numEncoders: 3
[34m[1mwandb[0m: 	optimiser: rmsprop
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.13.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None, 66)]   0           []                               
                                                                                                  
 simple_rnn_2 (SimpleRNN)       [(None, None, 32),   1920        ['input_1[0][0]']                
                                 (None, 32)]                                                      
                                                                                                  
 simple_rnn_5 (SimpleRNN)       [(None, None, 32),   3168        ['input_2[0][0]',            

INFO:tensorflow:Assets written to: ./TrainedAttentionModels\RNNen3_te_3_rmsprop_20_0.3_32_32\assets


0,1
accuracy,0.84679
best_epoch,6.0
best_val_loss,1.22984
epoch,15.0
loss,0.53004
val_accuracy,0.79864
val_loss,1.36174


0,1
accuracy,▁▄▆▇▇▇▇█████████
epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
loss,█▅▃▂▂▂▂▁▁▁▁▁▁▁▁▁
val_accuracy,▁▃▃▃▄▇▇▅▇▆█▇▆▆▆█
val_loss,█▆▆█▇▂▁▇▃▆▂▄▇▇▆▄


[34m[1mwandb[0m: Agent Starting Run: 5i1knhps with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden: 64
[34m[1mwandb[0m: 	latentDim: 64
[34m[1mwandb[0m: 	numDecoders: 3
[34m[1mwandb[0m: 	numEncoders: 3
[34m[1mwandb[0m: 	optimiser: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.13.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None, 66)]   0           []                               
                                                                                                  
 gru_2 (GRU)                    [(None, None, 64),   17856       ['input_1[0][0]']                
                                 (None, 64)]                                                      
                                                                                                  
 gru_5 (GRU)                    [(None, None, 64),   25344       ['input_2[0][0]',            



INFO:tensorflow:Assets written to: ./TrainedAttentionModels\GRUen3_te_3_adam_20_0.2_64_64\assets


INFO:tensorflow:Assets written to: ./TrainedAttentionModels\GRUen3_te_3_adam_20_0.2_64_64\assets


0,1
accuracy,0.91218
best_epoch,1.0
best_val_loss,1.48842
epoch,9.0
loss,0.28957
val_accuracy,0.7712
val_loss,1.89804


0,1
accuracy,▁▄▆▇▇█████
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▂▂▂▁▁▁▁▁
val_accuracy,▁▆▄▆█▇▆▆█▇
val_loss,▂▁▅▅▄▅▇█▇█


[34m[1mwandb[0m: Agent Starting Run: bmq9s1q0 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden: 16
[34m[1mwandb[0m: 	latentDim: 32
[34m[1mwandb[0m: 	numDecoders: 3
[34m[1mwandb[0m: 	numEncoders: 3
[34m[1mwandb[0m: 	optimiser: rmsprop
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.13.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 27)]   0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None, 66)]   0           []                               
                                                                                                  
 gru_2 (GRU)                    [(None, None, 32),   5856        ['input_1[0][0]']                
                                 (None, 32)]                                                      
                                                                                                  
 gru_5 (GRU)                    [(None, None, 32),   9600        ['input_2[0][0]',            



INFO:tensorflow:Assets written to: ./TrainedAttentionModels\GRUen3_te_3_rmsprop_15_0.2_64_32\assets


INFO:tensorflow:Assets written to: ./TrainedAttentionModels\GRUen3_te_3_rmsprop_15_0.2_64_32\assets


Move the trained models to Google drive