<a href="https://colab.research.google.com/github/swetha-rana/Assignment_3/blob/main/Best_model_without_attention.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Below cells installs the necessary modules

In [None]:
%%capture
!pip install wandb

Below cell imports the necessary modules

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import wandb
import copy

Below cell downloads the dataset and untars it

In [None]:
!wget https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
!tar -xvf dakshina_dataset_v1.0.tar

--2022-05-08 13:38:02--  https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.202.128, 173.194.194.128, 64.233.191.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.202.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2008340480 (1.9G) [application/x-tar]
Saving to: ‘dakshina_dataset_v1.0.tar’


2022-05-08 13:38:15 (153 MB/s) - ‘dakshina_dataset_v1.0.tar’ saved [2008340480/2008340480]

dakshina_dataset_v1.0/bn/
dakshina_dataset_v1.0/bn/lexicons/
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.test.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.train.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.dev.tsv
dakshina_dataset_v1.0/bn/native_script_wikipedia/
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.valid.text.shuf.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.info.sorted.tsv.

Below Cell contains the data processing function which is responsible for preproseccing the data. 

In [None]:
def data_preprocessing(path,ip_token=None,ip_len=None,output_token=None,output_len=None):
  
  ip_transcription= []
  output_transcription= []
  
  df= pd.read_csv(path,names=["1", "2","3"],sep="\t").astype(str)
  if ip_token is None:
      df=df.sample(frac=1)
  for index, row in df.iterrows():
      ip_text=row['2']
      op_text= row['1']
      if ip_text=='</s>' or op_text =='</s>':
        continue
      op_text= "\t" + op_text + "\n"
      ip_transcription.append(ip_text)
      output_transcription.append(op_text)
  
  if ip_token is None:
    ip_token= tf.keras.preprocessing.text.Tokenizer(filters='', char_level=True)
    ip_token.fit_on_texts(ip_transcription)
  input_text= ip_token.texts_to_sequences(ip_transcription)
  input_text= tf.keras.preprocessing.sequence.pad_sequences(input_text,padding='post')

  if output_token is None:
    output_token= tf.keras.preprocessing.text.Tokenizer(filters='', char_level=True)
    output_token.fit_on_texts(output_transcription)

  output_text= output_token.texts_to_sequences(output_transcription)
  output_text= tf.keras.preprocessing.sequence.pad_sequences(output_text,padding='post')

  if ip_len is not None and output_len is not None:
      input_text=tf.concat([input_text,tf.zeros((input_text.shape[0],ip_len-input_text.shape[1]))],axis=1)
      output_text=tf.concat([output_text,tf.zeros((output_text.shape[0],output_len-output_text.shape[1]))],axis=1)

  return ip_transcription,input_text,ip_token,output_transcription,output_text,output_token

In [None]:
train_ip_transcription,train_input_text,train_ip_token,train_output_transcription,train_output_text,train_output_token=data_preprocessing("/content/dakshina_dataset_v1.0/ta/lexicons/ta.translit.sampled.train.tsv")

test_ip_transcription,test_input_text,test_ip_token,test_output_transcription,test_output_text,test_output_token=data_preprocessing("/content/dakshina_dataset_v1.0/ta/lexicons/ta.translit.sampled.test.tsv",train_ip_token,train_input_text.shape[1],train_output_token,train_output_text.shape[1])

val_ip_transcription,val_input_text,val_ip_token,val_output_transcription,val_output_text,val_output_token=data_preprocessing("/content/dakshina_dataset_v1.0/ta/lexicons/ta.translit.sampled.dev.tsv",train_ip_token,train_input_text.shape[1],train_output_token,train_output_text.shape[1])

encoder_tokens = len(train_ip_token.word_index)+1

encoder_seq_length =  train_input_text.shape[1]

decoder_tokens = len(train_output_token.word_index)+1

decoder_seq_length = train_output_text.shape[1]

index_to_char_input = dict((train_ip_token.word_index[key], key) for key in train_ip_token.word_index.keys())

index_to_char_target = dict((train_output_token.word_index[key], key) for key in train_output_token.word_index.keys())

Below cell contains the seq to seq models by passing the rnn_type variable we can select LSTM, GRU, RNN. 

In [None]:
def seq_seq_model(rnn_type,embed_dim,encoder_layers,decoder_layers,dropout):
  
  encoder_inputs = keras.Input(shape=(encoder_seq_length))
  embed = keras.layers.Embedding(encoder_tokens, embed_dim)(encoder_inputs)
  last_encoder=None
  #for RNN model
  if rnn_type=='RNN':
    for i in range(encoder_layers-1):      
      encoder = keras.layers.SimpleRNN(latent_dim, return_sequences=True,dropout=dropout)
      if i==0:
        enc_out = encoder(embed)
      else:
        enc_out = encoder(last_encoder)
      last_encoder=enc_out
    encoder = keras.layers.SimpleRNN(latent_dim, return_state=True,dropout=dropout)
    if encoder_layers == 1:
      encoder_outputs, state = encoder(embed)
    else:
      encoder_outputs, state = encoder(last_encoder)
    encoder_states = [state]  
    decoder_inputs = keras.Input(shape=(decoder_seq_length))
    embed = keras.layers.Embedding(decoder_tokens, embed_dim)(decoder_inputs)
    for i in range(decoder_layers):
      decoder_lstm = keras.layers.SimpleRNN(latent_dim, return_sequences=True, return_state=True,dropout=dropout)
      if i==0:
        decoder_outputs, _= decoder_lstm(embed, initial_state=encoder_states)
      else:  
        decoder_outputs, _ = decoder_lstm(last, initial_state=encoder_states)
      last=decoder_outputs
    decoder_dense = keras.layers.Dense(decoder_tokens, activation="softmax",name='final')
    decoder_outputs = decoder_dense(last)
  #for GRU model
  elif rnn_type=='GRU':
    for i in range(encoder_layers-1):
      encoder = keras.layers.GRU(latent_dim, return_sequences=True,dropout=dropout)
      if i==0:
        enc_out = encoder(embed)
      else:
        enc_out = encoder(last_encoder)
      last_encoder=enc_out
    encoder = keras.layers.GRU(latent_dim, return_state=True,dropout=dropout)
    if encoder_layers == 1:
      encoder_outputs, state = encoder(embed)
    else:
      encoder_outputs, state = encoder(last_encoder)
    encoder_states = [state]

    decoder_inputs = keras.Input(shape=(decoder_seq_length))
    embed = keras.layers.Embedding(decoder_tokens, embed_dim)(decoder_inputs)  
    
    for i in range(decoder_layers):
      decoder_lstm = keras.layers.GRU(latent_dim, return_sequences=True, return_state=True,dropout=dropout)
      if i==0:
        decoder_outputs, _= decoder_lstm(embed, initial_state=encoder_states)
      else:  
        decoder_outputs, _ = decoder_lstm(last, initial_state=encoder_states)
      last=decoder_outputs
    decoder_dense = keras.layers.Dense(decoder_tokens, activation="softmax",name='final')
    decoder_outputs = decoder_dense(last)
#for LSTM model
  elif rnn_type=='LSTM':
    for i in range(encoder_layers-1):
      encoder = keras.layers.LSTM(latent_dim, return_sequences=True,dropout=dropout)
      if i==0:
        enc_out = encoder(embed)
      else:
        enc_out = encoder(last_encoder)
      last_encoder=enc_out
    encoder = keras.layers.LSTM(latent_dim, return_state=True,dropout=dropout)
    if encoder_layers == 1:
      encoder_outputs, state_h, state_c = encoder(embed)
    else:
      encoder_outputs, state_h, state_c = encoder(last_encoder)
    encoder_states = [state_h, state_c]

    decoder_inputs = keras.Input(shape=(decoder_seq_length))
    embed = keras.layers.Embedding(decoder_tokens, embed_dim)(decoder_inputs)  

    for i in range(decoder_layers):
      decoder_lstm = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True,dropout=dropout)
      if i==0:
        decoder_outputs, _, _ = decoder_lstm(embed, initial_state=encoder_states)
      else:  
        decoder_outputs, _, _ = decoder_lstm(last, initial_state=encoder_states)
      last=decoder_outputs
    decoder_dense = keras.layers.Dense(decoder_tokens, activation="softmax",name='final')
    decoder_outputs = decoder_dense(last)

  model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
  return model


Below cell contains the **inference_** Function which is responsible for building inference model.

In [None]:
def inference_(model,encoder_layers,decoder_layers):
    encoder_inputs = model.input[0]  
    if isinstance(model.layers[encoder_layers+3], keras.layers.LSTM):
      encoder_outputs, state_h_enc, state_c_enc = model.layers[encoder_layers+3].output  
      encoder_states = [state_h_enc, state_c_enc]
    elif isinstance(model.layers[encoder_layers+3], keras.layers.GRU):
      encoder_outputs, state = model.layers[encoder_layers+3].output  
      encoder_states = [state]
    elif isinstance(model.layers[encoder_layers+3], keras.layers.RNN):
      encoder_outputs, state = model.layers[encoder_layers+3].output  
      encoder_states = [state]
    encoder_model = keras.Model(encoder_inputs, encoder_states)
    decoder_inputs =  keras.Input(shape=( 1))  
    #for RNN
    if isinstance(model.layers[encoder_layers+3], keras.layers.RNN):
      decoder_states_inputs=[]
      decoder_states=[]
      last=None
      for i in range(decoder_layers):
        decoder_state_input = keras.Input(shape=(latent_dim,),name='inp3_'+str(i))
        x = [decoder_state_input]
        decoder_lstm = model.layers[i+encoder_layers+4]
        if i==0:
          decoder_outputs, state = decoder_lstm(
              model.layers[i+encoder_layers+2](decoder_inputs), initial_state=x
          )
        else:
          decoder_outputs, state = decoder_lstm(
              last, initial_state=x 
          )
        last=decoder_outputs
        decoder_states_inputs.append (decoder_state_input)
        decoder_states.append (state)      
    #for GRU
    elif isinstance(model.layers[encoder_layers+3], keras.layers.GRU):
      decoder_states_inputs=[]
      decoder_states=[] 
      last=None
      for i in range(decoder_layers):
        decoder_state_input = keras.Input(shape=(latent_dim,),name='inp3_'+str(i))
        x = [decoder_state_input]
        decoder_lstm = model.layers[i+encoder_layers+4]
        if i==0:
          decoder_outputs, state = decoder_lstm(
              model.layers[i+encoder_layers+2](decoder_inputs), initial_state=x
          )
        else:
          decoder_outputs, state = decoder_lstm(
              last, initial_state=x 
          )
        last=decoder_outputs
        decoder_states_inputs.append (decoder_state_input)
        decoder_states.append (state)    
    # For LSTM 
    elif isinstance(model.layers[encoder_layers+3], keras.layers.LSTM):
      decoder_states_inputs=[]
      decoder_states=[]
      last=None
      for i in range(decoder_layers):
        decoder_state_input_h = keras.Input(shape=(latent_dim,),name='inp3_'+str(i))
        decoder_state_input_c = keras.Input(shape=(latent_dim,),name='inp4_'+str(i))
        x = [decoder_state_input_h, decoder_state_input_c]
        decoder_lstm = model.layers[i+encoder_layers+4]
        if i==0:
          decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
              model.layers[i+encoder_layers+2](decoder_inputs), initial_state=x
          )
        else:
          decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
              last, initial_state=x 
          )
        last=decoder_outputs
        decoder_states_inputs.append (decoder_state_input_h)
        decoder_states_inputs.append (decoder_state_input_c)
        decoder_states.append (state_h_dec)
        decoder_states.append (state_c_dec)
    decoder_dense = model.get_layer('final')
    decoder_outputs = decoder_dense(last)
    decoder_model = keras.Model( [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states )
    return encoder_model,decoder_model


Below cell contains the **decode_and_eval** function which is responsibe for the decoding and predicting the sentences and also for calculating the accuracy.  
For val data set accuracy pass the **test_f** as False  
For test data set accuracy pass the **test_f** as True  

In [None]:
def decode_and_eval(rnn_type,input_seq,encoder_model,decoder_model,batch_size,encoder_layers,decoder_layers,test_f):
    states_value = encoder_model.predict(input_seq)
    if rnn_type=='GRU' or 'RNN':
      states_value=[states_value]
    nl=states_value
    for i in range(decoder_layers-1):
      nl=nl+states_value
    states_value=nl
    

    prev_char_index = np.zeros((batch_size, 1))
    prev_char_index[:, 0] = train_output_token.word_index['\t']
    
    predicted_words = [ "" for i in range(batch_size)]
    done=[False for i in range(batch_size)]
    for i in range(decoder_seq_length):
        out = decoder_model.predict(tuple([prev_char_index] + states_value))
        output_probability=out[0]
        states_value = out[1:]
        for j in range(batch_size):
          if done[j]:
            continue          
          sampled_token_index = np.argmax(output_probability[j, -1, :])
          if sampled_token_index == 0:
            sampled_char='\n'
          else:
            sampled_char = index_to_char_target[sampled_token_index]
          if sampled_char == '\n':
            done[j]=True
            continue            
          predicted_words[j] += sampled_char
          prev_char_index[j,0]=train_output_token.word_index[sampled_char]
    correct_predictions = 0
    for t_index in range(batch_size):
        predicted_word = predicted_words[t_index]
        if test_f == False:
          target_word=val_output_transcription[t_index][1:-1]
        else:
          target_word=test_output_transcription[t_index][1:-1]          
        if predicted_word == target_word:    # checks whether predicted word is corect or not
          correct_predictions+=1             # if correct increase the count
          if test_f == True:                # for analysis purpose stored in the text file
            text_file = open("correct_preds.txt", "a")
            text_file.write(test_ip_transcription[t_index]+' '+target_word+' '+predicted_word+'\n')
            text_file.close()
        else: 
          if test_f == False:
            text_file = open("wrong_preds.txt", "a")
            text_file.write(test_ip_transcription[t_index]+' '+target_word+' '+predicted_word+'\n')
            text_file.close()            

    accuracy_ = float(correct_predictions)/float(batch_size) 
    #print("accuracy of data",accuracy_)
    return accuracy_    
    

Below cell contains the function **train**. The train function will compile and trains the model and prints the validation accuracy and test accuracy.

In [None]:
def train():
  global latent_dim
  latent_dim = 256
  global epochs
  epochs = 10
  model=seq_seq_model(rnn_type="GRU",embed_dim=64,encoder_layers=3,decoder_layers=3,dropout=0.3)

  model.compile(
      optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(
                                                              reduction='none'), metrics=["accuracy"]
  )
  hist=model.fit(
        [train_input_text, train_output_text],
        tf.concat([train_output_text[:,1:],tf.zeros((train_output_text[:,:].shape[0],1))], axis=1),
        batch_size=100,
        epochs=epochs,shuffle=True
  )
  # Save model
  model.save("s2s.keras")
  # Restore the model and construct the encoder and decoder.
  inf = keras.models.load_model("/content/s2s.keras")
  encoder_model,decoder_model=inference_(inf,encoder_layers=3,decoder_layers=3)
  #log train loss to wandb rnn_type,input_seq,encoder_model,decoder_model,batch_size,encoder_layers,decoder_layers,test_f
                         
  val_acc=decode_and_eval("GRU", val_input_text,encoder_model,decoder_model,val_input_text.shape[0],3,3,False) # last argument False for Validationa accuracy
  print("accuracy of val data: ",val_acc)
  Test_accuracy=decode_and_eval("GRU", test_input_text,encoder_model,decoder_model,test_input_text.shape[0],3,3,True) # last argument False for test accuracy
  print("accuracy of Test data: ",Test_accuracy)
  
  return Test_accuracy


In [None]:
Test_accuracy = train()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
accuracy of val data:  0.4958253991504321
accuracy of Test data:  0.46736596736596736


In [None]:
Test_accuracy

0.46736596736596736

In [None]:
wandb.init(project="CS6910_Assignment3", entity="swe-rana")  # use your credentials
wandb.log({"Test accuracy": Test_accuracy})

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Test accuracy,▁

0,1
Test accuracy,0.46737
