In [1]:
!pip install wandb
import numpy as np
import tensorflow as tf
from tensorflow import keras
import wandb
from wandb.keras import WandbCallback

Collecting wandb
[?25l  Downloading https://files.pythonhosted.org/packages/98/5f/45439b4767334b868e1c8c35b1b0ba3747d8c21be77b79f09eed7aa3c72b/wandb-0.10.30-py2.py3-none-any.whl (1.8MB)
[K     |▏                               | 10kB 13.1MB/s eta 0:00:01[K     |▍                               | 20kB 18.1MB/s eta 0:00:01[K     |▌                               | 30kB 14.8MB/s eta 0:00:01[K     |▊                               | 40kB 14.0MB/s eta 0:00:01[K     |█                               | 51kB 8.5MB/s eta 0:00:01[K     |█                               | 61kB 9.6MB/s eta 0:00:01[K     |█▎                              | 71kB 8.3MB/s eta 0:00:01[K     |█▌                              | 81kB 9.2MB/s eta 0:00:01[K     |█▋                              | 92kB 9.8MB/s eta 0:00:01[K     |█▉                              | 102kB 7.9MB/s eta 0:00:01[K     |██                              | 112kB 7.9MB/s eta 0:00:01[K     |██▏                             | 122kB 7.9MB/s e

In [2]:
batch_size = 64  # Batch size for training.
epochs = 100  # Number of epochs to train for.
latent_dim = 256  # Latent dimensionality of the encoding space.
num_samples = 100000  # Number of samples to train on.
from google.colab import drive
drive.mount('/content/drive')

# Path to the data txt file on google drive.

data_path = "/content/drive/My Drive/lexicons/hi.translit.sampled.train.tsv"

Mounted at /content/drive


In [3]:
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()
with open(data_path, "r", encoding="utf-8") as f:
    lines = f.read().split("\n")
for line in lines[: min(num_samples, len(lines) - 1)]:
    input_text, target_text, _ = line.split("\t")
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
    input_text = "\t" + input_text + " " + "\n" 
    target_text = "\t" + target_text + " " + "\n"
    input_texts.append(input_text)
    target_texts.append(target_text)
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)

input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])

print("Number of samples:", len(input_texts))
print("Number of unique input tokens:", num_encoder_tokens)
print("Number of unique output tokens:", num_decoder_tokens)
print("Max sequence length for inputs:", max_encoder_seq_length)
print("Max sequence length for outputs:", max_decoder_seq_length)

input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])

encoder_input_data = np.zeros(
    (len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype="float32"
)
decoder_input_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float32"
)
decoder_target_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float32"
)

for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t, input_token_index[char]] = 1.0
    encoder_input_data[i, t + 1 :, input_token_index[" "]] = 1.0
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_input_data[i, t, target_token_index[char]] = 1.0
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
    decoder_input_data[i, t + 1 :, target_token_index[" "]] = 1.0
    decoder_target_data[i, t:, target_token_index[" "]] = 1.0


Number of samples: 44204
Number of unique input tokens: 66
Number of unique output tokens: 29
Max sequence length for inputs: 22
Max sequence length for outputs: 23


In [4]:
#Defining sweep config
sweep_config = {
    'name'  : "Surya_Pratik", 
    'method': 'grid', 
    'metric': {
      'name': 'val_acc',
      'goal': 'maximize'   
    },

    'parameters': {

        'latent_dim': {
            'values': [128,256,512]
        },
        'epochs': {
            'values': [10,15]
        },
        'num_encoder': {
            'values': [1,2,3]
        },
        'num_decoder': {
            'values': [1,2,3]
        },
        'n_type': {
            'values': ['lstm','rnn','gru']
        },
        'dropout': {
            'values': ['0.2','null']
        }
        
        
    }
}



In [5]:
#Defining the model
sweep_id = wandb.sweep(sweep_config, project = "dl_assignment3-surya-pratik")
def mytrain():
  # Vectorize the data.
    wandb.init(config = sweep_config)
    config = wandb.config
    
    
    encoder_inputs = keras.Input(shape=(None, num_encoder_tokens))

    if config.num_encoder==1:
        if config.n_type=='lstm':
            encoder_outputs0,state_h,state_c= keras.layers.LSTM(config.latent_dim, return_state=True)(encoder_inputs)
            encoder_states = [state_h, state_c]
        elif config.n_type=='gru':
            encoder_outputs0,encoder_states= keras.layers.GRU(config.latent_dim, return_state=True)(encoder_inputs)
        elif config.n_type=='rnn':
            encoder_outputs0,encoder_states= keras.layers.SimpleRNN(config.latent_dim, return_state=True)(encoder_inputs)
    
    if config.num_encoder==2:
        if config.n_type=='lstm':
            encoder_outputs0= keras.layers.LSTM(config.latent_dim, return_sequences=True)(encoder_inputs)
            encoder_outputs1,state_h,state_c= keras.layers.LSTM(config.latent_dim, return_state=True)(encoder_outputs0)
            encoder_states = [state_h, state_c]
        elif config.n_type=='gru':
            encoder_outputs0= keras.layers.GRU(config.latent_dim, return_sequences=True)(encoder_inputs)
            encoder_outputs1,encoder_states= keras.layers.GRU(config.latent_dim, return_state=True)(encoder_outputs0)
        elif config.n_type=='rnn':
            encoder_outputs0= keras.layers.SimpleRNN(config.latent_dim, return_sequences=True)(encoder_inputs)
            encoder_outputs1,encoder_states= keras.layers.SimpleRNN(config.latent_dim, return_state=True)(encoder_outputs0)


    if config.num_encoder==3:
        if config.n_type=='lstm':
            encoder_outputs0= keras.layers.LSTM(config.latent_dim, return_sequences=True)(encoder_inputs)
            encoder_outputs1= keras.layers.LSTM(config.latent_dim, return_sequences=True)(encoder_outputs0)
            encoder_outputs2,state_h,state_c= keras.layers.LSTM(config.latent_dim, return_state=True)(encoder_outputs1)
            encoder_states = [state_h, state_c]
        elif config.n_type=='gru':
            encoder_outputs0= keras.layers.GRU(config.latent_dim, return_sequences=True)(encoder_inputs)
            encoder_outputs1= keras.layers.GRU(config.latent_dim, return_sequences=True)(encoder_outputs0)
            encoder_outputs2,encoder_states= keras.layers.GRU(config.latent_dim, return_state=True)(encoder_outputs1)
        elif config.n_type=='rnn':
            encoder_outputs0= keras.layers.SimpleRNN(config.latent_dim, return_sequences=True)(encoder_inputs)
            encoder_outputs1= keras.layers.SimpleRNN(config.latent_dim, return_sequences=True)(encoder_outputs0)
            encoder_outputs2,encoder_states= keras.layers.SimpleRNN(config.latent_dim, return_state=True)(encoder_outputs1)


    

    decoder_inputs = keras.Input(shape=(None, num_decoder_tokens))


    if config.num_decoder==1:
        if config.n_type=='lstm':
            decoder_outputs,_,_ = keras.layers.LSTM(config.latent_dim, return_sequences=True, return_state=True)(decoder_inputs, initial_state=encoder_states)
            
        elif config.n_type=='gru':
            decoder_outputs,_ = keras.layers.GRU(config.latent_dim, return_sequences=True, return_state=True)(decoder_inputs, initial_state=encoder_states)
        elif config.n_type=='rnn':
            decoder_outputs,_ = keras.layers.SimpleRNN(config.latent_dim, return_sequences=True, return_state=True)(decoder_inputs, initial_state=encoder_states)


    if config.num_decoder==2:
        if config.n_type=='lstm':
            decoder_outputs0, _ , _ = keras.layers.LSTM(config.latent_dim, return_sequences=True, return_state=True)(decoder_inputs, initial_state=encoder_states)
            decoder_outputs , _ , _      = keras.layers.LSTM(config.latent_dim, return_sequences=True, return_state=True)(decoder_outputs0)
        elif config.n_type=='gru':
            decoder_outputs0, _  = keras.layers.GRU(config.latent_dim, return_sequences=True, return_state=True)(decoder_inputs, initial_state=encoder_states)
            decoder_outputs , _       = keras.layers.GRU(config.latent_dim, return_sequences=True, return_state=True)(decoder_outputs0)
        elif config.n_type=='rnn':
            decoder_outputs0, _  = keras.layers.SimpleRNN(config.latent_dim, return_sequences=True, return_state=True)(decoder_inputs, initial_state=encoder_states)
            decoder_outputs , _       = keras.layers.SimpleRNN(config.latent_dim, return_sequences=True, return_state=True)(decoder_outputs0)


    if config.num_decoder==3:
        if config.n_type=='lstm':
            decoder_outputs0, _ , _ = keras.layers.LSTM(config.latent_dim, return_sequences=True, return_state=True)(decoder_inputs, initial_state=encoder_states)
            decoder_outputs1, _ , _ = keras.layers.LSTM(config.latent_dim, return_sequences=True, return_state=True)(decoder_outputs0)
            decoder_outputs ,_, _   = keras.layers.LSTM(config.latent_dim, return_sequences=True, return_state=True)(decoder_outputs1)
        elif config.n_type=='gru':
            decoder_outputs0, _  = keras.layers.GRU(config.latent_dim, return_sequences=True, return_state=True)(decoder_inputs, initial_state=encoder_states)
            decoder_outputs1, _  = keras.layers.GRU(config.latent_dim, return_sequences=True, return_state=True)(decoder_outputs0)
            decoder_outputs ,_   = keras.layers.GRU(config.latent_dim, return_sequences=True, return_state=True)(decoder_outputs1)
        elif config.n_type=='rnn':
            decoder_outputs0, _  = keras.layers.SimpleRNN(config.latent_dim, return_sequences=True, return_state=True)(decoder_inputs, initial_state=encoder_states)
            decoder_outputs1, _  = keras.layers.SimpleRNN(config.latent_dim, return_sequences=True, return_state=True)(decoder_outputs0)
            decoder_outputs ,_   = keras.layers.SimpleRNN(config.latent_dim, return_sequences=True, return_state=True)(decoder_outputs1)
    if config.dropout=='0.2':        
        decoder_outputs=keras.layers.Dropout(0.1)(decoder_outputs)
    decoder_outputs =  keras.layers.Dense(num_decoder_tokens, activation="softmax")(decoder_outputs)
    model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
    model.summary()
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    model.fit(
    [encoder_input_data, decoder_input_data],
    decoder_target_data,
    batch_size=batch_size,
    epochs=config.epochs,
    validation_split=0.2,callbacks=[WandbCallback()])
    model.save("s2s")
wandb.agent(sweep_id, mytrain)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: dcd3i3o7
Sweep URL: https://wandb.ai/ee20m018/dl_assignment3-surya-pratik/sweeps/dcd3i3o7


[34m[1mwandb[0m: Agent Starting Run: d52y76nl with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	latent_dim: 256
[34m[1mwandb[0m: 	n_type: lstm
[34m[1mwandb[0m: 	num_decoder: 2
[34m[1mwandb[0m: 	num_encoder: 2
[34m[1mwandb[0m: Currently logged in as: [33mee20m018[0m (use `wandb login --relogin` to force relogin)


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, 66)]   0                                            
__________________________________________________________________________________________________
lstm (LSTM)                     (None, None, 256)    330752      input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, None, 29)]   0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   [(None, 256), (None, 525312      lstm[0][0]                       
______________________________________________________________________________________________



INFO:tensorflow:Assets written to: s2s/assets


INFO:tensorflow:Assets written to: s2s/assets


VBox(children=(Label(value=' 19.31MB of 19.31MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
epoch,14.0
loss,0.09609
accuracy,0.96547
val_loss,0.2296
val_accuracy,0.9291
_runtime,186.0
_timestamp,1621863613.0
_step,14.0
best_val_loss,0.22424
best_epoch,11.0


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▅▄▃▂▂▂▁▁▁▁▁▁▁▁
accuracy,▁▃▅▆▇▇▇████████
val_loss,█▆▄▃▂▂▁▁▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▇▇████████
_runtime,▁▁▂▂▃▃▄▄▅▆▆▆▇██
_timestamp,▁▁▂▂▃▃▄▄▅▆▆▆▇██
_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


In [6]:
##################################
#Preprocessing test data

input_texts = []
target_texts = []
test_data_path = "/content/drive/My Drive/lexicons/hi.translit.sampled.test.tsv"


with open(test_data_path, "r", encoding="utf-8") as f:
    lines = f.read().split("\n")
for line in lines[: min(num_samples, len(lines) - 1)]:
    input_text, target_text, _ = line.split("\t")
    input_text = "\t" + input_text + " " + "\n" 
    target_text = "\t" + target_text + " " + "\n"
    input_texts.append(input_text)
    target_texts.append(target_text)




print("Number of samples:", len(input_texts))
print("Number of unique input tokens:", num_encoder_tokens)
print("Number of unique output tokens:", num_decoder_tokens)
print("Max sequence length for inputs:", max_encoder_seq_length)
print("Max sequence length for outputs:", max_decoder_seq_length)


encoder_input_data = np.zeros(
    (len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype="float32"
)
decoder_input_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float32"
)
decoder_target_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float32"
)

for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t, input_token_index[char]] = 1.0
    encoder_input_data[i, t + 1 :, input_token_index[" "]] = 1.0
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_input_data[i, t, target_token_index[char]] = 1.0
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
    decoder_input_data[i, t + 1 :, target_token_index[" "]] = 1.0
    decoder_target_data[i, t:, target_token_index[" "]] = 1.0


Number of samples: 4502
Number of unique input tokens: 66
Number of unique output tokens: 29
Max sequence length for inputs: 22
Max sequence length for outputs: 23


In [15]:
#Evaluating the test model
model = keras.models.load_model("/content/drive/My Drive/lexicons/a.h2s")
test_pred=model.predict([encoder_input_data, decoder_input_data])
print(model.evaluate([encoder_input_data, decoder_input_data],decoder_target_data))

[0.1436898559331894, 0.9545226097106934]


In [10]:
model.save("/content/drive/My Drive/lexicons/a.h2s")



INFO:tensorflow:Assets written to: /content/drive/My Drive/lexicons/a.h2s/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/lexicons/a.h2s/assets
