In [1]:
import wandb

wandb.login(key="77d2ef98bdddf2be0b779711890e81fada9114f8")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mda24m023[0m ([33mda24m023-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, LSTM, GRU, TimeDistributed
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import Callback
import wandb
import os
import pkg_resources

# Enable eager execution and tf.data debug mode
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()

# Install dependencies in Kaggle
!pip install wandb --upgrade tensorflow==2.16.1

# Print wandb version for debugging
print(f"wandb version: {pkg_resources.get_distribution('wandb').version}")

# Wandb login (use Kaggle Secrets or input API key)
wandb.login(key="77d2ef98bdddf2be0b779711890e81fada9114f8")  # Input your wandb API key or set via Kaggle Secrets

# Data loading and preprocessing
def load_dakshina_data(data_dir='/kaggle/input/dakshinadataset/dakshina_dataset_v1.0/hi/lexicons'):
    train_file = os.path.join(data_dir, 'hi.translit.sampled.train.tsv')
    val_file = os.path.join(data_dir, 'hi.translit.sampled.dev.tsv')
    test_file = os.path.join(data_dir, 'hi.translit.sampled.test.tsv')
    
    train_df = pd.read_csv(train_file, sep='\t', header=None, names=['devanagari', 'latin', 'count'])
    val_df = pd.read_csv(val_file, sep='\t', header=None, names=['devanagari', 'latin', 'count'])
    test_df = pd.read_csv(test_file, sep='\t', header=None, names=['devanagari', 'latin', 'count'])
    
    return train_df, val_df, test_df

def preprocess_data(train_df, val_df, test_df, max_len=20):
    input_tokenizer = Tokenizer(char_level=True, filters='')
    output_tokenizer = Tokenizer(char_level=True, filters='')
    
    input_tokenizer.fit_on_texts(train_df['latin'].astype(str))
    output_tokenizer.fit_on_texts(['\t' + x + '\n' for x in train_df['devanagari'].astype(str)])
    
    X_train = input_tokenizer.texts_to_sequences(train_df['latin'].astype(str))
    y_train = output_tokenizer.texts_to_sequences(['\t' + x + '\n' for x in train_df['devanagari'].astype(str)])
    X_val = input_tokenizer.texts_to_sequences(val_df['latin'].astype(str))
    y_val = output_tokenizer.texts_to_sequences(['\t' + x + '\n' for x in val_df['devanagari'].astype(str)])
    X_test = input_tokenizer.texts_to_sequences(test_df['latin'].astype(str))
    y_test = output_tokenizer.texts_to_sequences(['\t' + x + '\n' for x in test_df['devanagari'].astype(str)])
    
    X_train = pad_sequences(X_train, maxlen=max_len, padding='post')
    y_train = pad_sequences(y_train, maxlen=max_len, padding='post')
    X_val = pad_sequences(X_val, maxlen=max_len, padding='post')
    y_val = pad_sequences(y_val, maxlen=max_len, padding='post')
    X_test = pad_sequences(X_test, maxlen=max_len, padding='post')
    y_test = pad_sequences(y_test, maxlen=max_len, padding='post')
    
    return (X_train, y_train, X_val, y_val, X_test, y_test, 
            input_tokenizer, output_tokenizer)

# Beam Search Decoder
def beam_search_decoder(encoder_model, decoder_model, input_seq, output_tokenizer, beam_size, max_len=20):
    states = encoder_model.predict(input_seq, verbose=0)
    
    if isinstance(states, list) and len(states) > 1:
        states = states[:2]  # LSTM: [h, c]
    else:
        states = [states]  # GRU: [h]
    
    start_token = output_tokenizer.word_index['\t']
    end_token = output_tokenizer.word_index['\n']
    sequences = [[[], 0.0, states]]
    
    for _ in range(max_len):
        all_candidates = []
        for seq, score, states in sequences:
            if seq and seq[-1] == end_token:
                all_candidates.append([seq, score, states])
                continue
            
            decoder_input = np.array([[seq[-1] if seq else start_token]])
            output_probs, *new_states = decoder_model.predict([decoder_input] + states, verbose=0)
            probs = output_probs[0, 0]
            
            top_indices = np.argsort(probs)[-beam_size:]
            top_probs = probs[top_indices]
            
            for idx, prob in zip(top_indices, top_probs):
                new_seq = seq + [idx]
                new_score = score + np.log(prob + 1e-10)
                all_candidates.append([new_seq, new_score, new_states])
        
        sequences = sorted(all_candidates, key=lambda x: x[1], reverse=True)[:beam_size]
        
        if all(seq[-1] == end_token for seq, _, _ in sequences):
            break
    
    return sequences[0][0]

# Seq2Seq Model
def build_seq2seq_model(input_vocab_size, output_vocab_size, config):
    cell_type = {'LSTM': LSTM, 'GRU': GRU}[config['cell_type']]
    #I added this
    config['cell_type'] = 'LSTM'
    state_count = 2 if config['cell_type'] == 'LSTM' else 1

    
    # Encoder
    encoder_inputs = Input(shape=(None,), name='encoder_inputs')
    encoder_embedding = Embedding(input_vocab_size, config['embed_size'], name='encoder_embedding')(encoder_inputs)
    print(f"Encoder embedding shape: {encoder_embedding.shape}")
    encoder = cell_type(config['hidden_size'], return_sequences=False, return_state=True, name='encoder')
    _, *encoder_states = encoder(encoder_embedding)
    
    # Decoder
    decoder_inputs = Input(shape=(None,), name='decoder_inputs')
    decoder_embedding = Embedding(output_vocab_size, config['embed_size'], name='decoder_embedding')(decoder_inputs)
    print(f"Decoder embedding shape: {decoder_embedding.shape}")
    decoder = cell_type(config['hidden_size'], return_sequences=True, return_state=True, name='decoder')
    decoder_outputs, *_ = decoder(decoder_embedding, initial_state=encoder_states[:state_count])
    
    decoder_dense = TimeDistributed(Dense(output_vocab_size, activation='softmax'), name='decoder_dense')
    decoder_outputs = decoder_dense(decoder_outputs)
    
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs, name='seq2seq')
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    # Inference models
    encoder_model = Model(encoder_inputs, encoder_states[:state_count], name='encoder_model')
    
    decoder_states_inputs = [Input(shape=(config['hidden_size'],), name=f'decoder_state_{i}') for i in range(state_count)]
    decoder_inputs_inf = Input(shape=(1,), name='decoder_input_inf')
    decoder_embedding_inf = Embedding(output_vocab_size, config['embed_size'], name='decoder_embedding_inf')(decoder_inputs_inf)
    decoder_inf = cell_type(config['hidden_size'], return_sequences=True, return_state=True, name='decoder_inf')
    decoder_outputs_inf, *decoder_states_inf = decoder_inf(decoder_embedding_inf, initial_state=decoder_states_inputs)
    decoder_outputs_inf = TimeDistributed(Dense(output_vocab_size, activation='softmax'), name='decoder_dense_inf')(decoder_outputs_inf)
    
    decoder_model = Model([decoder_inputs_inf] + decoder_states_inputs, 
                         [decoder_outputs_inf] + decoder_states_inf, name='decoder_model')
    
    return model, encoder_model, decoder_model

# Custom Wandb Callback (to replace WandbCallback if needed)
class CustomWandbCallback(Callback):
    def __init__(self, X_train, y_train, X_val, y_val, X_test, y_test, 
                 encoder_model, decoder_model, output_tokenizer, config):
        super().__init__()
        self.X_train = X_train
        self.y_train = y_train
        self.X_val = X_val
        self.y_val = y_val
        self.X_test = X_test[:13]  # Subsample for speed
        self.y_test = y_test[:13]
        self.encoder_model = encoder_model
        self.decoder_model = decoder_model
        self.output_tokenizer = output_tokenizer
        self.config = config
    
    def on_epoch_end(self, epoch, logs=None):
        train_loss = logs.get('loss')
        train_acc = logs.get('accuracy')
        val_loss = logs.get('val_loss')
        val_acc = logs.get('val_accuracy')
        
        test_correct = 0
        for i in range(len(self.X_test)):
            input_seq = self.X_test[i:i+1]
            pred_seq = beam_search_decoder(
                self.encoder_model, self.decoder_model, 
                input_seq, self.output_tokenizer, self.config['beam_size']
            )
            true_seq = self.y_test[i].tolist()
            true_seq = [x for x in true_seq if x != 0]
            test_correct += (pred_seq == true_seq)
        
        test_acc = test_correct / len(self.X_test)
        
        decoder_input_data = self.y_test[:, :-1]
        decoder_target_data = self.y_test[:, 1:]
        test_loss = self.model.evaluate(
            [self.X_test, decoder_input_data], decoder_target_data, verbose=0
        )[0]
        
        wandb.log({
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'train_accuracy': train_acc,
            'val_loss': val_loss,
            'val_accuracy': val_acc,
            'test_loss': test_loss,
            'test_accuracy': test_acc
        })

# Training function
def train_model(model, encoder_model, decoder_model, X_train, y_train, X_val, y_val, X_test, y_test, 
                input_tokenizer, output_tokenizer, config):
    wandb.init(project="DL_A3", config=config)
    
    decoder_input_data = y_train[:, :-1]
    decoder_target_data = y_train[:, 1:]
    
    val_decoder_input_data = y_val[:, :-1]
    val_decoder_target_data = y_val[:, 1:]
    
    print(f"X_train shape: {X_train.shape}, decoder_input_data shape: {decoder_input_data.shape}, "
          f"decoder_target_data shape: {decoder_target_data.shape}")
    
    model.summary()  # Debug model architecture
    
    model.fit(
        [X_train, decoder_input_data], decoder_target_data,
        validation_data=([X_val, val_decoder_input_data], val_decoder_target_data),
        epochs=config['epochs'],
        batch_size=config['batch_size'],
        callbacks=[
            CustomWandbCallback(X_train, y_train, X_val, y_val, X_test, y_test, 
                               encoder_model, decoder_model, output_tokenizer, config)
        ],
        verbose=1
    )
    wandb.finish()

# Wandb sweep configuration
sweep_config = {
    'method': 'bayes',
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'embed_size': {'values': [16, 32]},
        'hidden_size': {'values': [16, 32]},
        'cell_type': {'values': ['LSTM']},  # Focus on LSTM
        'dropout': {'values': [0.0, 0.2]},
        'beam_size': {'values': [1, 3]},
        'batch_size': {'values': [32, 64]},
        'epochs': {'value': 5}
    }
}

# Sweep function
def sweep():
    config_defaults = {
        'embed_size': 32,
        'hidden_size': 32,
        'cell_type': 'LSTM',
        'dropout': 0.0,
        'beam_size': 3,
        'batch_size': 64,
        'epochs': 5
    }
    
    wandb.init(project="DL_A3", config=config_defaults)
    config = wandb.config
    
    train_df, val_df, test_df = load_dakshina_data()
    (X_train, y_train, X_val, y_val, X_test, y_test, 
     input_tokenizer, output_tokenizer) = preprocess_data(train_df, val_df, test_df)
    
    model, encoder_model, decoder_model = build_seq2seq_model(
        input_vocab_size=len(input_tokenizer.word_index) + 1,
        output_vocab_size=len(output_tokenizer.word_index) + 1,
        config=config
    )
    
    train_model(model, encoder_model, decoder_model, 
                X_train, y_train, X_val, y_val, X_test, y_test, 
                input_tokenizer, output_tokenizer, config)

# Main execution
if __name__ == "__main__":
    # Debug GRU in a single run
    debug_gru = False  # Set to True to test GRU
    if debug_gru:
        config = {
            'embed_size': 32,
            'hidden_size': 32,
            'cell_type': 'GRU',
            'dropout': 0.0,
            'beam_size': 3,
            'batch_size': 64,
            'epochs': 1  # Single epoch for debugging
        }
        train_df, val_df, test_df = load_dakshina_data()
        (X_train, y_train, X_val, y_val, X_test, y_test, 
         input_tokenizer, output_tokenizer) = preprocess_data(train_df, val_df, test_df)
        model, encoder_model, decoder_model = build_seq2seq_model(
            input_vocab_size=len(input_tokenizer.word_index) + 1,
            output_vocab_size=len(output_tokenizer.word_index) + 1,
            config=config
        )
        train_model(model, encoder_model, decoder_model, 
                    X_train, y_train, X_val, y_val, X_test, y_test, 
                    input_tokenizer, output_tokenizer, config)
    else:
        sweep_id = wandb.sweep(sweep_config, project="DL_A3")
        wandb.agent(sweep_id, function=sweep, count=100)

# Only train and val

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, LSTM, GRU, TimeDistributed
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import Callback
import wandb
import os
import pkg_resources

# Enable eager execution and tf.data debug mode
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()

# Install dependencies in Kaggle
!pip install wandb --upgrade tensorflow==2.16.1

# Print wandb version for debugging
print(f"wandb version: {pkg_resources.get_distribution('wandb').version}")

# Wandb login (use Kaggle Secrets or input API key)
wandb.login()  # Input your wandb API key or set via Kaggle Secrets

# Data loading and preprocessing
def load_dakshina_data(data_dir='/kaggle/input/dakshinadataset/dakshina_dataset_v1.0/hi/lexicons'):
    train_file = os.path.join(data_dir, 'hi.translit.sampled.train.tsv')
    val_file = os.path.join(data_dir, 'hi.translit.sampled.dev.tsv')
    test_file = os.path.join(data_dir, 'hi.translit.sampled.test.tsv')
    
    train_df = pd.read_csv(train_file, sep='\t', header=None, names=['devanagari', 'latin', 'count'])
    val_df = pd.read_csv(val_file, sep='\t', header=None, names=['devanagari', 'latin', 'count'])
    test_df = pd.read_csv(test_file, sep='\t', header=None, names=['devanagari', 'latin', 'count'])
    
    return train_df, val_df, test_df

def preprocess_data(train_df, val_df, test_df, max_len=20):
    input_tokenizer = Tokenizer(char_level=True, filters='')
    output_tokenizer = Tokenizer(char_level=True, filters='')
    
    input_tokenizer.fit_on_texts(train_df['latin'].astype(str))
    output_tokenizer.fit_on_texts(['\t' + x + '\n' for x in train_df['devanagari'].astype(str)])
    
    X_train = input_tokenizer.texts_to_sequences(train_df['latin'].astype(str))
    y_train = output_tokenizer.texts_to_sequences(['\t' + x + '\n' for x in train_df['devanagari'].astype(str)])
    X_val = input_tokenizer.texts_to_sequences(val_df['latin'].astype(str))
    y_val = output_tokenizer.texts_to_sequences(['\t' + x + '\n' for x in val_df['devanagari'].astype(str)])
    X_test = input_tokenizer.texts_to_sequences(test_df['latin'].astype(str))
    y_test = output_tokenizer.texts_to_sequences(['\t' + x + '\n' for x in test_df['devanagari'].astype(str)])
    
    X_train = pad_sequences(X_train, maxlen=max_len, padding='post')
    y_train = pad_sequences(y_train, maxlen=max_len, padding='post')
    X_val = pad_sequences(X_val, maxlen=max_len, padding='post')
    y_val = pad_sequences(y_val, maxlen=max_len, padding='post')
    X_test = pad_sequences(X_test, maxlen=max_len, padding='post')
    y_test = pad_sequences(y_test, maxlen=max_len, padding='post')
    
    return (X_train, y_train, X_val, y_val, X_test, y_test, 
            input_tokenizer, output_tokenizer)

# Beam Search Decoder (kept for potential future use)
def beam_search_decoder(encoder_model, decoder_model, input_seq, output_tokenizer, beam_size, max_len=20):
    states = encoder_model.predict(input_seq, verbose=0)
    
    if isinstance(states, list) and len(states) > 1:
        states = states[:2]  # LSTM: [h, c]
    else:
        states = [states]  # GRU: [h]
    
    start_token = output_tokenizer.word_index['\t']
    end_token = output_tokenizer.word_index['\n']
    sequences = [[[], 0.0, states]]
    
    for _ in range(max_len):
        all_candidates = []
        for seq, score, states in sequences:
            if seq and seq[-1] == end_token:
                all_candidates.append([seq, score, states])
                continue
            
            decoder_input = np.array([[seq[-1] if seq else start_token]])
            output_probs, *new_states = decoder_model.predict([decoder_input] + states, verbose=0)
            probs = output_probs[0, 0]
            
            top_indices = np.argsort(probs)[-beam_size:]
            top_probs = probs[top_indices]
            
            for idx, prob in zip(top_indices, top_probs):
                new_seq = seq + [idx]
                new_score = score + np.log(prob + 1e-10)
                all_candidates.append([new_seq, new_score, new_states])
        
        sequences = sorted(all_candidates, key=lambda x: x[1], reverse=True)[:beam_size]
        
        if all(seq[-1] == end_token for seq, _, _ in sequences):
            break
    
    return sequences[0][0]

# Seq2Seq Model
def build_seq2seq_model(input_vocab_size, output_vocab_size, config):
    cell_type = {'LSTM': LSTM, 'GRU': GRU}[config['cell_type']]
    state_count = 2 if config['cell_type'] == 'LSTM' else 1
    
    # Encoder
    encoder_inputs = Input(shape=(None,), name='encoder_inputs')
    encoder_embedding = Embedding(input_vocab_size, config['embed_size'], name='encoder_embedding')(encoder_inputs)
    print(f"Encoder embedding shape: {encoder_embedding.shape}")
    encoder = cell_type(config['hidden_size'], return_sequences=False, return_state=True, name='encoder')
    _, *encoder_states = encoder(encoder_embedding)
    
    # Decoder
    decoder_inputs = Input(shape=(None,), name='decoder_inputs')
    decoder_embedding = Embedding(output_vocab_size, config['embed_size'], name='decoder_embedding')(decoder_inputs)
    print(f"Decoder embedding shape: {decoder_embedding.shape}")
    decoder = cell_type(config['hidden_size'], return_sequences=True, return_state=True, name='decoder')
    decoder_outputs, *_ = decoder(decoder_embedding, initial_state=encoder_states[:state_count])
    
    decoder_dense = TimeDistributed(Dense(output_vocab_size, activation='softmax'), name='decoder_dense')
    decoder_outputs = decoder_dense(decoder_outputs)
    
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs, name='seq2seq')
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    # Inference models (kept for potential future use)
    encoder_model = Model(encoder_inputs, encoder_states[:state_count], name='encoder_model')
    
    decoder_states_inputs = [Input(shape=(config['hidden_size'],), name=f'decoder_state_{i}') for i in range(state_count)]
    decoder_inputs_inf = Input(shape=(1,), name='decoder_input_inf')
    decoder_embedding_inf = Embedding(output_vocab_size, config['embed_size'], name='decoder_embedding_inf')(decoder_inputs_inf)
    decoder_inf = cell_type(config['hidden_size'], return_sequences=True, return_state=True, name='decoder_inf')
    decoder_outputs_inf, *decoder_states_inf = decoder_inf(decoder_embedding_inf, initial_state=decoder_states_inputs)
    decoder_outputs_inf = TimeDistributed(Dense(output_vocab_size, activation='softmax'), name='decoder_dense_inf')(decoder_outputs_inf)
    
    decoder_model = Model([decoder_inputs_inf] + decoder_states_inputs, 
                         [decoder_outputs_inf] + decoder_states_inf, name='decoder_model')
    
    return model, encoder_model, decoder_model

# Custom Wandb Callback (logs only train and validation metrics)
class CustomWandbCallback(Callback):
    def __init__(self, X_train, y_train, X_val, y_val):
        super().__init__()
        self.X_train = X_train
        self.y_train = y_train
        self.X_val = X_val
        self.y_val = y_val
    
    def on_epoch_end(self, epoch, logs=None):
        train_loss = logs.get('loss')
        train_acc = logs.get('accuracy')
        val_loss = logs.get('val_loss')
        val_acc = logs.get('val_accuracy')
        
        wandb.log({
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'train_accuracy': train_acc,
            'val_loss': val_loss,
            'val_accuracy': val_acc
        })

# Training function
def train_model(model, encoder_model, decoder_model, X_train, y_train, X_val, y_val, X_test, y_test, 
                input_tokenizer, output_tokenizer, config):
    wandb.init(project="DL_A3", config=config)

    # Set run name based on hyperparameters
    wandb.run.name = f"e{config['embed_size']}_h{config['hidden_size']}_c{config['cell_type']}_dr{config['dropout']}_b{config['batch_size']}"
    
    decoder_input_data = y_train[:, :-1]
    decoder_target_data = y_train[:, 1:]
    
    val_decoder_input_data = y_val[:, :-1]
    val_decoder_target_data = y_val[:, 1:]
    
    print(f"X_train shape: {X_train.shape}, decoder_input_data shape: {decoder_input_data.shape}, "
          f"decoder_target_data shape: {decoder_target_data.shape}")
    
    model.summary()  # Debug model architecture
    
    model.fit(
        [X_train, decoder_input_data], decoder_target_data,
        validation_data=([X_val, val_decoder_input_data], val_decoder_target_data),
        epochs=config['epochs'],
        batch_size=config['batch_size'],
        callbacks=[
            CustomWandbCallback(X_train, y_train, X_val, y_val)
        ],
        verbose=1
    )
    wandb.finish()

# Wandb sweep configuration
sweep_config = {
    'method': 'bayes',
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'embed_size': {'values': [16, 32, 64]},
        'hidden_size': {'values': [16, 32, 64]},
        'cell_type': {'values': ['LSTM']},#['LSTM','GRU','RNN']  # Focus on LSTM
        'dropout': {'values': [0.0, 0.2]},
        'beam_size': {'values': [1, 3]},
        'batch_size': {'values': [32, 64]},
        'epochs': {'value': 10}
    }
}

# Sweep function
def sweep():
    config_defaults = {
        'embed_size': 32,
        'hidden_size': 32,
        'cell_type': 'LSTM',
        'dropout': 0.0,
        'beam_size': 3,
        'batch_size': 64,
        'epochs': 5
    }
    
    wandb.init(project="DL_A3", config=config_defaults)
    config = wandb.config
    
    train_df, val_df, test_df = load_dakshina_data()
    (X_train, y_train, X_val, y_val, X_test, y_test, 
     input_tokenizer, output_tokenizer) = preprocess_data(train_df, val_df, test_df)
    
    model, encoder_model, decoder_model = build_seq2seq_model(
        input_vocab_size=len(input_tokenizer.word_index) + 1,
        output_vocab_size=len(output_tokenizer.word_index) + 1,
        config=config
    )
    
    train_model(model, encoder_model, decoder_model, 
                X_train, y_train, X_val, y_val, X_test, y_test, 
                input_tokenizer, output_tokenizer, config)

# Main execution
if __name__ == "__main__":
    # Debug GRU in a single run
    debug_gru = False  # Set to True to test GRU
    if debug_gru:
        config = {
            'embed_size': 32,
            'hidden_size': 32,
            'cell_type': 'GRU',
            'dropout': 0.0,
            'beam_size': 3,
            'batch_size': 64,
            'epochs': 1  # Single epoch for debugging
        }
        train_df, val_df, test_df = load_dakshina_data()
        (X_train, y_train, X_val, y_val, X_test, y_test, 
         input_tokenizer, output_tokenizer) = preprocess_data(train_df, val_df, test_df)
        model, encoder_model, decoder_model = build_seq2seq_model(
            input_vocab_size=len(input_tokenizer.word_index) + 1,
            output_vocab_size=len(output_tokenizer.word_index) + 1,
            config=config
        )
        train_model(model, encoder_model, decoder_model, 
                    X_train, y_train, X_val, y_val, X_test, y_test, 
                    input_tokenizer, output_tokenizer, config)
    else:
        sweep_id = wandb.sweep(sweep_config, project="DL_A3")
        wandb.agent(sweep_id, function=sweep, count=100)

2025-05-17 20:36:11.298085: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747514171.490728      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747514171.548804      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Collecting wandb
  Downloading wandb-0.19.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting tensorflow==2.16.1
  Downloading tensorflow-2.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting ml-dtypes~=0.3.1 (from tensorflow==2.16.1)
  Downloading ml_dtypes-0.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting tensorboard<2.17,>=2.16 (from tensorflow==2.16.1)
  Downloading tensorboard-2.16.2-py3-none-any.whl.metadata (1.6 kB)
Downloading tensorflow-2.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (589.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m589.8/589.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[?25hDownloading wandb-0.19.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.4/21.4 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0

[34m[1mwandb[0m: Agent Starting Run: 9w6s8b3q with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 16


I0000 00:00:1747514273.561209     109 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Encoder embedding shape: (None, None, 32)
Decoder embedding shape: (None, None, 32)


X_train shape: (44204, 20), decoder_input_data shape: (44204, 19), decoder_target_data shape: (44204, 19)


Epoch 1/10


I0000 00:00:1747514283.797561     141 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 138ms/step - accuracy: 0.6430 - loss: 1.9907 - val_accuracy: 0.7205 - val_loss: 1.0896
Epoch 2/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 138ms/step - accuracy: 0.7145 - loss: 1.0936 - val_accuracy: 0.7384 - val_loss: 0.9821
Epoch 3/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 140ms/step - accuracy: 0.7333 - loss: 0.9993 - val_accuracy: 0.7488 - val_loss: 0.9150
Epoch 4/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 140ms/step - accuracy: 0.7414 - loss: 0.9363 - val_accuracy: 0.7612 - val_loss: 0.8673
Epoch 5/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 140ms/step - accuracy: 0.7533 - loss: 0.8890 - val_accuracy: 0.7680 - val_loss: 0.8285
Epoch 6/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 140ms/step - accuracy: 0.7598 - loss: 0.8549 - val_accuracy: 0.7736 - val_loss: 0.8018
Epoch 7/10
[1m691/69

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▅▆▇▇▇██
train_loss,█▄▃▃▂▂▂▁▁▁
val_accuracy,▁▃▄▅▆▆▇▇██
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
train_accuracy,0.78244
train_loss,0.75683
val_accuracy,0.79133
val_loss,0.71528


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nslk9osh with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32


Encoder embedding shape: (None, None, 16)
Decoder embedding shape: (None, None, 16)


X_train shape: (44204, 20), decoder_input_data shape: (44204, 19), decoder_target_data shape: (44204, 19)


Epoch 1/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 139ms/step - accuracy: 0.6485 - loss: 1.8041 - val_accuracy: 0.7226 - val_loss: 1.0489
Epoch 2/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 139ms/step - accuracy: 0.7165 - loss: 1.0593 - val_accuracy: 0.7384 - val_loss: 0.9710
Epoch 3/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 139ms/step - accuracy: 0.7323 - loss: 0.9875 - val_accuracy: 0.7529 - val_loss: 0.9018
Epoch 4/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 139ms/step - accuracy: 0.7489 - loss: 0.9179 - val_accuracy: 0.7678 - val_loss: 0.8321
Epoch 5/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 139ms/step - accuracy: 0.7629 - loss: 0.8499 - val_accuracy: 0.7763 - val_loss: 0.7892
Epoch 6/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 139ms/step - accuracy: 0.7726 - loss: 0.8015 - val_accuracy: 0.7874 - val_loss: 0.7371
Epoch 7/10

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▅▆▆▇██
train_loss,█▅▄▄▃▃▂▂▁▁
val_accuracy,▁▂▃▄▅▅▆▇▇█
val_loss,█▇▆▅▄▃▃▂▁▁

0,1
epoch,10.0
train_accuracy,0.81788
train_loss,0.61454
val_accuracy,0.82955
val_loss,0.5782


[34m[1mwandb[0m: Agent Starting Run: dhbf2wxx with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 16


Encoder embedding shape: (None, None, 32)
Decoder embedding shape: (None, None, 32)


X_train shape: (44204, 20), decoder_input_data shape: (44204, 19), decoder_target_data shape: (44204, 19)


Epoch 1/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 138ms/step - accuracy: 0.6636 - loss: 1.7306 - val_accuracy: 0.7297 - val_loss: 1.0219
Epoch 2/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 138ms/step - accuracy: 0.7261 - loss: 1.0331 - val_accuracy: 0.7406 - val_loss: 0.9660
Epoch 3/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 138ms/step - accuracy: 0.7326 - loss: 0.9912 - val_accuracy: 0.7441 - val_loss: 0.9369
Epoch 4/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 138ms/step - accuracy: 0.7373 - loss: 0.9661 - val_accuracy: 0.7530 - val_loss: 0.8995
Epoch 5/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 138ms/step - accuracy: 0.7493 - loss: 0.9111 - val_accuracy: 0.7677 - val_loss: 0.8365
Epoch 6/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 139ms/step - accuracy: 0.7627 - loss: 0.8520 - val_accuracy: 0.7796 - val_loss:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▃▄▅▅▆▇▇█
train_loss,█▅▄▄▄▃▂▂▁▁
val_accuracy,▁▂▂▃▄▅▆▇██
val_loss,█▇▆▆▅▄▃▂▁▁

0,1
epoch,10.0
train_accuracy,0.80639
train_loss,0.6679
val_accuracy,0.8162
val_loss,0.62715


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: duzdsman with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64


Encoder embedding shape: (None, None, 64)
Decoder embedding shape: (None, None, 64)


X_train shape: (44204, 20), decoder_input_data shape: (44204, 19), decoder_target_data shape: (44204, 19)


Epoch 1/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 139ms/step - accuracy: 0.6872 - loss: 1.3442 - val_accuracy: 0.7576 - val_loss: 0.8864
Epoch 2/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 137ms/step - accuracy: 0.7580 - loss: 0.8730 - val_accuracy: 0.7889 - val_loss: 0.7217
Epoch 3/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 139ms/step - accuracy: 0.7991 - loss: 0.6928 - val_accuracy: 0.8386 - val_loss: 0.5505
Epoch 4/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 137ms/step - accuracy: 0.8430 - loss: 0.5301 - val_accuracy: 0.8690 - val_loss: 0.4415
Epoch 5/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 138ms/step - accuracy: 0.8714 - loss: 0.4284 - val_accuracy: 0.8884 - val_loss: 0.3692
Epoch 6/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 137ms/step - accuracy: 0.8897 - loss: 0.3634 - val_accuracy: 0.8990 - val_loss:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▆▆▇▇███
train_loss,█▆▄▃▂▂▁▁▁▁
val_accuracy,▁▂▅▆▇▇▇███
val_loss,█▆▄▃▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.92084
train_loss,0.25576
val_accuracy,0.91826
val_loss,0.26251


[34m[1mwandb[0m: Agent Starting Run: baip6hjm with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64


Encoder embedding shape: (None, None, 64)
Decoder embedding shape: (None, None, 64)


X_train shape: (44204, 20), decoder_input_data shape: (44204, 19), decoder_target_data shape: (44204, 19)


Epoch 1/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 138ms/step - accuracy: 0.6724 - loss: 1.4949 - val_accuracy: 0.7382 - val_loss: 0.9548
Epoch 2/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 137ms/step - accuracy: 0.7437 - loss: 0.9327 - val_accuracy: 0.7839 - val_loss: 0.7585
Epoch 3/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 137ms/step - accuracy: 0.7919 - loss: 0.7360 - val_accuracy: 0.8237 - val_loss: 0.6173
Epoch 4/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 138ms/step - accuracy: 0.8215 - loss: 0.6172 - val_accuracy: 0.8437 - val_loss: 0.5375
Epoch 5/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 138ms/step - accuracy: 0.8432 - loss: 0.5356 - val_accuracy: 0.8646 - val_loss: 0.4610
Epoch 6/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 137ms/step - accuracy: 0.8623 - loss: 0.4624 - val_accuracy: 0.8765 - val_loss: 0.4095
Epoch 7/10

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▇▇▇██
train_loss,█▆▄▃▃▂▂▁▁▁
val_accuracy,▁▃▅▅▆▇▇███
val_loss,█▆▄▄▃▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.90342
train_loss,0.31455
val_accuracy,0.9075
val_loss,0.30327


[34m[1mwandb[0m: Agent Starting Run: 8mt4jn5u with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64


Encoder embedding shape: (None, None, 64)
Decoder embedding shape: (None, None, 64)


X_train shape: (44204, 20), decoder_input_data shape: (44204, 19), decoder_target_data shape: (44204, 19)


Epoch 1/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 139ms/step - accuracy: 0.6860 - loss: 1.3614 - val_accuracy: 0.7521 - val_loss: 0.9168
Epoch 2/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 140ms/step - accuracy: 0.7598 - loss: 0.8758 - val_accuracy: 0.8129 - val_loss: 0.6569
Epoch 3/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 141ms/step - accuracy: 0.8194 - loss: 0.6263 - val_accuracy: 0.8548 - val_loss: 0.4889
Epoch 4/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 142ms/step - accuracy: 0.8579 - loss: 0.4751 - val_accuracy: 0.8784 - val_loss: 0.3993
Epoch 5/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 142ms/step - accuracy: 0.8807 - loss: 0.3903 - val_accuracy: 0.8936 - val_loss: 0.3484
Epoch 6/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 143ms/step - accuracy: 0.8956 - loss: 0.3397 - val_accuracy: 0.9010 - val_loss:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▇▇▇███
train_loss,█▆▄▃▂▂▁▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.9229
train_loss,0.24798
val_accuracy,0.92076
val_loss,0.25269


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: a1782t4c with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64


Encoder embedding shape: (None, None, 64)
Decoder embedding shape: (None, None, 64)


X_train shape: (44204, 20), decoder_input_data shape: (44204, 19), decoder_target_data shape: (44204, 19)


Epoch 1/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 143ms/step - accuracy: 0.6674 - loss: 1.5030 - val_accuracy: 0.7379 - val_loss: 0.9703
Epoch 2/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 142ms/step - accuracy: 0.7364 - loss: 0.9778 - val_accuracy: 0.7710 - val_loss: 0.8360
Epoch 3/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 142ms/step - accuracy: 0.7742 - loss: 0.8156 - val_accuracy: 0.8111 - val_loss: 0.6586
Epoch 4/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 142ms/step - accuracy: 0.8164 - loss: 0.6335 - val_accuracy: 0.8486 - val_loss: 0.5160
Epoch 5/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 142ms/step - accuracy: 0.8495 - loss: 0.5089 - val_accuracy: 0.8716 - val_loss: 0.4328
Epoch 6/10
[1m691/691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 143ms/step - accuracy: 0.8715 - loss: 0.4313 - val_accuracy: 0.8832 - val_loss: 0.3845
Epoch 7/10

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▄▅▆▇▇███
train_loss,█▆▅▃▃▂▂▁▁▁
val_accuracy,▁▂▄▅▆▇▇███
val_loss,█▇▅▃▃▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.91085
train_loss,0.29162
val_accuracy,0.91411
val_loss,0.28449


[34m[1mwandb[0m: Agent Starting Run: u8ndnahu with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64


Encoder embedding shape: (None, None, 64)
Decoder embedding shape: (None, None, 64)


X_train shape: (44204, 20), decoder_input_data shape: (44204, 19), decoder_target_data shape: (44204, 19)


Epoch 1/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 142ms/step - accuracy: 0.6898 - loss: 1.3351 - val_accuracy: 0.7510 - val_loss: 0.9154
Epoch 2/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 141ms/step - accuracy: 0.7503 - loss: 0.8982 - val_accuracy: 0.7882 - val_loss: 0.7359
Epoch 3/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 142ms/step - accuracy: 0.7955 - loss: 0.7134 - val_accuracy: 0.8378 - val_loss: 0.5560
Epoch 4/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 141ms/step - accuracy: 0.8398 - loss: 0.5428 - val_accuracy: 0.8683 - val_loss: 0.4441
Epoch 5/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 142ms/step - accuracy: 0.8684 - loss: 0.4410 - val_accuracy: 0.8879 - val_loss: 0.3769
Epoch 6/10
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 142ms/step - accuracy: 0.8879 - loss: 0.3697 - val_accuracy: 0.8989 - val_loss: