# Setup

In [None]:
!pip install neptune -q
!pip install bpemb -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.9/63.9 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m502.6/502.6 kB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.2/13.2 MB[0m [31m112.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.2/83.2 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.9/137.9 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m6.9 MB/s[0m eta [36m

In [None]:
# Standard Libraries
import os
import re
import json
import csv
import pickle
import uuid
from pathlib import Path
from typing import Dict, List, Set, Optional, Union
from datetime import datetime
from collections import defaultdict
import unicodedata
import dataclasses
import time


# Data Manipulation
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Text and Tokenization
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from string import punctuation, digits

# Machine Learning Utilities
from sklearn.utils import shuffle
from sklearn.model_selection import KFold, train_test_split

# TensorFlow and Keras
import tensorflow as tf
from tensorflow.keras.layers import (
    Input, Embedding, LSTM, Dense, Bidirectional, Concatenate,
    LayerNormalization, ActivityRegularization
)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.utils import plot_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.regularizers import l1_l2, l2
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, EarlyStopping


# Neptune.ai for Experiment Tracking
import neptune.new as neptune




In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Configuration

In [None]:
@dataclasses.dataclass
class ModelConfig:
    # Model Architecture Parameters
    hidden_units: int = 32
    use_bidirectional: bool = True
    use_attention: bool = True

    # Regularization Parameters
    dropout_rate: Optional[float] = 0.3
    recurrent_dropout_rate: Optional[float] = 0.1
    l1_reg: Optional[float] = 1e-3
    l2_reg: Optional[float] = 1e-3

    # Training Parameters
    batch_size: int = 64
    epochs: int = 50
    learning_rate: float = 1e-3

    # K-Fold Cross Validation Parameters
    n_splits: int = 5
    selected_folds: Optional[Union[List[int], int]] = None

    # Dropout and Regularization Toggles
    use_dropout: bool = True
    use_recurrent_dropout: bool = True
    use_l1_regularization: bool = False
    use_l2_regularization: bool = False

    # Neptune Tracking Parameters
    neptune_project: Optional[str] = None
    neptune_api_token: Optional[str] = None
    neptune_run_name: Optional[str] = None
    neptune_tags: Optional[List[str]] = None

    # Versioning and Tracking
    version: Optional[str] = None
    experiment_id: Optional[str] = None
    config_filepath: Optional[str] = None


    def __post_init__(self):
        # Validate selected_folds
        if isinstance(self.selected_folds, int):
            self.selected_folds = [self.selected_folds]

        # Adjust dropout and regularization based on toggle switches
        if not self.use_dropout:
            self.dropout_rate = None
        if not self.use_recurrent_dropout:
            self.recurrent_dropout_rate = None
        if not self.use_l1_regularization:
            self.l1_reg = None
        if not self.use_l2_regularization:
            self.l2_reg = None

        # Set default Neptune parameters if not provided
        if not self.neptune_project:
            self.neptune_project = "ihsani.yulfa/Translation-Project"
        if not self.neptune_api_token:
            self.neptune_api_token = "eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJmMTAzYmRjZC01YjBlLTRhNDktOTZjYy00MDY4ODdkMzNjZTAifQ=="
        if not self.neptune_run_name:
            self.neptune_run_name = f"Run-{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        if not self.neptune_tags:
            self.neptune_tags = ["default"]

        # Set version and experiment ID if not provided
        if not self.version:
            self.version = f"v{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        if not self.experiment_id:
            self.experiment_id = str(uuid.uuid4())

    def save_config(self, output_dir='experiments'):
        """
        Save configuration details to a JSON file

        Args:
            output_dir (str): Directory to save configuration files

        Returns:
            str: Path to the saved configuration file
        """
        # Ensure output directory exists
        os.makedirs(output_dir, exist_ok=True)

        # Create a dictionary of configuration details
        config_dict = dataclasses.asdict(self)

        # Add additional metadata
        config_dict['timestamp'] = datetime.now().isoformat()

        # Determine filename based on version and experiment ID
        filename = f"config_{self.version}_{self.experiment_id[:8]}.json"
        filepath = os.path.join(output_dir, filename)

        # Save configuration to JSON
        with open(filepath, 'w') as f:
            json.dump(config_dict, f, indent=4)

        print(f"Configuration saved to {filepath}")
        return filepath

    @classmethod
    def load_config(cls, filepath):
        """
        Load configuration from a JSON file

        Args:
            filepath (str): Path to the configuration JSON file

        Returns:
            ModelConfig: Loaded configuration instance
        """
        with open(filepath, 'r') as f:
            config_dict = json.load(f)

        # Remove timestamp and other non-init fields
        config_dict.pop('timestamp', None)
        config_dict.pop('version', None)
        config_dict.pop('experiment_id', None)

        return cls(**config_dict)

# Encoder Decoder

## Attention

In [None]:
# Attention
# https://colab.research.google.com/drive/1XrjPL3O_szhahYZW0z9yhCl9qvIcJJYW

import tensorflow as tf
from tensorflow.keras.layers import Concatenate,Layer
from tensorflow.keras import backend as K


class AttentionLayer(Layer):
    """
    This class implements Bahdanau attention (https://arxiv.org/pdf/1409.0473.pdf).
    There are three sets of weights introduced W_a, U_a, and V_a
     """

    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)
        # Create a trainable weight variable for this layer.

        self.W_a = self.add_weight(name='W_a',
                                   shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.U_a = self.add_weight(name='U_a',
                                   shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.V_a = self.add_weight(name='V_a',
                                   shape=tf.TensorShape((input_shape[0][2], 1)),
                                   initializer='uniform',
                                   trainable=True)

        super(AttentionLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, inputs, verbose=False):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state """

            assert_msg = "States must be a list. However states {} is of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch_size*en_seq_len, latent_dim
            reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden))
            # <= batch_size*en_seq_len, latent_dim
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden))
            if verbose:
                print('wa.s>',W_a_dot_s.shape)

            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>',U_a_dot_h.shape)

            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            reshaped_Ws_plus_Uh = K.tanh(K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
            if verbose:
                print('Ws+Uh>', reshaped_Ws_plus_Uh.shape)

            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a), (-1, en_seq_len))
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """
            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]

        def create_inital_state(inputs, hidden_size):
            # We are not using initial states, but need to pass something to K.rnn funciton
            fake_state = K.zeros_like(inputs)  # <= (batch_size, enc_seq_len, latent_dim
            fake_state = K.sum(fake_state, axis=[1, 2])  # <= (batch_size)
            fake_state = K.expand_dims(fake_state)  # <= (batch_size, 1)
            fake_state = K.tile(fake_state, [1, hidden_size])  # <= (batch_size, latent_dim
            return fake_state

        fake_state_c = create_inital_state(encoder_out_seq, encoder_out_seq.shape[-1])
        fake_state_e = create_inital_state(encoder_out_seq, encoder_out_seq.shape[1])  # <= (batch_size, enc_seq_len, latent_dim

        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step, decoder_out_seq, [fake_state_e],
        )

        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step, e_outputs, [fake_state_c],
        )

        return c_outputs, e_outputs

    def compute_output_shape(self, input_shape):
        """ Outputs produced by the layer """
        return [
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
        ]

## Model


In [None]:
def create_model(num_encoder_tokens, num_decoder_tokens, max_length,
                 hidden_units, use_bidirectional, use_attention,
                 dropout_rate, recurrent_dropout_rate,
                 l1_reg, l2_reg):
    # Encoder
    encoder_inputs = Input(shape=(max_length,))

    # Embedding with regularization
    enc_emb = Embedding(
        input_dim=num_encoder_tokens,
        output_dim=hidden_units
    )(encoder_inputs)

    # Apply dropout after embedding if specified
    # if dropout_rate is not None:
    #     enc_emb = tf.keras.layers.Dropout(rate=dropout_rate)(enc_emb)

    if use_bidirectional:
        # Bidirectional LSTM with regularization
        encoder = Bidirectional(
            LSTM(
                hidden_units,
                return_sequences=True,
                return_state=True,
                dropout=dropout_rate if dropout_rate else 0.0,
                # recurrent_dropout=recurrent_dropout_rate if recurrent_dropout_rate else 0.0,
                # kernel_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0),
                # recurrent_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0),
                # bias_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0)
            )
        )
        encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder(enc_emb)
        state_h = Concatenate()([forward_h, backward_h])
        state_c = Concatenate()([forward_c, backward_c])
        encoder_states = [state_h, state_c]
    else:
        # Unidirectional LSTM with regularization
        encoder = LSTM(
            hidden_units,
            return_sequences=True,
            return_state=True,
            dropout=dropout_rate if dropout_rate else 0.0,
            # recurrent_dropout=recurrent_dropout_rate if recurrent_dropout_rate else 0.0,
            # kernel_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0),
            # recurrent_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0),
            # bias_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0)
        )
        encoder_outputs, state_h, state_c = encoder(enc_emb)
        encoder_states = [state_h, state_c]

    # Decoder
    decoder_inputs = Input(shape=(None,))
    dec_emb = Embedding(
        input_dim=num_decoder_tokens,
        output_dim=hidden_units
    )(decoder_inputs)

    # if dropout_rate is not None:
    #     dec_emb = tf.keras.layers.Dropout(rate=dropout_rate)(dec_emb)

    decoder_lstm = LSTM(
        hidden_units * 2 if use_bidirectional else hidden_units,
        return_sequences=True,
        return_state=True,
        dropout=dropout_rate if dropout_rate else 0.0,
        # recurrent_dropout=recurrent_dropout_rate if recurrent_dropout_rate else 0.0,
        # kernel_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0),
        # recurrent_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0),
        # bias_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0)
    )
    decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=encoder_states)

    if use_attention:
        # Attention mechanism
        attention_layer = AttentionLayer()
        attention_result, _ = attention_layer([encoder_outputs, decoder_outputs])
        decoder_concat_input = Concatenate(axis=-1)([decoder_outputs, attention_result])
        decoder_outputs = Dense(
            num_decoder_tokens,
            activation='softmax',
            # kernel_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0),
            # bias_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0)
        )(decoder_concat_input)
    else:
        decoder_outputs = Dense(
            num_decoder_tokens,
            activation='softmax',
            # kernel_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0),
            # bias_regularizer=l1_l2(l1=l1_reg if l1_reg else 0.0, l2=l2_reg if l2_reg else 0.0)
        )(decoder_outputs)

    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    return model


# Train model

## Helper Functions


In [None]:
class NeptuneCallback(Callback):
    def __init__(self, run, fold):
        super().__init__()
        self.run = run
        self.fold = fold

    def on_epoch_end(self, epoch, logs=None):
        for metric_name, metric_value in logs.items():
            self.run[f"train/{metric_name}"].log(metric_value, step=epoch)

def initialize_neptune_run(config: ModelConfig, fold: int):
    """
    Initialize a Neptune run for tracking a specific fold.
    """
    run = neptune.init_run(
        project=config.neptune_project,  # Pass project name from config
        api_token=config.neptune_api_token,  # Securely pass API token from config
        name=f"{config.version}-Bi-LSTM-{config.use_attention}-Fold-{fold}",
        tags=[
            f"Version-{config.version}",
            f"Fold-{fold}",
            f"Experiment-{config.experiment_id[:8]}"
        ]
    )

    # Log configuration to Neptune
    run["parameters"] = dataclasses.asdict(config)
    run["parameters/fold"] = fold
    # run["config/filepath"] = config.config_filepath
    run["config/version"] = config.version
    run["config/experiment_id"] = config.experiment_id

    # Upload the configuration file to Neptune
    # run["config/file"].upload(config.config_filepath)

    return run

def prepare_decoder_data(data):
    """
    Prepare decoder input and target sequences.

    Args:
        data (np.ndarray): Dataset for decoding.

    Returns:
        Tuple of decoder input and target sequences.
    """
    decoder_input = data[:, :-1].reshape(data.shape[0], data.shape[1] - 1, 1)
    decoder_target = data[:, 1:].reshape(data.shape[0], data.shape[1] - 1, 1)
    return decoder_input, decoder_target

def log_model_architecture(model, run, config):
    """
    Logs the model's architecture summary and plot to Neptune.
    """
    # Generate and upload model plot
    plot_filename = f"Bi-LSTM-{config.use_attention}-{config.hidden_units}.png"
    plot_model(model, to_file=plot_filename, show_shapes=True)
    run["model/plot"].upload(plot_filename)

    # Log model summary
    model_summary = []
    model.summary(print_fn=lambda x: model_summary.append(x))
    run["model/summary"] = "\n".join(model_summary)

def prepare_callbacks(run, fold: int):
    """
    Prepare training callbacks including Neptune integration, LR scheduling, and early stopping.
    """
    lr_scheduler = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-5
    )
    neptune_cbk = NeptuneCallback(run, fold)
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    )

    return [lr_scheduler, neptune_cbk]

def log_final_metrics(run, history):
    """
    Log final training and validation metrics to Neptune.
    """
    run["metrics/final_train_loss"] = history.history['loss'][-1]
    run["metrics/final_train_accuracy"] = history.history['accuracy'][-1]
    run["metrics/final_val_loss"] = history.history['val_loss'][-1]
    run["metrics/final_val_accuracy"] = history.history['val_accuracy'][-1]
    run["metrics/epochs_trained"] = len(history.history['loss'])

def log_final_metrics(run, history):
    """
    Log final training and validation metrics to Neptune.
    """
    run["metrics/final_train_loss"] = history.history['loss'][-1]
    run["metrics/final_train_accuracy"] = history.history['accuracy'][-1]
    run["metrics/final_val_loss"] = history.history['val_loss'][-1]
    run["metrics/final_val_accuracy"] = history.history['val_accuracy'][-1]
    run["metrics/epochs_trained"] = len(history.history['loss'])

def plot_learning_curves(history, run):
    """
    Plot and log the learning curves for loss and accuracy.
    """
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    # Loss curves
    ax1.plot(history.history['loss'], label='Train Loss')
    ax1.plot(history.history['val_loss'], label='Val Loss')
    ax1.set_title('Model Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax1.grid(True)

    # Accuracy curves
    ax2.plot(history.history['accuracy'], label='Train Accuracy')
    ax2.plot(history.history['val_accuracy'], label='Val Accuracy')
    ax2.set_title('Model Accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')
    ax2.legend()
    ax2.grid(True)

    plt.tight_layout()
    run["visualizations/learning_curves"].upload(neptune.types.File.as_image(fig))
    plt.close(fig)



def summarize_training(neptune_runs, fold_histories, fold_scores):
    """
    Summarize and log results from all folds.
    """
    print("\nTraining completed! Summary of results:")
    print(f"Number of folds completed: {len(fold_scores)}")

    avg_val_acc = np.mean([history['val_accuracy'][-1] for history in fold_histories])
    std_val_acc = np.std([history['val_accuracy'][-1] for history in fold_histories])

    print(f"Average validation accuracy: {avg_val_acc:.4f}")
    print(f"Standard deviation: {std_val_acc:.4f}")

    # Log summary to Neptune
    summary_run = neptune.init_run(
        project=neptune_runs[0]["sys/project"].fetch(),
        api_token=neptune_runs[0]["sys/api_token"].fetch(),
        name="Summary-Run"
    )
    summary_run["metrics/mean_val_accuracy"] = avg_val_acc
    summary_run["metrics/std_val_accuracy"] = std_val_acc
    summary_run.stop()



## Create Model

In [None]:
def create_and_train_model_with_kfold(config: ModelConfig,
                                      fold_splits_path: str,
                                      tokenizer_info_path: str):
    """
    Train machine translation model with configurable k-fold cross-validation

    Args:
        config (ModelConfig): Configuration for model and training
        fold_splits_path (str): Path to pickled fold splits
        tokenizer_info_path (str): Path to pickled tokenizer information

    Returns:
        Tuple containing training histories, evaluation scores, and Neptune runs
    """

    # Save configuration before training
    config_filepath = config.save_config()

    # Load fold splits and tokenizer info
    with open(fold_splits_path, 'rb') as file:
        fold_splits = pickle.load(file)

    with open(tokenizer_info_path, 'rb') as file:
        tokenizer_info = pickle.load(file)

    # Extract tokenizer information
    num_encoder_tokens = tokenizer_info['num_encoder_tokens']
    num_decoder_tokens = tokenizer_info['num_decoder_tokens']
    max_length = tokenizer_info['max_length']

    # Determine which folds to train
    train_folds = config.selected_folds or list(range(len(fold_splits)))
    if not all(0 <= fold < len(fold_splits) for fold in train_folds):
        raise ValueError("Invalid fold indices in selected_folds.")

    fold_histories, fold_scores, neptune_runs = [], [], []

    for fold in train_folds:
        # Unpack training and validation data for the current fold
        fold_data = fold_splits[fold]  # Assuming fold_splits is a list of tuples
        X_train, X_val, y_train, y_val = fold_data['X_train'], fold_data['X_val'], fold_data['y_train'], fold_data['y_val']

        # Debugging: Print fold information
        print(f"Training on fold {fold}:")
        print(f"  X_train shape: {X_train.shape}")
        print(f"  X_val shape: {X_val.shape}")
        print(f"  y_train shape: {y_train.shape}")
        print(f"  y_val shape: {y_val.shape}")

        # Initialize Neptune run
        run = initialize_neptune_run(config, fold)
        neptune_runs.append(run)

        # Create model
        model = create_model(
            num_encoder_tokens=num_encoder_tokens,
            num_decoder_tokens=num_decoder_tokens,
            max_length=max_length,
            hidden_units=config.hidden_units,
            use_bidirectional=config.use_bidirectional,
            use_attention=config.use_attention,
            dropout_rate=config.dropout_rate,
            recurrent_dropout_rate=config.recurrent_dropout_rate,
            l1_reg=config.l1_reg,
            l2_reg=config.l2_reg,
        )
        model.compile(
            optimizer=tf.keras.optimizers.Adam(config.learning_rate),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )

        model.summary()

        # Log model architecture
        log_model_architecture(model, run, config)

        # Prepare callbacks
        callbacks = prepare_callbacks(run, fold)

        # Prepare the data
        decoder_input, decoder_target = prepare_decoder_data(y_train)
        decoder_input_val, decoder_target_val = prepare_decoder_data(y_val)

        # Train model
        start_time = time.time()
        history = model.fit(
            [X_train, decoder_input], decoder_target,
            batch_size=config.batch_size,
            epochs=config.epochs,
            validation_data=([X_val, decoder_input_val], decoder_target_val),
            callbacks=callbacks,
        )
        training_time = time.time() - start_time
        run["metrics/training_time_seconds"] = training_time

        # Save model
        model.save(f"/content/drive/MyDrive/Dataset/MT-JavaIndo/model/{config.hidden_units}-{'Bahdanau' if config.use_attention else 'no-attention'}-{'Dropout' if config.use_dropout else 'no-dropout'}.keras")

        # Evaluate model
        scores = model.evaluate([X_val, decoder_input_val], decoder_target_val, verbose=1)
        fold_scores.append(scores)
        fold_histories.append(history.history)

        # Log final metrics
        log_final_metrics(run, history)

        # Create and log learning curves
        plot_learning_curves(history, run)

        run.stop()

    # Summarize and return results
    # summarize_training(neptune_runs, fold_histories, fold_scores)
    return fold_histories, fold_scores, neptune_runs


# Main

## Declare Path

In [None]:
PATH_folds = "/content/drive/MyDrive/Dataset/MT-JavaIndo/nusa/dataset_kfolds (3).pkl"
PATH_tokenizer_info = "/content/drive/MyDrive/Dataset/MT-JavaIndo/nusa/tokenizer_info (3).pkl"


## Declare Config

In [None]:
# Define possible configurations
hidden_units_list = [64]
use_attention_list = [True]
dropout_rate_list = [0.2]

# Loop to create and train models for all configurations
for hidden_units in hidden_units_list:
    for use_attention in use_attention_list:
        for dropout_rate in dropout_rate_list:
            # Define version name
            version_name = f"{hidden_units}-{'Bahdanau' if use_attention else 'no-attention'}"
            if dropout_rate is not None:
                version_name += f"-dropout-{dropout_rate}"

            # Create config
            config = ModelConfig(
                hidden_units=hidden_units,
                use_bidirectional=True,
                use_attention=use_attention,
                dropout_rate=dropout_rate if dropout_rate is not None else 0,
                recurrent_dropout_rate=0.15,
                l1_reg=1e-4,
                l2_reg=1e-3,
                use_dropout=dropout_rate is not None,
                use_recurrent_dropout=False,
                use_l1_regularization=False,
                use_l2_regularization=False,
                batch_size=64,
                epochs=30,
                learning_rate=1e-2,
                selected_folds=4,
                version=f"MT-JavaIndo-v1.1-{version_name}",
                experiment_id=f"{version_name}-experiment",
                config_filepath="/content/experiments",

                # NEPTUNE
                neptune_project="ihsani.yulfa/Translation-Project",
                neptune_api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJmMTAzYmRjZC01YjBlLTRhNDktOTZjYy00MDY4ODdkMzNjZTAifQ==",
            )

            # Train the model with the current configuration
            print(f"Training model with configuration: {config.version}")
            histories, scores, runs = create_and_train_model_with_kfold(
                config,
                PATH_folds,
                PATH_tokenizer_info
            )

import shutil
from google.colab import files

# Replace 'folder_name' with your folder path
shutil.make_archive('/content/model', 'zip', 'model')
files.download('model.zip')



Training model with configuration: MT-JavaIndo-v1.1-64-Bahdanau-dropout-0.2
Configuration saved to experiments/config_MT-JavaIndo-v1.1-64-Bahdanau-dropout-0.2_64-Bahda.json
downloading https://nlp.h-its.org/bpemb/jv/jv.wiki.bpe.vs200000.model


100%|██████████| 3734000/3734000 [00:01<00:00, 3299766.75B/s]


downloading https://nlp.h-its.org/bpemb/id/id.wiki.bpe.vs200000.model


100%|██████████| 3739973/3739973 [00:01<00:00, 3335184.57B/s]


Training on fold 4:
  X_train shape: (9576, 35)
  X_val shape: (2393, 35)
  y_train shape: (9576, 35)
  y_val shape: (2393, 35)
[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/ihsani.yulfa/Translation-Project/e/TRAN-304


Epoch 1/30
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 88ms/step - accuracy: 0.5567 - loss: 3.7251 - val_accuracy: 0.6421 - val_loss: 2.5832 - learning_rate: 0.0100
Epoch 2/30
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 84ms/step - accuracy: 0.6543 - loss: 2.3891 - val_accuracy: 0.6807 - val_loss: 2.2698 - learning_rate: 0.0100
Epoch 3/30
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 83ms/step - accuracy: 0.7042 - loss: 1.9107 - val_accuracy: 0.7261 - val_loss: 1.9357 - learning_rate: 0.0100
Epoch 4/30
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 83ms/step - accuracy: 0.7607 - loss: 1.4019 - val_accuracy: 0.7580 - val_loss: 1.7138 - learning_rate: 0.0100
Epoch 5/30
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 83ms/step - accuracy: 0.8109 - loss: 1.0005 - val_accuracy: 0.7707 - val_loss: 1.6392 - learning_rate: 0.0100
Epoch 6/30
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>