In [None]:
import torch
import torch.nn as nn

class MyEncoder(nn.Module):
    def __init__(self, input_size=72, embedding_size=64, hidden_size=256, cell_type='gru', num_layers=2, bidirectional=True, dropout_prob=0,
                 device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
        super(MyEncoder, self).__init__()

        # Configuration parameters for the encoder
        self.config = {
            'input_size': input_size,
            'embedding_size': embedding_size,
            'hidden_size': hidden_size,
            'cell_type': cell_type,
            'num_layers': num_layers,
            'bidirectional': bidirectional,
            'dropout_prob': dropout_prob,
            'device': device.type
        }

        # Assigning parameters to instance variables
        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.cell_type = cell_type
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.dropout_prob = dropout_prob
        self.device = device

        # Embedding layer
        self.embedding_layer = nn.Embedding(input_size, embedding_size)
        # Dropout layer
        self.dropout_layer = nn.Dropout(dropout_prob)

        # Calculating the number of directions based on bidirectionality
        self.directions = 2 if bidirectional else 1

        # Instantiating the appropriate RNN layer based on cell type
        if cell_type == 'rnn':
            self.rnn_layer = MyRNNLayer(embedding_size, hidden_size, num_layers, bidirectional, dropout_prob)
        elif cell_type == 'gru':
            self.rnn_layer = MyGRULayer(embedding_size, hidden_size, num_layers, bidirectional, dropout_prob)
        elif cell_type == 'lstm':
            self.rnn_layer = MyLSTMLayer(embedding_size, hidden_size, num_layers, bidirectional, dropout_prob)

    def forward(self, input_seq, hidden_state, cell_state=None):
        # Embedding the input sequence
        embedded_seq = self.embedding_layer(input_seq)
        # Applying dropout to the embedded sequence
        embedded_seq = self.dropout_layer(embedded_seq)

        # Forward pass through the RNN layer
        if self.cell_type == 'lstm':
            output_seq, (hidden_state, cell_state) = self.rnn_layer(embedded_seq, (hidden_state, cell_state))
        else:
            output_seq, hidden_state = self.rnn_layer(embedded_seq, hidden_state)

        # Returning the output sequence and hidden/cell states (if LSTM)
        return output_seq, hidden_state, cell_state if self.cell_type == 'lstm' else None

    def get_config(self):
        # Method to retrieve the configuration parameters
        return self.config

    def init_hidden_state(self, batch_size):
        # Method to initialize the hidden state
        return torch.zeros(self.directions * self.num_layers, batch_size, self.hidden_size, device=self.device)

class MyRNNLayer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, bidirectional, dropout_prob):
        super(MyRNNLayer, self).__init__()
        # RNN layer instantiation
        self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional, dropout=dropout_prob)

    def forward(self, input_seq, hidden_state):
        # Forward pass through the RNN layer
        return self.rnn(input_seq, hidden_state)

class MyGRULayer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, bidirectional, dropout_prob):
        super(MyGRULayer, self).__init__()
        # GRU layer instantiation
        self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional, dropout=dropout_prob)

    def forward(self, input_seq, hidden_state):
        # Forward pass through the GRU layer
        return self.gru(input_seq, hidden_state)

class MyLSTMLayer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, bidirectional, dropout_prob):
        super(MyLSTMLayer, self).__init__()
        # LSTM layer instantiation
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional, dropout=dropout_prob)

    def forward(self, input_seq, hidden_state):
        # Forward pass through the LSTM layer
        return self.lstm(input_seq, hidden_state)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MyDecoder(nn.Module):
    def __init__(self, input_size=26, embedding_size=64, hidden_size=256, cell_type='lstm', num_layers=2, use_attention=False,
                 attention_size=None, dropout=0, bidirectional=True,
                 device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
        super(MyDecoder, self).__init__()

        # Decoder parameters
        self.input_size = input_size  # Number of unique input symbols
        self.embedding_size = embedding_size  # Dimensionality of the embedding space
        self.hidden_size = hidden_size  # Dimensionality of the hidden state
        self.cell_type = cell_type  # Type of recurrent cell (RNN, GRU, LSTM)
        self.num_layers = num_layers  # Number of recurrent layers
        self.use_attention = use_attention  # Flag indicating whether to use attention mechanism
        self.attention_size = attention_size  # Dimensionality of attention mechanism
        self.dropout = dropout  # Dropout probability
        self.bidirectional = bidirectional  # Flag indicating bidirectional RNN
        self.device = device  # Device to run computations

        # Components initialization
        self._build_embedding_layer()  # Initialize embedding layer
        self._build_dropout_layer()  # Initialize dropout layer
        self._build_decoder_rnn()  # Initialize decoder RNN
        if use_attention:
            self._build_attention_mechanism()  # Initialize attention mechanism components
        self._build_output_layer()  # Initialize output layer

    def forward(self, input_seq, hidden_state, cell_state=None, encoder_outputs=None):
        # Embed input sequence
        embedded_seq = self.embedding(input_seq)
        embedded_seq = self.dropout(embedded_seq)

        # Apply attention mechanism if enabled
        if self.use_attention:
            context, attention_weights = self._apply_attention(hidden_state, encoder_outputs)
            embedded_seq = torch.cat((embedded_seq, context), 2)

        # Pass through decoder RNN
        output, hidden_state, cell_state = self.decoder_rnn(embedded_seq, (hidden_state, cell_state) if self.cell_type == 'lstm' else hidden_state)

        # Apply output layer to match output dimension
        output = self.output_layer(output)

        return output, hidden_state, cell_state, attention_weights if self.use_attention else None

    # Method to build embedding layer
    def _build_embedding_layer(self):
        self.embedding = nn.Embedding(self.input_size, self.embedding_size)

    # Method to build dropout layer
    def _build_dropout_layer(self):
        self.dropout = nn.Dropout(self.dropout)

    # Method to build decoder RNN
    def _build_decoder_rnn(self):
        input_size = self.embedding_size + (self.attention_size if self.use_attention else 0)
        if self.cell_type == 'lstm':
            self.decoder_rnn = nn.LSTM(input_size=input_size, hidden_size=self.hidden_size, num_layers=self.num_layers,
                                       bidirectional=self.bidirectional, dropout=self.dropout)
        elif self.cell_type == 'gru':
            self.decoder_rnn = nn.GRU(input_size=input_size, hidden_size=self.hidden_size, num_layers=self.num_layers,
                                      bidirectional=self.bidirectional, dropout=self.dropout)
        else:
            self.decoder_rnn = nn.RNN(input_size=input_size, hidden_size=self.hidden_size, num_layers=self.num_layers,
                                      bidirectional=self.bidirectional, dropout=self.dropout)

    # Method to build attention mechanism
    def _build_attention_mechanism(self):
        self.attention_W = nn.Linear(self.hidden_size, self.hidden_size)
        self.attention_U = nn.Linear(self.hidden_size, self.hidden_size)
        self.attention_V = nn.Linear(self.hidden_size, 1)

    # Method to build output layer
    def _build_output_layer(self):
        output_size = self.input_size
        if self.bidirectional:
            output_size *= 2
        self.output_layer = nn.Linear(self.hidden_size, output_size)

    # Method to apply attention mechanism
    def _apply_attention(self, hidden_state, encoder_outputs):
        encoder_transform = self.attention_W(encoder_outputs)
        hidden_transform = self.attention_U(hidden_state)
        concat_transform = encoder_transform + hidden_transform
        concat_transform = torch.tanh(concat_transform)
        score = self.attention_V(concat_transform)
        attention_weights = F.softmax(score, dim=1)
        context_vector = torch.sum(attention_weights * encoder_outputs, dim=1)
        normalized_context_vector = context_vector.unsqueeze(0)

        return normalized_context_vector, attention_weights

    # Method to get decoder parameters
    def get_params(self):
        return {
            'input_size': self.input_size,
            'embedding_size': self.embedding_size,
            'hidden_size': self.hidden_size,
            'attention_size': self.attention_size,
            'cell_type': self.cell_type,
            'num_layers': self.num_layers,
            'dropout': self.dropout.p,
            'bidirectional': self.bidirectional,
            'device': self.device.type,
            'use_attention': self.use_attention,
            'attention_size': self.attention_size
        }

# note that this only functional part of code and not the whole one, so it will throw error on running this code
