In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

2023-06-07 11:13:08.683257: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Define encoder and decoder layers:

In [2]:
class EncoderLayer(keras.layers.Layer):
    def __init__(self, emb_dim, num_heads, hid_dim, dropout):
        super(EncoderLayer, self).__init__()

        self.attention = keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=emb_dim)
        self.dropout1 = keras.layers.Dropout(dropout)
        self.norm1 = keras.layers.LayerNormalization(epsilon=1e-6)

        self.dense1 = keras.layers.Dense(hid_dim, activation='relu')
        self.dense2 = keras.layers.Dense(emb_dim)
        self.dropout2 = keras.layers.Dropout(dropout)
        self.norm2 = keras.layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs, training=True):
        attention_output = self.attention(inputs, inputs)
        attention_output = self.dropout1(attention_output, training=training)
        attention_output = self.norm1(inputs + attention_output)

        intermediate_output = self.dense1(attention_output)
        intermediate_output = self.dense2(intermediate_output)
        intermediate_output = self.dropout2(intermediate_output, training=training)
        intermediate_output = self.norm2(attention_output + intermediate_output)

        return intermediate_output


class DecoderLayer(keras.layers.Layer):
    def __init__(self, emb_dim, num_heads, hid_dim, dropout):
        super(DecoderLayer, self).__init__()

        self.attention1 = keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=emb_dim)
        self.dropout1 = keras.layers.Dropout(dropout)
        self.norm1 = keras.layers.LayerNormalization(epsilon=1e-6)

        self.attention2 = keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=emb_dim)
        self.dropout2 = keras.layers.Dropout(dropout)
        self.norm2 = keras.layers.LayerNormalization(epsilon=1e-6)

        self.dense1 = keras.layers.Dense(hid_dim, activation='relu')
        self.dense2 = keras.layers.Dense(emb_dim)
        self.dropout3 = keras.layers.Dropout(dropout)
        self.norm3 = keras.layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs, encoder_outputs, training=True):
        attention_output1 = self.attention1(inputs, inputs)
        attention_output1 = self.dropout1(attention_output1, training=training)
        attention_output1 = self.norm1(inputs + attention_output1)

        attention_output2 = self.attention2(attention_output1, encoder_outputs)
        attention_output2 = self.dropout2(attention_output2, training=training)
        attention_output2 = self.norm2(attention_output1 + attention_output2)

        intermediate_output = self.dense1(attention_output2)
        intermediate_output = self.dense2(intermediate_output)
        intermediate_output = self.dropout3(intermediate_output, training=training)
        intermediate_output = self.norm3(attention_output2 + intermediate_output)

        return intermediate_output


Define encoder and decoder models:

In [3]:
class Encoder(keras.layers.Layer):
    def __init__(self, num_layers, emb_dim, num_heads, hid_dim, input_vocab_size, dropout):
        super(Encoder, self).__init__()

        self.emb_dim = emb_dim
        self.embedding = keras.layers.Embedding(input_vocab_size, emb_dim)
        self.dropout = keras.layers.Dropout(dropout)
        self.encoder_layers = [EncoderLayer(emb_dim, num_heads, hid_dim, dropout) for _ in range(num_layers)]

    def call(self, inputs, training=True):
        inputs = self.embedding(inputs)
        inputs *= tf.math.sqrt(tf.cast(self.emb_dim, tf.float32))
        inputs = self.dropout(inputs, training=training)

        for encoder_layer in self.encoder_layers:
            inputs = encoder_layer(inputs, training=training)

        return inputs


class Decoder(keras.layers.Layer):
    def __init__(self, num_layers, emb_dim, num_heads, hid_dim, output_vocab_size, dropout):
        super(Decoder, self).__init__()

        self.emb_dim = emb_dim
        self.embedding = keras.layers.Embedding(output_vocab_size, emb_dim)
        self.dropout = keras.layers.Dropout(dropout)
        self.decoder_layers = [DecoderLayer(emb_dim, num_heads, hid_dim, dropout) for _ in range(num_layers)]

    def call(self, inputs, encoder_outputs, training=True):
        inputs = self.embedding(inputs)
        inputs *= tf.math.sqrt(tf.cast(self.emb_dim, tf.float32))
        inputs = self.dropout(inputs, training=training)

        for decoder_layer in self.decoder_layers:
            inputs = decoder_layer(inputs, encoder_outputs, training=training)

        return inputs


Define the EncoderDecoder model:

In [4]:

class EncoderDecoder(keras.Model):
    def __init__(self, num_layers, emb_dim, num_heads, hid_dim, input_vocab_size, output_vocab_size, dropout):
        super(EncoderDecoder, self).__init__()

        self.encoder = Encoder(num_layers, emb_dim, num_heads, hid_dim, input_vocab_size, dropout)
        self.decoder = Decoder(num_layers, emb_dim, num_heads, hid_dim, output_vocab_size, dropout)
        self.final_dense = keras.layers.Dense(output_vocab_size)

    def call(self, inputs, training=True):
        encoder_outputs = self.encoder(inputs, training=training)
        decoder_outputs = self.decoder(inputs, encoder_outputs, training=training)  # Use inputs instead of targets
        final_outputs = self.final_dense(decoder_outputs)
        return final_outputs



Use the model for your input data

In [5]:
# Define your input data
input_data = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
target_data = np.array([[11, 12, 13, 14, 15], [16, 17, 18, 19, 20]])

# Define model hyperparameters
num_layers = 2
emb_dim = 32
num_heads = 4
hid_dim = 64
input_vocab_size = 100
output_vocab_size = 100
dropout = 0.1

# Create an instance of the EncoderDecoder model
model = EncoderDecoder(num_layers, emb_dim, num_heads, hid_dim, input_vocab_size, output_vocab_size, dropout)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Train the model
model.fit(input_data, target_data, epochs=10, batch_size=2)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x13503a460>

To check overfitting or underfitting by new input data:

In [6]:
# New input data
new_input_data = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])

# Generate predictions using the trained model
predictions = model.predict(new_input_data)

# Print the predictions
print(predictions)


[[[-8.27740490e-01 -1.55389166e+00 -3.35158110e-01 -9.63812828e-01
    1.11858284e+00 -2.11791079e-02 -8.62942636e-01  1.99557707e-01
   -2.77550668e-01  2.97810942e-01  4.03611958e-01  1.64057410e+00
    1.20691919e+00  1.60742998e+00  1.07484889e+00  5.69986105e-01
    1.13409579e+00 -4.89958018e-01  7.66841650e-01  1.29768044e-01
   -3.31402600e-01  1.11744978e-01 -5.56424707e-02 -1.29184353e+00
   -6.95902407e-01 -1.30048144e+00  2.18241930e-01 -4.97535676e-01
   -2.19486877e-02 -1.15205102e-01 -7.17603862e-01 -2.17971280e-01
   -9.64068845e-02 -1.58504725e-01 -6.76932812e-01  5.80210984e-01
   -5.33658862e-01 -4.40996259e-01 -1.09522903e+00 -4.04753566e-01
   -9.63765621e-01  1.61780524e+00 -5.39909780e-01  4.63281944e-02
    1.47211459e-02 -8.38227749e-01  3.38538200e-01  3.65014225e-01
   -6.79947197e-01 -7.31582493e-02  3.88835371e-02  1.28456247e+00
   -6.53105557e-01 -1.20700312e+00 -1.52032030e+00 -1.57009542e-01
   -1.45438939e-01 -1.60966128e-01 -2.59902179e-01 -9.44886267