In [41]:
import tensorflow as tf
from tensorflow import keras
import numpy as np



In [38]:
class InputEmbeddings(tf.keras.layers.Layer):
    
    def __init__(self, d_model: int, vocab_size: int) -> None:
        super(InputEmbeddings, self).__init__()
        self.d_model = d_model
        self.vocab_size = vocab_size
        self.embedding = tf.keras.layers.Embedding(vocab_size, d_model)
        self.scale = tf.math.sqrt(tf.cast(d_model, tf.float32))

    def call(self, x):
        return self.embedding(x) * self.scale

In [75]:
# test it out

sentence = "ein Auto ist ein Fahrzeug"

# Simulate Tokens and Vocabulary
vocab = {"ein": 1, "Auto": 2, "ist":3, "Fahrzeug": 4}
tokens = [vocab[word] for word in sentence.split()]

# Convert to tensor
token_tensor = tf.constant(tokens, dtype=tf.int32)

# Parameters
d_model = 4  # Dimension of Embeddings
vocab_size = len(vocab) + 1  # Size of Vocabulary

# Create Object from above class
embedding_layer = InputEmbeddings(d_model, vocab_size)

# Invoke the the tokens
embeddings = embedding_layer(token_tensor)

# Ausgabe der Embeddings
print("Sentence: ", sentence)
print()
print("Embeddings: ", embeddings)

Sentence:  ein Auto ist ein Fahrzeug

Embeddings:  tf.Tensor(
[[ 0.09785492 -0.03037474 -0.09098981  0.04843877]
 [ 0.00960038 -0.02764299 -0.00800388 -0.02333073]
 [-0.09946199  0.05157394  0.01916969 -0.05175848]
 [ 0.09785492 -0.03037474 -0.09098981  0.04843877]
 [-0.04503598 -0.08140297  0.02909774  0.06131718]], shape=(5, 4), dtype=float32)


In [77]:
class PositionalEncoding(tf.keras.layers.Layer):

    def __init__(self, d_model: int, seq_len: int, dropout: float) -> None:
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model
        self.seq_len = seq_len
        self.dropout = tf.keras.layers.Dropout(dropout)

        pe = np.zeros((seq_len, d_model))

        position = np.arange(seq_len)[:, np.newaxis]
        div_term = np.exp(np.arange(0, d_model, 2) * -(np.log(10000.0) / d_model))

        pe[:, 0::2] = np.sin(position * div_term)
        pe[:, 1::2] = np.cos(position * div_term)

        self.pe = tf.constant(pe[np.newaxis, ...], dtype=tf.float32)

    def call(self, x):
        x = x + self.pe[:, :tf.shape(x)[1], :]
        return self.dropout(x), self.pe[:, :tf.shape(x)[1], :]

In [78]:
# test it out

seq_len = len(tokens)
dropout_rate = 0.1


# Erstelle das PositionalEncoding-Objekt
pos_encoding_layer = PositionalEncoding(d_model, seq_len, dropout_rate)

# Wende Positional Encoding auf die Embeddings an
encoded_embeddings = pos_encoding_layer(tf.expand_dims(embeddings, axis=0))

print("Sentence: ", sentence)
print()
print("Embeddings: ", embeddings)
print()
print("+")
print()
print("Position Encodes: ", encoded_embeddings[1])
print()
print("=")
print()
print("Encoded Embeddings: ", encoded_embeddings[0])


Sentence:  ein Auto ist ein Fahrzeug

Embeddings:  tf.Tensor(
[[ 0.09785492 -0.03037474 -0.09098981  0.04843877]
 [ 0.00960038 -0.02764299 -0.00800388 -0.02333073]
 [-0.09946199  0.05157394  0.01916969 -0.05175848]
 [ 0.09785492 -0.03037474 -0.09098981  0.04843877]
 [-0.04503598 -0.08140297  0.02909774  0.06131718]], shape=(5, 4), dtype=float32)

+

Position Encodes:  tf.Tensor(
[[[ 0.          1.          0.          1.        ]
  [ 0.84147096  0.5403023   0.00999983  0.99995   ]
  [ 0.9092974  -0.41614684  0.01999867  0.9998    ]
  [ 0.14112    -0.9899925   0.0299955   0.99955004]
  [-0.7568025  -0.6536436   0.03998933  0.9992001 ]]], shape=(1, 5, 4), dtype=float32)

=

Encoded Embeddings:  tf.Tensor(
[[[ 0.09785492  0.96962523 -0.09098981  1.0484388 ]
  [ 0.85107136  0.5126593   0.00199596  0.97661924]
  [ 0.80983543 -0.3645729   0.03916835  0.94804156]
  [ 0.23897493 -1.0203673  -0.0609943   1.0479888 ]
  [-0.80183846 -0.73504657  0.06908707  1.0605173 ]]], shape=(1, 5, 4), dtype=f

In [16]:
class LayerNormalization(tf.keras.layers.Layer):
    
    def __init__(self, features: int, eps:float=10**-6) -> None:
        super(LayerNormalization, self).__init__()
        self.eps = eps
        self.alpha = tf.Variable(tf.ones(features)) # learnable parameter
        self.bias = tf.Variable(tf.zeros(features)) # learnable parameter
    
    def forward(self, x):

        mean = x.mean(dim = -1 , keepdim = True)

        std = x.std(dim = -1, keepdim = True)

        return self.alpha * (x - mean) / (std + self.eps) + self.bias


In [17]:
class FeedForwardBlock(tf.keras.layers.Layer):

    def __init__(self, d_model: int, d_ff: int, dropout: float): 
        super(FeedForwardBlock, self).__init__()
        self.linear_1 = tf.keras.layers.Dense(d_ff, activation="relu")
        self.dropout = tf.keras.layers.Dropout(dropout)
        self.linear_2 = tf.keras.layers.Dense(d_model)
    
    def forward(self, x):
        output = self.linear_2(self.dropout(self.linear_1(x)))
        return output

