<a href="https://colab.research.google.com/github/mateusz-kacpura/exercises-python/blob/main/Sieci_neuronowe_Transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

class TransformerBlock(layers.Layer):
    def __init__(self, n_heads, ff_dim, emb_dim):
        super(TransformerBlock, self).__init__()
        self.n_heads = n_heads
        self.ff_dim = ff_dim
        self.emb_dim = emb_dim
        self.attention = layers.MultiHeadAttention(num_heads=n_heads, key_dim=emb_dim)
        self.ff = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(emb_dim)]
        )
        self.normalizer_1 = layers.LayerNormalization(epsilon=1e-6)
        self.normalizer_2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout_1 = layers.Dropout(0.1)
        self.dropout_2 = layers.Dropout(0.1)

    def call(self, x):
        attention_output = self.attention(x, x)
        attention_output = self.dropout_1(attention_output)
        bottleneck = self.normalizer_1(attention_output + x)
        output = self.ff(bottleneck)
        output = self.dropout_2(output)
        output = self.normalizer_2(output + bottleneck)
        return output

class TransformerPE(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TransformerPE, self).__init__()
        self.embedding_1 = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.embedding_2 = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.embedding_2(positions)
        x = self.embedding_1(x)
        x = x + positions
        return x

vocabularysize = 20000
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=vocabularysize)
print(len(x_train), "Training sequences")
print(len(x_test), "Test sequences")
maxlen = 200
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen)

n_heads = 2
emb_dim = 32  # Corrected variable name
ff_dim = 32

inputs = layers.Input(shape=(maxlen,))
embedding = TransformerPE(maxlen, vocabularysize, emb_dim)(inputs)
x = TransformerBlock(n_heads, ff_dim, emb_dim)(embedding)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)  # Corrected dropout rate
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)  # Added missing output layer

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer="adam", loss="sparce_categorical_crossentropy", metrics=["accuracy"])

history = model.fit(
    x_train, y_train, batch_size=32, epochs=2, validation_data=(x_test, y_test)
)


25000 Training sequences
25000 Test sequences
Epoch 1/2
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 148ms/step - accuracy: 0.7144 - loss: 0.5157 - val_accuracy: 0.8803 - val_loss: 0.2823
Epoch 2/2
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 150ms/step - accuracy: 0.9322 - loss: 0.1889 - val_accuracy: 0.8728 - val_loss: 0.3079
