<a href="https://colab.research.google.com/github/owilli38/DSBA-6162/blob/main/DSBA6162_Transformer_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!pip install tf-keras
#!pip install transformers==4.42.0

In [None]:
import tf_keras
print(tf_keras.__version__)

2.19.0


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import transformers

vocab_size = 20000
max_length = 200
embed_dim = 32
ff_dim = 32
num_heads = 2
num_labels = 3

In [None]:
class TokenAndPositionEmbedding(tf_keras.layers.Layer):
    def __init__(self, sequence_length, vocabulary_size, embedding_dim):
        super().__init__()
        self.token_embedding = tf_keras.layers.Embedding(
            input_dim=vocabulary_size, output_dim=embedding_dim
        )
        self.position_embedding = tf_keras.layers.Embedding(
            input_dim=sequence_length, output_dim=embedding_dim
        )

    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        position_indices = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embedding(inputs)
        embedded_positions = self.position_embedding(position_indices)
        return embedded_tokens + embedded_positions

In [None]:
class TransformerBlock(tf_keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = tf_keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf_keras.Sequential(
            [tf_keras.layers.Dense(ff_dim, activation="relu"), tf_keras.layers.Dense(embed_dim),]
        )
        self.layernorm1 = tf_keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf_keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = tf_keras.layers.Dropout(rate)
        self.dropout2 = tf_keras.layers.Dropout(rate)

    def call(self, inputs):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        return self.layernorm2(out1 + ffn_output)


input_layer = tf_keras.layers.Input(shape=(max_length,))
embedding_layer = TokenAndPositionEmbedding(max_length, vocab_size, embed_dim)
x = embedding_layer(input_layer)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = tf_keras.layers.GlobalAveragePooling1D()(x)
x = tf_keras.layers.Dropout(0.1)(x)
x = tf_keras.layers.Dense(20, activation="relu")(x)
output_layer = tf_keras.layers.Dense(num_labels, activation="softmax")(x)

classifier_model = tf_keras.Model(inputs=input_layer, outputs=output_layer)

In [None]:
vocab_size = 20000
max_length = 200

(x_train, y_train), (x_test, y_test) = tf_keras.datasets.imdb.load_data(num_words=vocab_size)

x_train = tf_keras.utils.pad_sequences(x_train, maxlen=max_length)
x_test = tf_keras.utils.pad_sequences(x_test, maxlen=max_length)

print(len(x_train), "Training sequences after padding")
print(len(x_test), "Testing sequences after padding")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
25000 Training sequences after padding
25000 Testing sequences after padding


In [None]:
epochs = 2
batch_size = 32

classifier_model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

history = classifier_model.fit(
    x_train,
    y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(x_test, y_test)
)

Epoch 1/2
Epoch 2/2


In [None]:
vocab_size = 20000
max_length = 200

(x_train, y_train), (x_test, y_test) = tf_keras.datasets.imdb.load_data(num_words=vocab_size)

x_train = tf_keras.utils.pad_sequences(x_train, maxlen=max_length)
x_test = tf_keras.utils.pad_sequences(x_test, maxlen=max_length)

print(len(x_train), "Training sequences after padding")
print(len(x_test), "Testing sequences after padding")

25000 Training sequences after padding
25000 Testing sequences after padding


In [None]:
print("Training History:")
for epoch in range(len(history.history['loss'])):
    print(f"Epoch {epoch + 1}:")
    print(f"  Training Loss: {history.history['loss'][epoch]:.4f}")
    print(f"  Training Accuracy: {history.history['accuracy'][epoch]:.4f}")
    if 'val_loss' in history.history:
        print(f"  Validation Loss: {history.history['val_loss'][epoch]:.4f}")
        print(f"  Validation Accuracy: {history.history['val_accuracy'][epoch]:.4f}")

Training History:
Epoch 1:
  Training Loss: 0.3942
  Training Accuracy: 0.8043
  Validation Loss: 0.2838
  Validation Accuracy: 0.8803
Epoch 2:
  Training Loss: 0.1925
  Training Accuracy: 0.9270
  Validation Loss: 0.3080
  Validation Accuracy: 0.8746
