In [26]:
# ==============================
# COMPONENT–II: TRANSFORMER LANGUAGE MODEL
# ==============================

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import re
from tensorflow.keras.preprocessing.sequence import pad_sequences

# ------------------------------
# 1. Load and Clean Dataset
# ------------------------------
text = """
artificial intelligence is transforming modern society.
machine learning allows systems to improve automatically with experience.
deep learning uses multi layer neural networks.
neural networks are inspired by biological neurons.
training a neural network requires optimization techniques.
natural language processing helps computers understand human language.
transformer models changed the field of nlp.
education is being improved using artificial intelligence.
ethical considerations are important in artificial intelligence.
continuous learning is essential in the field of ai.
"""

text = text.lower()
text = re.sub(r'[^\w\s]', '', text)

# ------------------------------
# 2. Tokenization
# ------------------------------
vocab_size = 1000
sequence_length = 20

vectorizer = layers.TextVectorization(
    max_tokens=vocab_size,
    output_mode="int"
)

vectorizer.adapt([text])
tokens = vectorizer([text]).numpy()[0]

# ------------------------------
# 3. Create Training Sequences
# ------------------------------
X = []
y = []

for i in range(len(tokens) - sequence_length):
    X.append(tokens[i:i+sequence_length])
    y.append(tokens[i+1:i+sequence_length+1])

X = np.array(X).astype("int32")
y = np.array(y).astype("int32")

print("X shape:", X.shape)
print("y shape:", y.shape)

# ------------------------------
# 4. Token + Positional Embedding
# ------------------------------
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super().__init__()
        self.token_emb = layers.Embedding(vocab_size, embed_dim)
        self.pos_emb = layers.Embedding(maxlen, embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

# ------------------------------
# 5. Transformer Decoder Block
# ------------------------------
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(
            num_heads=num_heads,
            key_dim=embed_dim
        )
        self.ffn = keras.Sequential([
            layers.Dense(ff_dim, activation="relu"),
            layers.Dense(embed_dim),
        ])
        self.layernorm1 = layers.LayerNormalization()
        self.layernorm2 = layers.LayerNormalization()
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=False):
        seq_len = tf.shape(inputs)[1]
        causal_mask = tf.linalg.band_part(
            tf.ones((seq_len, seq_len)), -1, 0
        )

        attn_output = self.att(
            inputs, inputs,
            attention_mask=causal_mask
        )
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)

        return self.layernorm2(out1 + ffn_output)

# ------------------------------
# 6. Build Transformer Model
# ------------------------------
embed_dim = 128
num_heads = 4
ff_dim = 256
num_layers = 2

inputs = layers.Input(shape=(sequence_length,))
x = TokenAndPositionEmbedding(sequence_length, vocab_size, embed_dim)(inputs)

for _ in range(num_layers):
    x = TransformerBlock(embed_dim, num_heads, ff_dim)(x)

outputs = layers.Dense(vocab_size, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

# ------------------------------
# 7. Train Model
# ------------------------------
model.fit(X, y, epochs=40, batch_size=32)

# ------------------------------
# 8. Text Generation (Temperature Sampling)
# ------------------------------
def generate_text(seed_text, num_words=20, temperature=0.8):
    for _ in range(num_words):

        tokenized = vectorizer([seed_text]).numpy()[0]
        tokenized = pad_sequences(
            [tokenized],
            maxlen=sequence_length,
            padding="pre"
        )

        prediction = model.predict(tokenized, verbose=0)[0]
        prediction = prediction[-1]

        prediction = np.log(prediction + 1e-8) / temperature
        exp_preds = np.exp(prediction)
        prediction = exp_preds / np.sum(exp_preds)

        next_word_id = np.random.choice(len(prediction), p=prediction)
        next_word = vectorizer.get_vocabulary()[next_word_id]

        seed_text += " " + next_word

    return seed_text

print("\nGenerated Text:\n")
print(generate_text("artificial intelligence"))


X shape: (54, 20)
y shape: (54, 20)


Epoch 1/40
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7s/step - accuracy: 0.0062 - loss: 7.0000
Epoch 2/40
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.1760 - loss: 5.8651
Epoch 3/40
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.4460 - loss: 5.0844
Epoch 4/40
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.8245 - loss: 4.3350 
Epoch 5/40
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.8988 - loss: 3.6532
Epoch 6/40
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.9076 - loss: 3.0836
Epoch 7/40
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.9063 - loss: 2.6227
Epoch 8/40
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.9082 - loss: 2.1983
Epoch 9/40
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [