In [19]:
# ==========================================
# COMPONENT–I: LSTM TEXT GENERATION
# ==========================================

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import re
from tensorflow.keras.preprocessing.sequence import pad_sequences

# ------------------------------------------
# 1. Load and Preprocess Text
# ------------------------------------------
text = """
artificial intelligence is transforming modern society.
machine learning allows systems to improve automatically with experience.
deep learning uses multi layer neural networks.
neural networks are inspired by biological neurons.
training a neural network requires optimization techniques.
natural language processing helps computers understand human language.
transformer models changed the field of nlp.
education is being improved using artificial intelligence.
ethical considerations are important in artificial intelligence.
continuous learning is essential in the field of ai.
"""

text = text.lower()
text = re.sub(r'[^\w\s]', '', text)

# ------------------------------------------
# 2. Word-Level Tokenization
# ------------------------------------------
vocab_size = 1000
sequence_length = 10

vectorizer = layers.TextVectorization(
    max_tokens=vocab_size,
    output_mode="int"
)

vectorizer.adapt([text])

tokens = vectorizer([text]).numpy()[0]

# ------------------------------------------
# 3. Create Input-Output Sequences
# ------------------------------------------
X = []
y = []

for i in range(len(tokens) - sequence_length):
    X.append(tokens[i:i+sequence_length])
    y.append(tokens[i+sequence_length])

X = np.array(X).astype("int32")
y = np.array(y).astype("int32")

print("X shape:", X.shape)
print("y shape:", y.shape)

# ------------------------------------------
# 4. Design LSTM Architecture
# ------------------------------------------
embed_dim = 64

model = keras.Sequential([
    layers.Embedding(vocab_size, embed_dim, input_length=sequence_length),
    layers.LSTM(128),
    layers.Dense(vocab_size, activation="softmax")
])

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

# ------------------------------------------
# 5. Train Model
# ------------------------------------------
model.fit(X, y, epochs=50, batch_size=32)

# ------------------------------------------
# 6. Generate Text (Temperature Sampling)
# ------------------------------------------
def generate_text(seed_text, num_words=15, temperature=0.8):
    for _ in range(num_words):

        tokenized = vectorizer([seed_text]).numpy()[0]
        tokenized = pad_sequences(
            [tokenized],
            maxlen=sequence_length,
            padding="pre"
        )

        prediction = model.predict(tokenized, verbose=0)[0]

        # Temperature sampling
        prediction = np.log(prediction + 1e-8) / temperature
        exp_preds = np.exp(prediction)
        prediction = exp_preds / np.sum(exp_preds)

        next_word_id = np.random.choice(len(prediction), p=prediction)
        next_word = vectorizer.get_vocabulary()[next_word_id]

        seed_text += " " + next_word

    return seed_text


print("\nGenerated Text:\n")
print(generate_text("artificial intelligence"))


X shape: (64, 10)
y shape: (64,)


Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - accuracy: 0.0104 - loss: 6.9070  
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.1562 - loss: 6.8996 
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.1979 - loss: 6.8910
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.2188 - loss: 6.8805
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.2083 - loss: 6.8652
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.1458 - loss: 6.8414
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.0833 - loss: 6.8026
Epoch 8/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.0833 - loss: 6.7299
Epoch 9/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m