<a href="https://colab.research.google.com/github/moupriya20/mou_m/blob/main/LSTM_text_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Step 1: Import Libraries

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Activation


Step 2: Load Text Data

In [2]:
text = """
Artificial Intelligence and Machine Learning are transforming the world.
LSTMs are powerful models for learning sequences and generating new text.
"""

# Convert to lowercase
text = text.lower()
print("Length of text:", len(text))


Length of text: 149


Step 3: Create Character Mappings

In [3]:
chars = sorted(list(set(text)))
char_to_idx = {c:i for i,c in enumerate(chars)}
idx_to_char = {i:c for i,c in enumerate(chars)}

vocab_size = len(chars)
print("Total unique characters:", vocab_size)


Total unique characters: 23


Step 4: Prepare Training Sequences

In [4]:
SEQ_LENGTH = 40
step = 3

sentences = []
next_chars = []

for i in range(0, len(text) - SEQ_LENGTH, step):
    sentences.append(text[i: i + SEQ_LENGTH])
    next_chars.append(text[i + SEQ_LENGTH])

print("Number of sequences:", len(sentences))


Number of sequences: 37


Step 5: One-Hot Encode the Data

In [5]:
X = np.zeros((len(sentences), SEQ_LENGTH, vocab_size), dtype=np.float32)
y = np.zeros((len(sentences), vocab_size), dtype=np.float32)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_to_idx[char]] = 1.
    y[i, char_to_idx[next_chars[i]]] = 1.


Step 6: Build the LSTM Model

In [6]:
model = Sequential()
model.add(LSTM(128, input_shape=(SEQ_LENGTH, vocab_size)))
model.add(Dense(vocab_size))
model.add(Activation("softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam")
model.summary()


  super().__init__(**kwargs)


Step 7: Train the Model

In [7]:
model.fit(X, y, batch_size=128, epochs=20)


Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 3.1398
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - loss: 3.1231
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step - loss: 3.1060
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step - loss: 3.0878
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - loss: 3.0674
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step - loss: 3.0434
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step - loss: 3.0132
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142ms/step - loss: 2.9718
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139ms/step - loss: 2.9103
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 306ms/step - loss: 2.8150
Epoch 11/20
[1m

<keras.src.callbacks.history.History at 0x7f58fbcc8620>

Step 8: Generate New Text


In [8]:
def generate_text(seed_text, length=200):
    generated = seed_text
    sentence = seed_text.lower()

    for _ in range(length):
        x_pred = np.zeros((1, SEQ_LENGTH, vocab_size))

        for t, char in enumerate(sentence):
            if char in char_to_idx:
                x_pred[0, t, char_to_idx[char]] = 1.

        preds = model.predict(x_pred, verbose=0)[0]
        next_idx = np.argmax(preds)
        next_char = idx_to_char[next_idx]

        generated += next_char
        sentence = sentence[1:] + next_char

    return generated

print(generate_text("machine learning is"))


machine learning iseeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee
