<a href="https://colab.research.google.com/github/sanjibsinha/Machine-Learning-Primer/blob/main/Generating_Text_with_Deep_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import TensorFlow and Keras
import tensorflow as tf
from tensorflow import keras

# Import numpy for numerical operations
import numpy as np

# Import layers we'll need for our text model
from keras.layers import TextVectorization, Embedding, LSTM, Dense

In [2]:
# Our training text
text = """
Shall I compare thee to a summer's day?
Thou art more lovely and more temperate:
Rough winds do shake the darling buds of May,
And summer's lease hath all too short a date:
"""

# Create a vocabulary of all unique characters in the text
vocab = sorted(list(set(text)))
print(f"Vocabulary: {''.join(vocab)}")
print(f"Number of unique characters: {len(vocab)}")

# Create a mapping from characters to integers (and vice-versa)
char_to_int = {char: i for i, char in enumerate(vocab)}
int_to_char = {i: char for i, char in enumerate(vocab)}

# Convert our entire text to integers
encoded_text = np.array([char_to_int[char] for char in text])

Vocabulary: 
 ',:?AIMRSTabcdefghiklmnoprstuvwy
Number of unique characters: 34


In [4]:
# Create sequences and targets
seq_length = 20
sequences = []
targets = []

for i in range(len(encoded_text) - seq_length):
    sequences.append(encoded_text[i:i+seq_length])
    targets.append(encoded_text[i+seq_length])

X = np.array(sequences)
y = np.array(targets)

In [5]:
# Define the model architecture
model = keras.Sequential([
    # Embedding layer
    Embedding(input_dim=len(vocab), output_dim=256, input_length=seq_length),

    # LSTM layer
    LSTM(512),

    # Output layer (one neuron for each character in our vocabulary)
    Dense(len(vocab), activation='softmax')
])



In [6]:
# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
# Note: For good results, text models often need 50-100 epochs or more.
# We'll use 50 here for a demonstration.
history = model.fit(X, y, epochs=50, verbose=2)

Epoch 1/50
5/5 - 4s - 791ms/step - accuracy: 0.1234 - loss: 3.4753
Epoch 2/50
5/5 - 1s - 257ms/step - accuracy: 0.1688 - loss: 3.3763
Epoch 3/50
5/5 - 3s - 510ms/step - accuracy: 0.1299 - loss: 3.3038
Epoch 4/50
5/5 - 3s - 589ms/step - accuracy: 0.1364 - loss: 3.1936
Epoch 5/50
5/5 - 1s - 269ms/step - accuracy: 0.1234 - loss: 3.0123
Epoch 6/50
5/5 - 1s - 278ms/step - accuracy: 0.1688 - loss: 2.9966
Epoch 7/50
5/5 - 3s - 504ms/step - accuracy: 0.1688 - loss: 2.9470
Epoch 8/50
5/5 - 1s - 262ms/step - accuracy: 0.1883 - loss: 2.8843
Epoch 9/50
5/5 - 2s - 326ms/step - accuracy: 0.2273 - loss: 2.9131
Epoch 10/50
5/5 - 3s - 578ms/step - accuracy: 0.1883 - loss: 2.8440
Epoch 11/50
5/5 - 2s - 392ms/step - accuracy: 0.1818 - loss: 2.8053
Epoch 12/50
5/5 - 2s - 487ms/step - accuracy: 0.1883 - loss: 2.7603
Epoch 13/50
5/5 - 3s - 517ms/step - accuracy: 0.2403 - loss: 2.6846
Epoch 14/50
5/5 - 3s - 504ms/step - accuracy: 0.2597 - loss: 2.6117
Epoch 15/50
5/5 - 1s - 271ms/step - accuracy: 0.2662 - lo

In [9]:
# Function to generate text
def generate_text(model, start_string, num_generate=200):
    # Convert the start string to integers
    input_eval = [char_to_int[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    # Empty string to store our results
    text_generated = []

    for i in range(num_generate):
        predictions = model(input_eval)
        # Remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # Use a categorical distribution to predict the character returned by the model
        predicted_id = tf.random.categorical(tf.expand_dims(predictions, 0), num_samples=1)[0,0].numpy()

        # Pass the predicted character as the next input to the model
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(int_to_char[predicted_id])

    return (start_string + ''.join(text_generated))

# Generate text starting with "Thou art"
print(generate_text(model, start_string="Thou art"))

Thou artk uAcoyfTo u?gfdbaff,flgMI
dRRTcr:gvdT  uTwwlAknyrRkrd'  sfrlMAuhdMofTiSohvs
cRoR?bgwv,o?Iy:wdct'Ssh
bSkSvv,,itTvr s
rpl?T:afsn?M?I'yReefl:wbR?fasniatuvnovyvb,ycl
SvMd
yScgoR:werlSIuc:h
aIrpwhMTo:hifI
