In [None]:
import tensorflow as tf
import numpy as np

# 1. Load data — Shakespeare
path_to_file = tf.keras.utils.get_file(
    'shakespeare.txt',
    'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt'
)
text = open(path_to_file, 'rb').read().decode('utf-8')
vocab = sorted(set(text))

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1us/step


In [None]:
print(vocab)

['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [None]:

# 2. Map chars to IDs and back
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None
)
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None
)

def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))

all_ids

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1])>

In [None]:
# 3. Prepare dataset: input–target sequences
seq_length = 100
examples = tf.data.Dataset.from_tensor_slices(all_ids)
sequences = examples.batch(seq_length + 1, drop_remainder=True)

def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

print(split_input_target(list("Tariq Yousef")))

(['T', 'a', 'r', 'i', 'q', ' ', 'Y', 'o', 'u', 's', 'e'], ['a', 'r', 'i', 'q', ' ', 'Y', 'o', 'u', 's', 'e', 'f'])


In [None]:
dataset = sequences.map(split_input_target)

BUFFER_SIZE = 10000
BATCH_SIZE = 64
dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

![](https://www.tensorflow.org/static/text/tutorials/images/text_generation_training.png)

In [None]:
# 4. Build the model
vocab_size = len(ids_from_chars.get_vocabulary())
embedding_dim = 256
rnn_units = 1024

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.LSTM(rnn_units, return_sequences=True),
    tf.keras.layers.LSTM(rnn_units, return_sequences=True),
    tf.keras.layers.Dense(vocab_size)
])

model.summary()

In [None]:
# 5. Compile and train
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss)

EPOCHS = 20
model.fit(dataset, epochs=EPOCHS)



Epoch 1/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 44ms/step - loss: 3.2106
Epoch 2/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 44ms/step - loss: 1.9540
Epoch 3/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 44ms/step - loss: 1.6027
Epoch 4/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 44ms/step - loss: 1.4414
Epoch 5/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 44ms/step - loss: 1.3461
Epoch 6/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 44ms/step - loss: 1.2787
Epoch 7/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 45ms/step - loss: 1.2209
Epoch 8/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 45ms/step - loss: 1.1668
Epoch 9/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 45ms/step - loss: 1.1106
Epoch 10/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 45m

<keras.src.callbacks.history.History at 0x7e23497cb0e0>

![](https://www.tensorflow.org/static/text/tutorials/images/text_generation_sampling.png)

## Temperature in Text Generation

When generating text, the model outputs a probability distribution over possible next characters (or words).
The temperature parameter controls how deterministic or creative the sampling is.

We modify the logits before sampling: `scaled_logits = logits/temperature`

| Temperature         | Behavior             | Output Style                       |
| ------------------- | -------------------- | ---------------------------------- |
| **0 → 0.5 (low)**   | More deterministic   | Safe, repetitive, predictable      |
| **0.7 → 1.0 (mid)** | Balanced exploration | Natural text with creativity       |
| **> 1.0 (high)**    | More random          | Surprising, chaotic, more mistakes |
​


In [None]:
# 6. Text generation (sampling)
def generate_text(start_string, num_chars=500, temperature=1.0):
    input_ids = ids_from_chars(
        tf.strings.unicode_split([start_string], 'UTF-8')
    ).to_tensor()
    text_generated = []
    # model.reset_states()
    for _ in range(num_chars):
        predictions = model(input_ids)
        predictions = predictions[:, -1, :] / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[0, 0]
        input_ids = tf.expand_dims([predicted_id], 0)
        text_generated.append(chars_from_ids([predicted_id])[0].numpy().decode('utf-8'))
    return start_string + ''.join(text_generated)


In [None]:
print(generate_text("ROMEO:", num_chars=1000, temperature=1.0))

ROMEO:
vime that bouin me;
Buch heth CO, s m d, ge wal y furwis mathin pros th myorm,

O:
Whaiviof f if hsef at bapreatee vinds mbriolAthal r an, f; mio che'd here! des je XU
Ash celd att So e t if s athy boup If scakisllousthasuploubupllllelot rouicor s y inon
H:
F f ve wanorast o,
Beat d dllise, ar'd gh sins swat ierberw nat anein:
K ndacuprthycelfoup atirelingnood.
Cot l te is;
s bell latainck y RGimm:
Se:

Yorond m cak hanond nons yot, mete wamy.
SI nos
TRDO:
Whath m tcouthinienoulf drwasenonkiomards fo?
Nor-w sly's ocoure iserge y nojulyoucovith, a
UCUSoue'ouise t woniscad pretheat teld.
IO:

Yo satheyenioman thye arw ckinow t hed atilizeno t y ak ged!

HAMy g.
Therord
THERENCAn:
Top ba, sou ounforaf w me thive jeanthe s athal hin,
He s g yon w'l sthea RIORE a whathelfrorie the m, ouldounoul O:

As an
Pand y h n houce bllan,
O,
Tis m s ly gu thind heroury gheathef Myor is othathe KE:


Therin me I:
HA:
ULAnchencare m ca o ICI ches mat by, beathorand thouth,
IN3D gncaks Meslatr ata

In [None]:
print(generate_text("ROMEO:", num_chars=1000, temperature=0.1))

ROMEO:

Anouthe the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the 