In [None]:
import tensorflow as tf

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, GRU, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import numpy as np

In [None]:
tf.__version__

'2.14.0'

# Data set 1

In [None]:
# sonnets.txt
!gdown --id 108jAePKK4R3BVYBbYJZ32JWUwxeMg20K

Downloading...
From: https://drive.google.com/uc?id=108jAePKK4R3BVYBbYJZ32JWUwxeMg20K
To: /content/sonnets.txt
100% 93.6k/93.6k [00:00<00:00, 76.1MB/s]


In [None]:
data = open('./sonnets.txt').read()
print(data[0:100])

FROM fairest creatures we desire increase,
That thereby beauty's rose might never die,
But as the ri


# Dataset 2

In [None]:
# irish-lyrics-eof.txt
!gdown --id 15UqmiIm0xwh9mt0IYq2z3jHaauxQSTQT

In [None]:
data = open('./irish-lyrics-eof.txt').read()
print(data[0:100])

In [None]:
vocab = sorted(set(data))

In [None]:
print(vocab)
print(len(vocab))

['\n', ' ', '!', "'", ',', '-', '.', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
60


In [None]:
#string to integer mapper

ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)


In [None]:
len(ids_from_chars.get_vocabulary())

61

In [None]:
#integer to string mapper

chars_from_ids = tf.keras.layers.StringLookup(vocabulary = ids_from_chars.get_vocabulary(), invert = True, mask_token = None)

In [None]:
# reconstructing sentences

def text_from_ids(id):
  return tf.strings.reduce_join(chars_from_ids(id), axis=-1)

In [None]:
#creating input and labels

def split_input_target(sequence):
  input_text = sequence[:-1]
  target_text = sequence[1:]
  return input_text , target_text

In [None]:
#step 1 : Converting corpus to strings

chars = tf.strings.unicode_split(data, input_encoding='UTF-8')
print(chars.numpy())

[b'F' b'R' b'O' ... b'e' b'.' b'\n']


In [None]:
len(chars)

93578

In [None]:
#Step 2 : Converting characters to id form

ids = ids_from_chars(chars)
ids

<tf.Tensor: shape=(93578,), dtype=int64, numpy=array([16, 27, 25, ..., 39,  7,  1])>

In [None]:
#Creating dataset

id_dataset = tf.data.Dataset.from_tensor_slices(ids)

In [None]:
seq_length = 100

In [None]:
sequences = id_dataset.batch(seq_length+1, drop_remainder= True)

In [None]:
for seq in sequences.take(1):
  print(seq)
  print(chars_from_ids(seq))
  print(text_from_ids(seq))

tf.Tensor(
[16 27 25 23  2 40 35 43 52 39 53 54  2 37 52 39 35 54 55 52 39 53  2 57
 39  2 38 39 53 43 52 39  2 43 48 37 52 39 35 53 39  5  1 29 42 35 54  2
 54 42 39 52 39 36 59  2 36 39 35 55 54 59  4 53  2 52 49 53 39  2 47 43
 41 42 54  2 48 39 56 39 52  2 38 43 39  5  1 12 55 54  2 35 53  2 54 42
 39  2 52 43 50], shape=(101,), dtype=int64)
tf.Tensor(
[b'F' b'R' b'O' b'M' b' ' b'f' b'a' b'i' b'r' b'e' b's' b't' b' ' b'c'
 b'r' b'e' b'a' b't' b'u' b'r' b'e' b's' b' ' b'w' b'e' b' ' b'd' b'e'
 b's' b'i' b'r' b'e' b' ' b'i' b'n' b'c' b'r' b'e' b'a' b's' b'e' b','
 b'\n' b'T' b'h' b'a' b't' b' ' b't' b'h' b'e' b'r' b'e' b'b' b'y' b' '
 b'b' b'e' b'a' b'u' b't' b'y' b"'" b's' b' ' b'r' b'o' b's' b'e' b' '
 b'm' b'i' b'g' b'h' b't' b' ' b'n' b'e' b'v' b'e' b'r' b' ' b'd' b'i'
 b'e' b',' b'\n' b'B' b'u' b't' b' ' b'a' b's' b' ' b't' b'h' b'e' b' '
 b'r' b'i' b'p'], shape=(101,), dtype=string)
tf.Tensor(b"FROM fairest creatures we desire increase,\nThat thereby beauty's rose might never d

In [None]:
batch_size = 1
buffer_size = 1000
lstm_units = 1024

In [None]:
#Creating inputs and targets
training_data = sequences.map(split_input_target)

In [None]:
training_data = training_data.shuffle(buffer_size).batch(batch_size, drop_remainder=True).prefetch(tf.data.AUTOTUNE)

In [None]:
for x, target in training_data.take(1):
  print(x.shape, target.shape)
  print(text_from_ids(x))
  print(text_from_ids(target))

(1, 100) (1, 100)
tf.Tensor([b'e spirit of love with a perpetual dullness.\nLet this sad interim like the ocean be\nWhich parts the s'], shape=(1,), dtype=string)
tf.Tensor([b' spirit of love with a perpetual dullness.\nLet this sad interim like the ocean be\nWhich parts the sh'], shape=(1,), dtype=string)


In [None]:
vocab_size = len(ids_from_chars.get_vocabulary())
print(vocab_size)
embedding_dim = 100


61


# Model 1

In [None]:
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, batch_input_shape =[batch_size, None]))
model.add(LSTM(150, return_sequences = True, stateful = True, recurrent_initializer='glorot_uniform'))
model.add(BatchNormalization())
model.add(Dense(vocab_size))

# Model 2

In [None]:
def build_model(vocab_size, embedding_dim=256, rnn_units=1024, batch_size=batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
        tf.keras.layers.LSTM(lstm_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

In [None]:
#If using model 2 run this block

embedding_dim = 256  # Replace with your desired embedding dimension
lstm_units = 1024  # Replace with your desired number of GRU units

# Build the equivalent functional model
model = build_model(vocab_size, embedding_dim, lstm_units)

# Display the model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (1, None, 256)            15616     
                                                                 
 lstm (LSTM)                 (1, None, 1024)           5246976   
                                                                 
 dropout (Dropout)           (1, None, 1024)           0         
                                                                 
 batch_normalization (Batch  (1, None, 1024)           4096      
 Normalization)                                                  
                                                                 
 lstm_1 (LSTM)               (1, None, 1024)           8392704   
                                                                 
 dropout_1 (Dropout)         (1, None, 1024)           0         
                                                        

# Common block for all models

In [None]:
def loss (y_true, y_pred):
  return tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred, from_logits = True)


In [None]:
model.compile(loss = loss, optimizer = 'adam',  metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (1, None, 100)            6100      
                                                                 
 lstm (LSTM)                 (1, None, 150)            150600    
                                                                 
 batch_normalization (Batch  (1, None, 150)            600       
 Normalization)                                                  
                                                                 
 dense (Dense)               (1, None, 61)             9211      
                                                                 
Total params: 166511 (650.43 KB)
Trainable params: 166211 (649.26 KB)
Non-trainable params: 300 (1.17 KB)
_________________________________________________________________


In [None]:
epochs = 10

In [None]:
model.fit(training_data, epochs=epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7cf04b4f3af0>

In [None]:
model.save('text_generaion.keras')

In [None]:
user_input = input("Write the beginning of the text, the program will complete it. Your input is: ")

Write the beginning of the text, the program will complete it. Your input is: Help me Obi Wan Kenobi, you're my only hope


In [None]:
def generate_text(model, start_string, generate_char_num=1000, temperature=1.0, states=None):
    # Evaluation step (generating text using the learned model)
    # Low temperatures result in more predictable text, higher temperatures result in more surprising text.
    # Converting our start string to numbers (vectorizing)
    input_chars = tf.strings.unicode_split(start_string, 'UTF-8')
    input_ids = ids_from_chars(input_chars)
    input_ids = tf.expand_dims(input_ids, 0)
    result = [start_string]  # Store our results

    model.reset_states()

    for i in range(generate_char_num):
        predictions = model.predict(input_ids)
        predictions = predictions[:, -1, :]  # Only use the last prediction.
        predictions /= temperature

        # Using a categorical distribution to predict the character returned by the model
        input_eval = tf.random.categorical(predictions, num_samples=1)
        input_eval = tf.squeeze(input_eval, axis=-1)

        # Convert the predicted character to its string representation
        next_character = chars_from_ids(input_eval)
        # Update the input for the next iteration, This is an essential step
        #In the TF example ONW_FIND_Text_generation_USING TF NEW AND RNN this is done in loop outside of function.
        input_ids = ids_from_chars(next_character)
        input_ids = tf.expand_dims(input_ids, 0)
        # Append the predicted character to the result
        result.append(next_character.numpy()[0].decode('utf-8'))




    return ''.join(result)


In [None]:
generated_text= generate_text(model, start_string=user_input, generate_char_num=1000)



In [None]:
print(generated_text)

Help me Obi Wan Kenobi, you're my only hope,
On stall things parth thee vories
For I mation time is it nowly writh passess.
Levof of death, ratched the worth,
Unounter of love she more till thy stang,
Whilst me love fines's ear drow appever this, thou garts of woulds,
Ex bleass in thine more with leads and true lept.

It should gans, of thy fire I'er it from the wore,s now toud
Shap you aming thiness engrows the worth, bling desulfior.
Let him thou art contly other date's espend!
Uthan to the make squeck loow? in I fate,
That thou corid poen'd give then mine alwert,
Peauins forget's deeshust my heart likin of thy astring:
To line, me is not withs thou wretch seakles greate.
But, of for such and mine own state
Thou pericil false with wonto of shall every hate;
But sin, in tongue crost ghown when knowly rideivest;
As, it mut all the pretion? womang the lease spent
Within for they somorl ever! not the grouds fair chide,
And but is that owals more newore of supt;
And yet soplet it his dead