In [2]:
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
from tensorflow import keras
import tensorflow as tf
from keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
from ast import literal_eval
import keras.utils as utils
from tensorflow.keras.layers import TextVectorization

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [3]:
data_dir = 'data'

folder, dirs, filenames = next(os.walk(data_dir))
filenames = [os.path.join(data_dir, name) for name in filenames] 
filenames[:5], len(filenames)

(['data\\001.txt',
  'data\\002.txt',
  'data\\003.txt',
  'data\\004.txt',
  'data\\005.txt'],
 209)

In [4]:
def get_vocabulary_size(filenames): 
    doc = []
    for filename in filenames:
        n_vocab = 0
        with open(filename, 'r') as f:
            for row in f:
                if len(row) == 0:
                    continue
                row = row.lower().replace("\r","").replace("\n"," ").replace("/[^a-zA-z0-9 ]/g", "")
                row = row.replace("'", "").replace("-","")
                doc.append(row)

    return doc
all_text = get_vocabulary_size(filenames)
all_text = ' '.join(all_text)

In [5]:
n_vocal = len(set(all_text))
n_vocal, all_text[:1000]


(36,
 'in olden times when wishing still helped one, there lived a king  whose daughters were all beautiful, but the youngest was so beautiful  that the sun itself, which has seen so much, was astonished whenever  it shone in her face.  close by the kings castle lay a great dark  forest, and under an old limetree in the forest was a well, and when  the day was very warm, the kings child went out into the forest and  sat down by the side of the cool fountain, and when she was bored she  took a golden ball, and threw it up on high and caught it, and this  ball was her favorite plaything.    now it so happened that on one occasion the princesss golden ball  did not fall into the little hand which she was holding up for it,  but on to the ground beyond, and rolled straight into the water.  the  kings daughter followed it with her eyes, but it vanished, and the  well was deep, so deep that the bottom could not be seen.  at this  she began to cry, and cried louder and louder, and could not b

In [6]:
corpus = all_text.split()
corpus[:10]

['in',
 'olden',
 'times',
 'when',
 'wishing',
 'still',
 'helped',
 'one,',
 'there',
 'lived']

In [7]:
# Generate tokenizer 
tokenizer = Tokenizer()

def get_sequence_of_tokens(corpus):
    tokenizer.fit_on_texts(corpus)
    total_words = len(tokenizer.word_index) + 1
    return total_words
total_words = get_sequence_of_tokens(corpus)
total_words, tokenizer.texts_to_sequences(corpus[:10])

(8032, [[10], [2728], [692], [22], [1413], [114], [933], [35], [37], [292]])

In [8]:
u = utils.to_categorical([[1], [2], [3], [4], [5]], num_classes=11)
u

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)

In [9]:
tokenizer.texts_to_sequences(['in', 'olden'])

[[10], [2728]]

In [10]:
def sequence_tokenizer(doc):
    return tokenizer.texts_to_sequences(doc)

In [11]:
window_size = 10
def generate_window_dataset(documents, window_size=10, shuffle=True, batch_size=32):
    
    doc_dataset = tf.data.Dataset.from_tensor_slices(documents) # load data into tensor from
    
#     # make generate windows from list documents 
#     # E.x. ['A', 'B', 'C', 'D', 'E', 'F', 'G']
    doc_dataset = doc_dataset.window(size=window_size + 1, shift=1, drop_remainder=True)
    
# #     # E.x. [['A', 'B', 'C', 'D', 'E', 'F', 'G']]
    doc_dataset = doc_dataset.flat_map(lambda x: x.batch(window_size + 1))
    
# #     # E.x. ['A', 'B', 'C', 'D', 'E', 'F'] => [G]    
# #     #      ['B', 'C', 'D', 'E', 'F', 'G'] => [K]
    doc_dataset = doc_dataset.map(lambda x: (x[:-1] , x[-1:]))
    
#     # shuffle the dataset with window size
    if shuffle:
        doc_dataset = doc_dataset.shuffle(len(list(doc_dataset)))
    
    doc_dataset = doc_dataset.batch(batch_size)
    doc_dataset = doc_dataset.prefetch(batch_size)
    
    return doc_dataset


In [12]:
dataset = generate_window_dataset(corpus)

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [13]:
text_tokenizers = [x[0] for x in tokenizer.texts_to_sequences(corpus) if len(x) == 1]

In [14]:
text_tokenizers

[10,
 2728,
 692,
 22,
 1413,
 114,
 933,
 35,
 37,
 292,
 5,
 55,
 590,
 659,
 45,
 38,
 146,
 18,
 1,
 450,
 6,
 30,
 146,
 13,
 1,
 339,
 538,
 49,
 104,
 274,
 30,
 150,
 6,
 858,
 1053,
 7,
 672,
 10,
 16,
 313,
 746,
 54,
 1,
 105,
 191,
 166,
 5,
 81,
 557,
 120,
 2,
 268,
 92,
 73,
 2094,
 10,
 1,
 120,
 6,
 5,
 82,
 2,
 22,
 1,
 112,
 6,
 141,
 693,
 1,
 105,
 145,
 39,
 34,
 36,
 1,
 120,
 2,
 170,
 58,
 54,
 1,
 340,
 8,
 1,
 1961,
 2095,
 2,
 22,
 12,
 6,
 3552,
 12,
 67,
 5,
 175,
 985,
 2,
 304,
 7,
 51,
 26,
 400,
 2,
 494,
 7,
 2,
 56,
 985,
 6,
 16,
 2729,
 3553,
 68,
 7,
 30,
 280,
 13,
 26,
 35,
 2464,
 1,
 3086,
 175,
 985,
 64,
 21,
 368,
 36,
 1,
 50,
 189,
 49,
 12,
 6,
 1962,
 51,
 25,
 7,
 18,
 26,
 3,
 1,
 266,
 2262,
 2,
 777,
 525,
 36,
 1,
 147,
 1,
 105,
 94,
 572,
 7,
 20,
 16,
 179,
 18,
 7,
 859,
 2,
 1,
 82,
 6,
 602,
 30,
 602,
 13,
 1,
 760,
 65,
 21,
 32,
 274,
 31,
 56,
 12,
 144,
 3,
 673,
 2,
 100,
 2263,
 2,
 2263,
 2,
 65,
 21,
 32,
 960,
 2,
 

### Building model
##### Vectorization layer

In [15]:
vectorizer_layer = TextVectorization(max_tokens=total_words,
                                    output_mode="int",
                                    output_sequence_length=window_size)

vectorizer_layer.adapt(corpus)

In [16]:
embedding_dim = 16
embedding_layer = tf.keras.layers.Embedding(total_words, embedding_dim)

In [17]:
input_layer = tf.keras.Input(shape=(1,), dtype=tf.string)


In [18]:
model = tf.keras.models.Sequential()

In [19]:
# model.add(input_layer)
# model.add(vectorizer_layer.input_layer())
# model.add(embedding_layer.)

In [20]:
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# x = (tf.keras.layers.Dense(4))
# outputs = tf.keras.layers.Dense(total_words, activation="softmax")(x)


In [21]:
text_vector = vectorizer_layer(input_layer)
embedd = embedding_layer(text_vector)
embedd_outputs = layers.Dense(128, activation="relu")(embedd)
outputs = layers.Dense(total_words, activation="softmax")(embedd_outputs)
model = tf.keras.Model(input_layer, outputs)

# Compile
model.compile(loss="categorical_crossentropy", # if your labels are integer form (not one hot) use sparse_categorical_crossentropy
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["accuracy"])

In [22]:
# Fit the token, char and positional embedding model
history_model = model.fit(dataset, epochs=3)

Epoch 1/3


ValueError: in user code:

    File "c:\users\ntviet5\desktop\tensorflow-certificate\env\lib\site-packages\keras\engine\training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "c:\users\ntviet5\desktop\tensorflow-certificate\env\lib\site-packages\keras\engine\training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\users\ntviet5\desktop\tensorflow-certificate\env\lib\site-packages\keras\engine\training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "c:\users\ntviet5\desktop\tensorflow-certificate\env\lib\site-packages\keras\engine\training.py", line 1024, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "c:\users\ntviet5\desktop\tensorflow-certificate\env\lib\site-packages\keras\engine\training.py", line 1083, in compute_loss
        y, y_pred, sample_weight, regularization_losses=self.losses
    File "c:\users\ntviet5\desktop\tensorflow-certificate\env\lib\site-packages\keras\engine\compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "c:\users\ntviet5\desktop\tensorflow-certificate\env\lib\site-packages\keras\losses.py", line 152, in __call__
        losses = call_fn(y_true, y_pred)
    File "c:\users\ntviet5\desktop\tensorflow-certificate\env\lib\site-packages\keras\losses.py", line 284, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "c:\users\ntviet5\desktop\tensorflow-certificate\env\lib\site-packages\keras\losses.py", line 2005, in categorical_crossentropy
        y_true, y_pred, from_logits=from_logits, axis=axis
    File "c:\users\ntviet5\desktop\tensorflow-certificate\env\lib\site-packages\keras\backend.py", line 5532, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, None) and (None, 10, 8032) are incompatible


In [None]:
model.summary()

In [None]:
# Plot the token, char, positional embedding model
from tensorflow.keras.utils import plot_model
plot_model(model)

In [None]:
input_data[1]

In [None]:
input_data = [("in olden times when wishing still helped one"),("true")]
# model.predict(input_data)

In [None]:
c = model.predict(input_data)
c.shape

In [None]:
test_data = 'in olden times when wishing still helped one, there lived a king  whose daughters were all beautiful, but the youngest was so beautiful  that the sun itself, which has seen so much, was astonished whenever  it shone in her face.  close by the kings castle lay a great dark  forest, and under an old limetree in the forest was a well, and when  the day was very warm, the kings child went out into the forest and  sat down by the side of the cool fountain, and when she was bored she  took a golden ball, and threw it up on high and caught it, and this  ball was her favorite plaything.    now it so happened that on one occasion the princesss golden ball  did not fall into the little hand which she was holding up for it,  but on to the ground beyond, and rolled straight into the water.  the  kings daughter followed it with her eyes, but it vanished, and the  well was deep, so deep that the bottom could not be seen.  at this  she began to cry, and cried louder and louder, and could not be  comf'

In [None]:
text_tokenizers[:10]

In [None]:
def split_input_target(chunk):
    input_text, target_text = chunk[:-1], chunk[1:]
    return input_text, target_text


def create_dataset(text_as_int, seq_length=100, batch_size=64, buffer_size=10000):
    char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
    dataset = char_dataset.batch(seq_length + 1, drop_remainder=True).map(split_input_target)
    dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)
    return dataset


In [None]:
dataset2 = create_dataset(text_tokenizers)

In [None]:
zx = create_dataset([1,3,4,5])

In [None]:
[x for x in dataset2.take(1)]

In [184]:
inp_sequences = [[73, 313],
 [73, 313, 616],
 [73, 313, 616, 3],
 [73, 313, 616, 3, 617],
 [73, 313, 616, 3, 617, 205],
 [73, 313, 616, 3, 617, 205, 314],
 [618, 38],
 [618, 38, 619],
 [618, 38, 619, 1],
 [618, 38, 619, 1, 206]]

In [180]:
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]

TypeError: list indices must be integers or slices, not tuple

In [186]:
from keras.preprocessing.sequence import pad_sequences
def generate_padded_sequences(input_sequences):
    max_sequence_len = max([len(x) for x in input_sequences])
    input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
    
    predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
    label = ku.to_categorical(label, num_classes=total_words)
    return predictors, label, max_sequence_len

predictors, label, max_sequence_len = generate_padded_sequences(inp_sequences)

ImportError: cannot import name 'pad_sequences' from 'keras.preprocessing.sequence' (c:\users\ntviet5\desktop\tensorflow-certificate\env\lib\site-packages\keras\preprocessing\sequence.py)

In [97]:
import numpy as np
import tensorflow as tf


def process_text(text):
#     text = open(file_path, 'rb').read().decode(encoding='utf-8')  # Read, then decode for py2 compat.
    vocab = sorted(set(text))  # The unique characters in the file
    # Creating a mapping from unique characters to indices and vice versa
    char2idx = {u: i for i, u in enumerate(vocab)}
    idx2char = np.array(vocab)
    text_as_int = np.array([char2idx[c] for c in text])
    return text_as_int, vocab, char2idx, idx2char


def split_input_target(chunk):
    input_text, target_text = chunk[:-1], chunk[1:]
    return input_text, target_text


def create_dataset(text_as_int, seq_length=100, batch_size=64, buffer_size=10000):
    char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
    dataset = char_dataset.batch(seq_length + 1, drop_remainder=True).map(split_input_target)
    dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)
    return dataset


def build_model(vocab_size, embedding_dim=256, rnn_units=1024, batch_size=64):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
        tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model


def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)


def generate_text(model, char2idx, idx2char, start_string, generate_char_num=1000, temperature=1.0):
    # Evaluation step (generating text using the learned model)
    # Low temperatures results in more predictable text, higher temperatures results in more surprising text.
    # Converting our start string to numbers (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    start_eval = input_eval[:]
#     print(input_eval)
    input_eval = tf.expand_dims(input_eval, 0)
#     print(input_eval)
    text_generated = []  # Empty string to store our results
    model.reset_states()
    for i in range(generate_char_num):
        predictions = model(input_eval)
#         print(predictions)
        predictions = tf.squeeze(predictions, 0)    # remove the batch dimension
#         print(predictions)
        predictions /= temperature
#         print(predictions)
        # using a categorical distribution to predict the character returned by the model
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
#         print(predicted_id)
        start_eval.append(predicted_id)
        # We pass the predicted character as the next input to the model along with the previous hidden state
        input_eval = tf.expand_dims(start_eval[-100:], axis=0)
#         input_eval = tf.expand_dims([predicted_id], axis=0)
#         print(input_eval)
#         print(idx2char[predicted_id])
#         print('---------------')
        text_generated.append(idx2char[predicted_id])
    return start_string + ''.join(text_generated)


# path_to_file = tf.keras.utils.get_file('nietzsche.txt', 'https://s3.amazonaws.com/text-datasets/nietzsche.txt')
# path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

# text_as_int, vocab, char2idx, idx2char = process_text(path_to_file)
# dataset = create_dataset(text_as_int)
# model = build_model(vocab_size=len(vocab))
# model.compile(optimizer='adam', loss=loss)
# model.summary()
# history = model.fit(dataset, epochs=50)
# model.save_weights("gen_text_weights.h5", save_format='h5')
# # To keep this prediction step simple, use a batch size of 1
# model = build_model(vocab_size=len(vocab), batch_size=1)
# model.load_weights("gen_text_weights.h5")
# model.summary()

# user_input = input("Write the beginning of the text, the program will complete it. Your input is: ")
# generated_text = generate_text(model, char2idx, idx2char, start_string=user_input, generate_char_num=2000)
# print(generated_text)

In [23]:
text_as_int, vocab, char2idx, idx2char = process_text(all_text)

In [96]:
user_input = input("Write the beginning of the text, the program will complete it. Your input is: ")
generated_text = generate_text(model, char2idx, idx2char, start_string=user_input, generate_char_num=100)
print(generated_text)


Write the beginning of the text, the program will complete it. Your input is: test
test companied only by one  servant, rode full gallop to the forest.  the servant fell with  his horse, 


In [99]:
user_input = input("Write the beginning of the text, the program will complete it. Your input is: ")
generated_text = generate_text(model, char2idx, idx2char, start_string=user_input, generate_char_num=100)
print(generated_text)


Write the beginning of the text, the program will complete it. Your input is: the kings daughter was sitting
the kings daughter was sitting, when he went in  through the basket, the youth became king and  queen, and went some you.  my wish


In [100]:
user_input = input("Write the beginning of the text, the program will complete it. Your input is: ")
generated_text = generate_text(model, char2idx, idx2char, start_string=user_input, generate_char_num=500)
print(generated_text)


Write the beginning of the text, the program will complete it. Your input is: kings daughter was sitting
kings daughter was sitting upon the  heart.  that is the morning, said the jew, and fell asleep.  cock  she had no children.  once when the prince was riding forth the  birds   wondered at this shape and looked at her, so that she could not refuse.    after that the aged grandmother had already was a powerful golden feather orest three kings daughter come, and if you dont know her, i have  got the cow  for your daughter as your rame more place.  on this dress, and when they he window that she  had vanished, and now the b


In [260]:
dataset = create_dataset(text_as_int)
model = build_model(vocab_size=len(vocab))
model.compile(optimizer='adam', loss=loss)
model.summary()
history = model.fit(dataset, epochs=50)
# model.save_weights("gen_text_weights.h5", save_format='h5')
# To keep this prediction step simple, use a batch size of 1
model = build_model(vocab_size=len(vocab), batch_size=1)
model.load_weights("gen_text_weights.h5")
model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (64, None, 256)           9216      
                                                                 
 lstm_2 (LSTM)               (64, None, 1024)          5246976   
                                                                 
 dropout_2 (Dropout)         (64, None, 1024)          0         
                                                                 
 batch_normalization_2 (Batc  (64, None, 1024)         4096      
 hNormalization)                                                 
                                                                 
 lstm_3 (LSTM)               (64, None, 1024)          8392704   
                                                                 
 dropout_3 (Dropout)         (64, None, 1024)          0         
                                                     

In [95]:
# user_input = input("Write the beginning of the text, the program will complete it. Your input is: ")
# generated_text = generate_text(model, char2idx, idx2char, start_string=user_input, generate_char_num=2000)
# print(generated_text)

In [90]:
a = [39, 24, 38, 39, 11]
a[-4:]

[24, 38, 39, 11]

In [None]:

# def process_textx(text):
# #     text = open(file_path, 'rb').read().decode(encoding='utf-8')  # Read, then decode for py2 compat.
#     vocab = sorted(set(text))  # The unique characters in the file
#     # Creating a mapping from unique characters to indices and vice versa
#     char2idx = {u: i for i, u in enumerate(vocab)}
#     idx2char = np.array(vocab)
#     text_as_int = np.array([char2idx[c] for c in text])
#     return text_as_int, vocab, char2idx, idx2char


# def split_input_targetx(chunk):
#     input_text, target_text = chunk[:-1], chunk[1:]
#     return input_text, target_text


# def create_datasetx(text_as_int, seq_length=100, batch_size=64, buffer_size=10000):
#     char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
#     dataset = char_dataset.batch(seq_length + 1, drop_remainder=True).map(split_input_target)
# #     dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)
#     return dataset

In [51]:
tt = create_dataset(text_as_int)

---------------------------------------------------------
Tensor("args_0:0", shape=(101,), dtype=int32)
Tensor("strided_slice:0", shape=(100,), dtype=int32) Tensor("strided_slice_1:0", shape=(100,), dtype=int32)


In [49]:
tt

<BatchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int32, name=None), TensorSpec(shape=(64, 100), dtype=tf.int32, name=None))>

[(<tf.Tensor: shape=(64, 100), dtype=int32, numpy=
  array([[25, 27, 24, ..., 34, 24, 30],
         [ 1, 34, 24, ..., 21, 13,  1],
         [ 6,  1,  1, ..., 13,  1, 29],
         ...,
         [ 5,  1, 11, ..., 28,  1, 10],
         [17, 10, 29, ..., 17, 10, 29],
         [14, 29,  1, ...,  1, 32, 17]])>,
  <tf.Tensor: shape=(64, 100), dtype=int32, numpy=
  array([[27, 24, 25, ..., 24, 30, 27],
         [34, 24, 30, ..., 13,  1, 23],
         [ 1,  1, 29, ...,  1, 29, 17],
         ...,
         [ 1, 11, 30, ...,  1, 10, 27],
         [10, 29,  1, ..., 10, 29,  1],
         [29,  1, 22, ..., 32, 17, 18]])>)]

<MapDataset element_spec=(TensorSpec(shape=(None,), dtype=tf.int32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(9, shape=(), dtype=int32)
tf.Tensor(10, shape=(), dtype=int32)


[None, None, None, None, None, None, None, None, None, None]

In [46]:
model = build_model(vocab_size=len(vocab))
model.compile(optimizer='adam', loss=loss)
model.load_weights("gen_text_weights.h5")
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (64, None, 256)           9216      
                                                                 
 lstm_6 (LSTM)               (64, None, 1024)          5246976   
                                                                 
 dropout_6 (Dropout)         (64, None, 1024)          0         
                                                                 
 batch_normalization_6 (Batc  (64, None, 1024)         4096      
 hNormalization)                                                 
                                                                 
 lstm_7 (LSTM)               (64, None, 1024)          8392704   
                                                                 
 dropout_7 (Dropout)         (64, None, 1024)          0         
                                                      

In [61]:
user_input = input("Write the beginning of the text, the program will complete it. Your input is: ")
generated_text = generate_text(model, char2idx, idx2char, start_string=user_input, generate_char_num=1)
print(generated_text)


Write the beginning of the text, the program will complete it. Your input is: test
tf.Tensor(
[[[ -5.5037856    4.7511535   -4.579504    -1.3326935   -5.997266
     3.8355663    1.6708739   -3.6531289   -1.9414227   -7.3208733
     2.4477441   -4.4496236   -2.0152292   -4.228157     2.819153
    -4.323568    -2.8293428    6.5398674    1.5040026   -5.153716
    -3.8815207    2.9991596   -2.0816274   -4.3387246    4.2962947
    -5.3018045   -7.454533     1.2362175   -0.20588052   0.41155413
     1.0430781   -4.949195     3.6240258   -4.062361     0.03601163
    -4.9055543 ]
  [ -7.05791      3.8161633   -6.500891    -3.6414137   -5.3209558
     1.1802993    0.5248253   -5.892267    -3.1630824   -8.50098
     1.1969467    0.66362655  -0.6623765    3.1048393    1.2327492
    -2.3829548   -2.680851    -2.771061    -0.8635484   -4.319003
    -1.6912453    4.0789       2.1742647    3.0265334   -1.1597085
     0.623724    -5.1353116    5.432199     1.9001968   -0.32841235
    -1.924179    -1.0

In [52]:
dataset = create_dataset(text_as_int)
history = model.fit(dataset, epochs=5)
model.summary()

---------------------------------------------------------
Tensor("args_0:0", shape=(101,), dtype=int32)
Tensor("strided_slice:0", shape=(100,), dtype=int32) Tensor("strided_slice_1:0", shape=(100,), dtype=int32)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (64, None, 256)           9216      
                                                                 
 lstm_6 (LSTM)               (64, None, 1024)          5246976   
                                                                 
 dropout_6 (Dropout)         (64, None, 1024)          0         
                                                                 
 batch_normalization_6 (Batc  (64, None, 1024)         4096      
 hNormalization)                                                 
                                                        

In [55]:
model.save_weights("gen_text_weights.h5", save_format='h5')
# To keep this prediction step simple, use a batch size of 1
model = build_model(vocab_size=len(vocab), batch_size=1)
model.load_weights("gen_text_weights.h5")

In [45]:
[print(x) for x in dataset.take(3)]

(<tf.Tensor: shape=(64, 100), dtype=int32, numpy=
array([[16, 28,  1, ..., 17, 10, 28],
       [17, 14,  1, ..., 22, 34, 28],
       [14, 23,  1, ..., 16, 24, 21],
       ...,
       [28, 10, 29, ..., 29,  1, 28],
       [28,  5,  1, ..., 31, 14, 27],
       [29, 24,  1, ..., 17, 14, 27]])>, <tf.Tensor: shape=(64, 100), dtype=int32, numpy=
array([[28,  1, 13, ..., 10, 28,  1],
       [14,  1, 25, ..., 34, 28, 14],
       [23,  1, 18, ..., 24, 21, 13],
       ...,
       [10, 29,  1, ...,  1, 28, 17],
       [ 5,  1,  3, ..., 14, 27,  5],
       [24,  1, 20, ..., 14, 27, 28]])>)
(<tf.Tensor: shape=(64, 100), dtype=int32, numpy=
array([[14, 10, 27, ..., 14, 13,  1],
       [27, 22, 14, ...,  1,  1, 10],
       [26, 30, 14, ..., 25, 10, 21],
       ...,
       [24, 23, 16, ..., 10, 21, 21],
       [29, 24,  1, ...,  1, 13, 24],
       [21, 21, 14, ..., 31, 18, 12]])>, <tf.Tensor: shape=(64, 100), dtype=int32, numpy=
array([[10, 27, 14, ..., 13,  1, 29],
       [22, 14, 27, ...,  1, 10,  1

[None, None, None]