# 1. Rede Neural Recorrente para geração de texto

## Disciplina: Redes Neurais Artificiais

### Universidade Federal Rural do Semi-Árido

1.1 Importando as bibliotecas necessárias

In [10]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

1.2 Definição do nosso texto ou sentença

In [11]:
# Dados de exemplo (trecho de texto)
corpus = "Este é um exemplo de texto para treinar uma RNN."

Os modelos de aprendizado de máquina recebem vetores (matrizes de números) como entrada. Ao trabalhar com texto, a primeira coisa que devemos fazer é criar uma estratégia para converter seqüências de caracteres em números (ou "vetorizar" o texto) antes de alimentá-lo no modelo.

In [12]:
# Tokenização simples
tokenizer = tf.keras.preprocessing.text.Tokenizer(char_level=True)
tokenizer.fit_on_texts([corpus])
sequence_data = tokenizer.texts_to_sequences([corpus])[0]
vocab_size = len(tokenizer.word_index) + 1


Codificamos cada palavra/letra usando um número único

In [13]:
# print sequence
print(corpus)
print(tokenizer.word_index)

Este é um exemplo de texto para treinar uma RNN.
{' ': 1, 'e': 2, 't': 3, 'a': 4, 'r': 5, 'm': 6, 'n': 7, 'u': 8, 'x': 9, 'p': 10, 'o': 11, 's': 12, 'é': 13, 'l': 14, 'd': 15, 'i': 16, '.': 17}


In [14]:
# Preparar dados de entrada e saída
seq_length = 5
sequences = []
for i in range(seq_length, len(sequence_data)):
    sequences.append(sequence_data[i-seq_length:i+1])

sequences = np.array(sequences)
X, y = sequences[:,:-1], sequences[:,-1]

In [15]:
# Criar o modelo
model = Sequential([
    Embedding(vocab_size, 10, input_length=seq_length),
    LSTM(50, return_sequences=False),
    Dense(vocab_size, activation='softmax')
])
model.summary()



In [16]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [17]:
# Treinar o modelo
model.fit(X, y, epochs=300)

Epoch 1/300
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 2.8905
Epoch 2/300
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 2.8867
Epoch 3/300
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 2.8838
Epoch 4/300
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 2.8796
Epoch 5/300
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 2.8759 
Epoch 6/300
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 2.8730 
Epoch 7/300
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 2.8674
Epoch 8/300
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 2.8642 
Epoch 9/300
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 2.8612 
Epoch 10/300
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 2.8574
Epoch 11/

<keras.src.callbacks.history.History at 0x783fd6710460>

In [18]:
# Função para gerar texto
def gerar_texto(modelo, seed_text, next_chars, tokenizer, seq_length):
    result = seed_text
    for _ in range(next_chars):
        encoded = tokenizer.texts_to_sequences([seed_text])[0]
        encoded = np.array(encoded[-seq_length:]).reshape(1, seq_length)
        predicted = np.argmax(modelo.predict(encoded), axis=-1)
        out_char = tokenizer.sequences_to_texts([[predicted[0]]])[0]
        seed_text += out_char
        result += out_char
    return result

# Testar a geração de texto
seed_text = "Este "
generated = gerar_texto(model, seed_text, 50, tokenizer, seq_length)
print(generated)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 214ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3