<a href="https://colab.research.google.com/github/rubenguerra/hello_world/blob/master/generar_texto.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

PROGRAMA DE RED NEURONAL PARA GENERAR TEXTO TOMANDO COMO BASE LA OBRA DE W. SHAKESPEARE

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

!pip install -q tensorflow-gpu==2.0.0-beta1
import tensorflow as tf

import numpy as np
import os
import time

[K     |████████████████████████████████| 348.9MB 52kB/s 
[K     |████████████████████████████████| 3.1MB 66.6MB/s 
[K     |████████████████████████████████| 501kB 50.1MB/s 
[?25h

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [3]:
##Leer el texto
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

##La longitud del texto es el número de caracteres en él
print('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


In [4]:
#Toma una muestra de los primeros 259 caracteres en el texto
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [5]:
##Caracteres únicos en el archivo
vocab = sorted(set(text))
print('{} caracteres únicos'.format(len(vocab)))

65 caracteres únicos


In [0]:
##Crea un mapeo de caracteres únicos a indices
## Transforma cada caracter en un índice
## Luego ese índice es guardado en un arreglo
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

In [7]:
##Cada caracter tiene ahora asignado un entero, y esta relación se guardó en el diccionario 'char2idx'
print('{')
for char,_ in zip(char2idx, range(20)):
  print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n')

{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '$' :   3,
  '&' :   4,
  "'" :   5,
  ',' :   6,
  '-' :   7,
  '.' :   8,
  '3' :   9,
  ':' :  10,
  ';' :  11,
  '?' :  12,
  'A' :  13,
  'B' :  14,
  'C' :  15,
  'D' :  16,
  'E' :  17,
  'F' :  18,
  'G' :  19,
  ...



In [8]:
##Muestra cómo los primeros 13 caracteres del texto están representados con enteros
print('{}----caracteres representados como enteros---->{}'.format(repr(text[:13]), text_as_int[:13]))

'First Citizen'----caracteres representados como enteros---->[18 47 56 57 58  1 15 47 58 47 64 43 52]


In [9]:
#La máxima longitud de las sentencias que queremos por una simple entrada de caracteres
seq_length = 250
examples_per_epoch = len(text)//seq_length

##Se crea el ejemplo de entrenamiento
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)##Se usa la funcion tf.data.Dataset.from_tensor_slices()
                                                              ##para convertir el vector texto en una secuencia de indices de caracter
  
for i in char_dataset.take(5):
  print(idx2char[i.numpy()])

F
i
r
s
t


In [10]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
  print(repr(''.join(idx2char[item.numpy()])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you know Caius Marcius is chief enemy to the people.\n\n'
"All:\nWe know't, we know't.\n\nFirst Citizen:\nLet us kill him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be done: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor c"
'itizens, the patricians good.\nWhat authority surfeits on would relieve us: if they\nwould yield us but the superfluity, while it were\nwholesome, we might guess they relieved us humanely;\nbut they think we are too dear: the leanness that\nafflicts us, th'
'e object of our misery, is as an\ninventory to particularise their abundance; our\nsufferance is a gain to them Let us revenge this with\nour pikes, ere we become rakes: for the gods know I\nspeak this i

In [0]:
def split_input_target(chunk):
  input_text = chunk[:-1]
  target_text = chunk[1:]
  return input_text, target_text

dataset = sequences.map(split_input_target)

In [12]:
#Imprime el primer ejemplo de entrada y el valor resultante
for input_example, target_example in dataset.take(1):
  print('Dato de entrada: ', repr(''.join(idx2char[input_example.numpy()])))
  print('Dato resultante: ', repr(''.join(idx2char[target_example.numpy()])))

Dato de entrada:  'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you know Caius Marcius is chief enemy to the people.\n'
Dato resultante:  'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you know Caius Marcius is chief enemy to the people.\n\n'


In [13]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
  print('Step {:4d}'.format(i))
  print('    input: {} ({:s})'.format(input_idx, repr(idx2char[input_idx])))
  print('    expected output: {} ({:s})'.format(target_idx, repr(idx2char[target_idx])))

Step    0
    input: 18 ('F')
    expected output: 47 ('i')
Step    1
    input: 47 ('i')
    expected output: 56 ('r')
Step    2
    input: 56 ('r')
    expected output: 57 ('s')
Step    3
    input: 57 ('s')
    expected output: 58 ('t')
Step    4
    input: 58 ('t')
    expected output: 1 (' ')


In [14]:
##Se crean los procesos de entrenamientos
#Tamaño del proceso
BATCH_SIZE = 64

##Tamaño del buffer para el dataset
##(tf.data es designado para trabajar con infinitas posibilidades de secuencias)

BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<BatchDataset shapes: ((64, 250), (64, 250)), types: (tf.int64, tf.int64)>

In [0]:
##PARA CONSTRUIR EL MODELO
#Se usa tf.keras.Sequential para definir el modelo. Para este ejemplo simple
#se usarán tres capas:
#tf.keras.layers.Embedding, como capa de entrada. Una tabla entrenable que asigna cada entero de cada caracter a un vector 
#con cierta numero de dimensiones (256 en este caso, por el numero de caracteres ascii que existen)
#tf.keras.layers.GRU: un tipo de RNN con tamaño units=rnn_units(Se puede usar una capa LSTM aquí)
#tf.keras.layers.Dense: es la capa de salida, con vocab_size como salidas


#Longitud del vocabulario en caracteres
vocab_size = len(vocab)

#Dimensión de embedding
embedding_dim = 256

#Numero de unidades RNN
rnn_units = 1024

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size, embedding_dim,
                               batch_input_shape=[batch_size, None]),
      tf.keras.layers.LSTM(rnn_units, return_sequences=True,
                          stateful = True,
                          recurrent_initializer='glorot_uniform'),
      tf.keras.layers.Dense(vocab_size)
  ])
  return model


In [0]:
model = build_model(
  vocab_size = len(vocab),
  embedding_dim = embedding_dim,
  rnn_units = rnn_units,
  batch_size = BATCH_SIZE)

In [18]:
##Ahora se corre el modelo para ver que se comporta como se espera
##Primero chequea la forma de la salida
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, '#(batch_size, sequence_length, vocab_size)')

(64, 250, 65) #(batch_size, sequence_length, vocab_size)


In [19]:
#En este ejemplo señalado arriba la longitud de la secuencia de entrada es 100 pero al modelo
#puede serle introducido entradas de cualquier longitud
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
lstm (LSTM)                  (64, None, 1024)          5246976   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 5,330,241
Trainable params: 5,330,241
Non-trainable params: 0
_________________________________________________________________


In [0]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [21]:
sampled_indices

array([29, 10, 44, 53,  1, 19, 64, 35,  5, 56, 11, 42, 62,  8, 42, 21, 29,
       20,  4, 10, 41, 49, 42,  3, 54,  8,  4, 31,  0, 16, 51, 40, 23, 11,
       58, 54, 23, 32, 11, 11, 20, 64, 45, 57, 58, 21, 56, 63, 10, 29, 20,
       56, 23, 18, 34, 39, 20, 11, 46, 61, 40, 57,  6, 33,  8, 40, 13, 49,
       30, 57, 60,  8, 13, 37, 38, 32, 22, 20, 22, 43, 36, 20, 21, 58, 64,
       20, 52, 30, 13, 10, 36, 47,  7, 51,  4, 15, 48, 33,  6, 32, 57,  8,
        2, 18, 40, 59, 64, 27, 22, 53, 51, 59, 45, 45, 64,  0, 33, 56, 64,
       25,  1, 31, 32,  4, 53, 10, 25, 48, 27, 54, 33, 52, 62, 45, 49, 55,
       58, 15, 44,  6,  3, 18, 14, 23, 45, 56, 46, 35, 51, 29, 28, 11, 34,
       63, 30, 43, 32, 45,  3, 27,  7, 36, 46, 22, 40, 16, 56, 34,  8, 52,
       45, 13, 12, 63, 51, 37, 27, 24,  6, 10, 37, 56,  2, 32, 62,  6, 47,
       26,  9, 29, 11, 63, 18,  1, 16, 21,  2, 22,  9, 37, 29,  1, 24, 41,
       39, 59, 31, 14, 48, 36, 31, 59,  6, 48, 38, 20, 21, 55, 50, 10, 42,
       22, 19, 56, 17, 46

In [22]:
#Decodifica para ver el texto predicho por el modelo no entrenado
print('Input: \n', repr(''.join(idx2char[input_example_batch[0]])))
print()
print('Siguientes caracteres predichos: \n', repr(''.join(idx2char[sampled_indices ])))

Input: 
 "le\nMust have their voices; neither will they bate\nOne jot of ceremony.\n\nMENENIUS:\nPut them not to't:\nPray you, go fit you to the custom and\nTake to you, as your predecessors have,\nYour honour with your form.\n\nCORIOLANUS:\nIt is apart\nThat I shall blus"

Siguientes caracteres predichos: 
 "Q:fo GzW'r;dx.dIQH&:ckd$p.&S\nDmbK;tpKT;;HzgstIry:QHrKFVaH;hwbs,U.bAkRsv.AYZTJHJeXHItzHnRA:Xi-m&CjU,Ts.!FbuzOJomuggz\nUrzM ST&o:MjOpUnxgkqtCf,$FBKgrhWmQP;VyReTg$O-XhJbDrV.ngA?ymYOL,:Yr!Tx,iN3Q;yF DI!J3YQ LcauSBjXSu,jZHIql:dJGrEhQLv\nQYYbwYpUrgZRi3t,G3RH"


In [23]:
#ENTRENANDO EL MODELO

#La función estandar de cálculo de pérdida tf.keras.losses.sparse_categorical_crossentropy se usa en este caso
#es aplicada a través de la última dimensión de predicción

#Porque nuestro modelo retorna logits, necesitamos colocar el marcador from_logits

def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits = True)

example_batch_loss = loss(target_example_batch, example_batch_predictions)
print('Forma de la predicción: ', example_batch_predictions.shape, ' # (batch_size, sequence_length, vocab_size)')
print('scalar_loss:            ', example_batch_loss.numpy().mean())

Forma de la predicción:  (64, 250, 65)  # (batch_size, sequence_length, vocab_size)
scalar_loss:             4.1742063


In [0]:
#Se configura el procedimiento de entrenamiento usando tf.keras.Model.compile.
#Usaremos tf.keras.optimizers.Adam con argumentos y función de pérdida por defecto
model.compile(optimizer='adam', loss=loss)

In [0]:
#Se usa tf.keras.callbacks.ModelCheckpoint para asegurarse que el checkpoint estará guardado durante el entrenamiento
#DIrectorio donde el checkpoints estará guardado
checkpoint_dir = './training_checkpoints'
#Nombre del archivo checkpoint
checkpoint_prefix = os.path.join(checkpoint_dir, 'chpt_{epoch}')

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
  filepath=checkpoint_prefix,
  save_weights_only=True)

In [0]:
#EJECUTA EL ENTRENAMIENTO
#AUMENTAREMOS EL NUMERO DE EPOCAS A 30
EPOCHS = 30

In [27]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [28]:
##GENERACION DE TEXTO
#para mantener este simple paso de predicción se usa un batch_size = 1
#Para correr el modelo con diferente tamaño de lote(batch_size), necesitamos reconstruir
#el modelo y restaurar el peso del checkpoint
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints/chpt_30'

In [0]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [30]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            16640     
_________________________________________________________________
lstm_1 (LSTM)                (1, None, 1024)           5246976   
_________________________________________________________________
dense_1 (Dense)              (1, None, 65)             66625     
Total params: 5,330,241
Trainable params: 5,330,241
Non-trainable params: 0
_________________________________________________________________


In [0]:
#EL CICLO DE PREDICCION

#El siguiente bloque de coóigo genera el texto
#Comienza por escoger una cadena de inicio, inicializando la RNN y enviando el numero de caracteres a generar
#Toma la distribución de predicción para el próximo caracter usando la cadena de inicio y el estado de la RNN
#Usa una distribución categorial para calcular el índice del caracter predicho. Toma este caracter
#como nueva entrada al modelo
#EL estado de la RNN regresado por el modelo realimenta al modelo creando un contexto, en vez de una sola palabra
#Este procedimiento se repite creando a medida que se realimenta, un aprendizaje y ampliando el contexto inicial
#que permite una mejor predicción

def generate_text(model, start_string):
  #Evaluación (generación de texto usando el modelo de aprendizaje)
  #Número de caracteres a generar
  num_generate = 1000
  
  #Convirtiendo nuestra cadena de inicio a numeros(vectorizando)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)
  
  #Cadena vacía para guardar los resultados
  text_generated = []
  
  #Baja temperatura resulta en texto más predecible
  #Alta temperatura resulta en texto más sorprendente
  #Experimento para hallar mejores resultados
  temperature = 1.0
  
  #Aquí el batch_size == 1
  model.reset_states()
  for i in range(num_generate):
    predictions = model(input_eval)
    #Borra la dimension del proceso
    predictions = tf.squeeze(predictions, 0)
    
    #Usando una distribución categórica para predecir la palabra regresada por el modelo
    predictions = predictions / temperature
    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
    
    #Pasamos la palabra predicha como la próxima entrada al modelo
    input_eval = tf.expand_dims([predicted_id], 0)
    
    text_generated.append(idx2char[predicted_id])
    
  return (start_string + ''.join(text_generated))

In [33]:
print(generate_text(model, start_string=u'LEAR: '))

LEAR: MXEjTNHEPSj; FIIEENJW-ZIZ&FKbxENVAFMNISH$UkNXNXNCKPIT: KIMNISGOIODIULWFANDLIN&EHO!SF
VANW:
ces now, the rock is at the
dukes; and as bett in sinsing armour rentine.

SICINIUS:
Hark! here impimen'd: ho, honest think, be guibtles which the
house of Mine cut o'er the supposion: have you heard'st?
I god! by Marsouse is mine honour's will.

HORTENSIO:
This is's tround-time is a friar or he,
Out of the fortune's scholer of the base,
But not inferio is rounwally;
I am the sungear from the deather's love,
And with divension, cousin depose,
A priest he best within.

LUCENTIO:
Trit Lords of York looks of weeping souls,
Fillwing of mine own virtues,
They not rigning-man denied unkindly; and full
proils inteer'd father; for no such fairest springs
of these sorrow hath disperse these man of foul hours:
Thou declatise have put
zefold his witdor, and how fou'd itself?
Then sees, to follow us: here, since away'd
Uncomes a penshous and forget all.

GLOUCESTER:
Then fish surels, bid me bitience; m

In [0]:
###
###TAMBIÉN USANDO OTRA CADENA DE INICIO
###O AÑADIENDO OTRA CAPA A LA RNN

###USAREMOS EL tf.GradientTape

#Primero se inicializa el estado de la RNN, llamando el método
# tf.keras.Model.reset_states
#Luego se itera sobre el dataset(lote por lote) y se calculan las predicciones asociadas con cada una
#Se abre el tf.keras.GradientTape y se calculan las predicciones y pérdidas en ese contexto
#Se calcula el gradiente de pérdida con respecto las variables modelo usando el método tf.Gradient.Tape.grads
#Finalmente, se da un paso para usa el método tf.train.Optimizer.apply_gradients


model = build_model(
  vocab_size = len(vocab),
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

In [0]:
optimizer = tf.keras.optimizers.Adam()

In [0]:
@tf.function
def train_step(inp, target):
  with tf.GradientTape() as tape:
    predictions = model(inp)
    loss = tf.reduce_mean(
      tf.keras.losses.sparse_categorical_crossentropy(
        target, predictions, from_logits=True))
  grads = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(grads, model.trainable_variables))
  
  return loss

In [37]:
#Paso de entrenamiento
EPOCHS = 10

for epoch in range(EPOCHS):
  start = time.time()
  
  ##Inicialización del estado oculto al inicio de cada epoch
  ##La capa oculta inicialmente es None
  hidden = model.reset_states()
  
  for (batch_n, (inp, target)) in enumerate(dataset):
    loss = train_step(inp, target)
    
    if batch_n % 100 == 0:
      template = 'Epoch {} Batch {} Loss {}'
      print(template.format(epoch+1, batch_n, loss))
      
  #Guardando (checkpoint) el modelo cada 5 épocas
  if (epoch + 1) % 5 == 0:
    model.save_weights(checkpoint_prefix.format(epoch=epoch))
    
    
  print('Epoca {} Pérdida {:.4f}'.format(epoch+1, loss))
  print('Tiempo tomado por 1 época {} sec\n'.format(time.time() - start))
  
model.save_weights(checkpoint_prefix.format(epoch=epoch))
  

Epoch 1 Batch 0 Loss 4.174488544464111
Epoca 1 Pérdida 2.5963
Tiempo tomado por 1 época 11.757416248321533 sec

Epoch 2 Batch 0 Loss 2.569549798965454
Epoca 2 Pérdida 2.2150
Tiempo tomado por 1 época 9.44547414779663 sec

Epoch 3 Batch 0 Loss 2.2055587768554688
Epoca 3 Pérdida 1.9882
Tiempo tomado por 1 época 9.398260593414307 sec

Epoch 4 Batch 0 Loss 1.9997055530548096
Epoca 4 Pérdida 1.8254
Tiempo tomado por 1 época 9.339365482330322 sec

Epoch 5 Batch 0 Loss 1.8505524396896362
Epoca 5 Pérdida 1.7047
Tiempo tomado por 1 época 9.409043550491333 sec

Epoch 6 Batch 0 Loss 1.7243235111236572
Epoca 6 Pérdida 1.6084
Tiempo tomado por 1 época 9.36434030532837 sec

Epoch 7 Batch 0 Loss 1.626670002937317
Epoca 7 Pérdida 1.5279
Tiempo tomado por 1 época 9.359623193740845 sec

Epoch 8 Batch 0 Loss 1.551697850227356
Epoca 8 Pérdida 1.4710
Tiempo tomado por 1 época 9.358097791671753 sec

Epoch 9 Batch 0 Loss 1.4890209436416626
Epoca 9 Pérdida 1.4143
Tiempo tomado por 1 época 9.334773540496826 se

In [0]:
print(generate_text(model, start_string=u'LEAR: '))