# Red neuronal recurrente

### Importando las librerias

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

### Importando los datos de entrenamiento

In [2]:
dataset_train = open("grimms.txt", 'r', encoding='utf8').read()
# nparray de palabras
characters = list(set(dataset_train))
char_to_index = { ch:i for i,ch in enumerate(characters) }
index_to_char = { i:ch for i,ch in enumerate(characters) }

### Constantes

In [3]:
SEQ_LENGTH = 200
VOCAB_SIZE = len(characters)
N_SEQUENCES = int(len(dataset_train)/SEQ_LENGTH)
HIDDEN_DIM = 700 # neuronas por capa
LSTM_LAYERS = 3 # numero de capas
DROPOUT_RATIO = 0.3 # dropout de la primera capa

### Creando la estructura de datos con [SEQ_LENGTH] pasos de tiempo y una salida

In [4]:
X_train = np.zeros((N_SEQUENCES, SEQ_LENGTH, VOCAB_SIZE))
y_train = np.zeros((N_SEQUENCES, SEQ_LENGTH, VOCAB_SIZE))
for i in range(0, N_SEQUENCES):
    X_sequence = dataset_train[i*SEQ_LENGTH: (i+1)*SEQ_LENGTH]
    X_sequence_index = [char_to_index[value] for value in X_sequence]
    input_sequence = np.zeros((SEQ_LENGTH, VOCAB_SIZE))
    for j in range(SEQ_LENGTH):
        input_sequence[j][X_sequence_index[j]] = 1
    X_train[i] = input_sequence
    
    y_sequence = dataset_train[i*SEQ_LENGTH+1: (i+1)*SEQ_LENGTH+1]
    y_sequence_index = [char_to_index[value] for value in y_sequence]
    target_sequence = np.zeros((SEQ_LENGTH, VOCAB_SIZE))
    for j in range(SEQ_LENGTH):
        target_sequence[j][y_sequence_index[j]] = 1
    y_train[i] = target_sequence

### Construyendo la red neuronal recurrente

In [5]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, TimeDistributed, Activation

def build_rnn():
    # Inicializando la red
    model = Sequential()
    
    #Añadiendo la primera LSTM
    model.add(LSTM(
        units = HIDDEN_DIM, 
        return_sequences = True,             # Pasa las secuencias a la siguiente capa encolada LSTM
        input_shape = (None, VOCAB_SIZE)     # Tamaño de la entrada
    ))
    model.add(Dropout(DROPOUT_RATIO))
    
    for i in range(LSTM_LAYERS - 1):
        #Añadiendo las demas LSTM
        model.add(LSTM(
            units = HIDDEN_DIM, 
            return_sequences = True
        ))
    model.add(TimeDistributed(Dense(VOCAB_SIZE)))
    model.add(Activation("softmax"))
    
    # Compilando la red
    model.compile(
        optimizer = 'rmsprop', 
        loss = 'categorical_crossentropy'
    )
    return model

Using TensorFlow backend.


### Entrenamiento

In [6]:
from keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint(
    filepath = "ckpt/weights.{epoch:02d}-{val_loss:.2f}.hdf5",
    period = 10
)

model = build_rnn()
model.fit(X_train, y_train, epochs = 100, batch_size = 32, callbacks = [checkpoint])

TypeError: __init__() missing 1 required positional argument: 'filepath'

### Haciendo predicciones

In [26]:
length = 200

ix = [np.random.randint(VOCAB_SIZE)]
y_char = [index_to_char[ix[-1]]]
X = np.zeros((1, length, VOCAB_SIZE))
for i in range(length):
    X[0, i, :][ix[-1]] = 1
    print(index_to_char[ix[-1]], end="")
    ix = np.argmax(model.predict(X[:, :i+1, :])[0], 1)
    y_char.append(index_to_char[ix[-1]])

maro
Matamoras
Matimol Martanterrosisio
Miguel de Altanoda
Ma Mileo
Mattián Moxiahia
San Mireo Tebasto
Sena Mintepec
Sititla
Matimár de Tatenaca
San Pedro de Sal Parto de Marta de Aruanda
San Pedro Jacantía de Samiapo
San Jerónimo Tilocuepo
Santiago Tomaconotuahicca
Santiago Sololtepec
San albo
Samadol
Santiago de Sal Pablo Atlapan
San Pedro Amiatlán
San Dien de la Solinar de lo SelosanoSan Parla del Protengo
San Pedro Mataparo de las Sinás
San Anto Tuiapan
San Anantan
Santa María de la Salina
Santa Laca Yuautlan
San Juan Atatinta Atalta
Santa Matía Mateneses Yitonas
Santa Catarina Tlaláxtla
Santiago Tlacotetlahca
Santa María Tlacotepec
Santa Tlamixtacan
Santiago Sochitlán
San Miguel de Siranto
San Pedro Totopen
San Pedro Toxtlahua
San Mateo Santo Do irba
Teoticao
Telima
Temimputitlán
Temotepec

Tamáx
Bampa
Vala
Mazapa
Matampa
Matampa
Ramarena
Ajacutli
Aoca
Atera
Atia
Migael de Altaa
Laringo Lavinho
Lagonis
Laginidado Loso Ma Matoa
Rosas Lo Panabaro
Lucabár
Puz
Dranisto Domisgo Ixtapat