In [1]:
with open("../data/nietzsche.txt", "r", encoding="utf8") as f:
    text = f.read().lower()

print(len(text))

600901


In [2]:
import numpy as np

maxlen = 60
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])

In [3]:
for i in range(5): print(repr(sentences[i])," : ", next_chars[i])

'preface\n\n\nsupposing that truth is a woman--what then? is the'  :  r
'face\n\n\nsupposing that truth is a woman--what then? is there '  :  n
'e\n\n\nsupposing that truth is a woman--what then? is there not'  :   
'\nsupposing that truth is a woman--what then? is there not gr'  :  o
'pposing that truth is a woman--what then? is there not groun'  :  d


In [4]:
chars = sorted(list(set(text)))
print(chars)

['\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '?', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '¤', '¦', '©', '«', 'ã', '†']


In [5]:
char_indices = dict((char, chars.index(char)) for char in chars)
print(char_indices)

{'\n': 0, ' ': 1, '!': 2, '"': 3, "'": 4, '(': 5, ')': 6, ',': 7, '-': 8, '.': 9, '0': 10, '1': 11, '2': 12, '3': 13, '4': 14, '5': 15, '6': 16, '7': 17, '8': 18, '9': 19, ':': 20, ';': 21, '=': 22, '?': 23, '[': 24, ']': 25, '_': 26, 'a': 27, 'b': 28, 'c': 29, 'd': 30, 'e': 31, 'f': 32, 'g': 33, 'h': 34, 'i': 35, 'j': 36, 'k': 37, 'l': 38, 'm': 39, 'n': 40, 'o': 41, 'p': 42, 'q': 43, 'r': 44, 's': 45, 't': 46, 'u': 47, 'v': 48, 'w': 49, 'x': 50, 'y': 51, 'z': 52, '¤': 53, '¦': 54, '©': 55, '«': 56, 'ã': 57, '†': 58}


In [6]:
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [7]:
x[0][0]

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False])

In [8]:
import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.GRU(16, input_shape=(maxlen, len(chars))),
    tf.keras.layers.Dense(units=len(chars), activation='softmax')
]) 

model.compile(loss='categorical_crossentropy', 
              optimizer=tf.keras.optimizers.Adam(lr=0.01))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, 16)                3696      
_________________________________________________________________
dense (Dense)                (None, 59)                1003      
Total params: 4,699
Trainable params: 4,699
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.fit(x, 
          y,
          batch_size=128,
          epochs=1)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x1be7dd30308>

In [10]:
import random

start_index = random.randint(0, len(text) - maxlen - 1)
base_text = text[start_index: start_index + maxlen]
base_text

'-he forces it to say nay, where he would like\nto affirm, lov'

In [11]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [12]:
import sys
temperatures = [0.2, 0.5, 1.0, 1.2]
gen_characters = 200

for temp in temperatures:
    print("Temp: ", temp)
    generated_text = base_text
    print(generated_text)
    for i in range(gen_characters):
        sampled = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(generated_text):
            sampled[0, t, char_indices[char]] = 1.

        preds = model.predict(sampled, verbose=0)[0]        
        next_index = sample(preds, temp)
        next_char = chars[next_index]

        generated_text += next_char
        generated_text = generated_text[1:]

        sys.stdout.write(next_char)
        sys.stdout.flush()
    print()

Temp:  0.2
-he forces it to say nay, where he would like
to affirm, lov
e and the and as and the preastion of the and the such and the of the and and the greation and and and and and the might of the and and of of the and the the the more the the for of one and of the of 
Temp:  0.5
f the and the the the more the the for of one and of the of 
want and the of the in the suth and of in has of preation the preast suth is of the of
beself and ploust of the man of as and indersenster as a the conthing are have and the mand of the the and attos 
Temp:  1.0
s a the conthing are have and the mand of the the and attos 
ean abtionsly aldion axes. of has of whis, our of of
mar,
(wabition--a for ar
and of hanaltion the maistorenes and thoubshe winell
is to depion of fee romophioug
resannsise his to the co
casinwenw he 
Temp:  1.2
ion of fee romophioug
resannsise his to the co
casinwenw he 
elcentisonttanity extedloctpod imtimur; veudiceniture,
whail, stpith to, indnom feless
groy relil buply of si