In [1]:
!pip3 install --upgrade tensorflow
!pip3 install keras

Requirement already up-to-date: tensorflow in c:\users\wynnm\anaconda3\lib\site-packages (2.4.1)


In [2]:
import numpy as np

from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

INPUT_FILE = 'alice-in-wonderland.txt'

In [3]:
print('Extracting text from input')
file = open(INPUT_FILE, 'rb')
lines = []
for line in file:
    line = line.strip().lower()
    line = line.decode('ascii', 'ignore')
    if len(line) == 0:
        continue
    lines.append(line)
file.close()
text = ' '.join(lines)

Extracting text from input


In [4]:
chars = set([c for c in text])
nb_chars = len(chars)
char2index = dict((c, i) for i, c in enumerate(chars))
index2char = dict((i, c) for i, c in enumerate(chars))

print(index2char)

{0: 'b', 1: ',', 2: 'i', 3: "'", 4: '@', 5: '9', 6: '$', 7: 'n', 8: 'g', 9: 'd', 10: 'h', 11: 'y', 12: 'k', 13: '6', 14: '4', 15: '8', 16: 'a', 17: '.', 18: ' ', 19: '7', 20: 'w', 21: '%', 22: 'u', 23: 't', 24: 'p', 25: '_', 26: 'e', 27: '*', 28: '3', 29: ':', 30: '#', 31: '"', 32: '0', 33: '/', 34: '[', 35: ';', 36: '?', 37: 'x', 38: 'l', 39: '5', 40: '!', 41: 'f', 42: 'v', 43: 'r', 44: 'z', 45: '2', 46: '(', 47: ')', 48: 'q', 49: 'm', 50: '-', 51: ']', 52: 'j', 53: 'c', 54: 's', 55: 'o', 56: '1'}


In [5]:
print('Creating input and label')
SEQ_LEN = 10
STEP = 1
print('The table is ready')

Creating input and label
The table is ready


In [6]:
input_chars = []
label_chars = []
for i in range(0, len(text) - SEQ_LEN, STEP):
    input_chars.append(text[i:i + SEQ_LEN])
    label_chars.append(text[i + SEQ_LEN])

print(input_chars)



In [7]:
print("Vectorizing input and label text...")
x = np.zeros((len(input_chars), SEQ_LEN, nb_chars), dtype=np.bool)
y = np.zeros((len(input_chars), nb_chars), dtype=np.bool)

for i, input_char in enumerate(input_chars):
    for j, ch in enumerate(input_char):
        x[i, j, char2index[ch]] = 1
    y[i, char2index[label_chars[i]]] = 1


Vectorizing input and label text...


In [8]:
HIDDEN_SIZE = 128
BATCH_SIZE = 128
NUM_ITERATIONS = 25
NUM_EPOCHS_PER_ITERATION = 1
NUM_PREDS_PER_EPOCH = 100

model = Sequential()
model.add(SimpleRNN(HIDDEN_SIZE, return_sequences=False,
                    input_shape=(SEQ_LEN, nb_chars),
                    unroll=True))
model.add(Dense(nb_chars))
model.add(Activation("softmax"))

model.compile(loss="categorical_crossentropy", optimizer="rmsprop")

In [9]:
for iteration in range(NUM_ITERATIONS):
    print("=" * 50)
    print("Iteration #: %d" % (iteration))
    model.fit(x, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)
    
    # testing model
    # randomly choose a row from input_chars
    # generate text from model for next 100 chars
    test_idx = np.random.randint(len(input_chars))
    test_chars = input_chars[test_idx]
    print("Generating from seed: %s" % (test_chars))
    print(test_chars, end="")
    for i in range(NUM_PREDS_PER_EPOCH):
        xtest = np.zeros((1, SEQ_LEN, nb_chars))
        for i, ch in enumerate(test_chars):
            xtest[0, i, char2index[ch]] = 1
        pred = model.predict(xtest, verbose=0)[0]
        ypred = index2char[np.argmax(pred)]
        print(ypred, end="")
        # move forward with test_chars + ypred
        test_chars = test_chars[1:] + ypred
    print()


Iteration #: 0
Generating from seed: jaws are t
jaws are the the said the said the said the said the said the said the said the said the said the said the sai
Iteration #: 1
Generating from seed: ll me that
ll me that the marthe the har here the the her alle the the her alle the the her alle the the her alle the the
Iteration #: 2
Generating from seed:  those tar
 those tare the was so the was her hat her har head the moure so the was her hat her har head the moure so the
Iteration #: 3
Generating from seed: se directl
se directle the had the growher gote the was so the was so the was so the was so the was so the was so the was
Iteration #: 4
Generating from seed: zy thing!'
zy thing!' said the doust of the dored and the dored and the dored and the dored and the dored and the dored a
Iteration #: 5
Generating from seed:  his toes.
 his toes. 'i mad to the work out of the word and the was souse on the was souse on the was souse on the was s
Iteration #: 6
Generating from seed: ing to en