https://github.com/ageron/handson-ml2/blob/master/16_nlp_with_rnns_and_attention.ipynb

In [1]:
import numpy as np

In [2]:
import tensorflow as tf

tf.enable_eager_execution()




In [3]:
import tensorflow.keras as keras

# Tokenizing

In [4]:
shakespeare_url = "https://homl.info/shakespeare" # shortcut URL
filepath = keras.utils.get_file("shakespeare.txt", shakespeare_url)
with open(filepath) as f:
    shakespeare_text = f.read()

In [5]:
tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)
tokenizer.fit_on_texts([shakespeare_text])

In [6]:
tokenizer.texts_to_sequences(["First"])

[[20, 6, 9, 8, 3]]

In [7]:
tokenizer.sequences_to_texts([[20, 6, 9, 8, 3]])

['f i r s t']

In [8]:
tokenizer.word_index['p']

23

In [9]:
max_id = len(tokenizer.word_index) # number of distinct characters
dataset_size = tokenizer.document_count # total number of characters

In [10]:
[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text])) - 1
encoded.shape

(1115394,)

# Create a dataset

In [15]:
train_size = encoded.shape[0] * 90 // 100
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])

In [16]:
for d in dataset.take(2):
    print(d)

tf.Tensor(19, shape=(), dtype=int64)
tf.Tensor(5, shape=(), dtype=int64)


In [12]:
n_steps = 100
window_length = n_steps + 1 # target = input shifted 1 character ahead
dataset = dataset.repeat().window(window_length, shift=1, drop_remainder=True)

In [13]:
dataset = dataset.flat_map(lambda window: window.batch(window_length))

In [14]:
for d in dataset.take(2):
    print(d)

tf.Tensor(
[19  5  8  7  2  0 18  5  2  5 35  1  9 23 10 21  1 19  3  8  1  0 16  1
  0 22  8  3 18  1  1 12  0  4  9 15  0 19 13  8  2  6  1  8 17  0  6  1
  4  8  0 14  1  0  7 22  1  4 24 26 10 10  4 11 11 23 10  7 22  1  4 24
 17  0  7 22  1  4 24 26 10 10 19  5  8  7  2  0 18  5  2  5 35  1  9 23
 10 15  3 13  0], shape=(101,), dtype=int64)
tf.Tensor(
[ 5  8  7  2  0 18  5  2  5 35  1  9 23 10 21  1 19  3  8  1  0 16  1  0
 22  8  3 18  1  1 12  0  4  9 15  0 19 13  8  2  6  1  8 17  0  6  1  4
  8  0 14  1  0  7 22  1  4 24 26 10 10  4 11 11 23 10  7 22  1  4 24 17
  0  7 22  1  4 24 26 10 10 19  5  8  7  2  0 18  5  2  5 35  1  9 23 10
 15  3 13  0  4], shape=(101,), dtype=int64)


In [39]:
batch_size = 128
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))

In [30]:
print_ds(dataset)

(<tf.Tensor: id=1004, shape=(128, 100), dtype=int64, numpy=
array([[15,  0, 11, ...,  7,  2,  0],
       [ 9,  0, 13, ...,  1,  4,  8],
       [ 5, 13,  7, ...,  4,  2,  0],
       ...,
       [ 9,  1, 14, ...,  3,  8,  9],
       [13,  8,  2, ...,  9,  0,  2],
       [ 8, 24, 27, ...,  5,  8,  7]])>, <tf.Tensor: id=1005, shape=(128, 100), dtype=int64, numpy=
array([[ 0, 11,  5, ...,  2,  0, 18],
       [ 0, 13, 22, ...,  4,  8,  0],
       [13,  7, 23, ...,  2,  0,  4],
       ...,
       [ 1, 14, 15, ...,  8,  9,  0],
       [ 8,  2,  6, ...,  0,  2,  3],
       [24, 27,  7, ...,  8,  7,  2]])>)


In [40]:
dataset = dataset.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))

In [41]:
dataset = dataset.prefetch(1)

In [42]:
train_size //128.

7842.0

# Stateless RNN

In [19]:
model = keras.models.Sequential([
    keras.layers.GRU(128, return_sequences=True, input_shape=[None, max_id],
                     dropout=0.2, recurrent_dropout=0.2),
    keras.layers.GRU(128, return_sequences=True,
                     dropout=0.2, recurrent_dropout=0.2),
    keras.layers.TimeDistributed(keras.layers.Dense(max_id, activation="softmax"))
])
#model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
#history = model.fit(dataset, steps_per_epoch=train_size // batch_size, epochs=4)
#model.save("model.h5")

In [27]:
model = keras.models.load_model("model.h5")

In [4]:
def preprocess(texts):
    X = np.array(tokenizer.texts_to_sequences(texts)) - 1
    return tf.one_hot(X, max_id)

In [29]:
X_new = preprocess(["Thus the country to the str"])

In [38]:
Y_pred = model.predict_classes(X_new)
tokenizer.sequences_to_texts(Y_pred + 1)[0]

TypeError: predict_classes() got an unexpected keyword argument 'steps'

In [5]:
def next_char(text, model, temperature=1):
    X_new = preprocess([text])
    y_proba = model.predict(X_new, batch_size=1, steps=1)[0, -1:, :]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
     
    id_int = char_id.eval(session=tf.compat.v1.Session())
    return tokenizer.sequences_to_texts(id_int)
         #tokenizer.sequences_to_texts(char_id.numpy())[0] # version 2.0

In [115]:
r = next_char("You are  ss")

In [7]:
def complete_text(text, model, n_chars=10, temperature=1):
    for _ in range(n_chars):
        text += next_char(text, model, temperature)[0]
    return text

In [123]:
complete_text("I am")

'I am perharded'

# Statefull RNN

In [13]:
stateless_model_size = 1 
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])
dataset = dataset.window(window_length, shift=n_steps, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(window_length))
dataset = dataset.repeat().batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))

dataset = dataset.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))

# for el in dataset:
#     print(el)
#     break
dataset = dataset.prefetch(1)

In [77]:
def print_ds(ds, count = 1):
    c = 0
    for el in ds:
        print(el)
        c +=1
        if c >= count:
            break

In [52]:
batch_size = 32

slice_size = train_size // batch_size
window_length = 101
n_steps = 100


datasets = []
for i in range(batch_size):
    data_slice = tf.data.Dataset.from_tensor_slices(encoded[i*slice_size:(i+1)*slice_size])
    data_slice = data_slice.window(window_length, shift=n_steps, drop_remainder=True)
    data_slice = data_slice.flat_map(lambda window: window.batch(window_length))
    datasets.append(data_slice)
    
dataset = tf.data.Dataset.zip(tuple(datasets)).map(lambda *windows: tf.stack(windows))
dataset = dataset.repeat().map(lambda windows: (windows[:, :-1], windows[:, 1:]))
dataset = dataset.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))

dataset = dataset.prefetch(1)

In [113]:
print_ds(dataset, 1)

(<tf.Tensor: id=27560, shape=(32, 100, 39), dtype=float32, numpy=
array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
     

In [53]:
model = keras.models.Sequential([
    keras.layers.GRU(128, return_sequences=True, stateful=True,
                     dropout=0.2, recurrent_dropout=0.2,
                     batch_input_shape=[batch_size, None, max_id]),
    keras.layers.GRU(128, return_sequences=True, stateful=True,
                     dropout=0.2, recurrent_dropout=0.2),
    keras.layers.TimeDistributed(keras.layers.Dense(max_id,
                                                    activation="softmax"))
])

class ResetStatesCallback(keras.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs):
        self.model.reset_states()
        
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")

steps_per_epoch = train_size // batch_size // n_steps
model.fit(dataset, steps_per_epoch=steps_per_epoch, epochs=40,
                   callbacks=[ResetStatesCallback()])

Train on 313 steps
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f18f0235be0>

In [54]:
stateless_model = keras.models.Sequential([
    keras.layers.GRU(128, return_sequences=True,
                     dropout=0.2, recurrent_dropout=0.2,
                     input_shape=[None, max_id]),
    keras.layers.GRU(128, return_sequences=True,
                     dropout=0.2, recurrent_dropout=0.2),
    keras.layers.TimeDistributed(keras.layers.Dense(max_id, activation="softmax"))
])

In [55]:
weights = model.get_weights()

In [56]:
stateless_model.build(tf.TensorShape([None, None, max_id]))
stateless_model.set_weights(weights)

In [57]:
stateless_model.save('stateless.h5')

In [58]:
stateless_model.load_weights('stateless.h5')

In [59]:
X_new = preprocess(["Thus the country to the str"])

In [60]:
complete_text("I am", stateless_model, n_chars=50)

'I am i have noble.\n\nhomes:\ngo thee and never foundc sh'