In [None]:
import os
import time

In [None]:
import tensorflow as tf

In [None]:
try:
    text = open("dataset.txt", 'rb').read().decode(encoding='utf-8')
except FileNotFoundError:
    raise ValueError("can not find dataset")
print(f'Length of text: {len(text)} characters')
print(text[:250])

In [None]:
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

In [None]:
example_texts = ['abcdefg', 'xyz']
chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)
ids = ids_from_chars(chars)
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)
chars = chars_from_ids(ids)
tf.strings.reduce_join(chars, axis=-1).numpy()

In [None]:
def text_from_ids(ids_):
    return tf.strings.reduce_join(chars_from_ids(ids_), axis=-1)

In [None]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
print(all_ids)
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [None]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

In [None]:
seq_length = 100
examples_per_epoch = len(text) // (seq_length + 1)
sequences = ids_dataset.batch(seq_length + 1, drop_remainder=True)

In [None]:
for seq in sequences.take(1):
    print(chars_from_ids(seq))

In [None]:
for seq in sequences.take(5):
    print(text_from_ids(seq).numpy())

In [None]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [None]:
split_input_target(list("Tensorflow"))
dataset = sequences.map(split_input_target)

In [None]:
for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

In [None]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000

In [None]:
dataset = (
    dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)
)

In [None]:
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024

In [None]:
class MyModel(tf.keras.Model):
    def __init__(self, vocabsize, embeddingdim, rnnunits):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocabsize, embeddingdim)
        self.gru = tf.keras.layers.GRU(rnnunits,
                                       return_sequences=True,
                                       return_state=True)
        self.dense = tf.keras.layers.Dense(vocabsize)
    def call(self, inputs, states_=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        if states_ is None:
            states_ = self.gru.get_initial_state(x)
        x, states_ = self.gru(x, initial_state=states_, training=training)
        x = self.dense(x, training=training)
        return (x, states) if return_state else x

In [None]:
model = MyModel(
    vocabsize=len(ids_from_chars.get_vocabulary()),
    embeddingdim=embedding_dim,
    rnnunits=rnn_units)

In [None]:
example_batch_predictions = None
input_example_batch = target_example_batch = None

In [None]:
for input_example_batch, target_example_batch in dataset.take(1):
    # noinspection PyCallingNonCallable
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

In [None]:
model.summary()
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [None]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

In [None]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
example_batch_loss = loss(target_example_batch, example_batch_predictions)
mean_loss = example_batch_loss.numpy().mean()

In [None]:
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", mean_loss)

In [None]:
tf.exp(mean_loss).numpy()
model.compile(optimizer='adam', loss=loss)
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)
STEPS = 172  # steps to perform
EPOCHS = 40
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback], steps_per_epoch=STEPS)

In [None]:
class OneStep(tf.keras.Model):
    def __init__(self, model_, charsfromids, idsfromchars, temperature=1.0):
        super().__init__()
        self.temperature = temperature
        self.model = model_
        self.chars_from_ids = charsfromids
        self.ids_from_chars = idsfromchars
        skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
            values=[-float('inf')] * len(skip_ids),
            indices=skip_ids,
            dense_shape=[len(ids_from_chars.get_vocabulary())])
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)
    @tf.function
    def generate_one_step(self, inputs, states_=None):
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.ids_from_chars(input_chars).to_tensor()
        predicted_logits, states_ = self.model(inputs=input_ids, states_=states_,
                                               return_state=True)
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits / self.temperature
        predicted_logits = predicted_logits + self.prediction_mask
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)
        predicted_chars = self.chars_from_ids(predicted_ids)
        return predicted_chars, states

In [None]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)
start = time.time()
states = None
next_char = tf.constant(['seks'])
result = [next_char]

In [None]:
for n in range(1000):
    next_char, states = one_step_model.generate_one_step(next_char, states_=states)
    result.append(next_char)

In [None]:
result = tf.strings.join(result)
end = time.time()

In [None]:
print(result[0].numpy().decode('utf-8'), '\n\n' + '_' * 80)
print('\nRun time:', end - start)

In [None]:
start = time.time()
states = None
next_char = tf.constant(['seks', 'seks', 'seks', 'seks', 'seks'])
result = [next_char]

In [None]:
for n in range(1000):
    next_char, states = one_step_model.generate_one_step(next_char, states_=states)
    result.append(next_char)

In [None]:
result = tf.strings.join(result)
end = time.time()

In [None]:
print(result, '\n\n' + '_' * 80)
print('\nRun time:', end - start)

In [None]:
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')
states = None
next_char = tf.constant(['seks'])
result = [next_char]

In [None]:
for n in range(100):
    next_char, states = one_step_reloaded.generate_one_step(next_char, states_=states)
    result.append(next_char)

In [None]:
print(tf.strings.join(result)[0].numpy().decode("utf-8"))

In [None]:
class CustomTraining(MyModel):
    @tf.function
    def train_step(self, inputs):
        inputs, labels = inputs
        with tf.GradientTape() as tape:
            # noinspection PyCallingNonCallable
            predictions = self(inputs, training=True)
            loss_ = self.loss(labels, predictions)
        grads = tape.gradient(loss_, model.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, model.trainable_variables))
        return {'loss': loss_}

In [None]:
model = CustomTraining(
    vocabsize=len(ids_from_chars.get_vocabulary()),
    embeddingdim=embedding_dim,
    rnnunits=rnn_units)

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

In [None]:
model.fit(dataset, epochs=1, steps_per_epoch=STEPS)

In [None]:
mean = tf.metrics.Mean()
epoch = None

In [None]:
for epoch in range(EPOCHS):
    start = time.time()
    mean.reset_states()
    for (batch_n, (inp, target)) in enumerate(dataset):
        if len(dataset) < STEPS:
            raise ValueError("dataset is not large enough to do this many steps")
        logs = model.train_step([inp, target])
        mean.update_state(logs['loss'])
        if batch_n % 50 == 0:
            template = f"Epoch {epoch + 1} Batch {batch_n} Loss {logs['loss']:.4f}"
            print(template)
        if batch_n == STEPS:
            break
    if (epoch + 1) % 5 == 0:
        model.save_weights(checkpoint_prefix.format(epoch=epoch))
    print()
    print(f'Epoch {epoch + 1} Loss: {mean.result().numpy():.4f}')
    print(f'Time taken for 1 epoch {time.time() - start:.2f} sec')
    print("_" * 80)

In [None]:
model.save_weights(checkpoint_prefix.format(epoch=epoch))