In [4]:
import sys
import sklearn

import tensorflow as tf
from tensorflow import keras

import numpy as np
import os

np.random.seed(42)
tf.random.set_seed(42)


# Char-RNN

## Splitting a sequence into batches of shuffled windows

In [5]:
np.random.seed(42)
tf.random.set_seed(42)

n_steps=5
dataset =tf.data.Dataset.from_tensor_slices(tf.range(15))
dataset = dataset.window(n_steps, shift=2, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(n_steps))
dataset = dataset.shuffle(10).map(lambda window: (window[:-1], window[1:]))
dataset = dataset.batch(3).prefetch(1)
for index, (X_batch, Y_batch) in enumerate(dataset):
    print("_" * 20, "Batch", index, "\nX_batch")
    print(X_batch.numpy())
    print("=" *5, "\nY_batch")
    print(Y_batch.numpy())

____________________ Batch 0 
X_batch
[[6 7 8 9]
 [2 3 4 5]
 [4 5 6 7]]
===== 
Y_batch
[[ 7  8  9 10]
 [ 3  4  5  6]
 [ 5  6  7  8]]
____________________ Batch 1 
X_batch
[[ 0  1  2  3]
 [ 8  9 10 11]
 [10 11 12 13]]
===== 
Y_batch
[[ 1  2  3  4]
 [ 9 10 11 12]
 [11 12 13 14]]


## Loading the Data and Preparing the Dataset

In [9]:
shakespeare_url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
filepath = keras.utils.get_file("shakespeare.txt", shakespeare_url)
with open(filepath) as f:
    shakespeare_text =f.read()

In [10]:
print(shakespeare_text[:148])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?



In [11]:
"".join(sorted(set(shakespeare_text.lower())))

"\n !$&',-.3:;?abcdefghijklmnopqrstuvwxyz"

In [12]:
tokenizer= keras.preprocessing.text.Tokenizer(char_level =True)
tokenizer.fit_on_texts(shapespeare_text)

In [13]:
tokenizer.texts_to_sequences(["First"])

[[20, 6, 9, 8, 3]]

In [14]:
tokenizer.sequences_to_texts([[20,6,9,8,3]])

['f i r s t']

In [15]:
max_id = len(tokenizer.word_index)
dataset_size = tokenizer.document_count

In [16]:
[encoded] = np.array(tokenizer.texts_to_sequences([shapespeare_text])) -1
train_size = dataset_size *90 //100
dataset= tf.data.Dataset.from_tensor_slices(encoded[:train_size])

In [17]:
n_steps = 100
window_length = n_steps +1
dataset = dataset.window(window_length, shift=1, drop_remainder =True)

In [18]:
dataset = dataset.flat_map(lambda window: window.batch(window_length))

In [19]:
np.random.seed(42)
tf.random.set_seed(42)

In [20]:
batch_size =32
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))

In [21]:
dataset = dataset.map(lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))

In [22]:
dataset = dataset.prefetch(1)

In [23]:
for X_batch, Y_batch in dataset.take(1):
    print(X_batch.shape, Y_batch.shape)

(32, 100, 39) (32, 100)


## Creating and Training the Model