# PART 1 
# reading the data 

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import tensorflow as tf 

In [2]:
# opening the file 
path_to_file = 'shakespeare.txt' 
with open(path_to_file,'r') as f:
    text = f.read() 

# creating a vocabulary list 
vocab = sorted(set(text))


In [3]:
len(vocab) # length of the vocabulary 

84

# PART 2 
# test processing

In [4]:
# we want to assign a number to every character in the vocabulary 
char_to_ind = {char:ind for ind,char in enumerate(vocab)} # characters to index 
ind_to_char = np.array(vocab) # index to characters 


In [5]:
encoded_text = np.array([char_to_ind[char] for char in text])

In [6]:
seq_len = 120 
total_num_seq = len(text)  // (seq_len + 1)
total_num_seq


45005

In [7]:
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)

In [8]:
#char_dataset.batch()

In [9]:
#for item in char_dataset.take(500):
#    print(ind_to_char[item.numpy()])

sequences = char_dataset.batch(seq_len + 1, drop_remainder=True)

def create_seq_targets(seq):
    # seq -- Hello my name 
    input_text = seq[:-1] # Hello my nam
    target_text = seq[1:] # ello my name 
    return input_text, target_text 

dataset = sequences.map(create_seq_targets) 

for i in range(total_num_seq):
    start = i * (seq_len + 1)
    end = start + seq_len + 1 
    print(encoded_text[start:end])

In [10]:
for input_txt, target_txt in dataset.take(1): 
    print(input_txt.numpy()) 
    print("".join(ind_to_char[input_txt.numpy()]))

    print(target_txt.numpy())
    print("".join(ind_to_char[target_txt.numpy()]))


[ 0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0
  1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75]

                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But
[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0  1
  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74  1
 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45 63
 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74 60
  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75  1]
                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But 


In [11]:
batch_size = 128
buffer_size = 10000
dataset1 = dataset.shuffle(buffer_size=buffer_size).batch(batch_size=batch_size, drop_remainder=True) 


In [12]:
vocab_size = len(vocab)
vocab_size

84

In [13]:
embed_dim = 64 
rnn_neurons = 1026

from tensorflow.keras.losses import sparse_categorical_crossentropy 

In [14]:
def sparse_cat_loss(y_true, y_pred):
    return sparse_categorical_crossentropy(y_true, y_pred, from_logits=True) 

In [15]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Embedding, LSTM, GRU 

In [16]:
def create_model(vocab_size, embed_dim, rnn_neurons, batch_size):
    model = Sequential()
    model.add(Embedding(vocab_size, embed_dim, batch_input_shape = [batch_size, None])) 
    model.add(GRU(rnn_neurons, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'))
    model.add(Dense(vocab_size))

    model.compile(optimizer='adam', loss=sparse_cat_loss)

    return model 

In [17]:
model = create_model(vocab_size=vocab_size, embed_dim=embed_dim, rnn_neurons=rnn_neurons, batch_size=batch_size)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (128, None, 64)           5376      
_________________________________________________________________
gru (GRU)                    (128, None, 1026)         3361176   
_________________________________________________________________
dense (Dense)                (128, None, 84)           86268     
Total params: 3,452,820
Trainable params: 3,452,820
Non-trainable params: 0
_________________________________________________________________


In [18]:
for input_example_batch, target_example_batch in dataset1.take(1):
    example_batch_predictions = model(input_example_batch)

In [37]:
input_example_batch[0]

<tf.Tensor: shape=(120,), dtype=int32, numpy=
array([59, 77, 64, 74, 60,  1, 80, 70, 76,  1, 61, 76, 73, 75, 63, 60, 73,
       22,  1, 57, 76, 75,  1, 34,  0,  1,  1,  1,  1, 63, 70, 71, 60,  1,
       80, 70, 76, 73,  1, 70, 78, 69,  1, 62, 73, 56, 58, 60,  1, 78, 64,
       67, 67,  1, 66, 60, 60, 71,  1, 80, 70, 76,  1, 78, 63, 60, 73, 60,
        1, 80, 70, 76,  1, 56, 73, 60,  8,  1, 75, 63, 70, 76, 62, 63,  1,
       75, 63, 60, 73, 60,  0,  1,  1,  1,  1, 78, 60, 73, 60,  1, 69, 70,
        1, 61, 76, 73, 75, 63, 60, 73,  1, 59, 56, 69, 62, 60, 73,  1, 66,
       69])>

In [20]:
example_batch_predictions.shape

TensorShape([128, 120, 84])

In [21]:
example_batch_predictions[0].numpy().argmax(axis = 1)

array([42, 37,  5, 42,  9,  1, 24, 77,  0,  1, 75, 75, 13, 60,  4,  9, 13,
       50,  1, 49, 13, 60,  1,  1, 40, 15,  1,  1,  1,  7, 77, 10,  9, 15,
       24, 77,  0, 13,  1, 77, 32,  4,  1,  1, 13, 26, 26, 55, 24, 67, 70,
       67,  8, 38, 79,  0,  0, 11, 49, 24, 77,  0,  1, 32, 67,  9, 13,  9,
       34, 24, 77,  0,  1,  7, 13,  9, 16, 47, 49,  4, 77, 83, 40,  4,  1,
       49,  4,  9, 13,  9, 70, 15, 15,  1,  1, 49,  8, 13, 55, 34, 47, 77,
        1,  4, 75, 13, 60,  4,  9, 13,  1, 24, 37,  4, 54,  9, 13, 13, 58,
       59], dtype=int64)

In [22]:
ind_to_char[example_batch_predictions[0].numpy().argmax(axis = 1)]

array(['Q', 'L', "'", 'Q', '-', ' ', '>', 'v', '\n', ' ', 't', 't', '2',
       'e', '&', '-', '2', 'Y', ' ', 'X', '2', 'e', ' ', ' ', 'O', '4',
       ' ', ' ', ' ', ')', 'v', '.', '-', '4', '>', 'v', '\n', '2', ' ',
       'v', 'G', '&', ' ', ' ', '2', 'A', 'A', '`', '>', 'l', 'o', 'l',
       ',', 'M', 'x', '\n', '\n', '0', 'X', '>', 'v', '\n', ' ', 'G', 'l',
       '-', '2', '-', 'I', '>', 'v', '\n', ' ', ')', '2', '-', '5', 'V',
       'X', '&', 'v', '}', 'O', '&', ' ', 'X', '&', '-', '2', '-', 'o',
       '4', '4', ' ', ' ', 'X', ',', '2', '`', 'I', 'V', 'v', ' ', '&',
       't', '2', 'e', '&', '-', '2', ' ', '>', 'L', '&', '_', '-', '2',
       '2', 'c', 'd'], dtype='<U1')

In [23]:
#epochs = 30
#model.fit(dataset1, epochs=epochs)

In [31]:
from tensorflow.keras.models import load_model 
model = create_model(vocab_size=vocab_size, embed_dim=embed_dim, rnn_neurons=rnn_neurons, batch_size=1)
model.load_weights('shakespeare_gen.h5')
model.build(tf.TensorShape([1,None]))

In [32]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (1, None, 64)             5376      
_________________________________________________________________
gru_6 (GRU)                  (1, None, 1026)           3361176   
_________________________________________________________________
dense_6 (Dense)              (1, None, 84)             86268     
Total params: 3,452,820
Trainable params: 3,452,820
Non-trainable params: 0
_________________________________________________________________


In [103]:
def generate_text(model, start_seed, gen_size = 500, temp = 1.0):
    pass 
model = model 
start_seed = """So is it not with me as with that muse,
Stirred by a painted beauty to his verse,"""
gen_size = 500 
temp = 1.0 

num_generate = gen_size 

input_eval = [char_to_ind[char] for char in start_seed]

input_eval = tf.expand_dims(input_eval,0)

text_generated = [] 

temperature = temp 
# you should call reset_states every time, when you want to make consecutive model calls independent
model.reset_states()

for i in range(num_generate): 
  #print(1)
  # this for loop is for number of characters we are about to generate 

  predictions = model(input_eval) 
  predictions = tf.squeeze(predictions,axis = 0)

  predictions = predictions / temperature 
  predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
  print(ind_to_char[predicted_id])
  input_eval = tf.expand_dims([predicted_id],0) 
  text_generated.append(ind_to_char[predicted_id]) 






 
 
A
s
 
t
o
 
t
h
y
 
h
a
n
d
s
 
l
o
r
d
 
o
f
 
h
e
r
e
 
w
o
r
d
s
,
 
b
u
t
 
k
i
l
l
'
d
 
g
i
v
i
n
g


 
 
 
 
 
F
r
o
m
 
t
h
e
 
r
e
m
e
g
h
a
i
t
 
o
n
'
t
.


 
 
L
e
o
n
.
 
N
o
,
 
m
y
 
l
o
r
d
,
 
t
h
i
s
 
s
o
r
r
o
w
 
s
t
a
n
d
s
y
.


 
 
I
f
 
t
h
o
u
 
a
r
t
 
f
e
a
r
f
u
l
 
m
o
t
h
e
r
,
 
a
n
d
 
t
h
e
 
y
o
u
t
h
b
u
l
s
t


 
t
h
o
u
 
w
i
l
t
 
n
o
t
 
a
t
 
w
h
a
t
 
p
o
s
s
i
b
i
l
i
t
y


 
 
 
 
I
s
 
g
o
o
d
 
t
o
 
h
a
r
b
o
u
r
 
h
i
s
 
t
r
i
c
k
s
 
i
n
 
t
h
e
i
r
 
f
a
c
e
s
.


 
 
C
A
S
S
I
O
.
 
N
o
,
 
m
y
 
o
l
d
 
s
t
r
e
n
g
t
h
 
o
f
 
w
i
s
d
o
m
s
 
c
r
i
e
s
 
'
F
o
,
 
a
n
d
 
t
h
e
r
e
f
o
r
e


 
 
 
 
I
 
c
a
r
e
 
n
o
t
 
w
h
a
t
'
s
 
a
 
r
a
i
l
i
n
g
 
a
t
 
h
e
r


 
 
 
 
p
a
s
s
i
o
n
 
i
n
 
t
h
e
 
c
l
o
c
k
 
t
h
a
t
 
I
 
d
i
d
 
t
h
i
n
k
 
o
f
 
h
i
m
;
 
y
o
u
 
s
h
a
l
l
 
s
e
e
 
h
e
r
s
u
r
d


 
 
 
 
A
g
a
i
n
s
t
 
h
i
s
 
f
r
i
e
n
d
.
 
G
o
o
d
 
w
i
l
l
a
y
,
 
w
e
r
e
 
y
o
u
 
a
l
l
;


 
 
 
 
A
n
d
,
 


In [102]:
start_seed + "".join(text_generated)

"So is it not with me as with that muse,\nStirred by a painted beauty to his verse,\n  And fools doth this but bid ARATED OTHERS\nPERSONAL USE ONLY, AND (2) ARE NOT DISTRIBUTED OR USED\nCOMMERCIALLY.  PROHIBITED COMMERCIAL DISTRIBUTION INCLUDES BY ANY\nSERVICE THAT CHARGES FOR DOWNLOAD TIME OR FOR MEMBERSHIP.>>\n\n\n\nSCENE 2\n\nThe Lord's Rode?\n  AJAX. Do you but perceive to pieces?\n  EVANS. Is 'tes you?\n  CLOWN. Nay, but notople her.\n  VOLUMNIA. He says he, sir, but hear me swear, contrives no  ve not speak a\n    cockagent, and there's courage and a braver one. If I live press, and sp"

In [86]:
predicted_id

0

In [87]:
input_eval = tf.expand_dims([predicted_id],0)
input_eval

<tf.Tensor: shape=(1, 1), dtype=int32, numpy=array([[0]])>

In [88]:
ind_to_char[predicted_id]

'\n'

In [93]:
tf.random.categorical(tf.squeeze(model(input_eval),0), num_samples=1)

<tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[44]], dtype=int64)>

In [94]:
ind_to_char[44]

'S'

In [124]:
def generate_text(model, start_seed, gen_size = 500, temp = 1.0):
    num_generate = gen_size 

    input_eval = [char_to_ind[char] for char in start_seed] 
    input_eval = tf.expand_dims(input_eval,0) # adding a dimension before 

    text_generated = [] 

    temperature = temp 

    model.reset_states() 

    for i in range(num_generate): 
        predictions = model(input_eval) 
        predictions = tf.squeeze(predictions, axis = 0)
        predictions = predictions / temperature 
        prediction_id = predictions.numpy().argmax(axis = 1)[-1]
        #prediction_id  = tf.random.categorical(prediction_id, num_samples=1)[-1,0].numpy() 
        input_eval = tf.expand_dims([prediction_id],0)
        text_generated.append(ind_to_char[prediction_id]) 
    return start_seed + "".join(text_generated)





In [127]:
start_seed = """romeo and juliet"""
generate_text(model, start_seed, gen_size = 500, temp = 1.0)