In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
tf.enable_eager_execution()




import numpy as np
import os
import time
import pandas as pd

In [2]:
text = open('vinci.txt', 'rb').read().decode(encoding='utf-8')
length = len(text)
print(f'length of text: {length} characters')

length of text: 8875 characters


In [3]:
# Printing the first 300 characters in the text.
print(text[: 300])

Knowledge of the past and of the places of the earth is the ornament and food of the mind of man.
The noblest pleasure is the joy of understanding.
As a well-spent day brings happy sleep, so a life well spent brings happy death.
Learning never exhausts the mind.
To such an extent does nature delight


In [4]:
# Displaying the unique characters in life.
unique = sorted(set(text))
print(f'There are {len(unique)} unique characters')

There are 60 unique characters


# Vectorizing the text

In [5]:
# Here we would map all the unique characters to their indices
char_idx = {u:i for i, u in enumerate(unique)}
#idx_char = {i:u for i, u in enumerate(unique)}
idx_char = np.array(unique)

In [6]:
# printing out the first 20 elements in the char_idx dictionary
for charc,_ in zip(char_idx, range(20)):
    print(f'{repr(charc)} : {char_idx[charc]}')

'\n' : 0
' ' : 1
'!' : 2
"'" : 3
',' : 4
'-' : 5
'.' : 6
'0' : 7
'1' : 8
'3' : 9
':' : 10
';' : 11
'?' : 12
'A' : 13
'B' : 14
'C' : 15
'D' : 16
'E' : 17
'F' : 18
'G' : 19


In [7]:
# the same goes for the idx_char dictionary

In [8]:
integer_text = np.array([char_idx[c] for c in text])  
# It is better to put it in an array than to leave it in form of a list.


In [9]:
# so ...
print(f'{repr(text[:16])} --- in integer form is --- -> {integer_text[:16]}')

'Knowledge of the' --- in integer form is --- -> [23 47 48 56 45 38 37 40 38  1 48 39  1 53 41 38]


In [10]:
sequence_length = 103 # Number of length sentence we want for the input of a single character

In [11]:
# creating training examples and targets
character_dataset = tf.data.Dataset.from_tensor_slices(integer_text)

In [12]:
# How it looks
for i in character_dataset.take(5):
    print(idx_char[i.numpy()])

K
n
o
w
l


In [13]:
sequences = character_dataset.batch(sequence_length, drop_remainder= True)
''' what drop_remainder does is, if the actual length of a sentence is 120 and we take a character lenght(which is the sequence_length) of 103,
the remaining length of characters which is 17 should be dropped/discarded.
''' 
""" The batch works like this : [1 2 3 4 5 6 7 8 9 10], for a batch size of 3
we have, ['1 2 3' '4 5 6' '7 8 9'], 10 is dropped. I ignored the whitespaces for this explanation"""
# Let's see how this looks like.
for i in sequences.take(3):
    print(repr(''.join(idx_char[i.numpy()])))

'Knowledge of the past and of the places of the earth is the ornament and food of the mind of man.\nThe n'
'oblest pleasure is the joy of understanding.\nAs a well-spent day brings happy sleep, so a life well spe'
'nt brings happy death.\nLearning never exhausts the mind.\nTo such an extent does nature delight and abou'


In [14]:
def split_input_target(portion):
    input_text = portion[:-1]
    target_text = portion[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [15]:
BATCH_SIZE = 3
BUFFER_SIZE = 30 # It is good practise to set your buffer_size to the length of your dataset or higher, it makes the shuffling more uniform, but this applies to large datasets.
# since the dataset isn't very large I would use a smaller buffer
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder = True)

In [16]:
vocabulary_size = len(unique)
embedding_dim = 256
rnn_units = 1024

In [17]:
def build_model(vocabulary_size, embedding_dim, rnn_units, batch_size):
    
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Embedding(vocabulary_size, embedding_dim, batch_input_shape = [batch_size, None]))
    model.add(tf.keras.layers.LSTM(rnn_units, return_sequences = True, stateful = True, recurrent_initializer = 'orthogonal'))
    model.add(tf.keras.layers.Dense(vocabulary_size))
    
    return model

In [18]:
#Sequential() : Basically used to create a linear stack for the layers to be added.
#Embedding(): This turns integers into a dense vector space. This link explains it better  "https://stats.stackexchange.com/questions/270546/how-does-keras-embedding-layer-work"
#LSTM(): A variety of the RRN model. This liink explains it well "https://www.analyticsvidhya.com/blog/2017/12/fundamentals-of-deep-learning-introduction-to-lstm/"
#Dense(): Is the normal fully connected layer.


In [19]:
model = build_model(
    vocabulary_size = len(unique),
    embedding_dim = embedding_dim,
    rnn_units = rnn_units,
    batch_size = BATCH_SIZE

)

In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (3, None, 256)            15360     
_________________________________________________________________
lstm (LSTM)                  (3, None, 1024)           5246976   
_________________________________________________________________
dense (Dense)                (3, None, 60)             61500     
Total params: 5,323,836
Trainable params: 5,323,836
Non-trainable params: 0
_________________________________________________________________


In [21]:
# Basically used in finding the difference between to the predicted target value and actual target value

def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)


In [22]:
model.compile(optimizer='adam', loss=loss) #model.compile(optimizer = 'RMSProp', loss = loss)

In [23]:
# Saves the model after every epoch.

#---------------------------------------------
#Directory where the model is saved.
checkpoint_directory = './Training_checkpoints'

# Accesing the directory.
checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt_{epoch}')

#
checkpoint_call_back = tf.keras.callbacks.ModelCheckpoint(filepath = checkpoint_prefix, save_weights_only = True)

In [24]:
Epochs = 25

In [25]:
learning = model.fit(dataset, epochs = Epochs, callbacks = [checkpoint_call_back])

Epoch 1/25


W1217 16:57:47.046068  8568 deprecation.py:323] From C:\Users\Kenechi\Anaconda3\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [26]:
# this finds the latest saved check point
trained_weights= tf.train.latest_checkpoint(checkpoint_directory)

In [27]:
# building another model, and feeding the trained weights from 'learning' into it ... 
model = build_model( vocabulary_size = len(unique),embedding_dim = embedding_dim, rnn_units = rnn_units,batch_size = 1)
model.load_weights(trained_weights)
model.build(tf.TensorShape([1,None]))  # TensorShape() shapes a tensor, 

In [28]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            15360     
_________________________________________________________________
lstm_1 (LSTM)                (1, None, 1024)           5246976   
_________________________________________________________________
dense_1 (Dense)              (1, None, 60)             61500     
Total params: 5,323,836
Trainable params: 5,323,836
Non-trainable params: 0
_________________________________________________________________


In [29]:
def generating_the_text(model, starting_string):
    
    # number of characters to generate
    number_generated= 1500
    
    # converting the starting string to vector
    s_s_vector = [char_idx[s] for s in starting_string]
    
    # inserting a dimesion of one into a tensors shape, here it adds a dimension at index 0
    s_s_vector = tf.expand_dims(s_s_vector,0)
    
    # To store our results
    text_generated = []
    
    temperature = 0.9
    # the closer the temperature is to zero the more predictable text we have.
    # the closer the temperature is to one the more unpredictable text we have.
    
    model.reset_states() #
    for i in range(number_generated):
        predict = model(s_s_vector)
        
        predict = tf.squeeze(predict, 0)# Used in removing a dimension or specified dimension, here it removes the dimension at index 0
        
        predict = predict / temperature
        
        predicted_idx = tf.random.categorical(predict, num_samples = 1)[-1,0].numpy() # this applies the princniples of categorical distribution this link explains it 'https://stackoverflow.com/questions/55063120/can-anyone-give-a-tiny-example-to-explain-the-params-of-tf-random-categorical'
        
        
        s_s_vector = tf.expand_dims([predicted_idx], 0)
        
        text_generated.append(idx_char[predicted_idx])
        
    return (starting_string + ''.join(text_generated))
        
        

In [30]:
print(generating_the_text(model, starting_string ='The'))

The prainses in the representation of visible things and can eDflucts for dishowly which exws core dille purcains and domin of muscle is to pull and not to push, except in the caseare work and Porivion, Dore and Powit, an, ther whose heart is firm, things is the chief not is not enough.
Ye who boy sf sence not and propinquity, Motion and Rest.
Man is formett of being hap is may be istenale object so arr in erecting the brain.
I love those who can smile in trouble, who can gal and end to be pest descting haus not from her what is not is neoush is the manot of what body is dependant on Heaven and Heaven on the Spirit.
Time stays long enough for anyone who will user a the dinks.
The I manks of which are: it love it only little or not at all.
Weight, force and casual immore deminging the haman body as compared with the bodies of the painter must resemble a mirror, which always takes the colour of the object it reflects and is completely occupied by the images of as many objects as are in f

In [31]:
#  ..............changing the optimizer used ............................

In [32]:
def build_model(vocabulary_size, embedding_dim, rnn_units, batch_size):
    
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Embedding(vocabulary_size, embedding_dim, batch_input_shape = [batch_size, None]))
    model.add(tf.keras.layers.LSTM(rnn_units, return_sequences = True, stateful = True, recurrent_initializer = 'orthogonal'))
    model.add(tf.keras.layers.Dense(vocabulary_size))
    
    return model

In [33]:
model_1 = build_model(
    vocabulary_size = len(unique),
    embedding_dim = embedding_dim,
    rnn_units = rnn_units,
    batch_size = BATCH_SIZE

)

In [34]:
model_1.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (3, None, 256)            15360     
_________________________________________________________________
lstm_2 (LSTM)                (3, None, 1024)           5246976   
_________________________________________________________________
dense_2 (Dense)              (3, None, 60)             61500     
Total params: 5,323,836
Trainable params: 5,323,836
Non-trainable params: 0
_________________________________________________________________


In [35]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)


In [36]:
model_1.compile(optimizer = 'RMSProp', loss = loss)

In [37]:
# Saves the model after every epoch.

#---------------------------------------------
#Directory where the model is saved.
checkpoint_directory = './Training_checkpoints_2'

# Accesing the directory.
checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt_{epoch}')

#
checkpoint_call_back = tf.keras.callbacks.ModelCheckpoint(filepath = checkpoint_prefix, save_weights_only = True)

In [38]:
Epochs = 25

In [39]:
learning_2 = model_1.fit(dataset, epochs = Epochs, callbacks = [checkpoint_call_back])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [40]:
trained_weights_2= tf.train.latest_checkpoint(checkpoint_directory)

In [41]:
# building another model, and feeding the trained weights from 'learning' into it ... 
model_2 = build_model( vocabulary_size = len(unique),embedding_dim = embedding_dim, rnn_units = rnn_units,batch_size = 1)
model_2.load_weights(trained_weights_2)
model_2.build(tf.TensorShape([1,None]))  # TensorShape() shapes a tensor, 

In [42]:
model_2.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (1, None, 256)            15360     
_________________________________________________________________
lstm_3 (LSTM)                (1, None, 1024)           5246976   
_________________________________________________________________
dense_3 (Dense)              (1, None, 60)             61500     
Total params: 5,323,836
Trainable params: 5,323,836
Non-trainable params: 0
_________________________________________________________________


In [43]:
print(generating_the_text(model_2, starting_string ='The'))

The mant astion breater or less than that ave is not in her in hor do is like, in the hope of pulling o than a of sence in the cast and laves by as mame is out pesile is the joy of understanding is not becedsand of the tinds, which we will prove by the ie co dasterpiece of engineerinc and a worke is loys is auterilut, and fout all mer stals and the forition of the mind.
Taus the corrace of the earth is the misclear purts in e uster ince infer in haver and inventrention not appayse in the air, it is first necessary to acquire knowledge of the winds, which we will prove by the death of the painter must resemble a mirror, which always takes the colour of the object it reflects and is completely occupied by the images of as many objects; the third, and thet at ats.
Teer, a to attritage are the tont antion whece ho dound the least work are most active.
The truth of things is the chief nutriment of superior intellect, a mistle is the thiencoulite without theory ace the thuth of things is the

In [44]:
# Data used was too small.

In [None]:
#.......................... using a GRU .............................

In [45]:
def build_model(vocabulary_size, embedding_dim, rnn_units, batch_size):
    
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Embedding(vocabulary_size, embedding_dim, batch_input_shape = [batch_size, None]))
    model.add(tf.keras.layers.GRU(rnn_units, return_sequences = True, stateful = True, recurrent_initializer = 'glorot_uniform'))
    model.add(tf.keras.layers.Dense(vocabulary_size))
    
    return model

In [46]:
model_3= build_model(
    vocabulary_size = len(unique),
    embedding_dim = embedding_dim,
    rnn_units = rnn_units,
    batch_size = BATCH_SIZE

)

In [47]:
model_3.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (3, None, 256)            15360     
_________________________________________________________________
gru (GRU)                    (3, None, 1024)           3935232   
_________________________________________________________________
dense_4 (Dense)              (3, None, 60)             61500     
Total params: 4,012,092
Trainable params: 4,012,092
Non-trainable params: 0
_________________________________________________________________


In [48]:
model_3.compile(optimizer = 'adam', loss = loss)

In [49]:
# Saves the model after every epoch.

#---------------------------------------------
#Directory where the model is saved.
checkpoint_directory = './Training_checkpoints_3'

# Accesing the directory.
checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt_{epoch}')

#
checkpoint_call_back = tf.keras.callbacks.ModelCheckpoint(filepath = checkpoint_prefix, save_weights_only = True)

In [50]:
Epochs = 25

In [51]:
learning_3 = model_3.fit(dataset, epochs = Epochs, callbacks = [checkpoint_call_back])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [52]:
trained_weights_3= tf.train.latest_checkpoint(checkpoint_directory)

In [53]:
# building another model, and feeding the trained weights from 'learning' into it ... 
model_4 = build_model( vocabulary_size = len(unique),embedding_dim = embedding_dim, rnn_units = rnn_units,batch_size = 1)
model_4.load_weights(trained_weights_3)
model_4.build(tf.TensorShape([1,None]))  # TensorShape() shapes a tensor, 

In [54]:
model_4.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (1, None, 256)            15360     
_________________________________________________________________
gru_1 (GRU)                  (1, None, 1024)           3935232   
_________________________________________________________________
dense_5 (Dense)              (1, None, 60)             61500     
Total params: 4,012,092
Trainable params: 4,012,092
Non-trainable params: 0
_________________________________________________________________


In [55]:
print(generating_the_text(model_4, starting_string ='The'))

The mind of the painter must resemble a mirror, which always takes the colour of the object it reflects and is completely occupied by the images of as many objects as are in front of it.
The mind of the painter must resemble a mirror, which always takes the colour of the object it reflects and is completely occupied by the images of as many objects as are in front of it.
The mind of the painter must reselbal to a lifb, how many and which sinews, expanded into the thinnest cartilage, surround and the first of that which copies every thing placed in front of it without bearu wall the joy of undanst one well spent is long.
He who wishes to be rich in a day will ne a soe woll whe boly eyrs the painter in the representation of visible things, and far below the painter in the representation of visible things, and far below the painter in the representation of visible things, and far below the painter in the representation of visible things, and far below the painter in the representation of 

In [56]:
# looks like the GRU model over-fitted.

In [57]:
# reference : https://www.tensorflow.org/tutorials/text/text_generation