<a href="https://colab.research.google.com/github/mahadanso/text-generator/blob/master/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import tensorflow as tf
import datetime
from sklearn.model_selection import train_test_split

"""
    Shape(row, column)
"""

path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

"""
    Load data and get all the chars in text.
"""
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
chars = sorted(list(set(text)))
char_size = len(chars)
print('char_size : ' + str(char_size))
"""
    create dictionary to link each char to an id, and vice versa
"""
char2id = dict((c, i) for i, c in enumerate(chars))
id2char = dict((i, c) for i, c in enumerate(chars))


"""
    Text data have to be arranged into set of sections of {len_per_section} characters text 
    with the next character following the section as the output of the section.
    
    Then from the starting of the previous section, {skip} characters are skipped to start the next
    section that will form the input of the following input.
    
    Considering {len_per_section} to be 50 and skip = 2.
    section_1 = text[n:n+50]        =>      next_char_1 = text[n+50]
    section_2 = text[n+2:n+2+50]    =>      next_char_2 = text[n+2+50]
    ......
    ....
"""
len_per_section = 5
skip = 2
sections = []
next_chars = []


for i in range(0, len(text) - len_per_section, skip):
    sections.append(text[i: i + len_per_section])
    next_chars.append(text[i + len_per_section])

"""
    Create two vectors of zeros to
    
    X   => to store sections, 3 dimension.
            1-D to store a char,
            2-D to store a specific section of characters
            3-D to store all the sections
    
    y   =>  to store the next chars, 2 dimension.
            1-D to store a char,
            2-D to store all the next chars each for a section
            
    dtype   =>  int
"""
X = np.zeros((len(sections), len_per_section, char_size), dtype=int)
y = np.zeros((len(sections), char_size), dtype=int)


"""
    Go through the sections, grab the characters and one-hot encode them.
"""
for i, section in enumerate(sections):
    for j, char in enumerate(section):
        X[i, j, char2id[char]] = 1
    y[i, char2id[next_chars[i]]] = 1
    

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

"""
Var
"""
batch_size = 500
max_steps = 10
log_every = 1
save_every = 5
hidden_nodes = 100
"""
    Directory to store a trained model
"""
checkpoint_directory = 'ckpt/model'  # + datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H:%M:%S')

if tf.gfile.Exists(checkpoint_directory):
    tf.gfile.DeleteRecursively(checkpoint_directory)
tf.gfile.MakeDirs(checkpoint_directory)

"""
    Directory to store tensorboard summaries
"""
tensorboard_directory = 'temp/tensorboard'

if tf.gfile.Exists(tensorboard_directory):
    tf.gfile.DeleteRecursively(tensorboard_directory)
tf.gfile.MakeDirs(tensorboard_directory)

"""
    Define variable needed for the operations and outline the flow of data through this operations of computations.
    Basically outlining a computation graph to be later used.  
"""
graph = tf.Graph()
with graph.as_default():

    global_step = tf.Variable(0)

    with tf.name_scope('train_input'):
        # placeholders (no data in it during graph const), but will hold data during a session
        """
        1D  :   Store batch
        2D  :   Store input chars section
        3D  :   Store char
        """
        data = tf.placeholder(tf.float32, [batch_size, len_per_section, char_size], name="X")    # input data

        """
            1D  :   Store batch
            3D  :   Store output char
            """
        labels = tf.placeholder(tf.float32, [batch_size, char_size], name="Y")    # output data

    with tf.name_scope("WeightsLayer1"):
        """
            Initialise weights and biases.
            Weights initialised with random values from a truncated normal distribution.
            Biases initialised to zero.
        """
        with tf.name_scope("update"):
            w_ii = tf.Variable(tf.truncated_normal([char_size, hidden_nodes], -0.1, 0.1, tf.float32), name='wui')
            tf.summary.histogram('weights_i', w_ii)
            w_io = tf.Variable(tf.truncated_normal([hidden_nodes, hidden_nodes], -0.1, 0.1, tf.float32), name='wio')
            tf.summary.histogram('weights_o', w_io)
            b_i = tf.Variable(tf.zeros([1, hidden_nodes], tf.float32), name='bi')
            tf.summary.histogram('bias', b_i)
        with tf.name_scope("reset"):
            w_fi = tf.Variable(tf.truncated_normal([char_size, hidden_nodes], -0.1, 0.1, tf.float32), name='wri')
            tf.summary.histogram('weights_i', w_fi)
            w_fo = tf.Variable(tf.truncated_normal([hidden_nodes, hidden_nodes], -0.1, 0.1, tf.float32), name='wfo')
            tf.summary.histogram('weights_o', w_fo)
            b_f = tf.Variable(tf.zeros([1, hidden_nodes], tf.float32), name='bf')
            tf.summary.histogram('bias', b_f)
        with tf.name_scope("output"):
            w_oi = tf.Variable(tf.truncated_normal([char_size, hidden_nodes], -0.1, 0.1, tf.float32), name='woi')
            tf.summary.histogram('weights_i', w_oi)
            w_oo = tf.Variable(tf.truncated_normal([hidden_nodes, hidden_nodes], -0.1, 0.1, tf.float32), name='woo')
            tf.summary.histogram('weights_o', w_oo)
            b_o = tf.Variable(tf.zeros([1, hidden_nodes], tf.float32), name='bo')
            tf.summary.histogram('bias', b_o)
        with tf.name_scope("Cell"):
            w_ci = tf.Variable(tf.truncated_normal([char_size, hidden_nodes], -0.1, 0.1, tf.float32), name='wci')
            tf.summary.histogram('weights_i', w_ci)
            w_co = tf.Variable(tf.truncated_normal([hidden_nodes, hidden_nodes], -0.1, 0.1, tf.float32), name='wco')
            tf.summary.histogram('weights_o', w_co)
            b_c = tf.Variable(tf.zeros([1, hidden_nodes], tf.float32), name='bc')
            tf.summary.histogram('bias', b_c)

    """
        lstm(i, o, s):
        Take in Inputs i and Outpus o, and Previous State and compute a new
        state and output.
        _____________________________________________________        
            Input   i   :   shape=(batch_size, char_size)
            Output  o   :   shape=(batch_size, hidden_layers)
            State   s   :   shape=(batch_size, hidden_layers)
        _____________________________________________________
    """
    def lstm(i, o, s, name):
        with tf.name_scope(str(name) + "LSTMLayer1"):
            # the scalars are all between zero and one inclusive.
            with tf.name_scope("Gates"):
                # compute a scalar that decides what to remember about the previously seen characters
                # and what to include about the new character.
                with tf.name_scope("update_gate"):
                    update_gate = tf.sigmoid(tf.matmul(i, w_ii) + tf.matmul(o, w_io) + b_i)
                    # tf.summary.scalar('input_gate', input_gate)

                # compute a scalar that decides what to discard about the previous output.
                with tf.name_scope("reset_gate"):
                    reset_gate = tf.sigmoid(tf.matmul(i, w_fi) + tf.matmul(o, w_fo) + b_f)
                    # tf.summary.scalar('forget_gate', forget_gate)

                    # compute a scalar that decides what to include in the new output.
                with tf.name_scope("output_gate"):
                    output_gate = tf.sigmoid(tf.matmul(i, w_oi) + tf.matmul(o, w_oo) + b_o)
                    # tf.summary.scalar('output_gate', output_gate)
            with tf.name_scope("compute_cell"):
                memory_cell = tf.matmul(i, w_ci) + reset_gate * tf.matmul(o, w_co) + b_c

            with tf.name_scope("Update_state"):
                s = update_gate * s + (1 - update_gate) * memory_cell

            o = output_gate * tf.tanh(s)
            # tf.summary.scalar('state', s)

            return o, s

    with tf.name_scope("WeightsLayer2"):
        """
            Initialise weights and biases.
            Weights initialised with random values from a truncated normal distribution.
            Biases initialised to zero.
        """
        with tf.name_scope("update"):
            w_ii1 = tf.Variable(tf.truncated_normal([char_size, hidden_nodes], -0.1, 0.1, tf.float32), name='wui')
            tf.summary.histogram('weights_i', w_ii1)
            w_io1 = tf.Variable(tf.truncated_normal([hidden_nodes, hidden_nodes], -0.1, 0.1, tf.float32), name='wio')
            tf.summary.histogram('weights_o', w_io1)
            b_i1 = tf.Variable(tf.zeros([1, hidden_nodes], tf.float32), name='bi')
            tf.summary.histogram('bias', b_i1)
        with tf.name_scope("reset"):
            w_fi1 = tf.Variable(tf.truncated_normal([char_size, hidden_nodes], -0.1, 0.1, tf.float32), name='wri')
            tf.summary.histogram('weights_i', w_fi1)
            w_fo1 = tf.Variable(tf.truncated_normal([hidden_nodes, hidden_nodes], -0.1, 0.1, tf.float32), name='wfo')
            tf.summary.histogram('weights_o', w_fo1)
            b_f1 = tf.Variable(tf.zeros([1, hidden_nodes], tf.float32), name='bf')
            tf.summary.histogram('bias', b_f1)
        with tf.name_scope("output"):
            w_oi1 = tf.Variable(tf.truncated_normal([char_size, hidden_nodes], -0.1, 0.1, tf.float32), name='woi')
            tf.summary.histogram('weights_i', w_oi1)
            w_oo1 = tf.Variable(tf.truncated_normal([hidden_nodes, hidden_nodes], -0.1, 0.1, tf.float32), name='woo')
            tf.summary.histogram('weights_o', w_oo1)
            b_o1 = tf.Variable(tf.zeros([1, hidden_nodes], tf.float32), name='bo')
            tf.summary.histogram('bias', b_o1)
        with tf.name_scope("Cell"):
            w_ci1 = tf.Variable(tf.truncated_normal([char_size, hidden_nodes], -0.1, 0.1, tf.float32), name='wci')
            tf.summary.histogram('weights_i', w_ci1)
            w_co1 = tf.Variable(tf.truncated_normal([hidden_nodes, hidden_nodes], -0.1, 0.1, tf.float32), name='wco')
            tf.summary.histogram('weights_o', w_co1)
            b_c1 = tf.Variable(tf.zeros([1, hidden_nodes], tf.float32), name='bc')
            tf.summary.histogram('bias', b_c1)    
  
    """
        lstm(i, o, s):
        Take in Inputs i and Outpus o, and Previous State and compute a new
        state and output.
        _____________________________________________________        
            Input   i   :   shape=(batch_size, char_size)
            Output  o   :   shape=(batch_size, hidden_layers)
            State   s   :   shape=(batch_size, hidden_layers)
        _____________________________________________________
    """
    def lstm_1(i, o, s, name):
        with tf.name_scope(str(name) + "LSTMLayer2"):
            # the scalars are all between zero and one inclusive.
            with tf.name_scope("Gates"):
                # compute a scalar that decides what to remember about the previously seen characters
                # and what to include about the new character.
                with tf.name_scope("update_gate"):
                    update_gate = tf.sigmoid(tf.matmul(i, w_ii1) + tf.matmul(o, w_io1) + b_i1)
                    # tf.summary.scalar('input_gate', input_gate)

                # compute a scalar that decides what to discard about the previous output.
                with tf.name_scope("reset_gate"):
                    reset_gate = tf.sigmoid(tf.matmul(i, w_fi1) + tf.matmul(o, w_fo1) + b_f1)
                    # tf.summary.scalar('forget_gate', forget_gate)

                    # compute a scalar that decides what to include in the new output.
                with tf.name_scope("output_gate"):
                    output_gate = tf.sigmoid(tf.matmul(i, w_oi1) + tf.matmul(o, w_oo1) + b_o1)
                    # tf.summary.scalar('output_gate', output_gate)
            with tf.name_scope("compute_cell"):
                memory_cell = tf.matmul(i, w_ci1) + reset_gate * tf.matmul(o, w_co1) + b_c1

            with tf.name_scope("Update_state"):
                s = update_gate * s + (1 - update_gate) * memory_cell

            o = output_gate * tf.tanh(s)
            # tf.summary.scalar('state', s)

            return o, s      
          
    # initial output and state to zero
    output = tf.zeros([batch_size, hidden_nodes])
    state = tf.zeros([batch_size, hidden_nodes])

    # loop through all the sections.
    for i in range(len_per_section):

        """
            data[:, i, :]   :   i(th) section from the given batch
        """
        output, state = lstm(data[:, i, :], output, state, str(i))

        """
            outputs_all_i   :   stores the outputs from the lstm
            labels_all_i    :   stores the characters that follow, Which are the correct labels
        """

        if i == 0:  # if first section
            outputs_all_i = output  # make current output the start
            labels_all_i = data[:, i + 1, :]    # make next input as the start

        elif i != len_per_section - 1:  # not first or last section
            outputs_all_i = tf.concat([outputs_all_i, output], 0)   # append the current output
            labels_all_i = tf.concat([labels_all_i, data[:, i + 1, :]], 0)  # append the next input

        else:   # the last section
            outputs_all_i = tf.concat([outputs_all_i, output], 0)   # append the current output
            labels_all_i = tf.concat([labels_all_i, labels], 0)     # append the final labels

    for i in range(len_per_section):

        """
            data[:, i, :]   :   i(th) section from the given batch
        """
        output, state = lstm_1(data[:, i, :], output, state, str(i))

        """
            outputs_all_i   :   stores the outputs from the lstm
            labels_all_i    :   stores the characters that follow, Which are the correct labels
        """

        if i == 0:  # if first section
            outputs_all_i = output  # make current output the start
            labels_all_i = data[:, i + 1, :]    # make next input as the start

        elif i != len_per_section - 1:  # not first or last section
            outputs_all_i = tf.concat([outputs_all_i, output], 0)   # append the current output
            labels_all_i = tf.concat([labels_all_i, data[:, i + 1, :]], 0)  # append the next input

        else:   # the last section
            outputs_all_i = tf.concat([outputs_all_i, output], 0)   # append the current output
            labels_all_i = tf.concat([labels_all_i, labels], 0)     # append the final labels

    with tf.name_scope('ouput_weight_bias'):
        w = tf.Variable(tf.truncated_normal([hidden_nodes, char_size], -0.1, 0.1), name='w')
        tf.summary.histogram('W', w)
        b = tf.Variable(tf.zeros([char_size]), name='b')
        tf.summary.histogram('B', w_co)

    with tf.name_scope('logits'):
        logits = tf.matmul(outputs_all_i, w) + b
    with tf.name_scope('cross_entropy'):
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels_all_i))
    tf.summary.scalar('cross_entropy', loss)
    with tf.name_scope('train'):
        optimizer = tf.train.GradientDescentOptimizer(10.).minimize(loss, global_step=global_step)
    with tf.name_scope('accuracy'):
        with tf.name_scope('correct_predictions'):
            correct_predictions = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_all_i, 1))
        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))
    tf.summary.scalar('accuracy', accuracy)
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(tensorboard_directory, graph)
"""
    create a session and train the model on the data
"""
with tf.Session(graph=graph) as sess:
    # initialise all variables
    tf.global_variables_initializer().run()

    offset = 0

    saver = tf.train.Saver()

    X_length = len(X_train)

    # retrain on the data every epoch
    for step in range(max_steps):

        # pass the data to the model in batches
        for batch in range(X_length//batch_size):

            offset = offset % X_length

            if offset <= (X_length - batch_size):
                batch_data = X_train[offset: offset + batch_size]
                batch_labels = y_train[offset: offset + batch_size]
                offset += batch_size
            else:
                to_add = batch_size - (X_length - offset)
                batch_data = np.concatenate((X_train[offset:X_length], X_train[0: to_add]))
                batch_labels = np.concatenate((y_train[offset:X_length], y_train[0: to_add]))
                offset = to_add

            _, acc, summary, training_loss = sess.run([optimizer, accuracy, merged, loss], feed_dict={data: batch_data, labels: batch_labels})

            if step % log_every == 0:
                print('training loss at step %d - batch %d: %.2f (%s)' % (step, batch, training_loss, datetime.datetime.now()))
                print('Accuracy at step %d - batch %s: %s' % (step, batch, acc))

                if batch % save_every == 0:
                    saver.save(sess, checkpoint_directory + '/model', global_step=step)

        train_writer.add_summary(summary, step)
        
    X_test_length = len(X_test)
      
    # pass the data to the model in batches
     
    for batch in range(X_test_length//batch_size):

        offset = offset % X_test_length

        if offset <= (X_test_length - batch_size):
          batch_data = X_test[offset: offset + batch_size]
          batch_labels = y_test[offset: offset + batch_size]
          offset += batch_size
        else:
            to_add = batch_size - (X_test_length - offset)
            batch_data = np.concatenate((X_test[offset:X_test_length], X_test[0: to_add]))
            batch_labels = np.concatenate((y_test[offset:X_test_length], y_test[0: to_add]))
            offset = to_add

        acc = sess.run([accuracy], feed_dict={data: batch_data, labels: batch_labels})

        if step % log_every == 0:
          print('Accuracy at step %d - batch %s: %s' % (step, batch, acc))


Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
char_size : 65
training loss at step 0 - batch 0: 4.18 (2019-08-04 13:33:49.743027)
Accuracy at step 0 - batch 0: 0.012
training loss at step 0 - batch 1: 3.75 (2019-08-04 13:33:49.903178)
Accuracy at step 0 - batch 1: 0.1604
training loss at step 0 - batch 2: 3.56 (2019-08-04 13:33:49.916193)
Accuracy at step 0 - batch 2: 0.1504
training loss at step 0 - batch 3: 3.48 (2019-08-04 13:33:49.930927)
Accuracy at step 0 - batch 3: 0.1476
training loss at step 0 - batch 4: 3.42 (2019-08-04 13:33:49.943366)
Accuracy at step 0 - batch 4: 0.1504
training loss at step 0 - batch 5: 3.40 (2019-08-04 13:33:49.963696)
Accuracy at step 0 - batch 5: 0.07
training loss at step 0 - batch 6: 3.44 (2019-08-04 13:33:50.076217)
Accuracy at step 0 - batch 6: 0.156
training loss at step 0 - batch 7: 3.69 (2019-08-04 13:33:50.094020)
Accuracy at step 0 - batch 7: 0.0848
training loss at step 0 - batch 8: 3.41 (20

W0804 13:36:10.036100 140386591647616 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py:960: remove_checkpoint (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to delete files with this prefix.


training loss at step 4 - batch 775: 0.31 (2019-08-04 13:36:09.853616)
Accuracy at step 4 - batch 775: 0.9104
training loss at step 4 - batch 776: 0.33 (2019-08-04 13:36:09.974821)
Accuracy at step 4 - batch 776: 0.908
training loss at step 4 - batch 777: 0.32 (2019-08-04 13:36:09.989787)
Accuracy at step 4 - batch 777: 0.9024
training loss at step 4 - batch 778: 0.32 (2019-08-04 13:36:10.002552)
Accuracy at step 4 - batch 778: 0.904
training loss at step 4 - batch 779: 0.32 (2019-08-04 13:36:10.015345)
Accuracy at step 4 - batch 779: 0.9032
training loss at step 5 - batch 0: 0.33 (2019-08-04 13:36:10.031236)
Accuracy at step 5 - batch 0: 0.9024
training loss at step 5 - batch 1: 0.34 (2019-08-04 13:36:10.168518)
Accuracy at step 5 - batch 1: 0.9036
training loss at step 5 - batch 2: 0.33 (2019-08-04 13:36:10.182008)
Accuracy at step 5 - batch 2: 0.9036
training loss at step 5 - batch 3: 0.32 (2019-08-04 13:36:10.195097)
Accuracy at step 5 - batch 3: 0.9048
training loss at step 5 - ba

In [0]:
101 % 100