# Example of seq2seq architecture to handwriting
    - Train a seq2seq architecture over a sequence of digits generated by the MNIST dataset.
    - Same model can be used to the general handwriting text recognition problem
    - paper: Offline continuous handwriting recognition using sequence to sequence neural networks
    - https://www.sciencedirect.com/science/article/pii/S0925231218301371 

In [None]:
# Header

from __future__ import print_function

import tensorflow as tf
import numpy as np
import random
import cv2
import os
import time


#Limit GPU cards
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = '0'


In [None]:
# Parameters
# ==================================================

# Path to the data 
#data_path = '/tmp'


import argparse

args = argparse.ArgumentParser()


# Data loading params
#args.add_argument("--data_path", type=str, default='/tmp', help="data_path")
args.add_argument("--x_shape", type=int, default=192, help="x_shape (default: 192)")
args.add_argument("--y_shape", type=int, default=48, help="y_shape (default: 48)")
args.add_argument("--x_slide_size", type=int, default=28, help="x_slide_size (default: 28)")
args.add_argument("--slides_stride", type=int, default=2, help="slides_stride (default: 2)")
args.add_argument("--seq_decoder_len", type=int, default=19, help="max_length of a word (default: 19)")
args.add_argument("--size", type=int, default=10, help="size")
args.add_argument("--seq_length", type=int, default=10, help="seq_length")


# Model Hyperparameters

# Convolutional part parameters
args.add_argument("--num_classes_char_model", type=int, default=10, help="num_classes_char_model")
args.add_argument("--dense_size_char_model", type=int, default=1024, help="dense size of the char model (default: 1024)")

# RNN parameters
args.add_argument("--dim_lstm", type=int, default=256, help="dim_lstm (default: 256)")
args.add_argument("--keep_prob", type=float, default=0.5, help="keep_prob (default: 0.5)")
args.add_argument("--lambda_l2_reg", type=float, default=0, help="lambda_l2_reg (default: 0)")


# Training parameters
args.add_argument("--experiment", type=str, default='/tmp/mnist_sequence/exp01', help="Experiment absolute path")
args.add_argument("--load_model_name", type=str, default='',
                  help="If continue training, name of the model to load (default <BLANK> no continue training)")
args.add_argument("--learning_rate", type=float, default=0.002, help="learning rate (default: 0.001)")
args.add_argument("--pct_lr_char_model", type=float, default=0.1, help="Percent of learning rate applied to the char model part (default: 0.1)")
args.add_argument("--batch_size", type=int, default=256, help="Batch Size (default: 256)")
args.add_argument("--exponential_decay_step", type=int, default=100, help="exponential_decay_step (defaults 100)")
args.add_argument("--exponential_decay_rate", type=float, default=0.95, help="exponential_decay_rate (default 0.95)")
args.add_argument("--min_steps", type=int, default=10, help="min_steps (defaults 10 - min 10)")
args.add_argument("--max_steps", type=int, default=1000, help="max_steps (defaults 1000 - min 1)")



FLAGS, unparsed = args.parse_known_args()
print("\nParameters:", FLAGS)




## Data generation
    - Build a generator of digits sequences of variable length from the MNIST dataset

In [None]:
# Data generator parameters

PAD_ID = 10
GO_ID = 11
EOL_ID = 12
char_list = '0123456789'

encode_dict={}
decode_dict={}
for i, s in enumerate(char_list):
    encode_dict[s]=i
    decode_dict[i]=s

decode_dict[10]='-PAD'
decode_dict[11]='GO'
decode_dict[12]='-EOL'





In [None]:

def decode_text(text_array, decoder_dict):
    '''
    Decode the target from numbers to words
    '''
    text = ''
    eol_code = len(decoder_dict)-1
    ind_eol = False
    for c in text_array:
        if ind_eol==False:
            text += decoder_dict[c]
        if c==eol_code:
            ind_eol=True
    return text
#Test
#print(decode_text(np.array([2, 1, 12, 11], dtype=np.uint8)), decode_dict) 


def data_generator(X, y, batch_size=256, size_word=3, size=3):    
    ''' Generate sequences of MNIST digitis
    Generates target as sequence and target_length
        - size: size of the final image *28
        - size_word: max number of digits in sequence
    '''
    img = np.zeros([batch_size, 28, 28*size])
    img_l = np.zeros([batch_size])
    target = np.zeros([batch_size,size_word])
    target_l = np.zeros([batch_size])
    
    index_randomized = np.random.permutation(range(0, X.shape[0]))
    index_ini = 0
    for batch in range(batch_size):
        n_digits = np.random.randint(1,size_word+1)    
        img_l[batch] = n_digits * 28
        target_l[batch] = n_digits
        for i,index_pos in enumerate(range(index_ini, index_ini + n_digits)):
            img[batch,:,i*28:(i+1)*28] = np.reshape(X[index_randomized[index_pos]],[28,28])
            target[batch,i] = y[index_randomized[index_pos]]
        index_ini += n_digits
    return img, img_l, target, target_l


def img_augmented(img1, angle=0.0):
    ''' Data augmentation. Add some slant
    '''
    M = np.float32([[1, -angle, 0.5*img1.shape[0]*angle], [0, 1, 0]])
    img2 = cv2.warpAffine(img1,M,(img1.shape[1], img1.shape[0]),flags=cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR)
    return img2
# plt.imshow(img_augmented(img[0], angle=-0.5))



#Generate slides of the image
def generate_slides(img_batch, img_len_batch, x_slide_size = 10, slides_stride = 2):
    ''' Generate image slides to input the model
    '''
    #Normalize batch
    #img_batch_normalized =  1 - (img_batch/255.)
    img_batch_normalized = img_batch
    
    slides_batch = []
    slides_len_batch = []
    # Convert img_batch in a sequence of frames and calculate slides_len_batch
    for n_img, img in enumerate(img_batch_normalized):
        #Data augmentation
        #img = img_augmented(img, random.random()-0.5)
    
        max_slides = int((img.shape[1] - x_slide_size)/float(slides_stride))
        num_slides = max(2,min(max_slides, 1 + int((img_len_batch[n_img] - x_slide_size)/float(slides_stride))))
        slides_img = np.zeros([img.shape[0], x_slide_size, max_slides])
        
        for num_slide in range(num_slides):
            slides_img[:, :, num_slide] = img[:, num_slide*slides_stride : num_slide*slides_stride+x_slide_size]
        slides_batch += [slides_img]

        #Calculate slides_len_batch as the number of slides to get
        slides_len_batch += [num_slides]

    return np.array(slides_batch), np.array(slides_len_batch)



def generate_target(y_ini, y_len, seq_length=3, num_classes=13):
    ''' Generate target
    '''
    #Create vars: target, dec_inp and weigth
    batch_size = y_ini.shape[0]
    decoder_inputs = np.zeros([batch_size, seq_length+1, num_classes], dtype=np.float32)
    weights = np.zeros([batch_size, seq_length+1], dtype=np.float32)
    targets = np.zeros([batch_size, seq_length+1], dtype=np.uint16)
    for batch_i in range(batch_size):
        for char_pos in range(seq_length+1):
            if char_pos == 0:
                decoder_inputs[batch_i, char_pos, GO_ID] = 1
                weights[batch_i, char_pos] = 1
                targets[batch_i, char_pos] = int(y_ini[batch_i, char_pos])
            elif char_pos < y_len[batch_i]:
                decoder_inputs[batch_i, char_pos, int(y_ini[batch_i, char_pos-1])] = 1
                weights[batch_i, char_pos] = 1
                targets[batch_i, char_pos] = int(y_ini[batch_i, char_pos])
            elif char_pos == y_len[batch_i]:
                decoder_inputs[batch_i, char_pos, int(y_ini[batch_i, char_pos-1])] = 1
                weights[batch_i, char_pos] = 1
                targets[batch_i, char_pos] = EOL_ID
            else:
                decoder_inputs[batch_i, char_pos, PAD_ID] = 1
                weights[batch_i, char_pos] = 0
                targets[batch_i, char_pos] = PAD_ID

    return decoder_inputs, targets, weights



def batch_generator_epoch(X, y, batch_size=256, size_word=3, size=3, 
                          slides_stride=2, x_slide_size=10, num_classes=10+3, num_batches=100):
    '''Generator for one epoch of data in batches
    '''
    for batch in range(num_batches):
        img_b, img_b_l, target, target_l = data_generator(X, y, batch_size=batch_size,
                                                      size_word=size_word, size=size) 
            
        slides_batch, slides_len_batch = generate_slides(img_b, img_b_l, 
                                        x_slide_size = x_slide_size, slides_stride = slides_stride)
    
        decoder_inputs, targets, weights = generate_target(target, target_l, 
                                               seq_length=size, num_classes=num_classes)
           
        yield  slides_batch, slides_len_batch, decoder_inputs, targets, weights, img_b, target

#Test
'''
seq = batch_generator_epoch(mnist.train.images, mnist.train.labels, batch_size = 2)    
next_seq = seq.next()

print('slides_batch: '    , next_seq[0].shape, next_seq[0][0])
print('slides_len_batch: ', next_seq[1].shape, next_seq[1][0])
print('decoder_inputs: '  , next_seq[2].shape, next_seq[2][0])
print('targets: '         , next_seq[3].shape, next_seq[3][0])
print('weights: '         , next_seq[4].shape, next_seq[4][0])
print('X: '               , next_seq[5].shape, next_seq[5][0])
print('y_ini: '           , next_seq[6].shape, next_seq[6][0])

b = 0
plt.imshow(next_seq[5][b])
'''

In [None]:
# Check sequence generator
import matplotlib.pyplot as plt
%matplotlib inline

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

img, img_l, target, target_l = data_generator(x_train, y_train, batch_size=2)
print(img_l, target, target_l)
plt.imshow(img[0], cmap='gray')



# Model definition

In [None]:
def lenet_over_seq(img_seq, dropout_keep_prob):
    ''' Convollutional part
    Lenet model over a sequence of images
    '''
    #First convolution
    W_conv_1 = tf.Variable(tf.truncated_normal([5, 5, 1, 20], stddev=0.1))
    b_conv_1 = tf.Variable(tf.constant(0.1, shape=[20]))
    conv1_out = [tf.nn.relu(tf.nn.conv2d(x_in, W_conv_1, strides=[1, 1, 1, 1], padding='SAME') + b_conv_1) for x_in in img_seq]
    h_pool1 = [tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') for h_conv1 in conv1_out]

    #Second convolution
    W_conv_2 = tf.Variable(tf.truncated_normal([5, 5, 20, 50], stddev=0.1))
    b_conv_2 = tf.Variable(tf.constant(0.1, shape=[50]))
    conv2_out = [tf.nn.relu(tf.nn.conv2d(x_in, W_conv_2, strides=[1, 1, 1, 1], padding='SAME') + b_conv_2) for x_in in h_pool1]
    h_pool2 = [tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') for h_conv2 in conv2_out]

    #First dense layer
    h_pool2_flat = [tf.reshape(hp, [-1, 7*7*50]) for hp in h_pool2]
    
    W_dense_1 = tf.Variable(tf.truncated_normal([7*7*50, FLAGS.dense_size_char_model], stddev=0.1))
    b_dense_1 = tf.Variable(tf.constant(0.1, shape=[FLAGS.dense_size_char_model]))
    dense_output_1 = [tf.nn.relu(tf.matmul(x_in, W_dense_1) + b_dense_1) for x_in in h_pool2_flat]
    
    #Dropout over 
    h_fc1_drop = [tf.nn.dropout(h_fc1, dropout_keep_prob) for h_fc1 in dense_output_1]

    #Second dense layer
    W_dense_2 = tf.Variable(tf.truncated_normal([FLAGS.dense_size_char_model, FLAGS.num_classes_char_model], stddev=0.1))
    b_dense_2 = tf.Variable(tf.constant(0.1, shape=[FLAGS.num_classes_char_model]))
    dense_output_2 = [tf.nn.relu(tf.matmul(x_in, W_dense_2) + b_dense_2) for x_in in h_fc1_drop]
    
    return dense_output_2


def variable_summaries(var, name):
    """Attach a lot of summaries to a Tensor."""
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean/'   + name, mean)
        tf.summary.scalar('sttdev/' + name, tf.sqrt(tf.reduce_mean(tf.square(var - mean))))
        tf.summary.scalar('max/'    + name, tf.reduce_max(var))
        tf.summary.scalar('min/'    + name, tf.reduce_min(var))
        tf.summary.histogram(name, var)


In [None]:
### Create model

# Model parameters

# quitar?
size = FLAGS.size

#X length of slide
x_slide_size = FLAGS.x_slide_size

# X stride of slides
slides_stride = FLAGS.slides_stride

seq_length = FLAGS.seq_length

dim_lstm = FLAGS.dim_lstm



# Calculated
x_size = seq_length*28

# Num slides
seq_input_len = int((x_size - x_slide_size)/float(slides_stride))

vocab_size = len(char_list) + 3



graph = tf.Graph()
with graph.as_default():

    #Placeholders
    with tf.name_scope('inputs') as scope:
        input_slides = tf.placeholder(tf.float32, shape=(None, 28, 28, seq_input_len), name='input_slides')
        input_convolution = tf.split(input_slides, seq_input_len, axis=3)

        input_slides_len = tf.placeholder(tf.int32, shape=(None), name='input_word_len')

        input_word_chars = tf.placeholder(tf.float32, shape=(None, seq_length+1, vocab_size), name="input_word_chars") 
        input_decoder = [tf.reshape(t, [-1, vocab_size]) for t in tf.split(input_word_chars, seq_length+1, axis=1)]

        input_targets = tf.placeholder(tf.int32  , shape=[None, seq_length+1], name='input_targets')
        input_weights = tf.placeholder(tf.float32, shape=[None, seq_length+1], name='input_weights')

        weights = [tf.reshape(t, [-1]) for t in tf.split(input_weights, seq_length+1, axis=1)]
        targets = [tf.reshape(t, [-1]) for t in tf.split(input_targets, seq_length+1, axis=1)]


    ##Encoder
    with tf.name_scope('encoder') as scope:
        #Transform images to input to the LSTM
        keep_prob = tf.placeholder(tf.float32)
        input_encoder = lenet_over_seq(input_convolution, keep_prob)    
        input_encoder = tf.stack(input_encoder, axis=-1)
        variable_summaries(input_encoder, 'input_encoder')

        
        # LSTM
        cell_fw = tf.nn.rnn_cell.LSTMCell(dim_lstm,
                      initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123),
                                               state_is_tuple=False)
        cell_bw = tf.nn.rnn_cell.LSTMCell(dim_lstm,
                      initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113),
                                               state_is_tuple=False)
        
        (enc_outputs_list, enc_state_list) = tf.nn.bidirectional_dynamic_rnn(
                          cell_fw, cell_bw, input_encoder, dtype=tf.float32,
                          sequence_length=input_slides_len) 
        enc_outputs = tf.concat(enc_outputs_list, axis=2)
                        
        enc_state_b = enc_state_list[1] # state of the bw layer
        attention_states = tf.concat(enc_outputs_list, axis=2)


        
    ##Decoder
    with tf.name_scope('decoder') as scope:
        W_decoder = tf.Variable(tf.truncated_normal([dim_lstm, vocab_size], stddev=0.1), name='W_decoder')
        b_decoder = tf.Variable(tf.constant(0.1, shape=[vocab_size]), name='b_decoder')

        cell_dec = tf.nn.rnn_cell.LSTMCell(dim_lstm,
                                           initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113),
                                           state_is_tuple=False)
        cell_dec = tf.nn.rnn_cell.DropoutWrapper(cell_dec, output_keep_prob=keep_prob)


        def loop_function(prev, _):
            # The next input are a softmax of the previous input
            relu_prev = tf.nn.relu(tf.matmul(prev, W_decoder) + b_decoder)
            return tf.nn.softmax(relu_prev)    

        def decoder(feed_previous_bool):
            loop_f = None if feed_previous_bool else loop_function
            reuse = None if feed_previous_bool else True
            with tf.variable_scope(
                tf.get_variable_scope(), reuse=reuse) as scope:
                dec_outputs, _ = tf.contrib.legacy_seq2seq.attention_decoder(input_decoder, enc_state_b,
                                                                      attention_states, cell_dec, num_heads=1,
                                                                      loop_function=loop_f)
                            
            return dec_outputs

        # If feed_previous = True --> TEST: use the previous predicted output for the next output
        # If feed_previous = False -->  TRAIN: use the real previous output to predict the next output
        feed_previous = tf.placeholder(tf.bool)
        dec_outputs = tf.cond(feed_previous, lambda: decoder(True), lambda: decoder(False))    
        print('dec_outputs', dec_outputs)



    with tf.name_scope('outputs') as scope:
        dense_outputs = [tf.nn.relu(tf.matmul(dec_o, W_decoder) + b_decoder) for dec_o in dec_outputs]
        variable_summaries(dense_outputs, 'dense_outputs')
        
        output_proba = tf.stack(dense_outputs, axis=-1, name='stack_output')
                
        #Prediction probs
        output = tf.concat([tf.expand_dims(tf.nn.softmax(t),1) for t in dense_outputs], 1)
        print('output', output)


    #Loss
    with tf.name_scope('loss') as scope:
        loss = tf.contrib.legacy_seq2seq.sequence_loss(dense_outputs, targets, weights, name='seq2seq')
        loss_summary = tf.summary.scalar("loss", loss)

        



    #Trainer: different learning rate over the encoder part.
    with tf.name_scope('trainer') as scope:
        learning_rate = tf.placeholder(tf.float32, name='learning_rate')
        
        #Lists of encoder vs other vars
        encoder_vars=[]
        other_vars = []
        for t in tf.trainable_variables():
            if t.name[:7] == 'encoder':
                encoder_vars += [t] 
            else:
                other_vars += [t] 

        
        opt_encoder = tf.train.AdamOptimizer(learning_rate*0.1, beta1=0.9, beta2=0.999, epsilon=1e-08)
        opt_other   = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08)
        
        
        grads = tf.gradients(loss, encoder_vars + other_vars)
        grads_encoder = grads[:len(encoder_vars)]
        grads_other   = grads[len(encoder_vars):]
        
        train_encoder = opt_encoder.apply_gradients(zip(grads_encoder, encoder_vars))
        train_other   = opt_other.apply_gradients(zip(grads_other, other_vars))
        train_op      = tf.group(train_encoder, train_other)
        
        
        # If no different learning rates.
        #optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08)
        #optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        #train_op = optimizer.minimize(loss)


    # Saver 
    saver = tf.train.Saver(max_to_keep=0)

    # Summaries
    with tf.name_scope('summaries') as scope:
        merged = tf.summary.merge_all()

    # Add to collection 
    tf.add_to_collection('input_slides', input_slides)
    tf.add_to_collection('input_slides_len', input_slides_len)
    tf.add_to_collection('input_word_chars', input_word_chars)
    tf.add_to_collection('input_targets', input_targets)
    tf.add_to_collection('input_weights', input_weights)
    tf.add_to_collection('output_proba', output_proba)
    tf.add_to_collection('output', output)
    tf.add_to_collection('keep_prob', keep_prob)
    tf.add_to_collection('feed_previous', feed_previous)
    tf.add_to_collection('loss', loss)
    tf.add_to_collection('train_op', train_op)
    tf.add_to_collection('merged', merged)

print('MODEL CREATED!')

## Model training

In [None]:

def decode_text(text_array, decoder_dict):
    '''Decode the target from numbers to words
    '''
    text = ''
    eol_code = len(decoder_dict)-1
    ind_eol = False
    for c in text_array:
        if ind_eol==False:
            text += decoder_dict[c]
        if c==eol_code:
            ind_eol=True
    return text
#Test
#print(decode_text(np.array([2, 1, 12, 11], dtype=np.uint8)), decode_dict) 


def decode_response(response_array):
    '''Decode response logits
    '''
    response_text = []
    for i in range(response_array.shape[0]):
        response_dec = [np.argmax(r) for r in response_array[i,:,:]]
        response_text += [response_dec]
    return response_text



def train_batch(n_epochs, batch_size, lr=0.001, size_word=1, num_batches_epoch_trn=100, num_batches_epoch_tst=20):
    '''Train the model several epochs
    size_word: 
    '''
    print('TRAIN STEP. Epochs: ', n_epochs, ' - size word: ', size_word)
    step_summary = 0
    for epoch in range(n_epochs):
        tic = time.clock()
        loss_cumm = []
        seq = batch_generator_epoch(x_train, y_train, batch_size=batch_size,
                                    size_word=size_word,
                                    size=size, 
                                    slides_stride=slides_stride,
                                    x_slide_size=x_slide_size,
                                    num_batches=num_batches_epoch_trn)
        for s in seq:
            feed_dict = {input_slides: s[0],
                         input_slides_len: s[1],
                         input_word_chars: s[2],
                         input_targets: s[3],
                         input_weights: s[4],
                         keep_prob: 0.5,
                         feed_previous: False, # False feed_previous in the trainig process.
                         learning_rate: lr } 
            _, loss_t = sess.run([train_op, loss], feed_dict)
            loss_cumm += [loss_t]
            
        # Sumaries train    
        summary_str= sess.run(merged, feed_dict)
        train_writer.add_summary(summary_str, step_summary)
        
        #Test
        loss_cumm_tst = []
        correct = 0
        num_cases = 0
        seq_tst = batch_generator_epoch(x_test, y_test, batch_size=batch_size,
                                        size_word=size_word,
                                        size=size, 
                                        slides_stride=slides_stride,
                                        x_slide_size=x_slide_size,
                                        num_batches=num_batches_epoch_tst)
        for s_tst in seq_tst:
            feed_dict_tst = {input_slides: s_tst[0],
                         input_slides_len: s_tst[1],
                         input_word_chars: s_tst[2],
                         input_targets: s_tst[3],
                         input_weights: s_tst[4],
                         keep_prob: 1,
                         feed_previous: True} # True feed_previous in the test process.
            loss_tst, out_tst = sess.run([loss, output], feed_dict_tst)
            loss_cumm_tst += [loss_tst]
            
            # Calculate the number of correct sequences (WER)
            response_predict_text = decode_response(out_tst)
            for resp in range(len(out_tst)):
                num_cases += 1
                if decode_text(s_tst[3][resp], decode_dict) == decode_text(response_predict_text[resp], decode_dict):
                    correct += 1

        # Sumaries test    
        summary_str = sess.run(merged, feed_dict_tst)
        test_writer.add_summary(summary_str, step_summary)
        step_summary += 1
           
        print('Epoch: ',epoch, '- Loss trn: ', np.mean(loss_cumm), ' - Loss tst: ', np.mean(loss_cumm_tst))
        print('Correct count: ', correct, ' - Correct percent (WER): ', float(correct)/float(num_cases))
        print('Time', time.clock()-tic)
        
        # Print some results
        print('Real vs pred examples:')
        for resp in range(10):
            print('     ', decode_text(s_tst[3][resp], decode_dict) ,
                  ' vs ',  decode_text(response_predict_text[resp], decode_dict))
            
        
    return loss_t, out_tst

In [None]:
gpu_options = tf.GPUOptions(allow_growth = True)
with tf.Session(graph=graph, config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    
    # Merge all the summaries and write them out to /tmp/mnist_sequence
    summaries_dir = FLAGS.experiment

        
    train_writer = tf.summary.FileWriter(os.path.join(summaries_dir, 'train'), sess.graph)
    test_writer = tf.summary.FileWriter(os.path.join(summaries_dir, 'test'))
        
    # Initialize vars    
    tf.global_variables_initializer().run()
    print('vars initialized!')

    # Curriculum learning
    # Increment the word size - decrement the learning rate
    print('\nCurriculum learning words of size 1:')
    loss_t, out_tst = train_batch(5, FLAGS.batch_size, lr=FLAGS.learning_rate, size_word=1)

    print('\nCurriculum learning words of size <=2:')
    loss_t, out_tst = train_batch(15, FLAGS.batch_size, lr=FLAGS.learning_rate*0.8, size_word=2)

    #print('\nCurriculum learning words of size <=3:')
    #loss_t, out_tst = train_batch(15, FLAGS.batch_size, lr=FLAGS.learning_rate*0.6, size_word=3)


    # Save final model
    savefile = saver.save(sess, os.path.join(summaries_dir, 'final_model'))
    print('Model saved in ', savefile)