In [None]:
import tensorflow as tf
from keras import backend as K

import numpy as np
from numpy.random import randint
import os
import time
import math

Using TensorFlow backend.


In [None]:
path_to_file = '/content/processed_texts.csv'
# Read and decode file
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# The unique characters in the file
vocab = sorted(set(text))

In [None]:
# Creating a mapping from characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

In [None]:
# The maximum length sentence we want for a single input in characters
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)


In [None]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

sequences = char_dataset.batch(seq_length+1, drop_remainder=True)
dataset = sequences.map(split_input_target)

In [None]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

#dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)


In [None]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [None]:
def build_model(vocab_size, embedding_dim = 256, rnn_units = 1024, batch_size = 64, hidden_layer = None, softmax=False):
    
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
        tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform')])
     
    if hidden_layer is not None:
        model.add(tf.keras.layers.Dense(hidden_layer['size'], activation=hidden_layer['act']))
    
    model.add(tf.keras.layers.Dense(vocab_size, activation=('softmax' if softmax else None)))
    
    return model

In [None]:
model = build_model(
  vocab_size = len(vocab),
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

tf.keras.utils.plot_model(
    model,
    to_file="original_gru_model.png",
    show_shapes=True,
    show_layer_names=False,
    rankdir="TB"
)

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

In [None]:
def run_model(dataset, setting, EPOCHS, p = 0.8, callbacks=[]):
    x = dataset.shuffle(BUFFER_SIZE)

    # Split dataset in training and validation data
    if p >= 0 and p <= 1:
        l = len(list(dataset))
        c = math.floor(l*p)
        validation_data = x.skip(c).batch(setting['BATCH_SIZE'], drop_remainder=True)
        x = x.take(c).batch(setting['BATCH_SIZE'], drop_remainder=True)

    #build model
    model = build_model(vocab_size = len(vocab), embedding_dim=setting['embedding_dim'], rnn_units=setting['rnn_units'], batch_size=setting['BATCH_SIZE'], hidden_layer = setting['hidden_layer'], softmax=setting['softmax'])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    K.set_value(model.optimizer.learning_rate, setting['learning_rate'])

    # Train model
    history = model.fit(x, epochs=EPOCHS, validation_data = validation_data, callbacks = callbacks)

    return history.history['loss'][EPOCHS-1], model

In [None]:
def random_search_hyperparameters(num_tries=10):
    losses = np.zeros(num_tries)
    settings = []
    for i in range(num_tries):
        rnn_units = randint(500, 1500)
        embedding_dim = randint(200, 400)
        BATCH_SIZE = np.random.choice([32,64, 128, 256, 512])
        softmax = np.random.choice([True, False])
        optimizer = np.random.choice(['adam', 'nadam', 'rmsprop'])
        learning_rate = np.random.uniform(0.01, 0.0001)

        hidden_layer = np.random.choice([True, False])
        hidden_layer_size = randint(100,400)
        hidden_layer_activation = np.random.choice(['sigmoid', 'tanh', 'relu', 'elu'])
        
        if hidden_layer:
            hidden_layer = {'size': hidden_layer_size, 'act': hidden_layer_activation}
        else:
            hidden_layer = None

        EPOCHS = 1
                
        setting = {'rnn_units': rnn_units, 'embedding_dim': embedding_dim, 'BATCH_SIZE': BATCH_SIZE, 'softmax':softmax, 'hidden_layer': hidden_layer, 'optimizer': optimizer, 'learning_rate': learning_rate}
        print(setting)
        settings.append(setting)
        losses[i] = run_model(dataset, setting, 1)
    
    return losses, settings

losses, settings = random_search_hyperparameters()


In [None]:
# Best found settings
settings = [{'rnn_units': 1334, 'embedding_dim': 331, 'BATCH_SIZE': 32, 'softmax': True, 'hidden_layer': None, 'optimizer': 'adam', 'learning_rate': 0.0018792239351562903},
            {'rnn_units': 1203, 'embedding_dim': 318, 'BATCH_SIZE': 64, 'softmax': True, 'hidden_layer': None, 'optimizer': 'adam', 'learning_rate': 0.00249201743489572},
            {'rnn_units': 1312, 'embedding_dim': 289, 'BATCH_SIZE': 64, 'softmax': True, 'hidden_layer': None, 'optimizer': 'adam', 'learning_rate': 0.0007947486669195956},
            {'rnn_units': 1036, 'embedding_dim': 311, 'BATCH_SIZE': 64, 'softmax': True, 'hidden_layer': None, 'optimizer': 'adam', 'learning_rate': 0.0034466245761176593}]

In [None]:
# Monitor validation loss, for saving the model which generalizes best
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath='RNN_model_new.hdf5', monitor='val_loss', verbose=1, save_best_only=True, mode='min')

# Best of the above 4 settings
s = settings[2]
losses, best_model = run_model(dataset, s, 10, 0.75, [checkpoint_callback])

Epoch 1/10
Epoch 00001: val_loss improved from inf to 1.23736, saving model to RNN_model_new.hdf5
Epoch 2/10
Epoch 00002: val_loss improved from 1.23736 to 1.19505, saving model to RNN_model_new.hdf5
Epoch 3/10
Epoch 00003: val_loss improved from 1.19505 to 1.18378, saving model to RNN_model_new.hdf5
Epoch 4/10
Epoch 00004: val_loss improved from 1.18378 to 1.17852, saving model to RNN_model_new.hdf5
Epoch 5/10
Epoch 00005: val_loss improved from 1.17852 to 1.17456, saving model to RNN_model_new.hdf5
Epoch 6/10
Epoch 00006: val_loss did not improve from 1.17456
Epoch 7/10
Epoch 00007: val_loss did not improve from 1.17456
Epoch 8/10
Epoch 00008: val_loss did not improve from 1.17456
Epoch 9/10
Epoch 00009: val_loss did not improve from 1.17456
Epoch 10/10
Epoch 00010: val_loss did not improve from 1.17456


In [None]:
# Restore model from checkpoint
setting = settings[2]
best_model = build_model(vocab_size = len(vocab), embedding_dim=setting['embedding_dim'], rnn_units=setting['rnn_units'], batch_size=1, hidden_layer = setting['hidden_layer'], softmax=setting['softmax'])
best_model.load_weights('RNN_model_new.hdf5')
best_model.build(tf.TensorShape([1, None]))

In [None]:
def generate_text(model, start_string):
  # generate text using the learned model

  # Number of characters to generate
  num_generate = 1000

  # Converting our start string to numbers (vectorizing)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = np.expand_dims(input_eval, 0)
  
  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model.predict(input_eval)

      # remove the batch dimension      
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = np.expand_dims([predicted_id], 0)

      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [None]:
generate_text(best_model, start_string=u"Climate change is")