In [1]:
# Structurally Contrained Recurrent Network (SCRN) Model
#
# This gives an implementation of the SCRN model given in Mikolov et al. 2015, arXiv:1412.7753 [cs.NE], 
# https://arxiv.org/abs/1412.7753 using Python and Tensorflow.
#
# This IPython Notebook provides an example of how to call the associated library of Python scripts.  
# Mikolov et al. should be consulted to make sure of the correct hyperparameter values.
#
# Stuart Hagler, 2017

In [2]:
# Imports
import math
import sys

# Local Imports
sys.path.insert(0, 'python')
from lstm import lstm_graph
from read_data import read_data
from scrn import scrn_graph
from srn import srn_graph
from tokens import text_elements_to_tokens

In [3]:
# Flags
rnn_flg = 2      # 1 for SRN
                 # 2 for LSTM
                 # 3 for SCRN
usecase_flg = 1  # 1 for predicting letters
                 # 2 for predicting words with cutoff for infrequent words

In [4]:
# Network-specific hyperparameters
if rnn_flg == 1:
    
    # Network hyperparameters
    hidden_size = 110         # Dimension of the hidden vector
    
    # Training hyperparameters
    num_unfoldings = 10       # Total number of unfoldings
    
elif rnn_flg == 2:
    
    # Network hyperparameters
    hidden_size = 110         # Dimension of the hidden vector
    
    # Training hyperparameters
    num_unfoldings = 10       # Total number of unfoldings
    
elif rnn_flg == 3:
    
    # Network hyperparameters
    alpha = 0.95
    hidden_size = 100         # Dimension of the hidden vector
    state_size = 10           # Dimension of the state vector

    # Training hyperparameters
    num_unfoldings = 50       # Total number of unfoldings
    
# General network hyperparameters
word_frequency_cutoff = 50    # Cutoff for infrequent words for usecase_flg = 2

# General training hyperparameters
batch_size = 32               # Batch size for each tower
learning_decay = 1/1.5        # Multiplier to decay the learn rate when required
learning_rate = 0.05          # Initial learning rate
num_epochs = 100              # Total number of epochs to run the algorithm
num_gpus = 1                  # Number of GPUs (towers) available
optimization_frequency = 5    # Number of unfoldings before optimization step
summary_frequency = 500       # Summary information is displayed after training this many batches

# Data file
filename = 'data/text8.zip'

In [None]:
# Prepare training and validation batches
raw_data = read_data(usecase_flg, filename)
data, dictionary, reverse_dictionary, vocabulary_size = text_elements_to_tokens(usecase_flg, raw_data, 
                                                                                word_frequency_cutoff)
training_size = math.floor((10/11)*len(raw_data)/num_gpus)
validation_size = math.floor((1/11)*len(raw_data)/num_gpus)
training_text = []
validation_text = []
for i in range(num_gpus):
    training_text.append(data[i*training_size:(i+1)*training_size])
    validation_text.append(data[num_gpus*training_size + i*validation_size: \
                                num_gpus*training_size + (i+1)*validation_size])

In [None]:
print('Vocabulary Size: %d' % vocabulary_size)

# Initiate graph
if rnn_flg == 1:
    # Use SRN
    graph = srn_graph(num_gpus, hidden_size, vocabulary_size, num_unfoldings, 
                      optimization_frequency, batch_size)
elif rnn_flg == 2:
    # Use LSTM
    graph = lstm_graph(num_gpus, hidden_size, vocabulary_size, num_unfoldings, 
                       optimization_frequency, batch_size)
elif rnn_flg == 3:
    # Use SCRN
    graph = scrn_graph(num_gpus, alpha, hidden_size, state_size, vocabulary_size, num_unfoldings, 
                       optimization_frequency, batch_size)
    
# Optimize graph
graph.optimization(learning_rate, learning_decay, num_epochs, summary_frequency, training_text, validation_text)

Vocabulary Size: 28
Training Batch Generator:
     Tower: 0
          Input Text Size: 9090909
          Cut Text Size: 9090880
          Subtext Size: 284090
          Dropped Text Size: 29
          Effective Batch Size: 320
          Number of Batches: 28409
Validation Batch Generator:
     Tower: 0
          Input Text Size: 9090909
          Cut Text Size: 9090909
          Subtext Size: 9090909
          Dropped Text Size: 0
          Effective Batch Size: 1
          Number of Batches: 9090909
Initialized
Epoch: 1  Learning Rate: 0.05
     Total Batches: 500  Current Batch: 500  Cost: 2.88
     Total Batches: 1000  Current Batch: 1000  Cost: 2.83
     Total Batches: 1500  Current Batch: 1500  Cost: 2.91
     Total Batches: 2000  Current Batch: 2000  Cost: 2.85
     Total Batches: 2500  Current Batch: 2500  Cost: 2.78
     Total Batches: 3000  Current Batch: 3000  Cost: 2.77
     Total Batches: 3500  Current Batch: 3500  Cost: 2.80
     Total Batches: 4000  Current Batch: 4000  C