## Characterwise Double-Stacked LSTM as Author


In [1]:
import numpy
import theano
from theano import tensor

from blocks.bricks import Tanh
from blocks.bricks.recurrent import GatedRecurrent
from blocks.bricks.sequence_generators import (SequenceGenerator, Readout, SoftmaxEmitter, LookupFeedback)
from blocks.graph import ComputationGraph

from blocks.algorithms import GradientDescent, Scale
from blocks.initialization import Orthogonal, IsotropicGaussian, Constant
from blocks.model import Model
from blocks.monitoring import aggregation
from blocks.extensions import FinishAfter, Printing
from blocks.extensions.saveload import Checkpoint
from blocks.extensions.monitoring import TrainingDataMonitoring
from blocks.main_loop import MainLoop
from blocks.select import Selector


In [16]:
# Dictionaries
import string

all_chars = [ a for a in string.printable]+['<UNK>']
code2char = dict(enumerate(all_chars))
char2code = {v: k for k, v in code2char.items()}

In [17]:
#rng = numpy.random.RandomState(1)
batch_size = 50
seq_len = 100

num_states=len(char2code)
dim = 10
feedback_dim = 8

In [43]:
#from fuel.datasets import Dataset
from fuel.streams import DataStream
from fuel.schemes import ConstantScheme

from fuel.datasets import Dataset


data_file = '../data/Shakespeare.poetry.txt'

#from fuel.datasets import TextFile
#dataset = TextFile([data_file], bos_token=None, eos_token=None, level="character", dictionary=char2code)
#data_stream = DataStream(dataset, iteration_scheme=ConstantScheme(batch_size))

class CharacterTextFile(Dataset):
    provides_sources = ("data",)

    def __init__(self, fname, chunk_len, dictionary, **kwargs):
        self.fname = fname
        self.chunk_len = chunk_len
        self.dictionary = dictionary 
        super(CharacterTextFile, self).__init__(**kwargs)

    def open(self):
        return open(self.fname,'r')

    def get_data(self, state, request):
        assert isinstance(request, int)
        x = numpy.zeros((self.chunk_len, request), dtype='int64')
        for i in range(request):
            txt=state.read(self.chunk_len)
            #print(">%s<\n" % (txt,))
            x[:, i] = [ self.dictionary[c] for c in txt ]
        return (x,)    
    
    def close(self, state):
        close(state)
dataset = CharacterTextFile(data_file, chunk_len=seq_len, dictionary=char2code)
data_stream = DataStream(CharacterTextFile(data_file, chunk_len=seq_len, dictionary=char2code),
                         iteration_scheme=ConstantScheme(batch_size))
a=data_stream.get_data(10)
#[ code2char[v] for v in [94, 27, 21, 94, 16, 14, 54, 23, 14, 12] ]      # Horizontally
#[ code2char[v] for v in [94, 94,95,36,94,47,50,57,40,53,68,54,94,38] ]  # Vertically
''.join([ code2char[v] for v in a[0][:,0] ])

In [3]:
transition = GatedRecurrent(name="transition", dim=dim, activation=Tanh())
generator =  SequenceGenerator(
                Readout(readout_dim=num_states, source_names=["states"],
                        emitter=SoftmaxEmitter(name="emitter"),
                        feedback_brick=LookupFeedback(
                            num_states, feedback_dim, name='feedback'),
                        name="readout"),
                transition,
                weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
                name="generator")

generator.push_initialization_config()
transition.weights_init = Orthogonal()
generator.initialize()

# Give an idea of what's going on.
logger.info("Parameters:\n" + pprint.pformat(
    [(key, value.get_value().shape) for key, value in Selector(generator).get_params().items()],
    width=120))
logger.info("Markov chain entropy: {}".format(MarkovChainDataset.entropy))
logger.info("Expected min error: {}".format( -MarkovChainDataset.entropy * seq_len))

# Build the cost computation graph.
x = tensor.lmatrix('data')
cost = aggregation.mean(generator.cost_matrix(x[:, :]).sum(),
                        x.shape[1])
cost.name = "sequence_log_likelihood"

algorithm = GradientDescent(
    cost=cost, params=list(Selector(generator).get_params().values()),
    step_rule=Scale(0.001))
main_loop = MainLoop(
    algorithm=algorithm,
    data_stream=DataStream(
        MarkovChainDataset(rng, seq_len),
        iteration_scheme=ConstantScheme(batch_size)),
    model=Model(cost),
    extensions=[FinishAfter(after_n_batches=num_batches),
                TrainingDataMonitoring([cost], prefix="this_step",
                                       after_batch=True),
                TrainingDataMonitoring([cost], prefix="average",
                                       every_n_batches=100),
                Checkpoint(save_path, every_n_batches=500),
                Printing(every_n_batches=100)])
main_loop.run()
