In [18]:
"""
The bi-directional RNN that will form the basis of all our future dialogue
models.

The goal is to create a bi-directional encoder-decoder that can be either used independently
for next-response generation, or integrated into a hierarchical (or more complicated)
model.

To that end, the BidirectionalRNN class should should support the same interface as our
other dialogue models: it should take a Config() object, it should take a DialogueCorpus()
object, and it should support the same fit() and predict() methods that (along with SciPy
classifiers) all our models support.
"""

"""
TODO:
1. we currently ignore encoder/decoder depth -- fix this
"""

"""
TODO:
x  finish build-network function for encoder
x  copy and write decoder build-network function on same pattern
*3.[give both individual fit and predict functions\
4. write compose of both as BidirectionalEncoderDecoderRNN
5. alter DialogueCorpus so it takes into account whether or no the model being trained is hierarchical
   (i.e., wants whole dialogues) or flat (i.e., wants only adjacency pairs)
"""

# Keras packages
from keras.models import Model
from keras.layers import Input, LSTM, GRU, Dense, Bidirectional

# Our packages
from config import Config
from dialogue_corpus import DialogueCorpus

class EmbedOHE(Dense):
    """
    A Dense layer that we can use to embed one-hot word sequences, merely
    dressed up semantically
    """

class BidirectionalEncoderRNN:
    def __init__(self, config=Config(), corpus=None):
        self.config = config
        self.corpus = corpus
        
        if(self.corpus is None):
            self.corpus = DialogueCorpus(config)
        
        self.build() # this will be a Keras model for now
        
    def build(self):
        """
        The encoder computational graph consists of three components:
        (1) the input node            `encoder_input`
        (2) the Recurrent part        `encoder_rnn`
        (3) the hidden state output   `encoder_hidden_state`
        """
        
        # Grab hyperparameters from self.config:
        hidden_dim = self.config['encoding-layer-width']
        recurrent_unit = self.config['recurrent-unit-type']
        bidirectional = self.config['encoding-layer-bidirectional']
        embedding_dim = self.config['embedding-dim']
        vocab_size = self.config['vocab-size']
        
        # Assemble the network components:
        encoder_input = Input(shape=(None, vocab_size))
        encoder_embed = EmbedOHE(embedding_dim, activation=None, use_bias=False)(encoder_input)
        encoder_rnn, encoder_hidden_state = None, None
        
        if recurrent_unit == 'lstm':
            encoder_rnn = LSTM(hidden_dim, return_state=True)
            encoder_outputs, encoder_state_h, encoder_state_c = encoder_rnn(encoder_embed)
            # discard the encoder output, keeping only the hidden state
            encoder_hidden_state = [encoder_state_h, encoder_state_c]
        if recurrent_unit == 'gru':
            encoder_rnn = GRU(hidden_dim, return_state=True)
            encoder_outputs, encoder_hidden_state = encoder_rnn(encoder_embed)
        else:
            raise Exception('Invalid recurrent unit type: {}'.format(recurrent_unit))
        
        # make the RNN component bidirectional, if desired
        if bidirectional:
            encoder_rnn = Bidirectional(encoder_rnn)
        
        # save the three Enccoder components as class state
        self.encoder_input = encoder_input
        self.encoder_rnn = encoder_rnn
        self.encoder_hidden_state = encoder_hidden_state
        
        # finally, build the training model
        self.training_model = Model(self.encoder_input, self.encoder_hidden_state)
    
    def fit(self):
        pass
    
    def predict(self):
        pass
        
        
        
class BidirectionalDecoderRNN:
    def __init(self, config=Config(), corpus=None):
        self.config = config
        self.corpus = corpus
        
        if(self.corpus is None):
            self.corpus = DialogueCorpus(config)
        
        self.build() # this will be a Keras model for now
        
    def build(self):
        """
        The decoder computational graph consists of three components:
        (1) the input node            `decoder_input`
        (2) the Recurrent part        `decoder_rnn`
        (3) the decoder output        `decoder_output`
        """
        
        # Grab hyperparameters from self.config:
        hidden_dim = self.config['encoding-layer-width']
        recurrent_unit = self.config['recurrent-unit-type']
        bidirectional = self.config['encoding-layer-bidirectional']
        vocab_size = self.config['vocab-size']
        
        # Assemble the network components:
        decoder_inputs = Input(shape=(None, vocab_size))
        
        if recurrent_unit == 'lstm':
            decoder_rnn = LSTM(hidden_dim, return_sequences=True, return_state=True)
            decoder_outputs, _, _ = decoder_rnn(decoder_inputs,
                                                initial_state=encoder_hidden_state)
        elif recurrent_unit == 'gru':
            decoder_rnn = GRU(hidden_dim, return_sequences=True, return_state=True)
            decoder_outputs, _ = decoder_rnn(decoder_inputs, 
                                             initial_state=encoder_hidden_state)
        else:
            raise Exception('Invalid recurrent unit type: {}'.format(recurrent_unit))
        
        # make the RNN component bidirectional, if desired
        if bidirectional:
            encoder_rnn = Bidirectional(encoder_rnn)
        
        decoder_dense = Dense(vocab_size, activation='softmax')
        decoder_outputs = decoder_dense(decoder_outputs)
        
        # save the four Decoder components as class state
        self.decoder_input = decoder_input
        self.decoder_rnn = decoder_rnn
        self.decoder_dense = decoder_dense
        self.decoder_output = decoder_output
        
        # build the training and inference models
        self.training_model = Model()
    
class BidirectionalEncoderDecoderRNN:
    def __init(self, config=Config(), corpus=None):
        self.config = config
        self.corpus = corpus
        
        if(self.corpus is None):
            self.corpus = DialogueCorpus(config)
        
        # build the encoder and decoder
        self.encoder = BidirectionalEncoderRNN(self.config, self.corpus)
        self.decoder = BidirectionalDecoderRNN(self.config, self.corpus)
        
        # combine them into training and inference models
        self.build_training_model()
        self.build_inference_model()
        
    def build_training_model(self):
        if self.config['hierarchical']:
            # do something
        else:
            self.training_model = Model([self.encoder_inputs, self.decoder_inputs], self.decoder_outputs)
    
    def build_inference_model(self):
        pass
    
    def fit(self):
        # grab some hyperparameters from our config
        optimizer = self.config['optimizer']
        loss = self.config['loss']
        batch_size = self.config['batch-size']
        num_epochs = self.config['num-epochs']
        validation_split = self.config['validation-split']
        
        # grab the training and validation data
        encoder_x = self.corpus.get_encoder_x()
        decoder_x = self.corpus.get_decoder_y()
        decoder_y = self.corpus.get_decoder_y()
        
        self.training_model.compile(optimizer=optimizer, loss=loss)
        self.training_model.fit([encoder_x, decoder_x], decoder_y,
                                batch_size=batch_size
                                epochs=num_epochs
                                validation_split=validation_split)
        
    def response(self, input_sentence):
        pass
    
    def converse(self):
        # begins a loop that allows the user to converse with the machine
        

In [21]:
# make a BRNN
enc = BidirectionalEncoderRNN()



In [23]:
enc.training_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         (None, None, 15000)       0         
_________________________________________________________________
embed_ohe_8 (EmbedOHE)       (None, None, 512)         7680000   
_________________________________________________________________
gru_6 (GRU)                  [(None, 512), (None, 512) 1574400   
Total params: 9,254,400
Trainable params: 9,254,400
Non-trainable params: 0
_________________________________________________________________


In [24]:
dec = BidirectionalDecoderRNN()

In [None]:
dec.bu