In [0]:
import random
import tensorflow as tf

from sklearn.model_selection import train_test_split

  return f(*args, **kwds)


In [0]:
DATASET_SIZE = 100000
OPERATORS = ["+", "-"]
MIN = 1
MAX = 9
START = '^'
END = '$'
PAD = '#'
SEQ_LEN = 15
VOCAB = '#^$+-1234567890'
SPECIAL_VOCAB = {START, END, PAD}

In [0]:
def expression(operators, min_value, max_value, nobs):
    for _ in range(nobs):
        x = random.randint(a=min_value, b=max_value)
        y = random.randint(a=min_value, b=max_value)
        op = random.choice(operators)
        expr = f"{x}{op}{y}"
        result = eval(expr)
        yield expr, result

In [0]:
dfull = list(expression(OPERATORS, MIN, MAX, DATASET_SIZE))
dtrain, dtest = train_test_split(dfull, test_size=0.2)

word2id = {symbol:i for i, symbol in enumerate(VOCAB)}
id2word = {i:symbol for symbol, i in word2id.items()}

In [0]:
def sentence2ids(sentence, mapping, sequence_len):
    """
    sentence: expresion
    mapping: dictionary that maps expression to list of indicies
    sequence_len: united length for all sequencies
    """
    n_pads = max(sequence_len - len(sentence) - 1, 0)
    id_seq = [
        mapping[x] for i,x in enumerate(sentence) if i<sequence_len-1
    ]+[mapping[END]]+[mapping[PAD]]*n_pads
    return id_seq, len(id_seq)

def id2sentence(sequence, mapping):
    return "".join([
        mapping[x] for x in sequence
        if mapping[x] not in SPECIAL_VOCAB
    ])

def generate_batches(samples, batch_size=64):
    X, Y = [], []
    for i, (x, y) in enumerate(samples, 1):
        X.append(x)
        Y.append(y)
        if i % batch_size == 0:
            yield X, Y
            X, Y = [], []
    if X and Y:
        yield X, Y

In [0]:
class Seq2Seq:
    def __init__(self):
        self.input_batch = tf.placeholder(
            shape=[None, None], #[batch_size, sequence_len]
            dtype=tf.int32, 
            name='input_batch'
        )
        self.input_batch_lengths = tf.placeholder(
            shape=[None], #[batch_size]
            dtype=tf.int32, 
            name="input_batch_lengths"
        )
        self.ground_truth = tf.placeholder(
            shape=[None], #[batch_size]
            dtype=tf.int32, 
            name="ground_truth"
        )
        self.ground_truth_lengths = tf.placeholder(
            shape=[None], #[batch_size]
            dtype=tf.int32, 
            name="ground_truth_lengths"
        )
        self.droputh_ph = tf.placeholder(
            shape = None, #scalar
            dtype=tf.float32,
            name="dropout"
        ) 
        self.learning_rate_ph = tf.placeholder(
            shape=None, #scalar
            dtype=tf.float32,
            name="learning_rate"
        )
        
    def __create_embeddings(self, vocab_size, embeddings_size):
        init_embeddings = tf.random_uniform(
            (vocab_size, embeddings_size),
            -1.,
            1.
        )
        #[vocab_size, embedding_size]
        self.embeddings = tf.Variable(
            init_embeddings,
            name="embedding_matrix",
            dtype=tf.float32
        )
        self.input_batch_embedded = tf.nn.embedding_lookup(
            params=self.embeddings,
            ids=self.input_batch
        )
        
    def __build_encoder(self, hidden_size):
        encoder_cell = tf.nn.rnn_cell.DropoutWrapper(
            cell=tf.nn.rnn_cell.GRUCell(num_units=hidden_size),
            input_keep_prob=self.droputh_ph,
            state_keep_prob=self.droputh_ph,
            output_keep_prob=self.droputh_ph
        )
        _, self.final_encoder_state = tf.nn.dynamic_rnn(
            cell=encoder_cell,
            inputs=self.input_batch_embedded,
            sequence_length=self.input_batch_lengths,
            dtype=tf.float32
        )
    
    def __biuld_decoder(
        self,
        hidden_size, 
        vocab_size, 
        max_iter, 
        start_symbol_id, 
        end_symbol_id
    ):
        def decode(helper, scope, reuse=None):
            with tf.variable_scope(scope, reuse=reuse):
                decoder_cell = tf.contrib.rnn.OutputProjectionWrapper(
                    cell=tf.nn.rnn_cell.GRUCell(
                        num_units=hidden_size,
                        reuse=reuse
                    ),
                    output_size=vocab_size,
                    reuse=reuse
                )        
                decoder=tf.contrib.seq2seq.BasicDecoder(
                    cell=decoder_cell,
                    helper=helper,
                    initial_state=self.final_encoder_state
                )
                outputs,_,_ = tf.contrib.seq2seq.dynamic_decode(
                    decoder=decoder,
                    maximum_iterations=max_iter, 
                    output_time_major=False, 
                    impute_finished=True#not ignore final state
                )
                return outputs
        batch_size = tf.shape(self.input_batch)[0]
        start_tokens = tf.fill([batch_size], start_symbol_id)
        
        # convert to shape [batch_size, 1] (column-vector)
        start_tokens_col = tf.expand_dims(start_tokens, 1)
        
        #add start token to every input sequence
        ground_truth_as_input = tf.concat(
            [start_tokens_col, self.ground_truth], 
            1 #concat rows
        )
        self.ground_truth_embedded = tf.nn.embedding_lookup(
            params=self.embeddings,
            ids=ground_truth_as_input
        )
        #interface for training decoders
        train_helper = tf.contrib.seq2seq.TrainingHelper(
            inputs=self.ground_truth_embedded,
            sequence_length=self.ground_truth_lengths
        )
        #interface for infer decoders
        infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embedding=self.embeddings,
            start_tokens=start_tokens,
            end_token=end_symbol_id
        )
        self.train_outputs = decode(train_helper, "train")
        self.infer_outputs = decode(train_helper, "infer", reuse=True)
        
    def __compute_loss(self):
        weights = tf.cast(
            tf.sequence_mask(self.input_batch_lengths), 
            dtype=tf.float32
        )
        self.loss = tf.contrib.seq2seq.sequence_loss(
            logits=self.train_outputs.rnn_outputs,
            targets=self.ground_truth,
            weights=weights
        )
    
    def __perform_optimisation(self):
        self.train_op = tf.contrib.layers.optimize_loss(
            loss=self.loss,
            global_step=tf.train.global_step(),
            learning_rate=self.learning_rate_ph,
            optimizer=tf.train.AdagradOptimizer(
                learning_rate=self.learning_rate_ph
            ),
            clip_gradients=1.
        )
        
    def init_model(
        self,
        vocab_size, 
        embeddings_size, 
        hidden_size, 
        max_iter, 
        start_symbol_id, 
        end_symbol_id, 
        padding_symbol_id
    ):
        self.__create_embeddings(vocab_size, embeddings_size)
        self.__build_encoder(hidden_size)
        self.__biuld_decoder(
            hidden_size, 
            vocab_size, 
            max_iter, 
            start_symbol_id, 
            end_symbol_id
        )
        self.__compute_loss()
        self.__perform_optimisation()
        
        self.train_predicion = self.train_outputs.sample_id
        self.infer_prediction = self.infer_outputs.sample_id
        
    def train_on_batch(
        self, 
        session, 
        X, X_seq_len, 
        Y, Y_seq_len, 
        learning_rate, 
        dropout_keep_probability
    ):
        feed_dict = {
            self.input_batch: X,
            self.input_batch_lengths: X_seq_len,
            self.ground_truth: Y,
            self.ground_truth_lengths: Y_seq_len,
            self.droputh_ph: dropout_keep_probability,
            self.learning_rate_ph: learning_rate
        }
        pred, loss, _ = session.run(
            fetches=[self.train_predicion, self.loss, self.train_op],
            feed_dict=feed_dict
        )
        return pred, loss
    
    def predict_for_batch(
        self,
        session, 
        X, 
        X_seq_len
    ):
        feed_dict = {
            self.input_batch: X,
            self.input_batch_lengths: X_seq_len
        }
        pred = session.run(
            fetches=[self.infer_prediction],
            feed_dict=feed_dict
        )
        return pred
    
    def predict_for_batch_with_loss(
        self, 
        session, 
        X, X_seq_len, 
        Y, Y_seq_len
    ):
        feed_dict = {
            self.input_batch: X,
            self.input_batch_lengths: X_seq_len,
            self.ground_truth: Y,
            self.ground_truth_lengths: Y_seq_len,
        }
        pred, loss = session.run(
            fetches=[self.infer_prediction, self.loss]
        )
        return pred, loss

In [0]:
tf.reset_default_graph()

model = Seq2SeqModel(
    vocab_size=len(word2id), 
    embeddings_size=20, 
    hidden_size=512, 
    max_iter=7, 
    start_symbol_id=word2id["^"], 
    end_symbol_id=word2id["$"], 
    padding_symbol_id=word2id["#"]
)

In [1]:
8400+4304

12704