## General Framework:

In [None]:
# Sequence to Sequence encoder decorder Bi-LSTM LATER
# Multilingual Word Embeddings obtained from the supervised expert dictionary to initialize the embeddings layer 
# CNN Bidirectional LSTM to encode sentences in source language
# CNN Bidirectional LSTM to encode sentences in target language
# Take the average of the encoded states using attention

## Imports:

In [1]:
# -*- coding: utf-8 -*-
import os
import sys
import csv
import time
import json
import datetime
import pickle as pkl
import tensorflow as tf
from tensorflow.contrib import learn
from tqdm import tqdm
import numpy as np

#from data_helper
#from rnn_classifier import rnn_clf
#from cnn_classifier import cnn_clf
#from clstm_classifier import clstm_clf

try:
    from sklearn.model_selection import train_test_split
except ImportError as e:
    error = "Please install scikit-learn."
    print(str(e) + ': ' + error)
    sys.exit()
    
from nltk.tokenize import RegexpTokenizer
# Show warnings and errors only
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

## Parameters:

In [2]:
# Model choices
tf.flags.DEFINE_string('clf', 'cnn', "Type of classifiers. Default: cnn. You have four choices: [cnn, lstm, blstm, clstm]")

# Data parameters
tf.flags.DEFINE_string('data_file', "/Users/meryemmhamdi/Documents/rig9/ParallelCorpora/Europarl/sample_", 'Data file path')
tf.flags.DEFINE_string('emb_path', "/Users/meryemmhamdi/Documents/rig9/MultilingualEmbeddings/sample_emb.txt", 'Data file path')
tf.flags.DEFINE_string('stop_word_file', None, 'Stop word file path')
tf.flags.DEFINE_string('language', 'en', "Language of the data file. You have two choices: [ch, en]")
tf.flags.DEFINE_integer('min_frequency', 0, 'Minimal word frequency')
tf.flags.DEFINE_integer('num_classes', 2, 'Number of classes')
tf.flags.DEFINE_integer('max_length', 0, 'Max document length')
tf.flags.DEFINE_integer('vocab_size', 0, 'Vocabulary size')
tf.flags.DEFINE_float('test_size', 0.1, 'Cross validation test size')

# Model hyperparameters
tf.flags.DEFINE_integer('embedding_size', 300, 'Word embedding size. For CNN, C-LSTM.')
tf.flags.DEFINE_string('filter_sizes', '3, 4, 5', 'CNN filter sizes. For CNN, C-LSTM.')
tf.flags.DEFINE_integer('num_filters', 128, 'Number of filters per filter size. For CNN, C-LSTM.')
tf.flags.DEFINE_integer('hidden_size', 128, 'Number of hidden units in the LSTM cell. For LSTM, Bi-LSTM')
tf.flags.DEFINE_integer('num_layers', 2, 'Number of the LSTM cells. For LSTM, Bi-LSTM, C-LSTM')
tf.flags.DEFINE_float('keep_prob', 0.5, 'Dropout keep probability')  # All
tf.flags.DEFINE_float('learning_rate', 1e-3, 'Learning rate')  # All
tf.flags.DEFINE_float('l2_reg_lambda', 0.001, 'L2 regularization lambda')  # All

# Training parameters
tf.flags.DEFINE_integer('batch_size', 32, 'Batch size')
tf.flags.DEFINE_integer('num_epochs', 50, 'Number of epochs')
tf.flags.DEFINE_integer('evaluate_every_steps', 100, 'Evaluate the model on validation set after this many steps')
tf.flags.DEFINE_integer('save_every_steps', 1000, 'Save the model after this many steps')
tf.flags.DEFINE_integer('num_checkpoint', 10, 'Number of models to store')
tf.flags.DEFINE_string('src', "fr", 'source language')
tf.flags.DEFINE_string('trg', "en", 'source language')

FLAGS = tf.flags.FLAGS

if FLAGS.clf == 'lstm':
    FLAGS.embedding_size = FLAGS.hidden_size
elif FLAGS.clf == 'clstm':
    FLAGS.hidden_size = len(FLAGS.filter_sizes.split(",")) * FLAGS.num_filters

# Output files directory
timestamp = str(int(time.time()))
outdir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
if not os.path.exists(outdir):
    os.makedirs(outdir)


params = FLAGS.__flags

# Print parameters
model = params['clf']
if model == 'cnn':
    del params['hidden_size']
    del params['num_layers']
elif model == 'lstm' or model == 'blstm':
    del params['num_filters']
    del params['filter_sizes']
    params['embedding_size'] = params['hidden_size']
elif model == 'clstm':
    params['hidden_size'] = len(list(map(int, params['filter_sizes'].split(",")))) * params['num_filters']

params_dict = sorted(params.items(), key=lambda x: x[0])
print('Parameters:')
for item in params_dict:
    print('{}: {}'.format(item[0], item[1]))
print('')

# Save parameters to file
params_file = open(os.path.join(outdir, 'params.pkl'), 'wb')
pkl.dump(params, params_file, True)
params_file.close()

Parameters:
batch_size: 32
clf: cnn
data_file: /Users/meryemmhamdi/Documents/rig9/ParallelCorpora/Europarl/sample_
emb_path: /Users/meryemmhamdi/Documents/rig9/MultilingualEmbeddings/sample_emb.txt
embedding_size: 300
evaluate_every_steps: 100
filter_sizes: 3, 4, 5
keep_prob: 0.5
l2_reg_lambda: 0.001
language: en
learning_rate: 0.001
max_length: 0
min_frequency: 0
num_checkpoint: 10
num_classes: 2
num_epochs: 50
num_filters: 128
save_every_steps: 1000
src: fr
stop_word_file: None
test_size: 0.1
trg: en
vocab_size: 0



## Data Preprocessing:

In [26]:
def load_data(file_path, src_lang, trg_lang, shuffle=True):
    with open(file_path+ src_lang+ "_" +trg_lang) as file:
        sent_src_trg = file.readlines()
    sent_src = []
    sent_trg = []
    for sent in sent_src_trg:
        parts = sent.split(" ||| ")
        sent_src.append(parts[0])
        sent_trg.append(parts[1])
        
    # Tokenizing the sentences source
    tokens_src = []  
    tokenizer = RegexpTokenizer("[\w']+")
    for i in tqdm(range(0, len(sent_src))):
        tokens = tokenizer.tokenize(sent_src[i])

        tokens_src.append([src_lang+token for token in tokens])
        
    # Tokenizing the sentences target
    tokens_trg = []  
    tokenizer = RegexpTokenizer("[\w']+")
    for i in tqdm(range(0, len(sent_trg))):
        tokens = tokenizer.tokenize(sent_trg[i])

        tokens_trg.append([trg_lang+token for token in tokens])

    max_src = max([len(tokens) for tokens in tokens_src])
    max_trg = max([len(tokens) for tokens in tokens_trg])
    max_length = max(max_src, max_trg)
    
    
    x_all = tokens_src + tokens_trg
    vocab, vocab_dict = create_vocabulary(x_all)
    sequences_src = []
    for tokens in tokens_src:
        list_ids_sub = []
        for token in tokens:
            #token = token.decode("utf-8", errors='ignore')
            list_ids_sub.append(vocab[token])
        to_pad = max_src - len(tokens)
        for index in range(0, to_pad):
            list_ids_sub.append(len(vocab))
        sequences_src.append(list_ids_sub)
        
    sequences_trg = []
    for tokens in tokens_trg:
        list_ids_sub = []
        for token in tokens:
            #token = token.decode("utf-8", errors='ignore')
            list_ids_sub.append(vocab[token])
        to_pad = max_src - len(tokens)
        for index in range(0, to_pad):
            list_ids_sub.append(len(vocab))
        sequences_trg.append(list_ids_sub)
    pad = len(vocab)
    vocab["UNK"] = pad
    return sequences_src, sequences_trg, vocab, max_length

def create_vocabulary(x_all): #, save_path
    vocab_dict = {}
    for doc in x_all:
        for token in doc:
            #token = token.decode("utf-8", errors='ignore')
            # #print(token)
            if token in vocab_dict:
                vocab_dict[token] += 1
            else:
                vocab_dict[token] = 1
    vocab_list = sorted(vocab_dict)
    vocab = dict([x, y] for (y, x) in enumerate(vocab_list))

    return vocab, vocab_dict

def batch_iter(data, labels, max_length, batch_size, num_epochs):
    """
    A mini-batch iterator to generate mini-batches for training neural network
    :param data: a list of sentences. each sentence is a vector of integers
    :param labels: a list of labels
    :param batch_size: the size of mini-batch
    :param num_epochs: number of epochs
    :return: a mini-batch iterator
    """
    data_size = len(data)
    epoch_length = data_size // batch_size
    
    print("epoch_length:", epoch_length)

    for _ in range(num_epochs):
        for i in range(epoch_length):
            start_index = i * batch_size
            end_index = start_index + batch_size

            xdata = data[start_index: end_index]
            ydata = labels[start_index: end_index]
            sequence_length = (end_index-start_index) *[max_length] 

            yield xdata, ydata, sequence_length
            
def load_embeddings(emb_path, vocab):
    vocab_list = list(vocab.keys())
    with open(emb_path) as file_model:
        data = file_model.readlines()

    model_dict = {}
    model = np.zeros([len(vocab_list), FLAGS.embedding_size])
    for i in tqdm(range(0, len(data))):
        lang = data[i].split(" ")[0].split("_")[0]
        word = data[i].split(" ")[0]
        vectors = [float(vector) for vector in data[i].split(" ")[1:]]
        model_dict.update({word: vectors})
        if vocab_list[i] == word:
            model[i] = vectors
            
    embed_dim = len(model_dict[list(model_dict.keys())[0]])
    return model, model_dict, embed_dim

# MultiSkipGram:

## CNN Model:

In [27]:
class cnn_clf(object):
    """
    A CNN classifier for text classification
    """
    def __init__(self, config):
        self.max_length = config.max_length
        self.num_classes = config.num_classes
        self.vocab_size = config.vocab_size
        self.embedding_size = config.embedding_size
        self.filter_sizes = list(map(int, config.filter_sizes.split(",")))
        self.num_filters = config.num_filters
        self.l2_reg_lambda = config.l2_reg_lambda

        # Placeholders
        self.input_source = tf.placeholder(dtype=tf.int32, shape=[None, self.max_length])
        self.input_target = tf.placeholder(dtype=tf.int32, shape=[None, self.max_length])
        self.keep_prob = tf.placeholder(dtype=tf.float32)

        # L2 loss
        self.l2_loss = tf.constant(0.0)

        # Word embedding
        with tf.device('/cpu:0'), tf.name_scope('embedding'):
            embedding = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size], 0, 1.0), trainable= True, name="embedding") # One shared embedding layer
            self.embedding_placeholder = tf.placeholder(tf.float32, [self.vocab_size, self.embedding_size])
            self.embedding_init = embedding.assign(self.embedding_placeholder)
            embed_source = tf.nn.embedding_lookup(embedding, self.input_source)
            inputs_sources = tf.expand_dims(embed_source, -1)
            
            embed_target = tf.nn.embedding_lookup(embedding, self.input_target)
            inputs_targets = tf.expand_dims(embed_target, -1)
            

        # Convolution & Maxpool
        ## Source
        pooled_outputs_source = []
        for i, filter_size in enumerate(self.filter_sizes):
            with tf.variable_scope("conv-maxpool-%s" % filter_size):
                # Convolution
                filter_shape = [filter_size, self.embedding_size, 1, self.num_filters]
                W = tf.get_variable("weights", filter_shape, initializer=tf.truncated_normal_initializer(stddev=0.1))
                b = tf.get_variable("biases", [self.num_filters], initializer=tf.constant_initializer(0.0))

                conv = tf.nn.conv2d(inputs_sources,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding='VALID',
                                    name='conv')
                # Activation function
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')

                # Maxpool
                pooled = tf.nn.max_pool(h,
                                        ksize=[1, self.max_length - filter_size + 1, 1, 1],
                                        strides=[1, 1, 1, 1],
                                        padding='VALID',
                                        name='pool')
                pooled_outputs_source.append(pooled)
                
        num_filters_total = self.num_filters * len(self.filter_sizes)
        h_pool = tf.concat(pooled_outputs_source, 3)
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])
        # Add dropout
        h_drop_source = tf.nn.dropout(h_pool_flat, keep_prob=self.keep_prob)
                
        ## Target
        pooled_outputs_target = []
        for i, filter_size in enumerate(self.filter_sizes):
            with tf.variable_scope("conv-maxpool-%s" % filter_size, reuse=True):
                # Convolution
                filter_shape = [filter_size, self.embedding_size, 1, self.num_filters]
                W = tf.get_variable("weights", filter_shape, initializer=tf.truncated_normal_initializer(stddev=0.1))
                b = tf.get_variable("biases", [self.num_filters], initializer=tf.constant_initializer(0.0))

                conv = tf.nn.conv2d(inputs_targets,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding='VALID',
                                    name='conv')
                # Activation function
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')

                # Maxpool
                pooled = tf.nn.max_pool(h,
                                        ksize=[1, self.max_length - filter_size + 1, 1, 1],
                                        strides=[1, 1, 1, 1],
                                        padding='VALID',
                                        name='pool')
                pooled_outputs_target.append(pooled)

        num_filters_total = self.num_filters * len(self.filter_sizes)
        h_pool = tf.concat(pooled_outputs_target, 3)
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])
        # Add dropout
        h_drop_target = tf.nn.dropout(h_pool_flat, keep_prob=self.keep_prob)
        
        """#No Softmax
        # Softmax
        with tf.name_scope('softmax'):
            softmax_w = tf.Variable(tf.truncated_normal([num_filters_total, self.num_classes], stddev=0.1), name='softmax_w')
            softmax_b = tf.Variable(tf.constant(0.1, shape=[self.num_classes]), name='softmax_b')

            # Add L2 regularization to output layer
            self.l2_loss += tf.nn.l2_loss(softmax_w)
            self.l2_loss += tf.nn.l2_loss(softmax_b)

            self.logits = tf.matmul(h_drop, softmax_w) + softmax_b
            predictions = tf.nn.softmax(self.logits)
            self.predictions = tf.argmax(predictions, 1)
            
        """

        # Loss
        with tf.name_scope('loss'):
            #losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y, logits=self.logits)
            losses = tf.losses.mean_squared_error(h_drop_source, h_drop_target, weights=1.0, scope=None, loss_collection=tf.GraphKeys.LOSSES)
            # Add L2 losses
            self.cost = tf.reduce_mean(losses) # + self.l2_reg_lambda * self.l2_loss
        

        """
        # Accuracy
        with tf.name_scope('accuracy'):
            correct_predictions = tf.equal(self.predictions, self.input_y)
            self.correct_num = tf.reduce_sum(tf.cast(correct_predictions, tf.float32))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name='accuracy')
        """

## Bi-LSTM:

In [None]:
class rnn_clf(object):
    """"
    LSTM and Bi-LSTM classifiers for text classification
    """
    def __init__(self, config):
        self.max_length = config.max_length
        self.num_classes = config.num_classes
        self.vocab_size = config.vocab_size
        self.hidden_size = config.hidden_size
        self.num_layers = config.num_layers
        self.l2_reg_lambda = config.l2_reg_lambda

        # Placeholders
        self.batch_size = tf.placeholder(dtype=tf.int32, shape=[])
        self.input_source = tf.placeholder(dtype=tf.int32, shape=[None, self.max_length])
        self.input_target = tf.placeholder(dtype=tf.int32, shape=[None, self.max_length])
        self.keep_prob = tf.placeholder(dtype=tf.float32, shape=[])
        self.sequence_length = tf.placeholder(dtype=tf.int32, shape=[None])

        # L2 loss
        self.l2_loss = tf.constant(0.0)

        # Word embedding
        with tf.device('/cpu:0'), tf.name_scope('embedding'):
            embedding = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size], 0, 1.0), trainable= True, name="embedding") # One shared embedding layer
            self.embedding_placeholder = tf.placeholder(tf.float32, [self.vocab_size, self.embedding_size])
            self.embedding_init = embedding.assign(self.embedding_placeholder)
            embed_source = tf.nn.embedding_lookup(embedding, self.input_source)
            inputs_sources = tf.expand_dims(embed_source, -1)

            embed_target = tf.nn.embedding_lookup(embedding, self.input_target)
            inputs_targets = tf.expand_dims(embed_target, -1)

        # Input dropout
        self.input_source = tf.nn.dropout(inputs_sources, keep_prob=self.keep_prob)
        self.input_target = tf.nn.dropout(inputs_targets, keep_prob=self.keep_prob)

        # LSTM
        if config.clf == 'lstm':
            self.final_state = self.normal_lstm()
        else:
            self.final_state = self.bi_lstm()
        
        """ 
        # Softmax output layer
        with tf.name_scope('softmax'):
            # softmax_w = tf.get_variable('softmax_w', shape=[self.hidden_size, self.num_classes], dtype=tf.float32)
            if config.clf == 'lstm':
                softmax_w = tf.get_variable('softmax_w', shape=[self.hidden_size, self.num_classes], dtype=tf.float32)
            else:
                softmax_w = tf.get_variable('softmax_w', shape=[2 * self.hidden_size, self.num_classes], dtype=tf.float32)
            softmax_b = tf.get_variable('softmax_b', shape=[self.num_classes], dtype=tf.float32)

            # L2 regularization for output layer
            self.l2_loss += tf.nn.l2_loss(softmax_w)
            self.l2_loss += tf.nn.l2_loss(softmax_b)

            # self.logits = tf.matmul(self.final_state[self.num_layers - 1].h, softmax_w) + softmax_b
            if config.clf == 'lstm':
                self.logits = tf.matmul(self.final_state[self.num_layers - 1].h, softmax_w) + softmax_b
            else:
                self.logits = tf.matmul(self.final_state, softmax_w) + softmax_b
            predictions = tf.nn.softmax(self.logits)
            self.predictions = tf.argmax(predictions, 1)
        """

        # Loss
        with tf.name_scope('loss'):
            tvars = tf.trainable_variables()

            # L2 regularization for LSTM weights
            for tv in tvars:
                if 'kernel' in tv.name:
                    self.l2_loss += tf.nn.l2_loss(tv)

            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y,
                                                                    logits=self.logits)
            self.cost = tf.reduce_mean(losses) + self.l2_reg_lambda * self.l2_loss

        # Accuracy
        with tf.name_scope('accuracy'):
            correct_predictions = tf.equal(self.predictions, self.input_y)
            self.correct_num = tf.reduce_sum(tf.cast(correct_predictions, tf.float32))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name='accuracy')

    def normal_lstm(self):
        # LSTM Cell
        cell = tf.contrib.rnn.LSTMCell(self.hidden_size,
                                       forget_bias=1.0,
                                       state_is_tuple=True,
                                       reuse=tf.get_variable_scope().reuse)
        # Add dropout to cell output
        cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=self.keep_prob)

        # Stacked LSTMs
        cell = tf.contrib.rnn.MultiRNNCell([cell] * self.num_layers, state_is_tuple=True)

        self._initial_state = cell.zero_state(self.batch_size, dtype=tf.float32)

        # Dynamic LSTM
        with tf.variable_scope('LSTM'):
            outputs, state = tf.nn.dynamic_rnn(cell,
                                               inputs=self.inputs,
                                               initial_state=self._initial_state,
                                               sequence_length=self.sequence_length)

        final_state = state

        return final_state


    def bi_lstm(self):
        cell_fw = tf.contrib.rnn.LSTMCell(self.hidden_size,
                                          forget_bias=1.0,
                                          state_is_tuple=True,
                                          reuse=tf.get_variable_scope().reuse)
        cell_bw = tf.contrib.rnn.LSTMCell(self.hidden_size,
                                          forget_bias=1.0,
                                          state_is_tuple=True,
                                          reuse=tf.get_variable_scope().reuse)

        # Add dropout to cell output
        cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, output_keep_prob=self.keep_prob)
        cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, output_keep_prob=self.keep_prob)

        # Stacked LSTMs
        cell_fw = tf.contrib.rnn.MultiRNNCell([cell_fw] * self.num_layers, state_is_tuple=True)
        cell_bw = tf.contrib.rnn.MultiRNNCell([cell_bw] * self.num_layers, state_is_tuple=True)

        self._initial_state_fw = cell_fw.zero_state(self.batch_size, dtype=tf.float32)
        self._initial_state_bw = cell_bw.zero_state(self.batch_size, dtype=tf.float32)

        # Dynamic Bi-LSTM
        with tf.variable_scope('Bi-LSTM'):
            _, state = tf.nn.bidirectional_dynamic_rnn(cell_fw,
                                                       cell_bw,
                                                       inputs=self.inputs,
                                                       initial_state_fw=self._initial_state_fw,
                                                       initial_state_bw=self._initial_state_bw,
                                                       sequence_length=self.sequence_length)

        state_fw = state[0]
        state_bw = state[1]
        output = tf.concat([state_fw[self.num_layers - 1].h, state_bw[self.num_layers - 1].h], 1)

        return output

## Train

In [28]:
# Load and save data
# =============================================================================

sources, targets, vocab, max_length = load_data(FLAGS.data_file, "fr", "en", shuffle=True)

FLAGS.vocab_size = len(vocab)
FLAGS.max_length = max_length

embedding, emb_dict, FLAGS.embedding_size = load_embeddings(FLAGS.emb_path, vocab)

# Simple Cross validation
# TODO use k-fold cross validation
train_src, valid_src, train_trg, valid_trg = train_test_split(sources, targets, test_size=FLAGS.test_size, random_state=22)


# Batch iterator
#FLAGS.batch_size
train_data = batch_iter(train_src, train_trg, max_length, 10, FLAGS.num_epochs)

# Train
# =============================================================================

with tf.Graph().as_default():
    with tf.Session() as sess:
        if FLAGS.clf == 'cnn':
            classifier = cnn_clf(FLAGS)
        elif FLAGS.clf == 'lstm' or FLAGS.clf == 'blstm':
            classifier = rnn_clf(FLAGS)
        elif FLAGS.clf == 'clstm':
            classifier = clstm_clf(FLAGS)
        else:
            raise ValueError('clf should be one of [cnn, lstm, blstm, clstm]')

        # Train procedure
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
        grads_and_vars = optimizer.compute_gradients(classifier.cost)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        # Summaries
        loss_summary = tf.summary.scalar('Loss', classifier.cost)
        #accuracy_summary = tf.summary.scalar('Accuracy', classifier.accuracy)

        # Train summary
        train_summary_op = tf.summary.merge_all()
        train_summary_dir = os.path.join(outdir, 'summaries', 'train')
        train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

        # Validation summary
        valid_summary_op = tf.summary.merge_all()
        valid_summary_dir = os.path.join(outdir, 'summaries', 'valid')
        valid_summary_writer = tf.summary.FileWriter(valid_summary_dir, sess.graph)

        saver = tf.train.Saver(max_to_keep=FLAGS.num_checkpoint)

        sess.run(classifier.embedding_init, feed_dict={classifier.embedding_placeholder: embedding})
        sess.run(tf.global_variables_initializer())


        def run_step(input_data, is_training=True):
            """Run one step of the training process."""
            input_src, input_trg, sequence_length = input_data

            fetches = {'step': global_step,
                       'cost': classifier.cost}#,
                       #'accuracy': classifier.accuracy}
            feed_dict = {classifier.input_source: input_src,
                         classifier.input_target: input_trg}

            if FLAGS.clf != 'cnn':
                fetches['final_state'] = classifier.final_state
                feed_dict[classifier.batch_size] = len(input_src)
                feed_dict[classifier.sequence_length] = sequence_length

            if is_training:
                fetches['train_op'] = train_op
                fetches['summaries'] = train_summary_op
                feed_dict[classifier.keep_prob] = FLAGS.keep_prob
            else:
                fetches['summaries'] = valid_summary_op
                feed_dict[classifier.keep_prob] = 1.0

            vars = sess.run(fetches, feed_dict)
            step = vars['step']
            cost = vars['cost']
            #accuracy = vars['accuracy']
            summaries = vars['summaries']

            # Write summaries to file
            if is_training:
                train_summary_writer.add_summary(summaries, step)
            else:
                valid_summary_writer.add_summary(summaries, step)

            time_str = datetime.datetime.now().isoformat()
            print("{}: step: {}, loss: {:g}".format(time_str, step, cost))


        print('Start training ...')

        for train_input in train_data:
            #print(train_input)
            run_step(train_input, is_training=True)
            current_step = tf.train.global_step(sess, global_step)

            if current_step % FLAGS.evaluate_every_steps == 0:
                print('\nValidation')
                run_step((x_valid, y_valid, valid_lengths), is_training=False)
                print('')

            if current_step % FLAGS.save_every_steps == 0:
                save_path = saver.save(sess, os.path.join(outdir, 'model/clf'), current_step)

        print('\nAll the files have been saved to {}\n'.format(outdir))

100%|██████████| 19/19 [00:00<00:00, 35230.67it/s]
100%|██████████| 19/19 [00:00<00:00, 22834.32it/s]
100%|██████████| 5/5 [00:00<00:00, 5313.28it/s]


Start training ...
epoch_length: 1
2018-05-25T15:14:28.081260: step: 1, loss: 9.61059
2018-05-25T15:14:28.201663: step: 2, loss: 5.85256
2018-05-25T15:14:28.325846: step: 3, loss: 3.62878
2018-05-25T15:14:28.486504: step: 4, loss: 2.00446
2018-05-25T15:14:28.639046: step: 5, loss: 1.13188
2018-05-25T15:14:28.756389: step: 6, loss: 0.644091
2018-05-25T15:14:28.886344: step: 7, loss: 0.327571
2018-05-25T15:14:29.011215: step: 8, loss: 0.121332
2018-05-25T15:14:29.140364: step: 9, loss: 0.0567066
2018-05-25T15:14:29.294642: step: 10, loss: 0.0307281
2018-05-25T15:14:29.442458: step: 11, loss: 0.0129232
2018-05-25T15:14:29.564117: step: 12, loss: 0.00420781
2018-05-25T15:14:29.690745: step: 13, loss: 0.00167947
2018-05-25T15:14:29.809941: step: 14, loss: 0.00149764
2018-05-25T15:14:29.931064: step: 15, loss: 1.63307e-06
2018-05-25T15:14:30.070684: step: 16, loss: 0
2018-05-25T15:14:30.258684: step: 17, loss: 0
2018-05-25T15:14:30.373956: step: 18, loss: 0
2018-05-25T15:14:30.498939: step: 

## Negative Sampling:

In [42]:
classifier.__dict__

{'accuracy': <tf.Tensor 'accuracy/accuracy:0' shape=() dtype=float32>,
 'correct_num': <tf.Tensor 'accuracy/Sum:0' shape=() dtype=float32>,
 'cost': <tf.Tensor 'loss/add:0' shape=() dtype=float32>,
 'embedding_size': 256,
 'filter_sizes': [3, 4, 5],
 'input_x': <tf.Tensor 'Placeholder:0' shape=(?, 62) dtype=int32>,
 'input_y': <tf.Tensor 'Placeholder_1:0' shape=(?,) dtype=int64>,
 'keep_prob': <tf.Tensor 'Placeholder_2:0' shape=<unknown> dtype=float32>,
 'l2_loss': <tf.Tensor 'softmax/add_1:0' shape=() dtype=float32>,
 'l2_reg_lambda': 0.001,
 'logits': <tf.Tensor 'softmax/add_2:0' shape=(?, 2) dtype=float32>,
 'max_length': 62,
 'num_classes': 2,
 'num_filters': 128,
 'predictions': <tf.Tensor 'softmax/ArgMax:0' shape=(?,) dtype=int64>,
 'vocab_size': 377}