In [2]:
%matplotlib inline
import collections
import math
import os
import random
import zipfile

import numpy as np
from six.moves import urllib
from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf

import re
import pickle
import pandas as pd
import csv
import os
import time
import datetime
from tensorflow.contrib import learn
from nltk.tokenize import TweetTokenizer
from sklearn.metrics import f1_score
from IPython.display import clear_output, Image, display, HTML

In [6]:
###### Do not modify here ###### 
def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = graph_def
    #strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))
###### Do not modify  here ######

In [7]:
def load_data_and_labels(train_data_file, test_data_file):
    """
    Loads MR polarity data from files, splits the data into words and generates labels.
    Returns split sentences and labels.
    """
    # Load data from files
    train_data = pd.read_csv(train_data_file, sep='\t', quoting=csv.QUOTE_NONE, header=None, names=['id', 'language', 'label', 'text'])
    test_data = pd.read_csv(test_data_file, sep='\t', quoting=csv.QUOTE_NONE, header=None, names=['id', 'language', 'label', 'text'])
    
    x_train = train_data['text'].tolist()
    y_train = train_data['label'].tolist()

    x_test = test_data['text'].tolist()
    y_test = test_data['label'].tolist()
    
    x_train = [s.strip() for s in x_train]
    x_test = [s.strip() for s in x_test]
    
    label_encoding = {'neutral':0, 'positive':1, 'negative':2}
    
    y_train_encoding = [label_encoding[label] for label in y_train]    
    y_test_encoding = [label_encoding[label] for label in y_test]

    
    return [x_train, y_train_encoding, x_test, y_test_encoding]

def transform_data_and_labels(data):
    """
    Loads MR polarity data from files, splits the data into words and generates labels.
    Returns split sentences and labels.
    """
    
    x = np.array(data['text'].tolist())
    y = data['label'].tolist()
    
    # encoding label
    label_encoding = {'neutral':0, 'positive':1, 'negative':2}
    y = [label_encoding[label] for label in y]    
    
    
    y = np.array(y)
    
    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # maybe we can use cross-validation to improve
    dev_sample_index = -1 * int(0.1 * float(len(y)))
    x_train, x_test = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
    y_train, y_test = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
    print("Train/Test split: {:d}/{:d}".format(len(y_train), len(y_test)))
    
    return [x_train, y_train, x_test, y_test]


"""
    This function assumes that the last word in the word embedding is a zero vector, and will use it as padding.
    The input 'num_voc' equals to the shape[0] of the word embedding.
"""
def process_tweet(train_tweets, test_tweets, num_voc):
    # max_document_length = max([len(x.split(" ")) for x in x_train_sentence])
    ppl_re = re.compile(r'@\S*')
    url_re = re.compile(r'http\S+')
    tknzr = TweetTokenizer()
    # tknzr = TweetTokenizer(reduce_len=True)
    
    tokenized_tweets_all = []
    max_document_length = 0
    
    for tweets in [train_tweets, test_tweets]:
        tweets = [url_re.sub('URLTOK', ppl_re.sub('USRTOK', tweet.lower())) for tweet in tweets]
        tokenized_tweets = [tknzr.tokenize(tweet) for tweet in tweets]
        tokenized_tweets_all.append(tokenized_tweets)
        max_document_length = max(max_document_length, max([len(tweet) for tweet in tokenized_tweets]))
    print(max_document_length)
    
    x = []
    
    for tokenized_tweets in tokenized_tweets_all:
        x_curr = []
        for tokenized_tweet in tokenized_tweets:
            if len(tokenized_tweet) == max_document_length:
                print(tokenized_tweet)
            """Not sure if original paper does this, but since index 0 means USRTOK, padding should be a number
            higher than total word count, so tf.nn.embedding_lookup will return a tensor of 0 insted of USRTOK."""
        #     temp = np.zeros(max_document_length, dtype=np.int).tolist()
            temp = (np.ones(max_document_length, dtype=np.int)*(num_voc-1)).tolist()

            for index, word in enumerate(tokenized_tweet):
                if word in word_dict:
                    temp[index] = word_dict[word][0]
            x_curr.append(temp)
        x_curr = np.array(x_curr)
        x.append(x_curr)
    
    return x[0], x[1]


def batch_iter(data, batch_size, num_epochs, shuffle=True):
    """
    Generates a batch iterator for a dataset.
    """
    data = np.array(data)
    data_size = len(data)
    num_batches_per_epoch = int((len(data)-1)/batch_size) + 1
    for epoch in range(num_epochs):
        print("Current epoch: ", epoch)
        # Shuffle the data at each epoch
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            shuffled_data = data[shuffle_indices]
        else:
            shuffled_data = data
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)
            yield shuffled_data[start_index:end_index]

# Load pre-train word embeddings

In [4]:
final_embeddings = np.load('./data/embed_tweets_en_200M_200D/embedding_matrix.npy')
word_dict = {}
with open('./data/embed_tweets_en_200M_200D/vocabulary.pickle', 'rb') as myfile:
    word_dict = pickle.load(myfile)

In [5]:
# shit
for key, val in word_dict.items():
    if val[0] == 94:
        print(key, val)

> (94, 3868680)


In [6]:
final_embeddings.shape

(1859185, 200)

# Distant Supervision phase

In [7]:
distance_supervised_tweets = pd.read_csv('./data/distant_data/distance_supervised_tweets_corrected', names=["id","lan", "label", "text"], sep="\t", header=None, usecols=["lan", "label", "text"])
distance_supervised_tweets_2 = pd.read_csv('./data/distant_data/distance_supervised_tweets_2_corrected', names=["id","lan", "label", "text"], sep="\t", header=None, usecols=["lan", "label", "text"])
distance_supervised_tweets_3 = pd.read_csv('./data/distant_data/distance_supervised_tweets_3_corrected', names=["id","lan", "label", "text"], sep="\t", header=None, usecols=["lan", "label", "text"])
distance_supervised_tweets = distance_supervised_tweets.append(distance_supervised_tweets_2).append(distance_supervised_tweets_3)

In [8]:
distance_supervised_tweets

Unnamed: 0,lan,label,text
0,en,positive,Boston Bruins Morning Thoughtefense Exceeding ...
1,en,positive,Bol Bachchan!. #AeZindagiGaleLagale. Aata Majh...
2,en,negative,"(17) karena lagi sakit, aku lagi gelisah terus..."
3,en,positive,Telkomsel: Thary422 Terima kasih telah berpart...
4,en,positive,@parisa_khania آخر سر هم از جزوه ی محترم عکس گ...
5,en,positive,kyristcl: XL123: frungnarikvnn Bisa ajak selai...
6,en,positive,Telkomsel: daff_01 Terima kasih telah berparti...
7,en,positive,@chris_randall Just the usual disclaimer that ...
8,en,positive,@Lin_Manuel Congrats from me and all my friend...
9,en,negative,"@nicaaaji hahahaha magtext ako beb. ""Hi lola! ..."


In [9]:
x_train_sentence, y_train, x_test_sentence, y_test = transform_data_and_labels(distance_supervised_tweets)


Train/Test split: 183161/20351


In [10]:
# TODO Load distant-supervised data
# train it with two-layer CNN model
# pass the weight to next two layer CNN model
print(len(x_test_sentence))
x_train_distance, x_test_distance = process_tweet(x_train_sentence, x_test_sentence, final_embeddings.shape[0])


20351
2063
['people', 'come', 'and', 'people', 'go', '...', "that's", 'life', '...', '#aldubmistakenidentity', '33877', 'en', 'positive', "michiganon't", 'sell', 'USRTOK', '100m', 'gallons', 'of', 'groundwater', 'for', '$', '200', 'and', '20', 'jobs', '.', "that's", 'bananas', '.', 'URLTOK', '33878', 'en', 'positive', 'jio', 'has', 'touched', 'the', 'hearts', 'of', '50', 'millions', 'users', 'with', 'their', 'network', 'really', 'happy', 'for', 'jio', '!', '#jio50million', '33879', 'en', 'positive', 'every', 'second', ',', 'minute', '&', 'hour', 'of', 'our', 'life', 'must', 'be', 'filled', 'with', 'passion', ',', 'dedication', 'and', 'restlessness', 'to', 'make', 'it', 'the', 'best', 'possible', 'life', 'ever', '.', '33880', 'en', 'positive', 'USRTOK', 'USRTOK', 'USRTOK', 'whys', 'this', 'yous', '33881', 'en', 'positive', 'thanks', 'for', 'the', 'recent', 'follow', 'USRTOK', 'USRTOK', 'USRTOK', 'happy', 'to', 'connect', 'have', 'a', 'great', 'tuesday', '.', '>', '>', 'URLTOK', '33882',

In [11]:
# To save memory
del x_train_sentence
del x_test_sentence

In [12]:
# put Word2Vec on 590 million English Tweets using 52 dimensions.
vocabulary_size = final_embeddings.shape[0]
embedding_size = 200  # Dimension of the embedding vector.
graph = tf.Graph()

sequence_length=x_train_distance.shape[1]
num_classes=3

# filter_sizes: The number of words we want our convolutional filters to cover. 
# We will have num_filters for each size specified here. 
# For example, [3, 4, 5] means that we will have filters that slide over 3, 4 and 5 words respectively, for a total of 3 * num_filters filters.
first_filter_sizes = [4]
first_pool_window_sizes = [4]
first_pool_strides = [2]


second_filter_window_sizes = [3]
num_filters = 200

# No L2 norm
l2_reg_lambda=0.0

with graph.as_default():
    input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
    input_y = tf.placeholder(tf.int64, [None], name="input_y")
    with tf.device('/cpu:0'):
        embeddings = tf.Variable(tf.constant(0.0, shape=[vocabulary_size, embedding_size]),
                        trainable=False, name="embedding")

        embedding_placeholder = tf.placeholder(tf.float32, [vocabulary_size, embedding_size], name='word_embedding_placeholder')
        embedding_init = embeddings.assign(embedding_placeholder)  # assign exist word embeddings

        embedded_chars = tf.nn.embedding_lookup(embeddings, input_x)
    embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)
    print(input_x.shape)
    print(embedded_chars_expanded.shape)
    # Keeping track of l2 regularization loss (optional)
    l2_loss = tf.constant(0.0)
    
     # Add variable initializer.
    init = tf.global_variables_initializer()
    
    
    # Create first cnn : a convolution + maxpool layer for each filter size    
    # 1st Convolution Layer
    for i, first_filter_size in enumerate(first_filter_sizes):
        with tf.name_scope("conv-maxpool-1"):
            # Convolution Layer
            filter_shape = [first_filter_size, embedding_size, 1, num_filters]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
            print("CNN filter", W.shape)
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            conv = tf.nn.conv2d(
                embedded_chars_expanded,
                W,
                strides=[1, 1, 1, 1],
                padding="VALID",
                name="conv")
            # Apply nonlinearity
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            # Maxpooling over the outputs and transform for next layer, so the "channel" of convolution 
            # will become "input_width" for next layer
#             pooled = tf.transpose(tf.nn.max_pool(
#                 h,
#                 ksize=[1, first_pool_window_sizes[i], 1, 1],
#                 strides=[1, first_pool_strides[i], 1, 1],
#                 padding='VALID',
#                 name="pool"), perm=[0, 1, 3, 2])
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, first_pool_window_sizes[i], 1, 1],
                strides=[1, first_pool_strides[i], 1, 1],
                padding='VALID',
                name="pool")
#     print("conv1", conv.shape)
#     print("h1", h.shape)
#     print("pooled1", pooled_1.shape)
    
    # 2nd Convolutional Layer
#     for i, second_filter_size in enumerate(second_filter_window_sizes):
#         with tf.name_scope("conv-maxpool-2"):
#             # Convolution Layer
#             filter_shape = [second_filter_size, num_filters, 1, num_filters]
#             W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
#             print("CNN filter", W.shape)
#             b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
#             conv = tf.nn.conv2d(
#                 pooled,
#                 W,
#                 strides=[1, 1, 1, 1],
#                 padding="VALID",
#                 name="conv")
#             # Apply nonlinearity
#             h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
#             # Maxpooling over the outputs and transform for next layer, so the "channel" of convolution 
#             # will become "input_width" for next layer
#             pooled = tf.nn.max_pool(
#                 h,
#                 ksize=[1, h.shape[1], 1, 1],
#                 strides=[1, 1, 1, 1],
#                 padding='VALID',
#                 name="pool")
    for i, second_filter_size in enumerate(second_filter_window_sizes):
        with tf.name_scope("conv-maxpool-2"):
            # Convolution Layer
            filter_shape = [second_filter_size, 1, num_filters, num_filters]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
            print("CNN filter", W.shape)
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            conv = tf.nn.conv2d(
                pooled,
                W,
                strides=[1, 1, 1, 1],
                padding="VALID",
                name="conv")
            # Apply nonlinearity
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            print(h.shape)
            # Maxpooling over the outputs and transform for next layer, so the "channel" of convolution 
            # will become "input_width" for next layer
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, h.shape[1], 1, 1],
                strides=[1, 1, 1, 1],
                padding='VALID',
                name="pool")
 

    h_pool_flat = tf.reshape(pooled, [-1, num_filters])  # flatten pooling layers
    print("h_pool_flat", h_pool_flat.shape)
    
    # Add dropout
#     with tf.name_scope("dropout"):
#         self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

    # Final (unnormalized) scores and predictions
    
    # Fully connected hidden layer
    with tf.name_scope("hidden"):
        with tf.variable_scope("hidden"):
            W = tf.get_variable(
                "W",
                shape=[num_filters, num_filters],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            out = tf.nn.relu(tf.nn.xw_plus_b(h_pool_flat, W, b))
        
    
    with tf.name_scope("output"):
        with tf.variable_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            scores = tf.nn.xw_plus_b(out, W, b, name="scores")
            print("scores", scores.shape)
            predictions = tf.argmax(scores, 1, name="predictions")
            print("predictions", predictions.shape)


    # Calculate mean cross-entropy loss
    with tf.name_scope("loss"):
        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=scores, labels=input_y)
        print("losses", losses.shape)
        loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

    # Accuracy
    with tf.name_scope("accuracy"):
        correct_predictions = tf.equal(predictions, input_y)
        accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

(?, 2063)
(?, 2063, 200, 1)
CNN filter (4, 200, 1, 200)
CNN filter (3, 1, 200, 200)
(?, 1027, 1, 200)
h_pool_flat (?, 200)
scores (?, 3)
predictions (?,)
losses (?,)


In [22]:
batch_size = 512
num_epochs = 1

num_checkpoints = 5
print_train_every = 5
evaluate_every = 50
checkpoint_every = 10000000
allow_soft_placement=True
log_device_placement=False

with graph.as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=allow_soft_placement,
      log_device_placement=log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        
        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdadeltaOptimizer(1.0)
        grads_and_vars = optimizer.compute_gradients(loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.summary.merge(grad_summaries)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.summary.scalar("loss", loss)
        acc_summary = tf.summary.scalar("accuracy", accuracy)

        # Train Summaries
        train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

        # Dev summaries
        dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=num_checkpoints)

#         # Write vocabulary
#         vocab_processor.save(os.path.join(out_dir, "vocab"))

        # Initialize all variables
        sess.run(tf.global_variables_initializer())


        def train_step(x_batch, y_batch):
            """
            A single training step
            """
            feed_dict = {
              input_x: x_batch,
              input_y: y_batch,
            }
            _, step, summaries, cur_loss, cur_accuracy, y_pred = sess.run(
                [train_op, global_step, train_summary_op, loss, accuracy, predictions],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
#             print(y_pred)
#             print(y_batch)
            if step % print_train_every == 0:
                f1 = f1_score(y_batch, y_pred, average = 'weighted')
                print("{}: step {}, loss {:g}, acc {:g}, f1 {:g}".format(time_str, step, cur_loss, cur_accuracy,
                                                                     f1))
            train_summary_writer.add_summary(summaries, step)



        def dev_step(x_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            feed_dict = {
              input_x: x_batch,
              input_y: y_batch,
            }
            step, summaries, cur_loss, cur_accuracy, y_pred = sess.run(
                [global_step, dev_summary_op, loss, accuracy, predictions],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            f1 = f1_score(y_batch, y_pred, average = 'weighted')
            print("Test")
            print("{}: step {}, loss {:g}, acc {:g}, f1 {:g}".format(time_str, step, cur_loss, cur_accuracy,
                                                                    f1))
            if writer:
                writer.add_summary(summaries, step)
        
        def dev_step_batch(x_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            feed_dict = {
              input_x: x_batch,
              input_y: y_batch,
            }
            step, summaries, cur_loss, cur_accuracy, y_pred = sess.run(
                [global_step, dev_summary_op, loss, accuracy, predictions],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            f1 = f1_score(y_batch, y_pred, average = 'weighted')
#             print("{}: step {}, loss {:g}, acc {:g}, f1 {:g}".format(time_str, step, cur_loss, cur_accuracy,
#                                                                     f1))
            if writer:
                writer.add_summary(summaries, step)
            return cur_loss, cur_accuracy, f1
        
        
        sess.run(embedding_init, feed_dict={embedding_placeholder: final_embeddings})
        # Generate batches
        batches = batch_iter(
            list(zip(x_train_distance, y_train_)), batch_size, num_epochs)
        
        batches_test = list(batch_iter(
            list(zip(x_test_distance, y_test)), batch_size, 1))
        
        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            train_step(x_batch, y_batch)
            
            current_step = tf.train.global_step(sess, global_step)
            if current_step % evaluate_every == 0:
                print("\nEvaluation:")
                total_loss=0
                total_f1=0
                total_accuracy=0
                len_of_batch = int(len(batches_test))
                for batch_test in batches_test:
                    x_batch_test, y_batch_test = zip(*batch_test)
                    cur_loss, cur_accuracy, cur_f1 = dev_step_batch(x_batch_test, y_batch_test, writer=dev_summary_writer)
                    total_loss+=cur_loss
                    total_accuracy+=cur_accuracy
                    total_f1+=cur_f1
                print("loss {:g}, acc {:g}, f1 {:g}".format(total_loss/len_of_batch, total_accuracy/len_of_batch, total_f1/len_of_batch))
                print("")
            if current_step % checkpoint_every == 0:
                path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))
        final_embeddings = embeddings.eval()

INFO:tensorflow:Summary name conv-maxpool-1/W:0/grad/hist is illegal; using conv-maxpool-1/W_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-1/W:0/grad/sparsity is illegal; using conv-maxpool-1/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-1/b:0/grad/hist is illegal; using conv-maxpool-1/b_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-1/b:0/grad/sparsity is illegal; using conv-maxpool-1/b_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-2/W:0/grad/hist is illegal; using conv-maxpool-2/W_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-2/W:0/grad/sparsity is illegal; using conv-maxpool-2/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-2/b:0/grad/hist is illegal; using conv-maxpool-2/b_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-2/b:0/grad/sparsity is illegal; using conv-maxpool-2/b_0/grad/sparsity instead.
INFO:tensorflow:Summary name hidden/W:0/grad/hist is ill

  'precision', 'predicted', average, warn_for)


2017-11-17T16:01:04.244588: step 5, loss 0.495323, acc 0.845703, f1 0.775004
2017-11-17T16:01:09.962854: step 10, loss 0.466426, acc 0.824219, f1 0.748574
2017-11-17T16:01:15.749512: step 15, loss 0.380859, acc 0.863281, f1 0.803654
2017-11-17T16:01:21.526436: step 20, loss 0.407394, acc 0.835938, f1 0.7684
2017-11-17T16:01:27.289647: step 25, loss 0.428285, acc 0.824219, f1 0.753678
2017-11-17T16:01:33.035461: step 30, loss 0.341089, acc 0.876953, f1 0.848807
2017-11-17T16:01:38.798762: step 35, loss 0.398212, acc 0.861328, f1 0.830692
2017-11-17T16:01:44.532571: step 40, loss 0.369266, acc 0.849609, f1 0.803052
2017-11-17T16:01:50.255929: step 45, loss 0.426645, acc 0.832031, f1 0.830584
2017-11-17T16:01:56.039244: step 50, loss 0.397883, acc 0.851562, f1 0.796418

Evaluation:
loss 0.359244, acc 0.867456, f1 0.843437

2017-11-17T16:02:23.728139: step 55, loss 0.291955, acc 0.908203, f1 0.896704
2017-11-17T16:02:29.470398: step 60, loss 0.385201, acc 0.855469, f1 0.792562
2017-11-17T1

In [24]:
final_embeddings

array([[-0.10009229,  0.08579876, -0.12731791, ..., -0.15652488,
        -0.10116389,  0.08370614],
       [-0.02642334,  0.03184305, -0.1160032 , ..., -0.07465456,
        -0.10345571,  0.12367946],
       [-0.08056928,  0.03620725, -0.11454398, ..., -0.15909833,
        -0.10029007,  0.11134482],
       ..., 
       [ 0.17794977, -0.06076148,  0.00221153, ...,  0.2420754 ,
         0.16043946, -0.24765149],
       [ 0.05399185, -0.01826661, -0.06147144, ..., -0.03653212,
        -0.21923123,  0.2452819 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]], dtype=float32)

In [25]:
# Save embeddings
# np.save('final_embeddings', final_embeddings)

# Supervised Training

In [3]:
# Load embeddings from previous work
final_embeddings = np.load('./final_embeddings.npy')
word_dict = {}
with open('./data/embed_tweets_en_200M_200D/vocabulary.pickle', 'rb') as myfile:
    word_dict = pickle.load(myfile)

In [8]:
#Load label data
x_train_sentence, y_train, x_test_sentence, y_test = load_data_and_labels('./data/supervised_data/en_full.tsv.txt', './data/supervised_data/en_test.tsv')
print(len(x_test_sentence))
x_train, x_test = process_tweet(x_train_sentence, x_test_sentence, final_embeddings.shape[0])

20632
53
["there's", 'a', 'lot', 'of', 'stupid', '$', 'h', '!', 't', 'out', 'there', ',', 'but', 'polling', 'trump', 'v', 'kanye', 'west', 'may', 'take', 'the', 'cake', '.', 'all', 'i', 'can', 'think', 'to', 'say', 'is', ':', '#', '$', '%', '#', '$', '%', '$', '#', '%', '#', '$', '%', '#', '$', '%', '#', '$', '#', '$', '#', '$', '%']


In [9]:
# put Word2Vec on 590 million English Tweets using 52 dimensions.
vocabulary_size = final_embeddings.shape[0]
embedding_size = 200  # Dimension of the embedding vector.
graph = tf.Graph()

sequence_length=x_train.shape[1]
num_classes=3

# filter_sizes: The number of words we want our convolutional filters to cover. 
# We will have num_filters for each size specified here. 
# For example, [3, 4, 5] means that we will have filters that slide over 3, 4 and 5 words respectively, for a total of 3 * num_filters filters.
first_filter_sizes = [4]
first_pool_window_sizes = [4]
first_pool_strides = [2]


second_filter_window_sizes = [3]
num_filters = 200

# No L2 norm
l2_reg_lambda=0.0

with graph.as_default():
    input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
    input_y = tf.placeholder(tf.int64, [None], name="input_y")
    with tf.device('/cpu:0'):
        embeddings = tf.Variable(tf.constant(0.0, shape=[vocabulary_size, embedding_size]),
                        trainable=False, name="embedding")

        embedding_placeholder = tf.placeholder(tf.float32, [vocabulary_size, embedding_size], name='word_embedding_placeholder')
        embedding_init = embeddings.assign(embedding_placeholder)  # assign exist word embeddings

        embedded_chars = tf.nn.embedding_lookup(embeddings, input_x)
    embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)
    print(input_x.shape)
    print(embedded_chars_expanded.shape)
    # Keeping track of l2 regularization loss (optional)
    l2_loss = tf.constant(0.0)
    
     # Add variable initializer.
    init = tf.global_variables_initializer()
    
    
    # Create first cnn : a convolution + maxpool layer for each filter size    
    # 1st Convolution Layer
    for i, first_filter_size in enumerate(first_filter_sizes):
        with tf.name_scope("conv-maxpool-1"):
            # Convolution Layer
            filter_shape = [first_filter_size, embedding_size, 1, num_filters]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
            print("CNN filter", W.shape)
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            conv = tf.nn.conv2d(
                embedded_chars_expanded,
                W,
                strides=[1, 1, 1, 1],
                padding="VALID",
                name="conv")
            # Apply nonlinearity
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            # Maxpooling over the outputs and transform for next layer, so the "channel" of convolution 
            # will become "input_width" for next layer
#             pooled = tf.transpose(tf.nn.max_pool(
#                 h,
#                 ksize=[1, first_pool_window_sizes[i], 1, 1],
#                 strides=[1, first_pool_strides[i], 1, 1],
#                 padding='VALID',
#                 name="pool"), perm=[0, 1, 3, 2])
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, first_pool_window_sizes[i], 1, 1],
                strides=[1, first_pool_strides[i], 1, 1],
                padding='VALID',
                name="pool")
#     print("conv1", conv.shape)
#     print("h1", h.shape)
#     print("pooled1", pooled_1.shape)
    
    # 2nd Convolutional Layer
#     for i, second_filter_size in enumerate(second_filter_window_sizes):
#         with tf.name_scope("conv-maxpool-2"):
#             # Convolution Layer
#             filter_shape = [second_filter_size, num_filters, 1, num_filters]
#             W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
#             print("CNN filter", W.shape)
#             b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
#             conv = tf.nn.conv2d(
#                 pooled,
#                 W,
#                 strides=[1, 1, 1, 1],
#                 padding="VALID",
#                 name="conv")
#             # Apply nonlinearity
#             h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
#             # Maxpooling over the outputs and transform for next layer, so the "channel" of convolution 
#             # will become "input_width" for next layer
#             pooled = tf.nn.max_pool(
#                 h,
#                 ksize=[1, h.shape[1], 1, 1],
#                 strides=[1, 1, 1, 1],
#                 padding='VALID',
#                 name="pool")
    for i, second_filter_size in enumerate(second_filter_window_sizes):
        with tf.name_scope("conv-maxpool-2"):
            # Convolution Layer
            filter_shape = [second_filter_size, 1, num_filters, num_filters]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
            print("CNN filter", W.shape)
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            conv = tf.nn.conv2d(
                pooled,
                W,
                strides=[1, 1, 1, 1],
                padding="VALID",
                name="conv")
            # Apply nonlinearity
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            print(h.shape)
            # Maxpooling over the outputs and transform for next layer, so the "channel" of convolution 
            # will become "input_width" for next layer
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, h.shape[1], 1, 1],
                strides=[1, 1, 1, 1],
                padding='VALID',
                name="pool")
 

    h_pool_flat = tf.reshape(pooled, [-1, num_filters])  # flatten pooling layers
    print("h_pool_flat", h_pool_flat.shape)
    
    # Add dropout
#     with tf.name_scope("dropout"):
#         self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

    # Final (unnormalized) scores and predictions
    
    # Fully connected hidden layer
    with tf.name_scope("hidden"):
        with tf.variable_scope("hidden"):
            W = tf.get_variable(
                "W",
                shape=[num_filters, num_filters],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            out = tf.nn.relu(tf.nn.xw_plus_b(h_pool_flat, W, b))
        
    
    with tf.name_scope("output"):
        with tf.variable_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            scores = tf.nn.xw_plus_b(out, W, b, name="scores")
            print("scores", scores.shape)
            predictions = tf.argmax(scores, 1, name="predictions")
            print("predictions", predictions.shape)


    # Calculate mean cross-entropy loss
    with tf.name_scope("loss"):
        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=scores, labels=input_y)
        print("losses", losses.shape)
        loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

    # Accuracy
    with tf.name_scope("accuracy"):
        correct_predictions = tf.equal(predictions, input_y)
        accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
    

(?, 53)
(?, 53, 200, 1)
CNN filter (4, 200, 1, 200)
CNN filter (3, 1, 200, 200)
(?, 22, 1, 200)
h_pool_flat (?, 200)
scores (?, 3)
predictions (?,)
losses (?,)


In [12]:
show_graph(graph.as_graph_def())

In [None]:
batch_size = 1024
num_epochs = 200

num_checkpoints = 5
print_train_every = 5
evaluate_every = 50
checkpoint_every = 10000000
allow_soft_placement=True
log_device_placement=False

with graph.as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=allow_soft_placement,
      log_device_placement=log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        
        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdadeltaOptimizer(1.0)
        grads_and_vars = optimizer.compute_gradients(loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.summary.merge(grad_summaries)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.summary.scalar("loss", loss)
        acc_summary = tf.summary.scalar("accuracy", accuracy)

        # Train Summaries
        train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

        # Dev summaries
        dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=num_checkpoints)

#         # Write vocabulary
#         vocab_processor.save(os.path.join(out_dir, "vocab"))

        # Initialize all variables
        sess.run(tf.global_variables_initializer())


        def train_step(x_batch, y_batch):
            """
            A single training step
            """
            feed_dict = {
              input_x: x_batch,
              input_y: y_batch,
            }
            _, step, summaries, cur_loss, cur_accuracy, y_pred = sess.run(
                [train_op, global_step, train_summary_op, loss, accuracy, predictions],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
#             print(y_pred)
#             print(y_batch)
            if step % print_train_every == 0:
                f1 = f1_score(y_batch, y_pred, average = 'weighted')
                print("{}: step {}, loss {:g}, acc {:g}, f1 {:g}".format(time_str, step, cur_loss, cur_accuracy,
                                                                     f1))
            train_summary_writer.add_summary(summaries, step)



        def dev_step(x_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            feed_dict = {
              input_x: x_batch,
              input_y: y_batch,
            }
            step, summaries, cur_loss, cur_accuracy, y_pred = sess.run(
                [global_step, dev_summary_op, loss, accuracy, predictions],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            f1 = f1_score(y_batch, y_pred, average = 'weighted')
            print("{}: step {}, loss {:g}, acc {:g}, f1 {:g}".format(time_str, step, cur_loss, cur_accuracy,
                                                                    f1))
            if writer:
                writer.add_summary(summaries, step)
                
        def dev_step_batch(x_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            feed_dict = {
              input_x: x_batch,
              input_y: y_batch,
            }
            step, summaries, cur_loss, cur_accuracy, y_pred = sess.run(
                [global_step, dev_summary_op, loss, accuracy, predictions],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            f1 = f1_score(y_batch, y_pred, average = 'weighted')
#             print("{}: step {}, loss {:g}, acc {:g}, f1 {:g}".format(time_str, step, cur_loss, cur_accuracy,
#                                                                     f1))
            if writer:
                writer.add_summary(summaries, step)
            return cur_loss, cur_accuracy, f1

        
        sess.run(embedding_init, feed_dict={embedding_placeholder: final_embeddings})
        # Generate batches
        batches = batch_iter(
            list(zip(x_train, y_train)), batch_size, num_epochs)
        
        batches_test = list(batch_iter(
            list(zip(x_test, y_test)), batch_size, 1))
        
        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            train_step(x_batch, y_batch)
            
            current_step = tf.train.global_step(sess, global_step)
            if current_step % evaluate_every == 0:
                print("\nEvaluation:")
                dev_step(x_test, y_test, writer=dev_summary_writer)
                print("")
            if current_step % checkpoint_every == 0:
                path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))

INFO:tensorflow:Summary name conv-maxpool-1/W:0/grad/hist is illegal; using conv-maxpool-1/W_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-1/W:0/grad/sparsity is illegal; using conv-maxpool-1/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-1/b:0/grad/hist is illegal; using conv-maxpool-1/b_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-1/b:0/grad/sparsity is illegal; using conv-maxpool-1/b_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-2/W:0/grad/hist is illegal; using conv-maxpool-2/W_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-2/W:0/grad/sparsity is illegal; using conv-maxpool-2/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-2/b:0/grad/hist is illegal; using conv-maxpool-2/b_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-2/b:0/grad/sparsity is illegal; using conv-maxpool-2/b_0/grad/sparsity instead.
INFO:tensorflow:Summary name hidden/W:0/grad/hist is ill

  'precision', 'predicted', average, warn_for)


2017-11-17T21:21:51.801606: step 5, loss 1.10649, acc 0.426758, f1 0.374303
2017-11-17T21:21:52.311735: step 10, loss 1.00613, acc 0.461914, f1 0.419631
2017-11-17T21:21:52.803264: step 15, loss 0.989656, acc 0.481445, f1 0.44152
Current epoch:  1
2017-11-17T21:21:53.310189: step 20, loss 0.964587, acc 0.498047, f1 0.455241
2017-11-17T21:21:53.828340: step 25, loss 0.962968, acc 0.499023, f1 0.428204
2017-11-17T21:21:54.332194: step 30, loss 0.983904, acc 0.484375, f1 0.40822
2017-11-17T21:21:54.820674: step 35, loss 0.984431, acc 0.472656, f1 0.376734
Current epoch:  2
2017-11-17T21:21:55.293309: step 40, loss 0.973821, acc 0.498047, f1 0.443258
2017-11-17T21:21:55.773916: step 45, loss 0.931473, acc 0.544922, f1 0.490308
2017-11-17T21:21:56.295527: step 50, loss 0.955362, acc 0.53418, f1 0.526567

Evaluation:
2017-11-17T21:21:56.897350: step 50, loss 1.01432, acc 0.456087, f1 0.394521

Current epoch:  3
2017-11-17T21:21:57.420306: step 55, loss 0.983732, acc 0.496094, f1 0.406664
201

2017-11-17T21:22:41.437832: step 450, loss 0.866366, acc 0.587388, f1 0.557674

Current epoch:  25
2017-11-17T21:22:41.938905: step 455, loss 0.722706, acc 0.650391, f1 0.622195
2017-11-17T21:22:42.411744: step 460, loss 0.684525, acc 0.682617, f1 0.676811
2017-11-17T21:22:42.927911: step 465, loss 0.750825, acc 0.62793, f1 0.60354
Current epoch:  26
2017-11-17T21:22:43.380740: step 470, loss 0.707442, acc 0.68457, f1 0.681439
2017-11-17T21:22:43.879450: step 475, loss 0.83658, acc 0.633789, f1 0.577025
2017-11-17T21:22:44.394479: step 480, loss 0.725508, acc 0.651367, f1 0.642725
2017-11-17T21:22:44.886278: step 485, loss 0.722703, acc 0.655273, f1 0.626066
Current epoch:  27
2017-11-17T21:22:45.389441: step 490, loss 0.726678, acc 0.636719, f1 0.617678
2017-11-17T21:22:45.897512: step 495, loss 0.672432, acc 0.697266, f1 0.680786
2017-11-17T21:22:46.428056: step 500, loss 0.727178, acc 0.651367, f1 0.638572

Evaluation:
2017-11-17T21:22:47.017582: step 500, loss 0.926851, acc 0.51744

2017-11-17T21:23:31.951616: step 895, loss 0.646985, acc 0.745117, f1 0.691622
2017-11-17T21:23:32.437415: step 900, loss 0.476841, acc 0.841195, f1 0.840763

Evaluation:
2017-11-17T21:23:33.032629: step 900, loss 0.854142, acc 0.600281, f1 0.590337

Current epoch:  50
2017-11-17T21:23:33.584335: step 905, loss 0.696942, acc 0.632812, f1 0.592
2017-11-17T21:23:34.081560: step 910, loss 0.532751, acc 0.761719, f1 0.73683
2017-11-17T21:23:34.574437: step 915, loss 0.478487, acc 0.839844, f1 0.842472
Current epoch:  51
2017-11-17T21:23:35.064998: step 920, loss 0.55448, acc 0.71582, f1 0.705
2017-11-17T21:23:35.535843: step 925, loss 0.545979, acc 0.74707, f1 0.736941
2017-11-17T21:23:36.080489: step 930, loss 0.446002, acc 0.853516, f1 0.854415
2017-11-17T21:23:36.616720: step 935, loss 0.526118, acc 0.760742, f1 0.737764
Current epoch:  52
2017-11-17T21:23:37.084629: step 940, loss 0.625942, acc 0.711914, f1 0.677932
2017-11-17T21:23:37.606222: step 945, loss 0.430777, acc 0.856445, f1 

2017-11-17T21:24:21.559894: step 1330, loss 0.8446, acc 0.584961, f1 0.522448
Current epoch:  74
2017-11-17T21:24:22.086863: step 1335, loss 0.217387, acc 0.962891, f1 0.96284
2017-11-17T21:24:22.612490: step 1340, loss 0.190191, acc 0.959961, f1 0.959903
2017-11-17T21:24:23.078064: step 1345, loss 0.174227, acc 0.97168, f1 0.971673
2017-11-17T21:24:23.526398: step 1350, loss 0.174447, acc 0.974843, f1 0.974855

Evaluation:
2017-11-17T21:24:24.147657: step 1350, loss 0.939492, acc 0.600475, f1 0.598522

Current epoch:  75
2017-11-17T21:24:24.640644: step 1355, loss 0.179285, acc 0.96582, f1 0.965891
2017-11-17T21:24:25.146564: step 1360, loss 0.73615, acc 0.644531, f1 0.563617
2017-11-17T21:24:25.676461: step 1365, loss 0.20562, acc 0.960938, f1 0.960832
Current epoch:  76
2017-11-17T21:24:26.184282: step 1370, loss 0.182361, acc 0.964844, f1 0.964826
2017-11-17T21:24:26.692212: step 1375, loss 0.159454, acc 0.96875, f1 0.96876
2017-11-17T21:24:27.247530: step 1380, loss 0.22826, acc 0

Current epoch:  98
2017-11-17T21:25:11.239276: step 1765, loss 0.0506572, acc 0.998047, f1 0.998047
2017-11-17T21:25:11.757683: step 1770, loss 0.0506874, acc 0.996094, f1 0.996092
2017-11-17T21:25:12.280556: step 1775, loss 0.0459265, acc 0.996094, f1 0.996094
2017-11-17T21:25:12.787856: step 1780, loss 0.0469388, acc 0.995117, f1 0.995118
Current epoch:  99
2017-11-17T21:25:13.278394: step 1785, loss 0.0470566, acc 0.99707, f1 0.997071
2017-11-17T21:25:13.796884: step 1790, loss 0.0491562, acc 0.994141, f1 0.994142
2017-11-17T21:25:14.298429: step 1795, loss 0.0549314, acc 0.993164, f1 0.993164
2017-11-17T21:25:14.754546: step 1800, loss 0.044419, acc 0.995283, f1 0.99528

Evaluation:
2017-11-17T21:25:15.353828: step 1800, loss 1.18435, acc 0.591702, f1 0.591225

Current epoch:  100
2017-11-17T21:25:15.836215: step 1805, loss 0.0430611, acc 0.99707, f1 0.99707
2017-11-17T21:25:16.313769: step 1810, loss 0.0437556, acc 0.996094, f1 0.996095
2017-11-17T21:25:16.833370: step 1815, loss 

2017-11-17T21:26:00.974203: step 2200, loss 1.34488, acc 0.590393, f1 0.58995

2017-11-17T21:26:01.488317: step 2205, loss 0.0211679, acc 0.998047, f1 0.998047
2017-11-17T21:26:01.994865: step 2210, loss 0.022337, acc 0.99707, f1 0.997071
Current epoch:  123
2017-11-17T21:26:02.466049: step 2215, loss 0.019806, acc 0.998047, f1 0.998047
2017-11-17T21:26:03.003446: step 2220, loss 0.0205626, acc 0.996094, f1 0.996094
2017-11-17T21:26:03.497322: step 2225, loss 0.0192976, acc 0.999023, f1 0.999023
2017-11-17T21:26:04.009480: step 2230, loss 0.0298275, acc 0.995117, f1 0.995119
Current epoch:  124
2017-11-17T21:26:04.488355: step 2235, loss 1.20765, acc 0.615234, f1 0.531036
2017-11-17T21:26:04.992419: step 2240, loss 0.0286385, acc 1, f1 1
2017-11-17T21:26:05.485936: step 2245, loss 0.0243099, acc 0.99707, f1 0.997072
2017-11-17T21:26:05.971732: step 2250, loss 0.0229762, acc 0.998428, f1 0.998428

Evaluation:
2017-11-17T21:26:06.544169: step 2250, loss 1.30806, acc 0.589036, f1 0.588374

2017-11-17T21:26:50.832246: step 2645, loss 0.0135304, acc 0.999023, f1 0.999023
Current epoch:  147
2017-11-17T21:26:51.341985: step 2650, loss 0.0142829, acc 0.999023, f1 0.999023

Evaluation:
2017-11-17T21:26:51.944744: step 2650, loss 1.44084, acc 0.587873, f1 0.588105

2017-11-17T21:26:52.490154: step 2655, loss 0.0126971, acc 0.998047, f1 0.998048
2017-11-17T21:26:52.983662: step 2660, loss 0.0147691, acc 0.996094, f1 0.99609
Current epoch:  148
2017-11-17T21:26:53.453062: step 2665, loss 0.0108472, acc 1, f1 1
2017-11-17T21:26:53.963987: step 2670, loss 0.0138781, acc 0.99707, f1 0.997071
2017-11-17T21:26:54.451973: step 2675, loss 0.0130161, acc 0.998047, f1 0.998051
2017-11-17T21:26:54.956900: step 2680, loss 0.0114469, acc 0.998047, f1 0.998047
Current epoch:  149
2017-11-17T21:26:55.439220: step 2685, loss 0.0125751, acc 0.998047, f1 0.998047
2017-11-17T21:26:55.924000: step 2690, loss 0.0148008, acc 0.996094, f1 0.996098
2017-11-17T21:26:56.467580: step 2695, loss 0.0101745

2017-11-17T21:27:40.977315: step 3085, loss 0.00862041, acc 1, f1 1
2017-11-17T21:27:41.480189: step 3090, loss 0.0136617, acc 0.99707, f1 0.997067
2017-11-17T21:27:41.983979: step 3095, loss 0.00722829, acc 1, f1 1
Current epoch:  172
2017-11-17T21:27:42.504887: step 3100, loss 0.00799536, acc 0.999023, f1 0.999024

Evaluation:
2017-11-17T21:27:43.083573: step 3100, loss 1.51283, acc 0.588648, f1 0.588307

2017-11-17T21:27:43.592921: step 3105, loss 0.00953233, acc 0.998047, f1 0.998047
2017-11-17T21:27:44.125382: step 3110, loss 0.00968772, acc 0.998047, f1 0.998048
Current epoch:  173
2017-11-17T21:27:44.637324: step 3115, loss 0.00893983, acc 0.999023, f1 0.999023
2017-11-17T21:27:45.153345: step 3120, loss 0.0119802, acc 0.99707, f1 0.99707
2017-11-17T21:27:45.675480: step 3125, loss 0.00897494, acc 0.998047, f1 0.998047
2017-11-17T21:27:46.193141: step 3130, loss 0.00995636, acc 0.996094, f1 0.996095
Current epoch:  174
2017-11-17T21:27:46.681295: step 3135, loss 0.00726756, acc 

2017-11-17T21:28:30.543820: step 3520, loss 0.0115942, acc 0.998047, f1 0.998047
2017-11-17T21:28:31.068212: step 3525, loss 0.0123883, acc 0.998047, f1 0.998047
Current epoch:  196
2017-11-17T21:28:31.567611: step 3530, loss 0.0121059, acc 0.996094, f1 0.996094
2017-11-17T21:28:32.060940: step 3535, loss 0.00841089, acc 0.999023, f1 0.999023
2017-11-17T21:28:32.563623: step 3540, loss 0.0068251, acc 1, f1 1
2017-11-17T21:28:33.096238: step 3545, loss 0.0108673, acc 0.99707, f1 0.99707
Current epoch:  197
2017-11-17T21:28:33.568608: step 3550, loss 0.00778201, acc 0.999023, f1 0.999023

Evaluation:
2017-11-17T21:28:34.180251: step 3550, loss 1.57104, acc 0.587388, f1 0.587219

2017-11-17T21:28:34.706973: step 3555, loss 0.0125152, acc 0.994141, f1 0.994149
2017-11-17T21:28:35.233373: step 3560, loss 0.00674903, acc 0.999023, f1 0.999025
Current epoch:  198
2017-11-17T21:28:35.708241: step 3565, loss 0.00573806, acc 1, f1 1
2017-11-17T21:28:36.204267: step 3570, loss 0.00668056, acc 0.9

2017-11-15T23:45:49.310977: step 28650, loss 2.2659, acc 0.591896, f1 0.590577

2017-11-15T23:45:49.825484: step 28655, loss 0.00386814, acc 0.996094, f1 0.996094
Current epoch:  1592
2017-11-15T23:45:50.310022: step 28660, loss 0.00153759, acc 0.999023, f1 0.999023
2017-11-15T23:45:50.822744: step 28665, loss 0.00258661, acc 0.998047, f1 0.998047
2017-11-15T23:45:51.352169: step 28670, loss 0.00137868, acc 0.999023, f1 0.999023
Current epoch:  1593
2017-11-15T23:45:51.842602: step 28675, loss 0.000639613, acc 1, f1 1
2017-11-15T23:45:52.360281: step 28680, loss 0.00294478, acc 0.99707, f1 0.997082
2017-11-15T23:45:52.875169: step 28685, loss 0.00427626, acc 0.99707, f1 0.997069
2017-11-15T23:45:53.398795: step 28690, loss 0.00130105, acc 0.999023, f1 0.999022
Current epoch:  1594
2017-11-15T23:45:53.891669: step 28695, loss 0.00191334, acc 0.999023, f1 0.999024
2017-11-15T23:45:54.417607: step 28700, loss 5.42638e-05, acc 1, f1 1

Evaluation:
2017-11-15T23:45:55.096665: step 28700, lo

2017-11-15T23:46:40.067980: step 29085, loss 0.00184323, acc 0.999023, f1 0.999023
Current epoch:  1616
2017-11-15T23:46:40.556816: step 29090, loss 0.00191371, acc 1, f1 1
2017-11-15T23:46:41.092781: step 29095, loss 0.00320513, acc 0.996094, f1 0.996094
2017-11-15T23:46:41.608201: step 29100, loss 0.00118186, acc 0.999023, f1 0.999023

Evaluation:
2017-11-15T23:46:42.262991: step 29100, loss 2.27078, acc 0.59112, f1 0.590283

2017-11-15T23:46:42.781910: step 29105, loss 0.00250555, acc 0.99707, f1 0.997071
Current epoch:  1617
2017-11-15T23:46:43.274756: step 29110, loss 0.0033572, acc 0.996094, f1 0.996096
2017-11-15T23:46:43.793173: step 29115, loss 6.6246e-05, acc 1, f1 1
2017-11-15T23:46:44.310588: step 29120, loss 0.00123017, acc 0.999023, f1 0.999023
Current epoch:  1618
2017-11-15T23:46:44.805440: step 29125, loss 0.000822068, acc 0.999023, f1 0.999022
2017-11-15T23:46:45.320347: step 29130, loss 0.00328387, acc 0.99707, f1 0.997072
2017-11-15T23:46:45.838765: step 29135, loss

2017-11-15T23:47:30.286891: step 29510, loss 0.00205923, acc 0.998047, f1 0.998047
2017-11-15T23:47:30.807905: step 29515, loss 0.00250676, acc 0.998047, f1 0.998046
2017-11-15T23:47:31.302672: step 29520, loss 6.4664e-05, acc 1, f1 1
Current epoch:  1640
2017-11-15T23:47:31.828511: step 29525, loss 0.000496725, acc 1, f1 1
2017-11-15T23:47:32.363044: step 29530, loss 7.05721e-05, acc 1, f1 1
2017-11-15T23:47:32.900029: step 29535, loss 0.00557571, acc 0.995117, f1 0.995118
Current epoch:  1641
2017-11-15T23:47:33.418057: step 29540, loss 0.00222187, acc 0.998047, f1 0.998047
2017-11-15T23:47:33.948509: step 29545, loss 0.00401678, acc 0.998047, f1 0.998047
2017-11-15T23:47:34.497008: step 29550, loss 0.00285274, acc 0.998047, f1 0.998047

Evaluation:
2017-11-15T23:47:35.260093: step 29550, loss 2.30217, acc 0.577695, f1 0.579102

2017-11-15T23:47:35.792047: step 29555, loss 8.26115e-05, acc 1, f1 1
Current epoch:  1642
2017-11-15T23:47:36.355086: step 29560, loss 0.00165775, acc 0.999

2017-11-15T23:48:20.732432: step 29945, loss 0.00222861, acc 0.998047, f1 0.998047
2017-11-15T23:48:21.250221: step 29950, loss 0.00244983, acc 0.998047, f1 0.998047

Evaluation:
2017-11-15T23:48:21.929530: step 29950, loss 2.29064, acc 0.585401, f1 0.585109

Current epoch:  1664
2017-11-15T23:48:22.421052: step 29955, loss 0.00289479, acc 0.998047, f1 0.998047
2017-11-15T23:48:22.943479: step 29960, loss 0.00271034, acc 0.998047, f1 0.998043
2017-11-15T23:48:23.472147: step 29965, loss 0.00134792, acc 1, f1 1
2017-11-15T23:48:23.965653: step 29970, loss 0.00267524, acc 0.998428, f1 0.998427
Current epoch:  1665
2017-11-15T23:48:24.494298: step 29975, loss 0.00256825, acc 0.998047, f1 0.998047
2017-11-15T23:48:25.012842: step 29980, loss 0.00324159, acc 0.99707, f1 0.997069
2017-11-15T23:48:25.547297: step 29985, loss 0.00471571, acc 0.996094, f1 0.996094
Current epoch:  1666
2017-11-15T23:48:26.052196: step 29990, loss 0.000851692, acc 0.999023, f1 0.999022
2017-11-15T23:48:26.587744:

2017-11-15T23:49:10.407259: step 30370, loss 0.00117094, acc 0.999023, f1 0.999023
2017-11-15T23:49:10.927773: step 30375, loss 0.00201438, acc 0.998047, f1 0.998047
2017-11-15T23:49:11.448195: step 30380, loss 0.00127567, acc 0.999023, f1 0.999023
Current epoch:  1688
2017-11-15T23:49:11.940540: step 30385, loss 0.00151738, acc 0.998047, f1 0.998047
2017-11-15T23:49:12.457955: step 30390, loss 0.00311132, acc 0.99707, f1 0.997069
2017-11-15T23:49:12.981385: step 30395, loss 0.00214663, acc 0.998047, f1 0.998046
2017-11-15T23:49:13.509843: step 30400, loss 0.00352712, acc 0.99707, f1 0.99707

Evaluation:
2017-11-15T23:49:14.168666: step 30400, loss 2.28842, acc 0.590248, f1 0.58809

Current epoch:  1689
2017-11-15T23:49:14.660511: step 30405, loss 0.00616244, acc 0.995117, f1 0.995115
2017-11-15T23:49:15.193500: step 30410, loss 0.002057, acc 0.998047, f1 0.998046
2017-11-15T23:49:15.719434: step 30415, loss 0.000653468, acc 1, f1 1
2017-11-15T23:49:16.217295: step 30420, loss 0.003793

2017-11-15T23:49:59.898750: step 30800, loss 0.00159446, acc 0.998047, f1 0.998047

Evaluation:
2017-11-15T23:50:00.560059: step 30800, loss 2.37724, acc 0.5807, f1 0.577226

2017-11-15T23:50:01.075969: step 30805, loss 0.00206911, acc 0.998047, f1 0.998047
2017-11-15T23:50:01.603927: step 30810, loss 0.00260104, acc 0.99707, f1 0.997069
2017-11-15T23:50:02.122846: step 30815, loss 0.000810922, acc 0.999023, f1 0.999023
Current epoch:  1712
2017-11-15T23:50:02.610178: step 30820, loss 0.00172141, acc 0.999023, f1 0.999023
2017-11-15T23:50:03.125600: step 30825, loss 0.000596868, acc 1, f1 1
2017-11-15T23:50:03.649031: step 30830, loss 0.00309252, acc 0.99707, f1 0.997071
Current epoch:  1713
2017-11-15T23:50:04.141878: step 30835, loss 0.00235275, acc 0.998047, f1 0.998047
2017-11-15T23:50:04.672339: step 30840, loss 0.00107542, acc 1, f1 1
2017-11-15T23:50:05.190757: step 30845, loss 0.00040276, acc 1, f1 1
2017-11-15T23:50:05.705163: step 30850, loss 0.00139275, acc 0.999023, f1 0.99

2017-11-15T23:50:50.490649: step 31230, loss 0.0015668, acc 0.998428, f1 0.998428
Current epoch:  1735
2017-11-15T23:50:51.012576: step 31235, loss 0.00153637, acc 0.999023, f1 0.999024
2017-11-15T23:50:51.528988: step 31240, loss 0.00087301, acc 0.999023, f1 0.999025
2017-11-15T23:50:52.055427: step 31245, loss 0.0017022, acc 0.998047, f1 0.998047
Current epoch:  1736
2017-11-15T23:50:52.555317: step 31250, loss 0.00179216, acc 0.999023, f1 0.999024

Evaluation:
2017-11-15T23:50:53.218186: step 31250, loss 2.32424, acc 0.576483, f1 0.578607

2017-11-15T23:50:53.757224: step 31255, loss 0.00256009, acc 0.99707, f1 0.997067
2017-11-15T23:50:54.290786: step 31260, loss 0.00146377, acc 0.999023, f1 0.999023
2017-11-15T23:50:54.809704: step 31265, loss 0.00167324, acc 0.998047, f1 0.998047
Current epoch:  1737
2017-11-15T23:50:55.294028: step 31270, loss 0.00256034, acc 0.99707, f1 0.997071
2017-11-15T23:50:55.810142: step 31275, loss 0.00205763, acc 0.999023, f1 0.999024
2017-11-15T23:50:

2017-11-15T23:51:40.364835: step 31655, loss 0.00384289, acc 0.996094, f1 0.996096
2017-11-15T23:51:40.893799: step 31660, loss 0.00213699, acc 0.998047, f1 0.998047
Current epoch:  1759
2017-11-15T23:51:41.389204: step 31665, loss 0.000770643, acc 0.999023, f1 0.999023
2017-11-15T23:51:41.915656: step 31670, loss 0.0029624, acc 0.99707, f1 0.99707
2017-11-15T23:51:42.443641: step 31675, loss 0.00173988, acc 0.998047, f1 0.998048
2017-11-15T23:51:42.933980: step 31680, loss 0.00152629, acc 0.998428, f1 0.998425
Current epoch:  1760
2017-11-15T23:51:43.475462: step 31685, loss 0.00220178, acc 0.998047, f1 0.998047
2017-11-15T23:51:43.999894: step 31690, loss 0.00140655, acc 0.999023, f1 0.999023
2017-11-15T23:51:44.515302: step 31695, loss 0.00366319, acc 0.99707, f1 0.997066
Current epoch:  1761
2017-11-15T23:51:45.007673: step 31700, loss 0.000468343, acc 1, f1 1

Evaluation:
2017-11-15T23:51:45.646420: step 31700, loss 2.28277, acc 0.59272, f1 0.591959

2017-11-15T23:51:46.161327: st

2017-11-15T23:52:30.934799: step 32090, loss 0.000914494, acc 0.999023, f1 0.999023
Current epoch:  1783
2017-11-15T23:52:31.434665: step 32095, loss 0.00212047, acc 0.998047, f1 0.998048
2017-11-15T23:52:31.968625: step 32100, loss 0.00207465, acc 0.999023, f1 0.999022

Evaluation:
2017-11-15T23:52:32.635585: step 32100, loss 2.29561, acc 0.588455, f1 0.587431

2017-11-15T23:52:33.175041: step 32105, loss 5.96303e-05, acc 1, f1 1
2017-11-15T23:52:33.701481: step 32110, loss 0.00319382, acc 0.99707, f1 0.997069
Current epoch:  1784
2017-11-15T23:52:34.201424: step 32115, loss 0.000825372, acc 0.999023, f1 0.999023
2017-11-15T23:52:34.722348: step 32120, loss 0.00182351, acc 0.999023, f1 0.999024
2017-11-15T23:52:35.239762: step 32125, loss 0.000595488, acc 1, f1 1
2017-11-15T23:52:35.726093: step 32130, loss 0.00153949, acc 0.998428, f1 0.998428
Current epoch:  1785
2017-11-15T23:52:36.261557: step 32135, loss 4.81049e-05, acc 1, f1 1
2017-11-15T23:52:36.786490: step 32140, loss 4.8833

2017-11-15T23:53:21.264558: step 32515, loss 0.00325604, acc 0.99707, f1 0.997069
2017-11-15T23:53:21.802626: step 32520, loss 0.00222876, acc 0.99707, f1 0.99707
2017-11-15T23:53:22.335603: step 32525, loss 0.00281493, acc 0.99707, f1 0.997069
Current epoch:  1807
2017-11-15T23:53:22.838478: step 32530, loss 0.00275699, acc 0.998047, f1 0.998046
2017-11-15T23:53:23.376852: step 32535, loss 0.00250701, acc 0.99707, f1 0.99707
2017-11-15T23:53:23.903792: step 32540, loss 0.00147774, acc 0.999023, f1 0.999023
Current epoch:  1808
2017-11-15T23:53:24.405779: step 32545, loss 0.00231538, acc 0.998047, f1 0.998047
2017-11-15T23:53:24.934223: step 32550, loss 0.000838263, acc 0.999023, f1 0.999023

Evaluation:
2017-11-15T23:53:25.609128: step 32550, loss 2.29915, acc 0.584723, f1 0.58574

2017-11-15T23:53:26.130555: step 32555, loss 0.00278418, acc 0.998047, f1 0.998048
2017-11-15T23:53:26.650475: step 32560, loss 0.00239347, acc 0.998047, f1 0.998047
Current epoch:  1809
2017-11-15T23:53:27

2017-11-15T23:54:11.902907: step 32950, loss 0.00196327, acc 0.998047, f1 0.998047

Evaluation:
2017-11-15T23:54:12.589784: step 32950, loss 2.34388, acc 0.579924, f1 0.579687

2017-11-15T23:54:13.111041: step 32955, loss 0.00109506, acc 0.999023, f1 0.999023
Current epoch:  1831
2017-11-15T23:54:13.611408: step 32960, loss 0.00149799, acc 0.999023, f1 0.999022
2017-11-15T23:54:14.135841: step 32965, loss 0.00099666, acc 0.999023, f1 0.999022
2017-11-15T23:54:14.666793: step 32970, loss 0.00284557, acc 0.998047, f1 0.998047
2017-11-15T23:54:15.213788: step 32975, loss 0.00274356, acc 0.99707, f1 0.997071
Current epoch:  1832
2017-11-15T23:54:15.713830: step 32980, loss 0.00156528, acc 0.999023, f1 0.999023
2017-11-15T23:54:16.244824: step 32985, loss 0.00153846, acc 0.998047, f1 0.998048
2017-11-15T23:54:16.774292: step 32990, loss 0.00486877, acc 0.995117, f1 0.995114
Current epoch:  1833
2017-11-15T23:54:17.278183: step 32995, loss 0.00228159, acc 0.999023, f1 0.999023
2017-11-15T23:

2017-11-15T23:55:02.559112: step 33375, loss 0.00346669, acc 0.99707, f1 0.99707
2017-11-15T23:55:03.084074: step 33380, loss 0.00399803, acc 0.996094, f1 0.996094
2017-11-15T23:55:03.609009: step 33385, loss 0.00363328, acc 0.99707, f1 0.99707
2017-11-15T23:55:04.101353: step 33390, loss 0.00329851, acc 0.996855, f1 0.996855
Current epoch:  1855
2017-11-15T23:55:04.631804: step 33395, loss 0.00143517, acc 0.999023, f1 0.999023
2017-11-15T23:55:05.162856: step 33400, loss 0.00204742, acc 0.999023, f1 0.999022

Evaluation:
2017-11-15T23:55:05.807115: step 33400, loss 2.39319, acc 0.577695, f1 0.577218

2017-11-15T23:55:06.321041: step 33405, loss 0.00204316, acc 0.998047, f1 0.998046
Current epoch:  1856
2017-11-15T23:55:06.808899: step 33410, loss 0.00185929, acc 1, f1 1
2017-11-15T23:55:07.326813: step 33415, loss 0.00179409, acc 0.999023, f1 0.999023
2017-11-15T23:55:07.850248: step 33420, loss 0.00103028, acc 0.999023, f1 0.999023
2017-11-15T23:55:08.380196: step 33425, loss 0.00578

2017-11-15T23:55:52.823198: step 33800, loss 2.30864, acc 0.585304, f1 0.58531

Current epoch:  1878
2017-11-15T23:55:53.320055: step 33805, loss 0.000579561, acc 1, f1 1
2017-11-15T23:55:53.849048: step 33810, loss 0.000525835, acc 1, f1 1
2017-11-15T23:55:54.374985: step 33815, loss 0.00365831, acc 0.996094, f1 0.996094
2017-11-15T23:55:54.899993: step 33820, loss 0.00395094, acc 0.996094, f1 0.996094
Current epoch:  1879
2017-11-15T23:55:55.396350: step 33825, loss 0.000656922, acc 1, f1 1
2017-11-15T23:55:55.924292: step 33830, loss 0.00214968, acc 0.998047, f1 0.998047
2017-11-15T23:55:56.459756: step 33835, loss 0.00265433, acc 0.99707, f1 0.997066
2017-11-15T23:55:56.967219: step 33840, loss 0.00176286, acc 0.998428, f1 0.99843
Current epoch:  1880
2017-11-15T23:55:57.496165: step 33845, loss 5.54171e-05, acc 1, f1 1
2017-11-15T23:55:58.019595: step 33850, loss 0.00512765, acc 0.995117, f1 0.995117

Evaluation:
2017-11-15T23:55:58.695068: step 33850, loss 2.30297, acc 0.588067, 

2017-11-15T23:56:43.690551: step 34235, loss 0.00275748, acc 0.99707, f1 0.99707
Current epoch:  1902
2017-11-15T23:56:44.183898: step 34240, loss 0.00204252, acc 0.999023, f1 0.999023
2017-11-15T23:56:44.708401: step 34245, loss 0.00382085, acc 0.99707, f1 0.997074
2017-11-15T23:56:45.232923: step 34250, loss 0.00297496, acc 0.99707, f1 0.997069

Evaluation:
2017-11-15T23:56:45.912799: step 34250, loss 2.3715, acc 0.59209, f1 0.587197

Current epoch:  1903
2017-11-15T23:56:46.417224: step 34255, loss 0.000832774, acc 0.999023, f1 0.999022
2017-11-15T23:56:46.941658: step 34260, loss 0.00109041, acc 0.999023, f1 0.999023
2017-11-15T23:56:47.476139: step 34265, loss 0.00201358, acc 0.998047, f1 0.998046
2017-11-15T23:56:48.007591: step 34270, loss 0.00113494, acc 0.999023, f1 0.999025
Current epoch:  1904
2017-11-15T23:56:48.510465: step 34275, loss 4.54996e-05, acc 1, f1 1
2017-11-15T23:56:49.045011: step 34280, loss 0.000774745, acc 0.999023, f1 0.999023
2017-11-15T23:56:49.574508: st

2017-11-15T23:57:34.175152: step 34660, loss 0.00188707, acc 0.998047, f1 0.998046
2017-11-15T23:57:34.702596: step 34665, loss 0.00175806, acc 0.998047, f1 0.998047
Current epoch:  1926
2017-11-15T23:57:35.195943: step 34670, loss 0.00167867, acc 1, f1 1
2017-11-15T23:57:35.740932: step 34675, loss 4.38102e-05, acc 1, f1 1
2017-11-15T23:57:36.268894: step 34680, loss 0.00164087, acc 0.998047, f1 0.998048
2017-11-15T23:57:36.800436: step 34685, loss 0.000974823, acc 0.999023, f1 0.999022
Current epoch:  1927
2017-11-15T23:57:37.300805: step 34690, loss 0.00263804, acc 0.999023, f1 0.999024
2017-11-15T23:57:37.824735: step 34695, loss 0.00122397, acc 1, f1 1
2017-11-15T23:57:38.346161: step 34700, loss 0.00256147, acc 0.99707, f1 0.997071

Evaluation:
2017-11-15T23:57:39.016511: step 34700, loss 2.32407, acc 0.584044, f1 0.584504

Current epoch:  1928
2017-11-15T23:57:39.517441: step 34705, loss 0.00268897, acc 0.999023, f1 0.999023
2017-11-15T23:57:40.049395: step 34710, loss 4.29851e-

2017-11-15T23:58:25.107635: step 35095, loss 0.000995744, acc 0.999023, f1 0.999024
2017-11-15T23:58:25.610054: step 35100, loss 0.00443643, acc 0.995283, f1 0.995283

Evaluation:
2017-11-15T23:58:26.287908: step 35100, loss 2.3926, acc 0.574641, f1 0.574447

Current epoch:  1950
2017-11-15T23:58:26.812356: step 35105, loss 0.00232507, acc 0.999023, f1 0.999023
2017-11-15T23:58:27.339797: step 35110, loss 0.00133155, acc 1, f1 1
2017-11-15T23:58:27.863229: step 35115, loss 0.00213038, acc 0.998047, f1 0.998047
Current epoch:  1951
2017-11-15T23:58:28.357078: step 35120, loss 0.0039972, acc 0.999023, f1 0.999023
2017-11-15T23:58:28.888531: step 35125, loss 6.42001e-05, acc 1, f1 1
2017-11-15T23:58:29.406948: step 35130, loss 4.24269e-05, acc 1, f1 1
2017-11-15T23:58:29.942470: step 35135, loss 0.00151535, acc 0.999023, f1 0.999023
Current epoch:  1952
2017-11-15T23:58:30.451362: step 35140, loss 0.00317315, acc 0.99707, f1 0.997069
2017-11-15T23:58:30.981309: step 35145, loss 0.00167158

2017-11-15T23:59:16.047441: step 35525, loss 0.003397, acc 0.996094, f1 0.996097
2017-11-15T23:59:16.573172: step 35530, loss 0.00261285, acc 0.99707, f1 0.99707
Current epoch:  1974
2017-11-15T23:59:17.070532: step 35535, loss 0.00128627, acc 1, f1 1
2017-11-15T23:59:17.598976: step 35540, loss 0.00305306, acc 0.99707, f1 0.99707
2017-11-15T23:59:18.122408: step 35545, loss 0.00219351, acc 0.998047, f1 0.998051
2017-11-15T23:59:18.618262: step 35550, loss 4.03077e-05, acc 1, f1 1

Evaluation:
2017-11-15T23:59:19.275559: step 35550, loss 2.32959, acc 0.583268, f1 0.58375

Current epoch:  1975
2017-11-15T23:59:19.792471: step 35555, loss 0.000779976, acc 0.999023, f1 0.999023
2017-11-15T23:59:20.317908: step 35560, loss 0.00468215, acc 0.995117, f1 0.995117
2017-11-15T23:59:20.836826: step 35565, loss 0.00181298, acc 0.998047, f1 0.998048
Current epoch:  1976
2017-11-15T23:59:21.328169: step 35570, loss 0.000673392, acc 1, f1 1
2017-11-15T23:59:21.843578: step 35575, loss 0.000552767, a

2017-11-16T00:00:06.181691: step 35955, loss 0.00277737, acc 0.99707, f1 0.99707
2017-11-16T00:00:06.701612: step 35960, loss 4.48798e-05, acc 1, f1 1
Current epoch:  1998
2017-11-16T00:00:07.179440: step 35965, loss 0.000729787, acc 0.999023, f1 0.999023
2017-11-16T00:00:07.696354: step 35970, loss 0.00219639, acc 0.998047, f1 0.998046
2017-11-16T00:00:08.209256: step 35975, loss 0.00266371, acc 0.99707, f1 0.997069
2017-11-16T00:00:08.727171: step 35980, loss 4.40683e-05, acc 1, f1 1
Current epoch:  1999
2017-11-16T00:00:09.208988: step 35985, loss 0.00210667, acc 0.998047, f1 0.998047
2017-11-16T00:00:09.730914: step 35990, loss 0.00159984, acc 0.998047, f1 0.998048
2017-11-16T00:00:10.246324: step 35995, loss 0.00105196, acc 0.999023, f1 0.999023
2017-11-16T00:00:10.738167: step 36000, loss 0.00331172, acc 0.996855, f1 0.996856

Evaluation:
2017-11-16T00:00:11.392957: step 36000, loss 2.36229, acc 0.580215, f1 0.580538

