# MBTI Parallel Classification Model with Neural BOW (T/F Axis)

First, load libraries and useful functions from class:

In [1]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division


import os, sys, re, json, time, datetime, shutil
from importlib import reload
import collections, itertools

# NumPy and TensorFlow
import numpy as np
import pandas as pd
import tensorflow as tf
import patched_numpy_io
assert(tf.__version__.startswith("1."))

# Utils and Helper libraries
# import nltk
import utils, vocabulary
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import math
from nltk.corpus import stopwords

  from ._conv import register_converters as _register_converters


## Specifications for Binary Classification NBOW for MBTI

In this baseline, the task is to predict the first MBTI axis (T vs. F) given a text string. We will model after the A2 assignment, with Architecture and Parameters defined below.

### Pre-Processing:
* Minimial pre-processing, only separating punctuation from text and lower-case all text
* Assigning words to numerical indices based on a fixed Vocab size, defined by word frequency in training set
* Pulled out first axis of all target labels, assigned to binary (T = 0, F = 1)

### Architecture:
* Encoder: Bag of Words 
* Decoder: Softmax
* Classification: Binary (2 MBTI types - T or F)

### Parameters
* Batch Size: 25 
* Text length: 100
* Vocabulary size (V): ~328K - removed stopwords
* Embedding Size: 50
* Hidden Dimensions: 25

### Training:
* Epochs = 10 
* 80% train, 20% test
* Loss: Sparse Softmax Cross Entropy 
* Optimizers: Adagrad Optimizer

## Load Corpus & Pre-Process

In [2]:
#load data
df = pd.read_csv('./mbti_1.csv')
df.head(5)

Unnamed: 0,type,posts
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...
1,ENTP,'I'm finding the lack of me in these posts ver...
2,INTP,'Good one _____ https://www.youtube.com/wat...
3,INTJ,"'Dear INTP, I enjoyed our conversation the o..."
4,ENTJ,'You're fired.|||That's another silly misconce...


In [3]:
# function to clean and tokenize sentence ["Hello world."] into list of words ["hello world"]
def clean(sentence):
    ignore_words = ['a']
    words = re.sub("[^\w]", " ",  sentence).split() #nltk.word_tokenize(sentence)
    words_cleaned = [w.lower() for w in words if w not in ignore_words]
    stop_words = set(stopwords.words('english'))
    words_cleaned = [w for w in words_cleaned if not w in stop_words]
    words_string = ''.join(words_cleaned)
    return words_string

In [4]:
# split posts per users into separate sentences
post = []
utype = []
user = []

for index, row in df.iterrows():
    posts = row['posts'].split('|||')
    posts_clean = []
    for sentence in posts:
        posts_clean.append(clean(sentence))
    post.extend(posts_clean)
#     post.extend(posts)
    utype.extend([row['type'] for i in range(len(posts))])
    user.extend([index for i in range(len(posts))])
    
short_posts = pd.DataFrame({"user": user,"type": utype,"post": post})
print(short_posts.shape)
short_posts.head(5)

(422845, 3)


Unnamed: 0,user,type,post
0,0,INFJ,httpwwwyoutubecomwatchvqsxhcwe3krw
1,0,INFJ,http41mediatumblrcomtumblr_lfouy03pma1qa1rooo1...
2,0,INFJ,enfpintjmomentshttpswwwyoutubecomwatchviz7le1g...
3,0,INFJ,lifechangingexperiencelife
4,0,INFJ,httpwwwyoutubecomwatchvvxzeywwrdw8httpwwwyoutu...


In [5]:
# Split data: 80% train, 20% test
post_train, post_test, label_train, label_test = train_test_split(np.array(short_posts['post']), 
                                                    np.array(short_posts['type']), 
                                                    test_size=0.2, 
                                                    random_state=88)


print("MBIT posts", post_train[:5])
print('')
print("MBTI Labels: ",label_train[:5])

MBIT posts ['httpswwwyoutubecomwatchvbxvkaah2d7m'
 'isfjsinfpsbalancereallywellthinklearncommunicatesjschoosingwordstonevoicecarefullynpslearningtakethingslesspersonallysj'
 'seekrecognitionfame'
 'honestmaybegivingvibesselfassuredthinkbullygoinggosomeonethinkfightbacksomeoneweak'
 'probablythinkingreallypersonallypreferaxbcdyfunctionstackcomparedgrantallowspartsgrantgetsrightstillconsistentjunge']

MBTI Labels:  ['INTP' 'INFP' 'INTP' 'ENFP' 'INTJ']


In [6]:
# Build a vocabulary (V size is defaulted to full text) for train corpus
vocab_mbti = vocabulary.Vocabulary((utils.canonicalize_word(w) for w in post_train))
vocab_mbti.size

328905

In [7]:
# tokenize and canonicalize train and test sets
x_train = []
for post in post_train:
    x_train.append(vocab_mbti.words_to_ids(post.split()))

x_test = []
for post in post_test:
    x_test.append(vocab_mbti.words_to_ids(post.split()))

In [8]:
print("Original Text: ",post_train[88])
print("Canonicalized Text: ", x_train[88])
print("Max lengths of texts: ", max([len(x) for x in x_train+x_test]))

Original Text:  agreebrutal
Canonicalized Text:  [2305]
Max lengths of texts:  1


In [9]:
print(len(label_train[0]))


4


In [10]:
def binary_mbti(string):
    label_bin = []
    if string[0]=="E":
        label_bin.append(0)
    else:
        label_bin.append(1)
    if string[1]=="N":
        label_bin.append(0)
    else:
        label_bin.append(1)
    if string[2]=="F":
        label_bin.append(0)
    else:
        label_bin.append(1)
    if string[3]=="J":
        label_bin.append(0)
    else:
        label_bin.append(1)
        
    assert len(label_bin) == 4,"Not a valid MBTI type"
    return label_bin

In [11]:
print(label_train[0])
print(binary_mbti(label_train[0]))

INTP
[1, 0, 1, 1]


In [12]:
y_train_id = list(map(lambda x: binary_mbti(x), label_train))
y_test_id = list(map(lambda x: binary_mbti(x), label_test))

print(y_train_id[0:5])
print(label_train[0:5])
print(y_test_id[0:5])
print(label_test[0:5])

[[1, 0, 1, 1], [1, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1], [1, 0, 1, 0]]
['INTP' 'INFP' 'INTP' 'ENFP' 'INTJ']
[[1, 0, 0, 1], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 1]]
['INFP' 'INFJ' 'INFJ' 'INFJ' 'INFP']


## Bulid the NBOW Model

In [13]:
def pad_np_array(example_ids, max_len=35, pad_id=0):
    """Pad a list of lists of ids into a rectangular NumPy array.

    """
    arr = np.full([len(example_ids), max_len], pad_id, dtype=np.int32)
    ns = np.zeros([len(example_ids)], dtype=np.int32)
    for i, ids in enumerate(example_ids):
        cpy_len = min(len(ids), max_len)
        arr[i,:cpy_len] = ids[:cpy_len]
        ns[i] = cpy_len
    return arr, ns

def tokenize_post(post_string):
    return vocab_mbti.words_to_ids(post_string)

In [14]:
def as_padded_array(post_ids, targets, max_len=40, pad_id=0,
                    root_only=False, df_idxs=None):
    """Return the dataset as a (padded) NumPy array.

    """
    #needs to put together the pad_np_array output with  the target labels in dataframe
    
    
    x, ns = pad_np_array(post_ids, max_len=max_len, pad_id=pad_id)
    return x, ns, np.array(targets)

In [15]:
y_train_3 = []
for i in range(len(y_train_id)):
    y_train_3.append(y_train_id[i][2])

y_test_3 = []
for i in range(len(y_test_id)):
    y_test_3.append(y_test_id[i][2])

In [16]:
train_x, train_ns, train_y = as_padded_array(x_train, y_train_3)
test_x, test_ns, test_y = as_padded_array(x_test, y_test_3)

In [17]:
y_test_id[0:5]

[[1, 0, 0, 1], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 1]]

In [18]:
#set up model using tf.estimator

import MBTI_BOW_model; reload(MBTI_BOW_model)

# Specify model hyperparameters as used by model
model_params = dict(V=vocab_mbti.size, embed_dim=50, hidden_dims=[25], num_classes=2,
                    encoder_type='bow',
                    lr=0.1, optimizer='adagrad', beta=0.01)

checkpoint_dir = "/tmp/tf_bow_sst_" + datetime.datetime.now().strftime("%Y%m%d-%H%M")
if os.path.isdir(checkpoint_dir):
    shutil.rmtree(checkpoint_dir)

vocab_mbti.write_projector_config(checkpoint_dir, "Encoder/Embedding_Layer/W_embed")

model = tf.estimator.Estimator(model_fn=MBTI_BOW_model.classifier_model_fn, 
                               params=model_params,
                               model_dir=checkpoint_dir)
print("")
print("To view training (once it starts), run:\n")
print("    tensorboard --logdir='{:s}' --port 6006".format(checkpoint_dir))
print("\nThen in your browser, open: http://localhost:6006")

Vocabulary (328,905 words) written to '/tmp/tf_bow_sst_20181209-0108/metadata.tsv'
Projector config written to /tmp/tf_bow_sst_20181209-0108/projector_config.pbtxt
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tf_bow_sst_20181209-0108', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd044461b38>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}

To view training (once it starts), run:

    tensorboard --logdir='/tmp/tf_bow_sst_20181209-0108' --port 6006

Then in your browser, open: htt

## Train Model

In [19]:
#start training

train_params = dict(batch_size=25, total_epochs=10, eval_every=2)
assert(train_params['total_epochs'] % train_params['eval_every'] == 0)

train_input_fn = patched_numpy_io.numpy_input_fn(
                    x={"ids": train_x, "ns": train_ns}, y=train_y,
                    batch_size=train_params['batch_size'], 
                    num_epochs=train_params['eval_every'], shuffle=True, seed=42
                 )


test_input_fn = tf.estimator.inputs.numpy_input_fn(
                    x={"ids": test_x, "ns": test_ns}, y=test_y,
                    batch_size=25, num_epochs=1, shuffle=False
                )

for _ in range(train_params['total_epochs'] // train_params['eval_every']):
    model.train(input_fn=train_input_fn)


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tf_bow_sst_20181209-0108/model.ckpt.
INFO:tensorflow:loss = 1.0793855, step = 1
INFO:tensorflow:global_step/sec: 306.594
INFO:tensorflow:loss = 0.860101, step = 101 (0.330 sec)
INFO:tensorflow:global_step/sec: 367.076
INFO:tensorflow:loss = 0.80247533, step = 201 (0.272 sec)
INFO:tensorflow:global_step/sec: 361.157
INFO:tensorflow:loss = 0.70512277, step = 301 (0.277 sec)
INFO:tensorflow:global_step/sec: 361.933
INFO:tensorflow:loss = 0.72612745, step = 401 (0.276 sec)
INFO:tensorflow:global_step/sec: 367.919
INFO:tensorflow:loss = 0.7153385, step = 501 (0.272 sec)
INFO:tensorflow:global_step/sec: 350.07
INFO:tensorflow:loss = 0.6884944, step = 601 (0.286 sec)
INFO:tensorflow:global_step/sec: 360.954
IN

INFO:tensorflow:global_step/sec: 378.192
INFO:tensorflow:loss = 0.70048016, step = 8001 (0.264 sec)
INFO:tensorflow:global_step/sec: 385.431
INFO:tensorflow:loss = 0.6995514, step = 8101 (0.259 sec)
INFO:tensorflow:global_step/sec: 379.934
INFO:tensorflow:loss = 0.6992834, step = 8201 (0.263 sec)
INFO:tensorflow:global_step/sec: 379.335
INFO:tensorflow:loss = 0.7056831, step = 8301 (0.265 sec)
INFO:tensorflow:global_step/sec: 376.529
INFO:tensorflow:loss = 0.6999474, step = 8401 (0.265 sec)
INFO:tensorflow:global_step/sec: 378.159
INFO:tensorflow:loss = 0.68634576, step = 8501 (0.264 sec)
INFO:tensorflow:global_step/sec: 376.929
INFO:tensorflow:loss = 0.707908, step = 8601 (0.265 sec)
INFO:tensorflow:global_step/sec: 373.01
INFO:tensorflow:loss = 0.6661285, step = 8701 (0.269 sec)
INFO:tensorflow:global_step/sec: 363.697
INFO:tensorflow:loss = 0.67906964, step = 8801 (0.274 sec)
INFO:tensorflow:global_step/sec: 370.773
INFO:tensorflow:loss = 0.7076591, step = 8901 (0.269 sec)
INFO:tens

INFO:tensorflow:global_step/sec: 368.887
INFO:tensorflow:loss = 0.6939223, step = 16201 (0.271 sec)
INFO:tensorflow:global_step/sec: 366.531
INFO:tensorflow:loss = 0.70190585, step = 16301 (0.273 sec)
INFO:tensorflow:global_step/sec: 379.996
INFO:tensorflow:loss = 0.71561724, step = 16401 (0.263 sec)
INFO:tensorflow:global_step/sec: 379.581
INFO:tensorflow:loss = 0.67442954, step = 16501 (0.264 sec)
INFO:tensorflow:global_step/sec: 389.436
INFO:tensorflow:loss = 0.6993908, step = 16601 (0.256 sec)
INFO:tensorflow:global_step/sec: 376.632
INFO:tensorflow:loss = 0.7189654, step = 16701 (0.266 sec)
INFO:tensorflow:global_step/sec: 378.911
INFO:tensorflow:loss = 0.68685263, step = 16801 (0.264 sec)
INFO:tensorflow:global_step/sec: 383.702
INFO:tensorflow:loss = 0.6524663, step = 16901 (0.261 sec)
INFO:tensorflow:global_step/sec: 377.981
INFO:tensorflow:loss = 0.69339204, step = 17001 (0.265 sec)
INFO:tensorflow:global_step/sec: 371.877
INFO:tensorflow:loss = 0.6549692, step = 17101 (0.269 

INFO:tensorflow:global_step/sec: 366.107
INFO:tensorflow:loss = 0.6741902, step = 24401 (0.273 sec)
INFO:tensorflow:global_step/sec: 377.824
INFO:tensorflow:loss = 0.6869344, step = 24501 (0.265 sec)
INFO:tensorflow:global_step/sec: 373.764
INFO:tensorflow:loss = 0.69338113, step = 24601 (0.268 sec)
INFO:tensorflow:global_step/sec: 371.255
INFO:tensorflow:loss = 0.6865555, step = 24701 (0.269 sec)
INFO:tensorflow:global_step/sec: 367.327
INFO:tensorflow:loss = 0.7010492, step = 24801 (0.272 sec)
INFO:tensorflow:global_step/sec: 376.691
INFO:tensorflow:loss = 0.72944546, step = 24901 (0.265 sec)
INFO:tensorflow:global_step/sec: 378.029
INFO:tensorflow:loss = 0.6722513, step = 25001 (0.265 sec)
INFO:tensorflow:global_step/sec: 374.691
INFO:tensorflow:loss = 0.6797397, step = 25101 (0.267 sec)
INFO:tensorflow:global_step/sec: 375.209
INFO:tensorflow:loss = 0.7301177, step = 25201 (0.266 sec)
INFO:tensorflow:global_step/sec: 366.603
INFO:tensorflow:loss = 0.6790989, step = 25301 (0.273 sec

INFO:tensorflow:global_step/sec: 379.862
INFO:tensorflow:loss = 0.7143968, step = 32064 (0.263 sec)
INFO:tensorflow:global_step/sec: 384.407
INFO:tensorflow:loss = 0.7345164, step = 32164 (0.260 sec)
INFO:tensorflow:global_step/sec: 379.323
INFO:tensorflow:loss = 0.70597327, step = 32264 (0.264 sec)
INFO:tensorflow:global_step/sec: 362.525
INFO:tensorflow:loss = 0.6795871, step = 32364 (0.276 sec)
INFO:tensorflow:global_step/sec: 371.114
INFO:tensorflow:loss = 0.67923915, step = 32464 (0.269 sec)
INFO:tensorflow:global_step/sec: 374.972
INFO:tensorflow:loss = 0.7019055, step = 32564 (0.267 sec)
INFO:tensorflow:global_step/sec: 374.129
INFO:tensorflow:loss = 0.69402045, step = 32664 (0.267 sec)
INFO:tensorflow:global_step/sec: 378.73
INFO:tensorflow:loss = 0.7148095, step = 32764 (0.264 sec)
INFO:tensorflow:global_step/sec: 371.309
INFO:tensorflow:loss = 0.6735117, step = 32864 (0.269 sec)
INFO:tensorflow:global_step/sec: 382.421
INFO:tensorflow:loss = 0.6935436, step = 32964 (0.262 sec

INFO:tensorflow:global_step/sec: 373.693
INFO:tensorflow:loss = 0.673001, step = 40264 (0.268 sec)
INFO:tensorflow:global_step/sec: 378.768
INFO:tensorflow:loss = 0.6796161, step = 40364 (0.264 sec)
INFO:tensorflow:global_step/sec: 375.426
INFO:tensorflow:loss = 0.67231333, step = 40464 (0.266 sec)
INFO:tensorflow:global_step/sec: 380.425
INFO:tensorflow:loss = 0.67897636, step = 40564 (0.263 sec)
INFO:tensorflow:global_step/sec: 379.35
INFO:tensorflow:loss = 0.69410354, step = 40664 (0.264 sec)
INFO:tensorflow:global_step/sec: 371.731
INFO:tensorflow:loss = 0.69380426, step = 40764 (0.269 sec)
INFO:tensorflow:global_step/sec: 361.332
INFO:tensorflow:loss = 0.7163058, step = 40864 (0.277 sec)
INFO:tensorflow:global_step/sec: 377.905
INFO:tensorflow:loss = 0.7012946, step = 40964 (0.265 sec)
INFO:tensorflow:global_step/sec: 375.296
INFO:tensorflow:loss = 0.68037206, step = 41064 (0.267 sec)
INFO:tensorflow:global_step/sec: 372.829
INFO:tensorflow:loss = 0.6689276, step = 41164 (0.269 se

INFO:tensorflow:global_step/sec: 368.811
INFO:tensorflow:loss = 0.6796386, step = 48464 (0.271 sec)
INFO:tensorflow:global_step/sec: 368.749
INFO:tensorflow:loss = 0.6801269, step = 48564 (0.271 sec)
INFO:tensorflow:global_step/sec: 369.726
INFO:tensorflow:loss = 0.6671539, step = 48664 (0.270 sec)
INFO:tensorflow:global_step/sec: 368.328
INFO:tensorflow:loss = 0.7129902, step = 48764 (0.272 sec)
INFO:tensorflow:global_step/sec: 362.991
INFO:tensorflow:loss = 0.6804148, step = 48864 (0.275 sec)
INFO:tensorflow:global_step/sec: 367.889
INFO:tensorflow:loss = 0.67448086, step = 48964 (0.272 sec)
INFO:tensorflow:global_step/sec: 369.015
INFO:tensorflow:loss = 0.686684, step = 49064 (0.271 sec)
INFO:tensorflow:global_step/sec: 371.481
INFO:tensorflow:loss = 0.6527797, step = 49164 (0.269 sec)
INFO:tensorflow:global_step/sec: 371.533
INFO:tensorflow:loss = 0.6866185, step = 49264 (0.269 sec)
INFO:tensorflow:global_step/sec: 367.899
INFO:tensorflow:loss = 0.6731099, step = 49364 (0.272 sec)


INFO:tensorflow:global_step/sec: 359.892
INFO:tensorflow:loss = 0.680759, step = 56127 (0.278 sec)
INFO:tensorflow:global_step/sec: 355.175
INFO:tensorflow:loss = 0.6679222, step = 56227 (0.282 sec)
INFO:tensorflow:global_step/sec: 361.375
INFO:tensorflow:loss = 0.6869202, step = 56327 (0.277 sec)
INFO:tensorflow:global_step/sec: 355.106
INFO:tensorflow:loss = 0.67487574, step = 56427 (0.281 sec)
INFO:tensorflow:global_step/sec: 369.262
INFO:tensorflow:loss = 0.686942, step = 56527 (0.271 sec)
INFO:tensorflow:global_step/sec: 373.931
INFO:tensorflow:loss = 0.70022005, step = 56627 (0.267 sec)
INFO:tensorflow:global_step/sec: 368.632
INFO:tensorflow:loss = 0.7144582, step = 56727 (0.271 sec)
INFO:tensorflow:global_step/sec: 357.152
INFO:tensorflow:loss = 0.69361246, step = 56827 (0.280 sec)
INFO:tensorflow:global_step/sec: 366.58
INFO:tensorflow:loss = 0.7150031, step = 56927 (0.273 sec)
INFO:tensorflow:global_step/sec: 371.908
INFO:tensorflow:loss = 0.6582054, step = 57027 (0.269 sec)


INFO:tensorflow:global_step/sec: 372.567
INFO:tensorflow:loss = 0.71213627, step = 64327 (0.268 sec)
INFO:tensorflow:global_step/sec: 368.75
INFO:tensorflow:loss = 0.6434433, step = 64427 (0.271 sec)
INFO:tensorflow:global_step/sec: 354.601
INFO:tensorflow:loss = 0.7059342, step = 64527 (0.282 sec)
INFO:tensorflow:global_step/sec: 359.406
INFO:tensorflow:loss = 0.6995029, step = 64627 (0.278 sec)
INFO:tensorflow:global_step/sec: 358.506
INFO:tensorflow:loss = 0.6741433, step = 64727 (0.279 sec)
INFO:tensorflow:global_step/sec: 352.709
INFO:tensorflow:loss = 0.6932423, step = 64827 (0.283 sec)
INFO:tensorflow:global_step/sec: 345.596
INFO:tensorflow:loss = 0.7166521, step = 64927 (0.290 sec)
INFO:tensorflow:global_step/sec: 307.634
INFO:tensorflow:loss = 0.6990027, step = 65027 (0.324 sec)
INFO:tensorflow:global_step/sec: 363.671
INFO:tensorflow:loss = 0.6868525, step = 65127 (0.275 sec)
INFO:tensorflow:global_step/sec: 361.527
INFO:tensorflow:loss = 0.712798, step = 65227 (0.277 sec)
I

INFO:tensorflow:global_step/sec: 178.556
INFO:tensorflow:loss = 0.65164053, step = 72527 (0.560 sec)
INFO:tensorflow:global_step/sec: 177.287
INFO:tensorflow:loss = 0.7075442, step = 72627 (0.564 sec)
INFO:tensorflow:global_step/sec: 175.901
INFO:tensorflow:loss = 0.71411985, step = 72727 (0.568 sec)
INFO:tensorflow:global_step/sec: 178.568
INFO:tensorflow:loss = 0.6997208, step = 72827 (0.560 sec)
INFO:tensorflow:global_step/sec: 168.892
INFO:tensorflow:loss = 0.66130304, step = 72927 (0.595 sec)
INFO:tensorflow:global_step/sec: 149.633
INFO:tensorflow:loss = 0.7150238, step = 73027 (0.668 sec)
INFO:tensorflow:global_step/sec: 134.249
INFO:tensorflow:loss = 0.71571535, step = 73127 (0.744 sec)
INFO:tensorflow:global_step/sec: 123.143
INFO:tensorflow:loss = 0.6640828, step = 73227 (0.812 sec)
INFO:tensorflow:global_step/sec: 126.686
INFO:tensorflow:loss = 0.6864556, step = 73327 (0.789 sec)
INFO:tensorflow:global_step/sec: 164.638
INFO:tensorflow:loss = 0.7004739, step = 73427 (0.607 s

INFO:tensorflow:global_step/sec: 202.833
INFO:tensorflow:loss = 0.6664867, step = 80727 (0.492 sec)
INFO:tensorflow:global_step/sec: 201.699
INFO:tensorflow:loss = 0.6933006, step = 80827 (0.496 sec)
INFO:tensorflow:global_step/sec: 198.759
INFO:tensorflow:loss = 0.68664163, step = 80927 (0.503 sec)
INFO:tensorflow:global_step/sec: 200.433
INFO:tensorflow:loss = 0.70736635, step = 81027 (0.499 sec)
INFO:tensorflow:global_step/sec: 199.215
INFO:tensorflow:loss = 0.69367903, step = 81127 (0.502 sec)
INFO:tensorflow:Saving checkpoints for 81189 into /tmp/tf_bow_sst_20181209-0108/model.ckpt.
INFO:tensorflow:Loss for final step: 0.79315704.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20181209-0108/model.ckpt-81189
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpo

INFO:tensorflow:global_step/sec: 156.136
INFO:tensorflow:loss = 0.70667636, step = 88390 (0.641 sec)
INFO:tensorflow:global_step/sec: 154.441
INFO:tensorflow:loss = 0.67357755, step = 88490 (0.647 sec)
INFO:tensorflow:global_step/sec: 187.799
INFO:tensorflow:loss = 0.6867254, step = 88590 (0.532 sec)
INFO:tensorflow:global_step/sec: 195.526
INFO:tensorflow:loss = 0.6997169, step = 88690 (0.511 sec)
INFO:tensorflow:global_step/sec: 199.986
INFO:tensorflow:loss = 0.6933116, step = 88790 (0.500 sec)
INFO:tensorflow:global_step/sec: 201.568
INFO:tensorflow:loss = 0.6867424, step = 88890 (0.496 sec)
INFO:tensorflow:global_step/sec: 192.696
INFO:tensorflow:loss = 0.6933415, step = 88990 (0.518 sec)
INFO:tensorflow:global_step/sec: 196.853
INFO:tensorflow:loss = 0.7133511, step = 89090 (0.508 sec)
INFO:tensorflow:global_step/sec: 198.862
INFO:tensorflow:loss = 0.7000815, step = 89190 (0.505 sec)
INFO:tensorflow:global_step/sec: 194.831
INFO:tensorflow:loss = 0.699831, step = 89290 (0.512 sec)

INFO:tensorflow:global_step/sec: 195.346
INFO:tensorflow:loss = 0.6802866, step = 96590 (0.512 sec)
INFO:tensorflow:global_step/sec: 195.303
INFO:tensorflow:loss = 0.71872044, step = 96690 (0.512 sec)
INFO:tensorflow:global_step/sec: 196.869
INFO:tensorflow:loss = 0.69320947, step = 96790 (0.508 sec)
INFO:tensorflow:global_step/sec: 195.98
INFO:tensorflow:loss = 0.6867934, step = 96890 (0.511 sec)
INFO:tensorflow:global_step/sec: 198.125
INFO:tensorflow:loss = 0.6930355, step = 96990 (0.505 sec)
INFO:tensorflow:global_step/sec: 199.557
INFO:tensorflow:loss = 0.6869968, step = 97090 (0.501 sec)
INFO:tensorflow:global_step/sec: 199.996
INFO:tensorflow:loss = 0.68669164, step = 97190 (0.500 sec)
INFO:tensorflow:global_step/sec: 204.096
INFO:tensorflow:loss = 0.72748375, step = 97290 (0.490 sec)
INFO:tensorflow:global_step/sec: 199.985
INFO:tensorflow:loss = 0.6935384, step = 97390 (0.500 sec)
INFO:tensorflow:global_step/sec: 194.504
INFO:tensorflow:loss = 0.7008166, step = 97490 (0.514 se

INFO:tensorflow:loss = 0.6808781, step = 104690 (0.504 sec)
INFO:tensorflow:global_step/sec: 199.184
INFO:tensorflow:loss = 0.6869046, step = 104790 (0.502 sec)
INFO:tensorflow:global_step/sec: 204.627
INFO:tensorflow:loss = 0.68673295, step = 104890 (0.489 sec)
INFO:tensorflow:global_step/sec: 201.521
INFO:tensorflow:loss = 0.69326365, step = 104990 (0.496 sec)
INFO:tensorflow:global_step/sec: 201.598
INFO:tensorflow:loss = 0.6811634, step = 105090 (0.496 sec)
INFO:tensorflow:global_step/sec: 201.225
INFO:tensorflow:loss = 0.6743197, step = 105190 (0.497 sec)
INFO:tensorflow:global_step/sec: 202
INFO:tensorflow:loss = 0.66214293, step = 105290 (0.495 sec)
INFO:tensorflow:global_step/sec: 199.616
INFO:tensorflow:loss = 0.6932039, step = 105390 (0.501 sec)
INFO:tensorflow:global_step/sec: 201.985
INFO:tensorflow:loss = 0.66246057, step = 105490 (0.495 sec)
INFO:tensorflow:global_step/sec: 198.044
INFO:tensorflow:loss = 0.67434835, step = 105590 (0.505 sec)
INFO:tensorflow:global_step/se

INFO:tensorflow:loss = 0.6570921, step = 112253 (0.521 sec)
INFO:tensorflow:global_step/sec: 210.056
INFO:tensorflow:loss = 0.7004556, step = 112353 (0.476 sec)
INFO:tensorflow:global_step/sec: 210.13
INFO:tensorflow:loss = 0.7068947, step = 112453 (0.480 sec)
INFO:tensorflow:global_step/sec: 198.4
INFO:tensorflow:loss = 0.6996654, step = 112553 (0.500 sec)
INFO:tensorflow:global_step/sec: 189.399
INFO:tensorflow:loss = 0.68662447, step = 112653 (0.528 sec)
INFO:tensorflow:global_step/sec: 210.625
INFO:tensorflow:loss = 0.6799234, step = 112753 (0.475 sec)
INFO:tensorflow:global_step/sec: 207.31
INFO:tensorflow:loss = 0.68665785, step = 112853 (0.483 sec)
INFO:tensorflow:global_step/sec: 205.408
INFO:tensorflow:loss = 0.6932872, step = 112953 (0.487 sec)
INFO:tensorflow:global_step/sec: 196.837
INFO:tensorflow:loss = 0.6866692, step = 113053 (0.508 sec)
INFO:tensorflow:global_step/sec: 192.265
INFO:tensorflow:loss = 0.69346035, step = 113153 (0.521 sec)
INFO:tensorflow:global_step/sec:

INFO:tensorflow:loss = 0.6937561, step = 120353 (0.498 sec)
INFO:tensorflow:global_step/sec: 199.377
INFO:tensorflow:loss = 0.68637836, step = 120453 (0.502 sec)
INFO:tensorflow:global_step/sec: 199.651
INFO:tensorflow:loss = 0.7082687, step = 120553 (0.501 sec)
INFO:tensorflow:global_step/sec: 199.96
INFO:tensorflow:loss = 0.7006477, step = 120653 (0.499 sec)
INFO:tensorflow:global_step/sec: 196.896
INFO:tensorflow:loss = 0.6724554, step = 120753 (0.508 sec)
INFO:tensorflow:global_step/sec: 203.259
INFO:tensorflow:loss = 0.66592234, step = 120853 (0.492 sec)
INFO:tensorflow:global_step/sec: 203.234
INFO:tensorflow:loss = 0.6866003, step = 120953 (0.492 sec)
INFO:tensorflow:global_step/sec: 205.489
INFO:tensorflow:loss = 0.6934847, step = 121053 (0.486 sec)
INFO:tensorflow:global_step/sec: 208.985
INFO:tensorflow:loss = 0.693434, step = 121153 (0.478 sec)
INFO:tensorflow:global_step/sec: 209.824
INFO:tensorflow:loss = 0.65935844, step = 121253 (0.476 sec)
INFO:tensorflow:global_step/se

INFO:tensorflow:global_step/sec: 206.57
INFO:tensorflow:loss = 0.6656848, step = 128553 (0.484 sec)
INFO:tensorflow:global_step/sec: 203.303
INFO:tensorflow:loss = 0.67961997, step = 128653 (0.492 sec)
INFO:tensorflow:global_step/sec: 200.795
INFO:tensorflow:loss = 0.67345774, step = 128753 (0.499 sec)
INFO:tensorflow:global_step/sec: 198.041
INFO:tensorflow:loss = 0.68677616, step = 128853 (0.504 sec)
INFO:tensorflow:global_step/sec: 200.798
INFO:tensorflow:loss = 0.68010825, step = 128953 (0.498 sec)
INFO:tensorflow:global_step/sec: 199.597
INFO:tensorflow:loss = 0.713105, step = 129053 (0.501 sec)
INFO:tensorflow:global_step/sec: 197.571
INFO:tensorflow:loss = 0.6865842, step = 129153 (0.506 sec)
INFO:tensorflow:global_step/sec: 201.607
INFO:tensorflow:loss = 0.6867865, step = 129253 (0.496 sec)
INFO:tensorflow:global_step/sec: 179.252
INFO:tensorflow:loss = 0.69323266, step = 129353 (0.558 sec)
INFO:tensorflow:global_step/sec: 201.933
INFO:tensorflow:loss = 0.6671498, step = 129453

## Evaluation

In [20]:
#Evaluation on test data

eval_metrics = model.evaluate(input_fn=test_input_fn, name="test")  

print ("Perplexity on test set: {:.03}".format(math.exp(eval_metrics['cross_entropy_loss'])))
print("Accuracy on test set: {:.02%}".format(eval_metrics['accuracy']))

eval_metrics

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-12-09-01:17:29
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20181209-0108/model.ckpt-135315
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-12-09-01:17:36
INFO:tensorflow:Saving dict for global step 135315: accuracy = 0.5405172, cross_entropy_loss = 0.68990886, global_step = 135315, loss = 0.6900018
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 135315: /tmp/tf_bow_sst_20181209-0108/model.ckpt-135315
Perplexity on test set: 1.99
Accuracy on test set: 54.05%


{'accuracy': 0.5405172,
 'cross_entropy_loss': 0.68990886,
 'loss': 0.6900018,
 'global_step': 135315}

In [21]:
#Evaluation on training data

eval_metrics = model.evaluate(input_fn=train_input_fn, name="train")  

print ("Perplexity on train set: {:.03}".format(math.exp(eval_metrics['cross_entropy_loss'])))
print("Accuracy on train set: {:.02%}".format(eval_metrics['accuracy']))
eval_metrics

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-12-09-01:17:37
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20181209-0108/model.ckpt-135315
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-12-09-01:18:39
INFO:tensorflow:Saving dict for global step 135315: accuracy = 0.542755, cross_entropy_loss = 0.68948305, global_step = 135315, loss = 0.68955386
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 135315: /tmp/tf_bow_sst_20181209-0108/model.ckpt-135315
Perplexity on train set: 1.99
Accuracy on train set: 54.28%


{'accuracy': 0.542755,
 'cross_entropy_loss': 0.68948305,
 'loss': 0.68955386,
 'global_step': 135315}