# MBTI Parallel Classification Model with Neural BOW (S/N Axis)

First, load libraries and useful functions from class:

In [1]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division


import os, sys, re, json, time, datetime, shutil
from importlib import reload
import collections, itertools

# NumPy and TensorFlow
import numpy as np
import pandas as pd
import tensorflow as tf
import patched_numpy_io
assert(tf.__version__.startswith("1."))

# Utils and Helper libraries
# import nltk
import utils, vocabulary
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import math
from nltk.corpus import stopwords

  from ._conv import register_converters as _register_converters


## Specifications for Binary Classification NBOW for MBTI

In this baseline, the task is to predict the first MBTI axis (S vs. N) given a text string. We will model after the A2 assignment, with Architecture and Parameters defined below.

### Pre-Processing:
* Minimial pre-processing, only separating punctuation from text and lower-case all text
* Assigning words to numerical indices based on a fixed Vocab size, defined by word frequency in training set
* Pulled out first axis of all target labels, assigned to binary (S = 0, N = 1)

### Architecture:
* Encoder: Bag of Words 
* Decoder: Softmax
* Classification: Binary (2 MBTI types - S or N)

### Parameters
* Batch Size: 25 
* Text length: 100
* Vocabulary size (V): ~328K - removed stopwords
* Embedding Size: 50
* Hidden Dimensions: 25

### Training:
* Epochs = 10 
* 80% train, 20% test
* Loss: Sparse Softmax Cross Entropy 
* Optimizers: Adagrad Optimizer

## Load Corpus & Pre-Process

In [2]:
#load data
df = pd.read_csv('./mbti_1.csv')
df.head(5)

Unnamed: 0,type,posts
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...
1,ENTP,'I'm finding the lack of me in these posts ver...
2,INTP,'Good one _____ https://www.youtube.com/wat...
3,INTJ,"'Dear INTP, I enjoyed our conversation the o..."
4,ENTJ,'You're fired.|||That's another silly misconce...


In [3]:
# function to clean and tokenize sentence ["Hello world."] into list of words ["hello world"]
def clean(sentence):
    ignore_words = ['a']
    words = re.sub("[^\w]", " ",  sentence).split() #nltk.word_tokenize(sentence)
    words_cleaned = [w.lower() for w in words if w not in ignore_words]
    stop_words = set(stopwords.words('english'))
    words_cleaned = [w for w in words_cleaned if not w in stop_words]
    words_string = ''.join(words_cleaned)
    return words_string

In [4]:
# split posts per users into separate sentences
post = []
utype = []
user = []

for index, row in df.iterrows():
    posts = row['posts'].split('|||')
    posts_clean = []
    for sentence in posts:
        posts_clean.append(clean(sentence))
    post.extend(posts_clean)
#     post.extend(posts)
    utype.extend([row['type'] for i in range(len(posts))])
    user.extend([index for i in range(len(posts))])
    
short_posts = pd.DataFrame({"user": user,"type": utype,"post": post})
print(short_posts.shape)
short_posts.head(5)

(422845, 3)


Unnamed: 0,user,type,post
0,0,INFJ,httpwwwyoutubecomwatchvqsxhcwe3krw
1,0,INFJ,http41mediatumblrcomtumblr_lfouy03pma1qa1rooo1...
2,0,INFJ,enfpintjmomentshttpswwwyoutubecomwatchviz7le1g...
3,0,INFJ,lifechangingexperiencelife
4,0,INFJ,httpwwwyoutubecomwatchvvxzeywwrdw8httpwwwyoutu...


In [5]:
# Split data: 80% train, 20% test
post_train, post_test, label_train, label_test = train_test_split(np.array(short_posts['post']), 
                                                    np.array(short_posts['type']), 
                                                    test_size=0.2, 
                                                    random_state=88)


print("MBIT posts", post_train[:5])
print('')
print("MBTI Labels: ",label_train[:5])

MBIT posts ['httpswwwyoutubecomwatchvbxvkaah2d7m'
 'isfjsinfpsbalancereallywellthinklearncommunicatesjschoosingwordstonevoicecarefullynpslearningtakethingslesspersonallysj'
 'seekrecognitionfame'
 'honestmaybegivingvibesselfassuredthinkbullygoinggosomeonethinkfightbacksomeoneweak'
 'probablythinkingreallypersonallypreferaxbcdyfunctionstackcomparedgrantallowspartsgrantgetsrightstillconsistentjunge']

MBTI Labels:  ['INTP' 'INFP' 'INTP' 'ENFP' 'INTJ']


In [6]:
# Build a vocabulary (V size is defaulted to full text) for train corpus
vocab_mbti = vocabulary.Vocabulary((utils.canonicalize_word(w) for w in post_train))
vocab_mbti.size

328905

In [7]:
print (vocab_mbti.words_to_ids(['a','what','and','the']))
print (vocab_mbti.ids_to_words([202, 147565, 317206, 159348])) 

[2, 2, 2, 2]
['always', 'fearfeelingbadwantothersfeelbadsomethingsaiddonenoncommunicationconveysspnegativemessagebetter', 'forgotmentioningcluwellknowthoughtlastpostmentionedblushed', 'hiflyincavemanthinksoundsinfpdistractionlacksensoryawarenessconsistentnfpronouncedinfpsfeelsorryactivities']


In [8]:
# tokenize and canonicalize train and test sets
x_train = []
for post in post_train:
    x_train.append(vocab_mbti.words_to_ids(post.split()))

x_test = []
for post in post_test:
    x_test.append(vocab_mbti.words_to_ids(post.split()))

In [9]:
print("Original Text: ",post_train[88])
print("Canonicalized Text: ", x_train[88])
print("Max lengths of texts: ", max([len(x) for x in x_train+x_test]))

Original Text:  agreebrutal
Canonicalized Text:  [2305]
Max lengths of texts:  1


In [10]:
print(len(label_train[0]))


4


In [11]:
def binary_mbti(string):
    label_bin = []
    if string[0]=="E":
        label_bin.append(0)
    else:
        label_bin.append(1)
    if string[1]=="N":
        label_bin.append(0)
    else:
        label_bin.append(1)
    if string[2]=="F":
        label_bin.append(0)
    else:
        label_bin.append(1)
    if string[3]=="J":
        label_bin.append(0)
    else:
        label_bin.append(1)
        
    assert len(label_bin) == 4,"Not a valid MBTI type"
    return label_bin

In [12]:
print(label_train[0])
print(binary_mbti(label_train[0]))

INTP
[1, 0, 1, 1]


In [13]:
y_train_id = list(map(lambda x: binary_mbti(x), label_train))
y_test_id = list(map(lambda x: binary_mbti(x), label_test))

print(y_train_id[0:5])
print(label_train[0:5])
print(y_test_id[0:5])
print(label_test[0:5])

[[1, 0, 1, 1], [1, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1], [1, 0, 1, 0]]
['INTP' 'INFP' 'INTP' 'ENFP' 'INTJ']
[[1, 0, 0, 1], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 1]]
['INFP' 'INFJ' 'INFJ' 'INFJ' 'INFP']


## Bulid the NBOW Model

In [14]:
def pad_np_array(example_ids, max_len=35, pad_id=0):

    arr = np.full([len(example_ids), max_len], pad_id, dtype=np.int32)
    ns = np.zeros([len(example_ids)], dtype=np.int32)
    for i, ids in enumerate(example_ids):
        cpy_len = min(len(ids), max_len)
        arr[i,:cpy_len] = ids[:cpy_len]
        ns[i] = cpy_len
    return arr, ns

def tokenize_post(post_string):
    return vocab_mbti.words_to_ids(post_string)

In [15]:
def as_padded_array(post_ids, targets, max_len=40, pad_id=0,
                    root_only=False, df_idxs=None):

    #needs to put together the pad_np_array output with  the target labels in dataframe
    
    
    x, ns = pad_np_array(post_ids, max_len=max_len, pad_id=pad_id)
    return x, ns, np.array(targets)

In [16]:
y_train_2 = []
for i in range(len(y_train_id)):
    y_train_2.append(y_train_id[i][1])

y_test_2 = []
for i in range(len(y_test_id)):
    y_test_2.append(y_test_id[i][1])

In [17]:
train_x, train_ns, train_y = as_padded_array(x_train, y_train_2)
test_x, test_ns, test_y = as_padded_array(x_test, y_test_2)

In [18]:
y_test_id[0:5]

[[1, 0, 0, 1], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 1]]

In [19]:
#set up model using tf.estimator

import MBTI_BOW_model; reload(MBTI_BOW_model)

# Specify model hyperparameters as used by model_fn
model_params = dict(V=vocab_mbti.size, embed_dim=50, hidden_dims=[25], num_classes=2,
                    encoder_type='bow',
                    lr=0.1, optimizer='adagrad', beta=0.01)

checkpoint_dir = "/tmp/tf_bow_sst_" + datetime.datetime.now().strftime("%Y%m%d-%H%M")
if os.path.isdir(checkpoint_dir):
    shutil.rmtree(checkpoint_dir)
# Write vocabulary to file, so TensorBoard can label embeddings.
# creates checkpoint_dir/projector_config.pbtxt and checkpoint_dir/metadata.tsv
vocab_mbti.write_projector_config(checkpoint_dir, "Encoder/Embedding_Layer/W_embed")

model = tf.estimator.Estimator(model_fn=MBTI_BOW_model.classifier_model_fn, 
                               params=model_params,
                               model_dir=checkpoint_dir)
print("")
print("To view training (once it starts), run:\n")
print("    tensorboard --logdir='{:s}' --port 6006".format(checkpoint_dir))
print("\nThen in your browser, open: http://localhost:6006")

Vocabulary (328,905 words) written to '/tmp/tf_bow_sst_20181209-0039/metadata.tsv'
Projector config written to /tmp/tf_bow_sst_20181209-0039/projector_config.pbtxt
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tf_bow_sst_20181209-0039', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1362ea2780>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}

To view training (once it starts), run:

    tensorboard --logdir='/tmp/tf_bow_sst_20181209-0039' --port 6006

Then in your browser, open: htt

## Train Model

In [20]:
#start training


train_params = dict(batch_size=25, total_epochs=10, eval_every=2)
assert(train_params['total_epochs'] % train_params['eval_every'] == 0)


train_input_fn = patched_numpy_io.numpy_input_fn(
                    x={"ids": train_x, "ns": train_ns}, y=train_y,
                    batch_size=train_params['batch_size'], 
                    num_epochs=train_params['eval_every'], shuffle=True, seed=42
                 )


test_input_fn = tf.estimator.inputs.numpy_input_fn(
                    x={"ids": test_x, "ns": test_ns}, y=test_y,
                    batch_size=25, num_epochs=1, shuffle=False
                )

for _ in range(train_params['total_epochs'] // train_params['eval_every']):
    model.train(input_fn=train_input_fn)


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tf_bow_sst_20181209-0039/model.ckpt.
INFO:tensorflow:loss = 1.0797739, step = 1
INFO:tensorflow:global_step/sec: 306.316
INFO:tensorflow:loss = 0.57583416, step = 101 (0.332 sec)
INFO:tensorflow:global_step/sec: 368.694
INFO:tensorflow:loss = 0.42626342, step = 201 (0.270 sec)
INFO:tensorflow:global_step/sec: 371.023
INFO:tensorflow:loss = 0.36406192, step = 301 (0.270 sec)
INFO:tensorflow:global_step/sec: 371.152
INFO:tensorflow:loss = 0.42025545, step = 401 (0.269 sec)
INFO:tensorflow:global_step/sec: 364.644
INFO:tensorflow:loss = 0.32429466, step = 501 (0.274 sec)
INFO:tensorflow:global_step/sec: 368.717
INFO:tensorflow:loss = 0.6148126, step = 601 (0.271 sec)
INFO:tensorflow:global_step/sec: 366.67

INFO:tensorflow:global_step/sec: 365.185
INFO:tensorflow:loss = 0.3781145, step = 8001 (0.274 sec)
INFO:tensorflow:global_step/sec: 373.524
INFO:tensorflow:loss = 0.5902865, step = 8101 (0.268 sec)
INFO:tensorflow:global_step/sec: 361.994
INFO:tensorflow:loss = 0.377673, step = 8201 (0.276 sec)
INFO:tensorflow:global_step/sec: 376.094
INFO:tensorflow:loss = 0.37706584, step = 8301 (0.266 sec)
INFO:tensorflow:global_step/sec: 377.693
INFO:tensorflow:loss = 0.519986, step = 8401 (0.265 sec)
INFO:tensorflow:global_step/sec: 380.923
INFO:tensorflow:loss = 0.44745973, step = 8501 (0.263 sec)
INFO:tensorflow:global_step/sec: 372.372
INFO:tensorflow:loss = 0.5906318, step = 8601 (0.269 sec)
INFO:tensorflow:global_step/sec: 368.776
INFO:tensorflow:loss = 0.3779748, step = 8701 (0.271 sec)
INFO:tensorflow:global_step/sec: 367.975
INFO:tensorflow:loss = 0.37628815, step = 8801 (0.271 sec)
INFO:tensorflow:global_step/sec: 365.433
INFO:tensorflow:loss = 0.72672725, step = 8901 (0.274 sec)
INFO:ten

INFO:tensorflow:global_step/sec: 358.535
INFO:tensorflow:loss = 0.4488726, step = 16201 (0.279 sec)
INFO:tensorflow:global_step/sec: 366.187
INFO:tensorflow:loss = 0.23445642, step = 16301 (0.274 sec)
INFO:tensorflow:global_step/sec: 363.326
INFO:tensorflow:loss = 0.30613354, step = 16401 (0.274 sec)
INFO:tensorflow:global_step/sec: 366.178
INFO:tensorflow:loss = 0.44772717, step = 16501 (0.273 sec)
INFO:tensorflow:global_step/sec: 366.644
INFO:tensorflow:loss = 0.5197558, step = 16601 (0.273 sec)
INFO:tensorflow:global_step/sec: 365.554
INFO:tensorflow:loss = 0.44752774, step = 16701 (0.273 sec)
INFO:tensorflow:global_step/sec: 374.961
INFO:tensorflow:loss = 0.37721267, step = 16801 (0.267 sec)
INFO:tensorflow:global_step/sec: 381.092
INFO:tensorflow:loss = 0.44846842, step = 16901 (0.262 sec)
INFO:tensorflow:global_step/sec: 376.323
INFO:tensorflow:loss = 0.5180098, step = 17001 (0.266 sec)
INFO:tensorflow:global_step/sec: 374.539
INFO:tensorflow:loss = 0.37738943, step = 17101 (0.26

INFO:tensorflow:global_step/sec: 382.616
INFO:tensorflow:loss = 0.3767781, step = 24401 (0.262 sec)
INFO:tensorflow:global_step/sec: 386.318
INFO:tensorflow:loss = 0.51640916, step = 24501 (0.259 sec)
INFO:tensorflow:global_step/sec: 389.355
INFO:tensorflow:loss = 0.44796607, step = 24601 (0.257 sec)
INFO:tensorflow:global_step/sec: 384.073
INFO:tensorflow:loss = 0.3773504, step = 24701 (0.260 sec)
INFO:tensorflow:global_step/sec: 374.367
INFO:tensorflow:loss = 0.23631242, step = 24801 (0.267 sec)
INFO:tensorflow:global_step/sec: 393.499
INFO:tensorflow:loss = 0.5181453, step = 24901 (0.254 sec)
INFO:tensorflow:global_step/sec: 382.506
INFO:tensorflow:loss = 0.37707213, step = 25001 (0.262 sec)
INFO:tensorflow:global_step/sec: 372.534
INFO:tensorflow:loss = 0.37651363, step = 25101 (0.269 sec)
INFO:tensorflow:global_step/sec: 376.555
INFO:tensorflow:loss = 0.30273637, step = 25201 (0.265 sec)
INFO:tensorflow:global_step/sec: 378.499
INFO:tensorflow:loss = 0.16294836, step = 25301 (0.26

INFO:tensorflow:global_step/sec: 371.529
INFO:tensorflow:loss = 0.51749104, step = 32064 (0.269 sec)
INFO:tensorflow:global_step/sec: 380.472
INFO:tensorflow:loss = 0.44777077, step = 32164 (0.263 sec)
INFO:tensorflow:global_step/sec: 377.543
INFO:tensorflow:loss = 0.23397379, step = 32264 (0.265 sec)
INFO:tensorflow:global_step/sec: 380.898
INFO:tensorflow:loss = 0.30348054, step = 32364 (0.263 sec)
INFO:tensorflow:global_step/sec: 378.997
INFO:tensorflow:loss = 0.37691867, step = 32464 (0.264 sec)
INFO:tensorflow:global_step/sec: 382.136
INFO:tensorflow:loss = 0.37796694, step = 32564 (0.262 sec)
INFO:tensorflow:global_step/sec: 384.294
INFO:tensorflow:loss = 0.51820254, step = 32664 (0.260 sec)
INFO:tensorflow:global_step/sec: 384.653
INFO:tensorflow:loss = 0.4481752, step = 32764 (0.260 sec)
INFO:tensorflow:global_step/sec: 385.852
INFO:tensorflow:loss = 0.23655199, step = 32864 (0.259 sec)
INFO:tensorflow:global_step/sec: 383.689
INFO:tensorflow:loss = 0.37683347, step = 32964 (0.

INFO:tensorflow:global_step/sec: 305.151
INFO:tensorflow:loss = 0.37773687, step = 40264 (0.327 sec)
INFO:tensorflow:global_step/sec: 307.762
INFO:tensorflow:loss = 0.30783272, step = 40364 (0.325 sec)
INFO:tensorflow:global_step/sec: 308.226
INFO:tensorflow:loss = 0.23866004, step = 40464 (0.325 sec)
INFO:tensorflow:global_step/sec: 305.894
INFO:tensorflow:loss = 0.51877683, step = 40564 (0.326 sec)
INFO:tensorflow:global_step/sec: 309.038
INFO:tensorflow:loss = 0.4473119, step = 40664 (0.323 sec)
INFO:tensorflow:global_step/sec: 321.05
INFO:tensorflow:loss = 0.3050431, step = 40764 (0.312 sec)
INFO:tensorflow:global_step/sec: 309.066
INFO:tensorflow:loss = 0.4484674, step = 40864 (0.323 sec)
INFO:tensorflow:global_step/sec: 322.411
INFO:tensorflow:loss = 0.5202761, step = 40964 (0.310 sec)
INFO:tensorflow:global_step/sec: 318.138
INFO:tensorflow:loss = 0.4475763, step = 41064 (0.314 sec)
INFO:tensorflow:global_step/sec: 309.444
INFO:tensorflow:loss = 0.37682843, step = 41164 (0.323 s

INFO:tensorflow:global_step/sec: 377.452
INFO:tensorflow:loss = 0.4486736, step = 48464 (0.265 sec)
INFO:tensorflow:global_step/sec: 373.377
INFO:tensorflow:loss = 0.37721026, step = 48564 (0.268 sec)
INFO:tensorflow:global_step/sec: 378.744
INFO:tensorflow:loss = 0.3768283, step = 48664 (0.264 sec)
INFO:tensorflow:global_step/sec: 360.554
INFO:tensorflow:loss = 0.37749878, step = 48764 (0.278 sec)
INFO:tensorflow:global_step/sec: 371.409
INFO:tensorflow:loss = 0.30607834, step = 48864 (0.269 sec)
INFO:tensorflow:global_step/sec: 374.977
INFO:tensorflow:loss = 0.44893852, step = 48964 (0.267 sec)
INFO:tensorflow:global_step/sec: 386.634
INFO:tensorflow:loss = 0.37731773, step = 49064 (0.259 sec)
INFO:tensorflow:global_step/sec: 380.005
INFO:tensorflow:loss = 0.3081348, step = 49164 (0.263 sec)
INFO:tensorflow:global_step/sec: 379.872
INFO:tensorflow:loss = 0.5888393, step = 49264 (0.263 sec)
INFO:tensorflow:global_step/sec: 375.304
INFO:tensorflow:loss = 0.66392887, step = 49364 (0.266

INFO:tensorflow:global_step/sec: 380.614
INFO:tensorflow:loss = 0.30528387, step = 56127 (0.263 sec)
INFO:tensorflow:global_step/sec: 380.527
INFO:tensorflow:loss = 0.5947784, step = 56227 (0.263 sec)
INFO:tensorflow:global_step/sec: 379.154
INFO:tensorflow:loss = 0.44846365, step = 56327 (0.264 sec)
INFO:tensorflow:global_step/sec: 382.24
INFO:tensorflow:loss = 0.23592983, step = 56427 (0.262 sec)
INFO:tensorflow:global_step/sec: 376.979
INFO:tensorflow:loss = 0.37646037, step = 56527 (0.265 sec)
INFO:tensorflow:global_step/sec: 370.252
INFO:tensorflow:loss = 0.23170573, step = 56627 (0.270 sec)
INFO:tensorflow:global_step/sec: 372.777
INFO:tensorflow:loss = 0.2332441, step = 56727 (0.268 sec)
INFO:tensorflow:global_step/sec: 369.491
INFO:tensorflow:loss = 0.4491146, step = 56827 (0.271 sec)
INFO:tensorflow:global_step/sec: 376.632
INFO:tensorflow:loss = 0.30601442, step = 56927 (0.265 sec)
INFO:tensorflow:global_step/sec: 350.104
INFO:tensorflow:loss = 0.4490052, step = 57027 (0.286 

INFO:tensorflow:global_step/sec: 375.568
INFO:tensorflow:loss = 0.37713432, step = 64327 (0.265 sec)
INFO:tensorflow:global_step/sec: 376.799
INFO:tensorflow:loss = 0.30389988, step = 64427 (0.265 sec)
INFO:tensorflow:global_step/sec: 374.289
INFO:tensorflow:loss = 0.37647557, step = 64527 (0.267 sec)
INFO:tensorflow:global_step/sec: 383.062
INFO:tensorflow:loss = 0.59355795, step = 64627 (0.261 sec)
INFO:tensorflow:global_step/sec: 392.668
INFO:tensorflow:loss = 0.30495542, step = 64727 (0.254 sec)
INFO:tensorflow:global_step/sec: 392.924
INFO:tensorflow:loss = 0.3072975, step = 64827 (0.255 sec)
INFO:tensorflow:global_step/sec: 386.388
INFO:tensorflow:loss = 0.5896778, step = 64927 (0.259 sec)
INFO:tensorflow:global_step/sec: 384.643
INFO:tensorflow:loss = 0.52030873, step = 65027 (0.260 sec)
INFO:tensorflow:global_step/sec: 382.586
INFO:tensorflow:loss = 0.3772629, step = 65127 (0.261 sec)
INFO:tensorflow:global_step/sec: 385.344
INFO:tensorflow:loss = 0.7318384, step = 65227 (0.259

INFO:tensorflow:global_step/sec: 372.482
INFO:tensorflow:loss = 0.30675015, step = 72527 (0.268 sec)
INFO:tensorflow:global_step/sec: 377.606
INFO:tensorflow:loss = 0.3061233, step = 72627 (0.265 sec)
INFO:tensorflow:global_step/sec: 378.78
INFO:tensorflow:loss = 0.30739126, step = 72727 (0.264 sec)
INFO:tensorflow:global_step/sec: 371.126
INFO:tensorflow:loss = 0.5182818, step = 72827 (0.270 sec)
INFO:tensorflow:global_step/sec: 371.69
INFO:tensorflow:loss = 0.5207221, step = 72927 (0.269 sec)
INFO:tensorflow:global_step/sec: 381.676
INFO:tensorflow:loss = 0.305428, step = 73027 (0.262 sec)
INFO:tensorflow:global_step/sec: 384.495
INFO:tensorflow:loss = 0.44825277, step = 73127 (0.260 sec)
INFO:tensorflow:global_step/sec: 388.261
INFO:tensorflow:loss = 0.44768938, step = 73227 (0.258 sec)
INFO:tensorflow:global_step/sec: 385.4
INFO:tensorflow:loss = 0.51837724, step = 73327 (0.260 sec)
INFO:tensorflow:global_step/sec: 373.634
INFO:tensorflow:loss = 0.23628537, step = 73427 (0.268 sec)

INFO:tensorflow:global_step/sec: 374.837
INFO:tensorflow:loss = 0.37698096, step = 80727 (0.267 sec)
INFO:tensorflow:global_step/sec: 370.13
INFO:tensorflow:loss = 0.37765804, step = 80827 (0.270 sec)
INFO:tensorflow:global_step/sec: 385.25
INFO:tensorflow:loss = 0.3070857, step = 80927 (0.259 sec)
INFO:tensorflow:global_step/sec: 377.421
INFO:tensorflow:loss = 0.5175595, step = 81027 (0.265 sec)
INFO:tensorflow:global_step/sec: 379.034
INFO:tensorflow:loss = 0.517519, step = 81127 (0.264 sec)
INFO:tensorflow:Saving checkpoints for 81189 into /tmp/tf_bow_sst_20181209-0039/model.ckpt.
INFO:tensorflow:Loss for final step: 0.16484013.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20181209-0039/model.ckpt-81189
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints

INFO:tensorflow:loss = 0.37710288, step = 88290 (0.264 sec)
INFO:tensorflow:global_step/sec: 376.087
INFO:tensorflow:loss = 0.5180641, step = 88390 (0.266 sec)
INFO:tensorflow:global_step/sec: 368.495
INFO:tensorflow:loss = 0.5208692, step = 88490 (0.272 sec)
INFO:tensorflow:global_step/sec: 385.302
INFO:tensorflow:loss = 0.3766867, step = 88590 (0.259 sec)
INFO:tensorflow:global_step/sec: 396.563
INFO:tensorflow:loss = 0.30561963, step = 88690 (0.253 sec)
INFO:tensorflow:global_step/sec: 393.803
INFO:tensorflow:loss = 0.3764153, step = 88790 (0.254 sec)
INFO:tensorflow:global_step/sec: 390.712
INFO:tensorflow:loss = 0.37632456, step = 88890 (0.256 sec)
INFO:tensorflow:global_step/sec: 387.71
INFO:tensorflow:loss = 0.22997048, step = 88990 (0.258 sec)
INFO:tensorflow:global_step/sec: 401.076
INFO:tensorflow:loss = 0.4490995, step = 89090 (0.249 sec)
INFO:tensorflow:global_step/sec: 381.811
INFO:tensorflow:loss = 0.37707007, step = 89190 (0.262 sec)
INFO:tensorflow:global_step/sec: 376.

INFO:tensorflow:loss = 0.3055031, step = 96490 (0.265 sec)
INFO:tensorflow:global_step/sec: 380.318
INFO:tensorflow:loss = 0.15768689, step = 96590 (0.263 sec)
INFO:tensorflow:global_step/sec: 379.828
INFO:tensorflow:loss = 0.51943856, step = 96690 (0.263 sec)
INFO:tensorflow:global_step/sec: 368.43
INFO:tensorflow:loss = 0.3768156, step = 96790 (0.271 sec)
INFO:tensorflow:global_step/sec: 371.174
INFO:tensorflow:loss = 0.23107995, step = 96890 (0.269 sec)
INFO:tensorflow:global_step/sec: 370.226
INFO:tensorflow:loss = 0.3770382, step = 96990 (0.270 sec)
INFO:tensorflow:global_step/sec: 387.538
INFO:tensorflow:loss = 0.3060808, step = 97090 (0.258 sec)
INFO:tensorflow:global_step/sec: 389.068
INFO:tensorflow:loss = 0.5222225, step = 97190 (0.257 sec)
INFO:tensorflow:global_step/sec: 389.247
INFO:tensorflow:loss = 0.37648064, step = 97290 (0.257 sec)
INFO:tensorflow:global_step/sec: 378.397
INFO:tensorflow:loss = 0.44917125, step = 97390 (0.265 sec)
INFO:tensorflow:global_step/sec: 395.

INFO:tensorflow:global_step/sec: 369.102
INFO:tensorflow:loss = 0.3769059, step = 104690 (0.270 sec)
INFO:tensorflow:global_step/sec: 371.319
INFO:tensorflow:loss = 0.37663054, step = 104790 (0.269 sec)
INFO:tensorflow:global_step/sec: 375.054
INFO:tensorflow:loss = 0.23458184, step = 104890 (0.267 sec)
INFO:tensorflow:global_step/sec: 349.306
INFO:tensorflow:loss = 0.4486593, step = 104990 (0.286 sec)
INFO:tensorflow:global_step/sec: 375.058
INFO:tensorflow:loss = 0.2318525, step = 105090 (0.267 sec)
INFO:tensorflow:global_step/sec: 371.538
INFO:tensorflow:loss = 0.23299433, step = 105190 (0.269 sec)
INFO:tensorflow:global_step/sec: 369.583
INFO:tensorflow:loss = 0.3050742, step = 105290 (0.271 sec)
INFO:tensorflow:global_step/sec: 374.284
INFO:tensorflow:loss = 0.5203057, step = 105390 (0.267 sec)
INFO:tensorflow:global_step/sec: 381.39
INFO:tensorflow:loss = 0.30636075, step = 105490 (0.262 sec)
INFO:tensorflow:global_step/sec: 383.048
INFO:tensorflow:loss = 0.37689322, step = 10559

INFO:tensorflow:global_step/sec: 383.436
INFO:tensorflow:loss = 0.51858795, step = 112253 (0.261 sec)
INFO:tensorflow:global_step/sec: 383.459
INFO:tensorflow:loss = 0.44760075, step = 112353 (0.261 sec)
INFO:tensorflow:global_step/sec: 372.832
INFO:tensorflow:loss = 0.37787613, step = 112453 (0.268 sec)
INFO:tensorflow:global_step/sec: 386.664
INFO:tensorflow:loss = 0.16595472, step = 112553 (0.259 sec)
INFO:tensorflow:global_step/sec: 380.307
INFO:tensorflow:loss = 0.3770598, step = 112653 (0.263 sec)
INFO:tensorflow:global_step/sec: 376.293
INFO:tensorflow:loss = 0.37696368, step = 112753 (0.266 sec)
INFO:tensorflow:global_step/sec: 382.371
INFO:tensorflow:loss = 0.5200371, step = 112853 (0.261 sec)
INFO:tensorflow:global_step/sec: 375.652
INFO:tensorflow:loss = 0.6604436, step = 112953 (0.266 sec)
INFO:tensorflow:global_step/sec: 378.683
INFO:tensorflow:loss = 0.30693117, step = 113053 (0.264 sec)
INFO:tensorflow:global_step/sec: 371.835
INFO:tensorflow:loss = 0.23601441, step = 11

INFO:tensorflow:global_step/sec: 381.088
INFO:tensorflow:loss = 0.5891936, step = 120353 (0.262 sec)
INFO:tensorflow:global_step/sec: 382.783
INFO:tensorflow:loss = 0.66511893, step = 120453 (0.261 sec)
INFO:tensorflow:global_step/sec: 368.351
INFO:tensorflow:loss = 0.59496623, step = 120553 (0.271 sec)
INFO:tensorflow:global_step/sec: 376.325
INFO:tensorflow:loss = 0.5927222, step = 120653 (0.266 sec)
INFO:tensorflow:global_step/sec: 373.197
INFO:tensorflow:loss = 0.44864056, step = 120753 (0.268 sec)
INFO:tensorflow:global_step/sec: 378.673
INFO:tensorflow:loss = 0.44804102, step = 120853 (0.264 sec)
INFO:tensorflow:global_step/sec: 381.095
INFO:tensorflow:loss = 0.30703953, step = 120953 (0.262 sec)
INFO:tensorflow:global_step/sec: 372.476
INFO:tensorflow:loss = 0.3061849, step = 121053 (0.269 sec)
INFO:tensorflow:global_step/sec: 378.373
INFO:tensorflow:loss = 0.5188748, step = 121153 (0.264 sec)
INFO:tensorflow:global_step/sec: 381.291
INFO:tensorflow:loss = 0.37695873, step = 121

INFO:tensorflow:global_step/sec: 386.365
INFO:tensorflow:loss = 0.66212666, step = 128453 (0.259 sec)
INFO:tensorflow:global_step/sec: 383.526
INFO:tensorflow:loss = 0.30761537, step = 128553 (0.261 sec)
INFO:tensorflow:global_step/sec: 376.867
INFO:tensorflow:loss = 0.37749517, step = 128653 (0.266 sec)
INFO:tensorflow:global_step/sec: 383.632
INFO:tensorflow:loss = 0.37733427, step = 128753 (0.261 sec)
INFO:tensorflow:global_step/sec: 374.481
INFO:tensorflow:loss = 0.65931916, step = 128853 (0.267 sec)
INFO:tensorflow:global_step/sec: 375.101
INFO:tensorflow:loss = 0.3768244, step = 128953 (0.267 sec)
INFO:tensorflow:global_step/sec: 373.626
INFO:tensorflow:loss = 0.5907517, step = 129053 (0.268 sec)
INFO:tensorflow:global_step/sec: 380.918
INFO:tensorflow:loss = 0.661831, step = 129153 (0.263 sec)
INFO:tensorflow:global_step/sec: 386.441
INFO:tensorflow:loss = 0.37675777, step = 129253 (0.259 sec)
INFO:tensorflow:global_step/sec: 381.96
INFO:tensorflow:loss = 0.23136412, step = 1293

## Evaluation

In [21]:
#Evaluation on test data

eval_metrics = model.evaluate(input_fn=test_input_fn, name="test")  

print ("Perplexity on test set: {:.03}".format(math.exp(eval_metrics['cross_entropy_loss'])))
print("Accuracy on test set: {:.02%}".format(eval_metrics['accuracy']))

eval_metrics

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-12-09-00:45:43
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20181209-0039/model.ckpt-135315
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-12-09-00:45:47
INFO:tensorflow:Saving dict for global step 135315: accuracy = 0.86298764, cross_entropy_loss = 0.3997835, global_step = 135315, loss = 0.40732667
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 135315: /tmp/tf_bow_sst_20181209-0039/model.ckpt-135315
Perplexity on test set: 1.49
Accuracy on test set: 86.30%


{'accuracy': 0.86298764,
 'cross_entropy_loss': 0.3997835,
 'loss': 0.40732667,
 'global_step': 135315}

In [22]:
#Evaluation on training data

eval_metrics = model.evaluate(input_fn=train_input_fn, name="train")  

print ("Perplexity on train set: {:.03}".format(math.exp(eval_metrics['cross_entropy_loss'])))
print("Accuracy on train set: {:.02%}".format(eval_metrics['accuracy']))
eval_metrics

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-12-09-00:45:47
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20181209-0039/model.ckpt-135315
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-12-09-00:46:20
INFO:tensorflow:Saving dict for global step 135315: accuracy = 0.86272746, cross_entropy_loss = 0.4002523, global_step = 135315, loss = 0.40780616
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 135315: /tmp/tf_bow_sst_20181209-0039/model.ckpt-135315
Perplexity on train set: 1.49
Accuracy on train set: 86.27%


{'accuracy': 0.86272746,
 'cross_entropy_loss': 0.4002523,
 'loss': 0.40780616,
 'global_step': 135315}