# MBTI Parallel Classification Model with Neural BOW (I/E Axis)

First, load libraries and useful functions from class:

In [1]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division


import os, sys, re, json, time, datetime, shutil
from importlib import reload
import collections, itertools

# NumPy and TensorFlow
import numpy as np
import pandas as pd
import tensorflow as tf
import patched_numpy_io
assert(tf.__version__.startswith("1."))

# Utils and Helper libraries
# import nltk
import utils, vocabulary
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import math
from nltk.corpus import stopwords

  from ._conv import register_converters as _register_converters


## Specifications for Binary Classification NBOW for MBTI

In this baseline, the task is to predict the first MBTI axis (I vs. E) given a text string. We will model after the A2 assignment, with Architecture and Parameters defined below.

### Pre-Processing:
* Minimial pre-processing, only separating punctuation from text and lower-case all text
* Assigning words to numerical indices based on a fixed Vocab size, defined by word frequency in training set
* Pulled out first axis of all target labels, assigned to binary (E = 0, I = 1)

### Architecture:
* Encoder: Bag of Words 
* Decoder: Softmax
* Classification: Binary (2 MBTI types - I or E)

### Parameters
* Batch Size: 25 
* Text length: 100
* Vocabulary size (V): ~328K - removed stopwords
* Embedding Size: 50
* Hidden Dimensions: 25

### Training:
* Epochs = 10 
* 80% train, 20% test
* Loss: Sparse Softmax Cross Entropy 
* Optimizers: Adagrad Optimizer

## Load Corpus & Pre-Process

In [2]:
#load data
df = pd.read_csv('./mbti_1.csv')
df.head(5)

Unnamed: 0,type,posts
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...
1,ENTP,'I'm finding the lack of me in these posts ver...
2,INTP,'Good one _____ https://www.youtube.com/wat...
3,INTJ,"'Dear INTP, I enjoyed our conversation the o..."
4,ENTJ,'You're fired.|||That's another silly misconce...


In [3]:
# function to tokenize and clean sentence ["Hello world."] into list of words ["hello","world"]
def clean(sentence):
    ignore_words = ['a']
    words = re.sub("[^\w]", " ",  sentence).split() #nltk.word_tokenize(sentence)
    words_cleaned = [w.lower() for w in words if w not in ignore_words]
    stop_words = set(stopwords.words('english'))
    words_cleaned = [w for w in words_cleaned if not w in stop_words]
    words_string = ''.join(words_cleaned)
    return words_string

In [4]:
# split posts per users into separate sentences
post = []
utype = []
user = []

for index, row in df.iterrows():
    posts = row['posts'].split('|||')
    posts_clean = []
    for sentence in posts:
        posts_clean.append(clean(sentence))
    post.extend(posts_clean)
#     post.extend(posts)
    utype.extend([row['type'] for i in range(len(posts))])
    user.extend([index for i in range(len(posts))])
    
short_posts = pd.DataFrame({"user": user,"type": utype,"post": post})
print(short_posts.shape)
short_posts.head(5)

(422845, 3)


Unnamed: 0,user,type,post
0,0,INFJ,httpwwwyoutubecomwatchvqsxhcwe3krw
1,0,INFJ,http41mediatumblrcomtumblr_lfouy03pma1qa1rooo1...
2,0,INFJ,enfpintjmomentshttpswwwyoutubecomwatchviz7le1g...
3,0,INFJ,lifechangingexperiencelife
4,0,INFJ,httpwwwyoutubecomwatchvvxzeywwrdw8httpwwwyoutu...


In [5]:
# Split data: 80% train, 20% test
post_train, post_test, label_train, label_test = train_test_split(np.array(short_posts['post']), 
                                                    np.array(short_posts['type']), 
                                                    test_size=0.2, 
                                                    random_state=88)


print("MBIT posts", post_train[:5])
print('')
print("MBTI Labels: ",label_train[:5])

MBIT posts ['httpswwwyoutubecomwatchvbxvkaah2d7m'
 'isfjsinfpsbalancereallywellthinklearncommunicatesjschoosingwordstonevoicecarefullynpslearningtakethingslesspersonallysj'
 'seekrecognitionfame'
 'honestmaybegivingvibesselfassuredthinkbullygoinggosomeonethinkfightbacksomeoneweak'
 'probablythinkingreallypersonallypreferaxbcdyfunctionstackcomparedgrantallowspartsgrantgetsrightstillconsistentjunge']

MBTI Labels:  ['INTP' 'INFP' 'INTP' 'ENFP' 'INTJ']


In [6]:
# Build a vocabulary (V size is defaulted to full text) for train corpus
vocab_mbti = vocabulary.Vocabulary((utils.canonicalize_word(w) for w in post_train))
vocab_mbti.size

328905

In [7]:
print (vocab_mbti.words_to_ids(['a','what','and','the']))
print (vocab_mbti.ids_to_words([202, 147565, 317206, 159348])) 

[2, 2, 2, 2]
['always', 'fearfeelingbadwantothersfeelbadsomethingsaiddonenoncommunicationconveysspnegativemessagebetter', 'forgotmentioningcluwellknowthoughtlastpostmentionedblushed', 'hiflyincavemanthinksoundsinfpdistractionlacksensoryawarenessconsistentnfpronouncedinfpsfeelsorryactivities']


In [8]:
# tokenize and canonicalize train and test sets
x_train = []
for post in post_train:
    x_train.append(vocab_mbti.words_to_ids(post.split()))

x_test = []
for post in post_test:
    x_test.append(vocab_mbti.words_to_ids(post.split()))

In [9]:
print("Original Text: ",post_train[88])
print("Canonicalized Text: ", x_train[88])
print("Max lengths of texts: ", max([len(x) for x in x_train+x_test]))

Original Text:  agreebrutal
Canonicalized Text:  [2305]
Max lengths of texts:  1


In [10]:
print(len(label_train[0]))


4


In [11]:
def binary_mbti(string):
    label_bin = []
    if string[0]=="E":
        label_bin.append(0)
    else:
        label_bin.append(1)
    if string[1]=="N":
        label_bin.append(0)
    else:
        label_bin.append(1)
    if string[2]=="F":
        label_bin.append(0)
    else:
        label_bin.append(1)
    if string[3]=="J":
        label_bin.append(0)
    else:
        label_bin.append(1)
        
    assert len(label_bin) == 4,"Not a valid MBTI type"
    return label_bin

In [12]:
print(label_train[0])
print(binary_mbti(label_train[0]))

INTP
[1, 0, 1, 1]


In [13]:
y_train_id = list(map(lambda x: binary_mbti(x), label_train))
y_test_id = list(map(lambda x: binary_mbti(x), label_test))

print(y_train_id[0:5])
print(label_train[0:5])

[[1, 0, 1, 1], [1, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1], [1, 0, 1, 0]]
['INTP' 'INFP' 'INTP' 'ENFP' 'INTJ']


## Build the NBOW Model

In [14]:
def pad_np_array(example_ids, max_len=100, pad_id=0):

    arr = np.full([len(example_ids), max_len], pad_id, dtype=np.int32)
    ns = np.zeros([len(example_ids)], dtype=np.int32)
    for i, ids in enumerate(example_ids):
        cpy_len = min(len(ids), max_len)
        arr[i,:cpy_len] = ids[:cpy_len]
        ns[i] = cpy_len
    return arr, ns

def tokenize_post(post_string):
    return vocab_mbti.words_to_ids(post_string)

In [15]:
def as_padded_array(post_ids, targets, max_len=100, pad_id=0,
                    root_only=False, df_idxs=None):

    x, ns = pad_np_array(post_ids, max_len=max_len, pad_id=pad_id)
    return x, ns, np.array(targets)

In [16]:
len(y_train_id)

338276

In [17]:
y_train_1 = []
for i in range(len(y_train_id)):
    y_train_1.append(y_train_id[i][0])

y_test_1 = []
for i in range(len(y_test_id)):
    y_test_1.append(y_test_id[i][0])

In [18]:
train_x, train_ns, train_y = as_padded_array(x_train, y_train_1)
test_x, test_ns, test_y = as_padded_array(x_test, y_test_1)

In [19]:
len(y_train_1)
print(len(y_test_1))

84569


In [20]:
#set up model using tf.estimator

import MBTI_BOW_model; reload(MBTI_BOW_model)

# Specify model hyperparameters as used by model
model_params = dict(V=vocab_mbti.size, embed_dim=50, hidden_dims=[25], num_classes=2,
                    encoder_type='bow',
                    lr=0.1, optimizer='adagrad', beta=0.01)

checkpoint_dir = "/tmp/tf_bow_sst_" + datetime.datetime.now().strftime("%Y%m%d-%H%M")
if os.path.isdir(checkpoint_dir):
    shutil.rmtree(checkpoint_dir)

vocab_mbti.write_projector_config(checkpoint_dir, "Encoder/Embedding_Layer/W_embed")

model = tf.estimator.Estimator(model_fn=MBTI_BOW_model.classifier_model_fn, 
                               params=model_params,
                               model_dir=checkpoint_dir)
print("")
print("To view training (once it starts), run:\n")
print("    tensorboard --logdir='{:s}' --port 6006".format(checkpoint_dir))
print("\nThen in your browser, open: http://localhost:6006")

Vocabulary (328,905 words) written to '/tmp/tf_bow_sst_20181209-0002/metadata.tsv'
Projector config written to /tmp/tf_bow_sst_20181209-0002/projector_config.pbtxt
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tf_bow_sst_20181209-0002', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f0769620cf8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}

To view training (once it starts), run:

    tensorboard --logdir='/tmp/tf_bow_sst_20181209-0002' --port 6006

Then in your browser, open: htt

## Train Model

In [21]:
#start training


train_params = dict(batch_size=25, total_epochs=10, eval_every=2)
assert(train_params['total_epochs'] % train_params['eval_every'] == 0)


train_input_fn = patched_numpy_io.numpy_input_fn(
                    x={"ids": train_x, "ns": train_ns}, y=train_y,
                    batch_size=train_params['batch_size'], 
                    num_epochs=train_params['eval_every'], shuffle=True, seed=42
                 )


test_input_fn = tf.estimator.inputs.numpy_input_fn(
                    x={"ids": test_x, "ns": test_ns}, y=test_y,
                    batch_size=25, num_epochs=1, shuffle=False
                )

for _ in range(train_params['total_epochs'] // train_params['eval_every']):
  
    model.train(input_fn=train_input_fn)


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tf_bow_sst_20181209-0002/model.ckpt.
INFO:tensorflow:loss = 1.0795684, step = 1
INFO:tensorflow:global_step/sec: 178.156
INFO:tensorflow:loss = 0.70701534, step = 101 (0.566 sec)
INFO:tensorflow:global_step/sec: 198.395
INFO:tensorflow:loss = 0.61591583, step = 201 (0.504 sec)
INFO:tensorflow:global_step/sec: 204.111
INFO:tensorflow:loss = 0.6153555, step = 301 (0.490 sec)
INFO:tensorflow:global_step/sec: 203.871
INFO:tensorflow:loss = 0.72303694, step = 401 (0.491 sec)
INFO:tensorflow:global_step/sec: 202.42
INFO:tensorflow:loss = 0.66030294, step = 501 (0.494 sec)
INFO:tensorflow:global_step/sec: 200.409
INFO:tensorflow:loss = 0.5244156, step = 601 (0.499 sec)
INFO:tensorflow:global_step/sec: 202.135


INFO:tensorflow:global_step/sec: 212.813
INFO:tensorflow:loss = 0.5082699, step = 8001 (0.470 sec)
INFO:tensorflow:global_step/sec: 206.813
INFO:tensorflow:loss = 0.46109566, step = 8101 (0.484 sec)
INFO:tensorflow:global_step/sec: 210.383
INFO:tensorflow:loss = 0.74759746, step = 8201 (0.475 sec)
INFO:tensorflow:global_step/sec: 203.883
INFO:tensorflow:loss = 0.45686802, step = 8301 (0.491 sec)
INFO:tensorflow:global_step/sec: 201.403
INFO:tensorflow:loss = 0.650445, step = 8401 (0.496 sec)
INFO:tensorflow:global_step/sec: 199.565
INFO:tensorflow:loss = 0.5556252, step = 8501 (0.501 sec)
INFO:tensorflow:global_step/sec: 200.748
INFO:tensorflow:loss = 0.36592674, step = 8601 (0.498 sec)
INFO:tensorflow:global_step/sec: 199.535
INFO:tensorflow:loss = 0.506722, step = 8701 (0.501 sec)
INFO:tensorflow:global_step/sec: 200.303
INFO:tensorflow:loss = 0.6005305, step = 8801 (0.500 sec)
INFO:tensorflow:global_step/sec: 210.016
INFO:tensorflow:loss = 0.5544501, step = 8901 (0.476 sec)
INFO:ten

INFO:tensorflow:global_step/sec: 217.21
INFO:tensorflow:loss = 0.5546702, step = 16201 (0.461 sec)
INFO:tensorflow:global_step/sec: 218.279
INFO:tensorflow:loss = 0.6482405, step = 16301 (0.458 sec)
INFO:tensorflow:global_step/sec: 214.525
INFO:tensorflow:loss = 0.46078235, step = 16401 (0.466 sec)
INFO:tensorflow:global_step/sec: 217.293
INFO:tensorflow:loss = 0.7416355, step = 16501 (0.460 sec)
INFO:tensorflow:global_step/sec: 223.176
INFO:tensorflow:loss = 0.41394937, step = 16601 (0.448 sec)
INFO:tensorflow:global_step/sec: 224.729
INFO:tensorflow:loss = 0.41251975, step = 16701 (0.445 sec)
INFO:tensorflow:global_step/sec: 224.434
INFO:tensorflow:loss = 0.50783294, step = 16801 (0.446 sec)
INFO:tensorflow:global_step/sec: 222.199
INFO:tensorflow:loss = 0.5545066, step = 16901 (0.450 sec)
INFO:tensorflow:global_step/sec: 221.522
INFO:tensorflow:loss = 0.41307467, step = 17001 (0.452 sec)
INFO:tensorflow:global_step/sec: 219.505
INFO:tensorflow:loss = 0.60112005, step = 17101 (0.456 

INFO:tensorflow:global_step/sec: 227.544
INFO:tensorflow:loss = 0.32179785, step = 24401 (0.439 sec)
INFO:tensorflow:global_step/sec: 232.996
INFO:tensorflow:loss = 0.55453223, step = 24501 (0.429 sec)
INFO:tensorflow:global_step/sec: 233.971
INFO:tensorflow:loss = 0.41362163, step = 24601 (0.427 sec)
INFO:tensorflow:global_step/sec: 233.889
INFO:tensorflow:loss = 0.5547997, step = 24701 (0.427 sec)
INFO:tensorflow:global_step/sec: 237.261
INFO:tensorflow:loss = 0.4589513, step = 24801 (0.422 sec)
INFO:tensorflow:global_step/sec: 228.752
INFO:tensorflow:loss = 0.36194873, step = 24901 (0.437 sec)
INFO:tensorflow:global_step/sec: 234.328
INFO:tensorflow:loss = 0.65078145, step = 25001 (0.427 sec)
INFO:tensorflow:global_step/sec: 231.462
INFO:tensorflow:loss = 0.45822152, step = 25101 (0.432 sec)
INFO:tensorflow:global_step/sec: 233.699
INFO:tensorflow:loss = 0.6028294, step = 25201 (0.428 sec)
INFO:tensorflow:global_step/sec: 226.041
INFO:tensorflow:loss = 0.6486658, step = 25301 (0.443

INFO:tensorflow:global_step/sec: 234.759
INFO:tensorflow:loss = 0.6483323, step = 32064 (0.426 sec)
INFO:tensorflow:global_step/sec: 228.738
INFO:tensorflow:loss = 0.5545218, step = 32164 (0.437 sec)
INFO:tensorflow:global_step/sec: 231.507
INFO:tensorflow:loss = 0.6471501, step = 32264 (0.432 sec)
INFO:tensorflow:global_step/sec: 230.151
INFO:tensorflow:loss = 0.554489, step = 32364 (0.434 sec)
INFO:tensorflow:global_step/sec: 236.131
INFO:tensorflow:loss = 0.60117453, step = 32464 (0.424 sec)
INFO:tensorflow:global_step/sec: 233.357
INFO:tensorflow:loss = 0.36555082, step = 32564 (0.428 sec)
INFO:tensorflow:global_step/sec: 232.92
INFO:tensorflow:loss = 0.50759214, step = 32664 (0.429 sec)
INFO:tensorflow:global_step/sec: 238.231
INFO:tensorflow:loss = 0.695165, step = 32764 (0.420 sec)
INFO:tensorflow:global_step/sec: 238.736
INFO:tensorflow:loss = 0.6471168, step = 32864 (0.419 sec)
INFO:tensorflow:global_step/sec: 241.025
INFO:tensorflow:loss = 0.6001372, step = 32964 (0.415 sec)


INFO:tensorflow:global_step/sec: 214.294
INFO:tensorflow:loss = 0.5072996, step = 40264 (0.466 sec)
INFO:tensorflow:global_step/sec: 231.235
INFO:tensorflow:loss = 0.60189253, step = 40364 (0.432 sec)
INFO:tensorflow:global_step/sec: 241.714
INFO:tensorflow:loss = 0.46061546, step = 40464 (0.414 sec)
INFO:tensorflow:global_step/sec: 232.666
INFO:tensorflow:loss = 0.3687992, step = 40564 (0.429 sec)
INFO:tensorflow:global_step/sec: 235.423
INFO:tensorflow:loss = 0.5076045, step = 40664 (0.425 sec)
INFO:tensorflow:global_step/sec: 237.787
INFO:tensorflow:loss = 0.414974, step = 40764 (0.421 sec)
INFO:tensorflow:global_step/sec: 219.871
INFO:tensorflow:loss = 0.41439453, step = 40864 (0.455 sec)
INFO:tensorflow:global_step/sec: 232.513
INFO:tensorflow:loss = 0.60108584, step = 40964 (0.430 sec)
INFO:tensorflow:global_step/sec: 236.863
INFO:tensorflow:loss = 0.64852405, step = 41064 (0.422 sec)
INFO:tensorflow:global_step/sec: 240.42
INFO:tensorflow:loss = 0.6005396, step = 41164 (0.416 se

INFO:tensorflow:global_step/sec: 228.151
INFO:tensorflow:loss = 0.6014952, step = 48464 (0.439 sec)
INFO:tensorflow:global_step/sec: 228.815
INFO:tensorflow:loss = 0.5076641, step = 48564 (0.437 sec)
INFO:tensorflow:global_step/sec: 227.928
INFO:tensorflow:loss = 0.5546255, step = 48664 (0.440 sec)
INFO:tensorflow:global_step/sec: 201.882
INFO:tensorflow:loss = 0.69677764, step = 48764 (0.494 sec)
INFO:tensorflow:global_step/sec: 229.32
INFO:tensorflow:loss = 0.5547513, step = 48864 (0.436 sec)
INFO:tensorflow:global_step/sec: 235.434
INFO:tensorflow:loss = 0.5071211, step = 48964 (0.425 sec)
INFO:tensorflow:global_step/sec: 229.001
INFO:tensorflow:loss = 0.6020499, step = 49064 (0.437 sec)
INFO:tensorflow:global_step/sec: 231.225
INFO:tensorflow:loss = 0.5548776, step = 49164 (0.432 sec)
INFO:tensorflow:global_step/sec: 242.873
INFO:tensorflow:loss = 0.6501301, step = 49264 (0.412 sec)
INFO:tensorflow:global_step/sec: 233.042
INFO:tensorflow:loss = 0.4114922, step = 49364 (0.429 sec)


INFO:tensorflow:global_step/sec: 232.201
INFO:tensorflow:loss = 0.46002012, step = 56127 (0.430 sec)
INFO:tensorflow:global_step/sec: 232.578
INFO:tensorflow:loss = 0.5545423, step = 56227 (0.430 sec)
INFO:tensorflow:global_step/sec: 223.679
INFO:tensorflow:loss = 0.46069056, step = 56327 (0.447 sec)
INFO:tensorflow:global_step/sec: 236.297
INFO:tensorflow:loss = 0.6009747, step = 56427 (0.423 sec)
INFO:tensorflow:global_step/sec: 230.095
INFO:tensorflow:loss = 0.64718616, step = 56527 (0.435 sec)
INFO:tensorflow:global_step/sec: 231.1
INFO:tensorflow:loss = 0.601244, step = 56627 (0.433 sec)
INFO:tensorflow:global_step/sec: 231.631
INFO:tensorflow:loss = 0.60142064, step = 56727 (0.432 sec)
INFO:tensorflow:global_step/sec: 228.777
INFO:tensorflow:loss = 0.5076368, step = 56827 (0.437 sec)
INFO:tensorflow:global_step/sec: 217.614
INFO:tensorflow:loss = 0.41299036, step = 56927 (0.460 sec)
INFO:tensorflow:global_step/sec: 219.334
INFO:tensorflow:loss = 0.50765663, step = 57027 (0.456 se

INFO:tensorflow:global_step/sec: 232.022
INFO:tensorflow:loss = 0.6919487, step = 64327 (0.431 sec)
INFO:tensorflow:global_step/sec: 229.946
INFO:tensorflow:loss = 0.647534, step = 64427 (0.435 sec)
INFO:tensorflow:global_step/sec: 230.38
INFO:tensorflow:loss = 0.5077979, step = 64527 (0.434 sec)
INFO:tensorflow:global_step/sec: 230.608
INFO:tensorflow:loss = 0.6928401, step = 64627 (0.434 sec)
INFO:tensorflow:global_step/sec: 241.491
INFO:tensorflow:loss = 0.5544222, step = 64727 (0.414 sec)
INFO:tensorflow:global_step/sec: 244.287
INFO:tensorflow:loss = 0.6938545, step = 64827 (0.410 sec)
INFO:tensorflow:global_step/sec: 239.086
INFO:tensorflow:loss = 0.5076585, step = 64927 (0.418 sec)
INFO:tensorflow:global_step/sec: 241.942
INFO:tensorflow:loss = 0.5081335, step = 65027 (0.413 sec)
INFO:tensorflow:global_step/sec: 235.451
INFO:tensorflow:loss = 0.6473126, step = 65127 (0.425 sec)
INFO:tensorflow:global_step/sec: 218.164
INFO:tensorflow:loss = 0.46119595, step = 65227 (0.459 sec)
I

INFO:tensorflow:global_step/sec: 230.232
INFO:tensorflow:loss = 0.5545682, step = 72527 (0.435 sec)
INFO:tensorflow:global_step/sec: 226.151
INFO:tensorflow:loss = 0.55450493, step = 72627 (0.442 sec)
INFO:tensorflow:global_step/sec: 235.252
INFO:tensorflow:loss = 0.55451775, step = 72727 (0.425 sec)
INFO:tensorflow:global_step/sec: 236.5
INFO:tensorflow:loss = 0.6009689, step = 72827 (0.422 sec)
INFO:tensorflow:global_step/sec: 243.643
INFO:tensorflow:loss = 0.41490847, step = 72927 (0.410 sec)
INFO:tensorflow:global_step/sec: 237.68
INFO:tensorflow:loss = 0.5076568, step = 73027 (0.421 sec)
INFO:tensorflow:global_step/sec: 234.525
INFO:tensorflow:loss = 0.46068406, step = 73127 (0.426 sec)
INFO:tensorflow:global_step/sec: 235.773
INFO:tensorflow:loss = 0.55456173, step = 73227 (0.424 sec)
INFO:tensorflow:global_step/sec: 239.999
INFO:tensorflow:loss = 0.6014203, step = 73327 (0.417 sec)
INFO:tensorflow:global_step/sec: 235.51
INFO:tensorflow:loss = 0.4148014, step = 73427 (0.425 sec)

INFO:tensorflow:global_step/sec: 219.286
INFO:tensorflow:loss = 0.6488777, step = 80727 (0.456 sec)
INFO:tensorflow:global_step/sec: 223
INFO:tensorflow:loss = 0.41349825, step = 80827 (0.449 sec)
INFO:tensorflow:global_step/sec: 233.813
INFO:tensorflow:loss = 0.6972075, step = 80927 (0.428 sec)
INFO:tensorflow:global_step/sec: 234.449
INFO:tensorflow:loss = 0.601773, step = 81027 (0.426 sec)
INFO:tensorflow:global_step/sec: 235.533
INFO:tensorflow:loss = 0.41382885, step = 81127 (0.425 sec)
INFO:tensorflow:Saving checkpoints for 81189 into /tmp/tf_bow_sst_20181209-0002/model.ckpt.
INFO:tensorflow:Loss for final step: 0.27570623.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20181209-0002/model.ckpt-81189
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints f

INFO:tensorflow:global_step/sec: 228.788
INFO:tensorflow:loss = 0.46026888, step = 88390 (0.437 sec)
INFO:tensorflow:global_step/sec: 217.582
INFO:tensorflow:loss = 0.5545407, step = 88490 (0.460 sec)
INFO:tensorflow:global_step/sec: 219.176
INFO:tensorflow:loss = 0.41541716, step = 88590 (0.456 sec)
INFO:tensorflow:global_step/sec: 220.651
INFO:tensorflow:loss = 0.55447745, step = 88690 (0.453 sec)
INFO:tensorflow:global_step/sec: 235.45
INFO:tensorflow:loss = 0.31997675, step = 88790 (0.425 sec)
INFO:tensorflow:global_step/sec: 234.847
INFO:tensorflow:loss = 0.7906066, step = 88890 (0.426 sec)
INFO:tensorflow:global_step/sec: 221.794
INFO:tensorflow:loss = 0.46033207, step = 88990 (0.451 sec)
INFO:tensorflow:global_step/sec: 216.658
INFO:tensorflow:loss = 0.5545267, step = 89090 (0.462 sec)
INFO:tensorflow:global_step/sec: 215.579
INFO:tensorflow:loss = 0.5076721, step = 89190 (0.464 sec)
INFO:tensorflow:global_step/sec: 204.41
INFO:tensorflow:loss = 0.46061558, step = 89290 (0.489 s

INFO:tensorflow:global_step/sec: 235.66
INFO:tensorflow:loss = 0.8816463, step = 96590 (0.424 sec)
INFO:tensorflow:global_step/sec: 233.242
INFO:tensorflow:loss = 0.5545594, step = 96690 (0.429 sec)
INFO:tensorflow:global_step/sec: 231.063
INFO:tensorflow:loss = 0.554547, step = 96790 (0.433 sec)
INFO:tensorflow:global_step/sec: 229.208
INFO:tensorflow:loss = 0.64836395, step = 96890 (0.436 sec)
INFO:tensorflow:global_step/sec: 225.524
INFO:tensorflow:loss = 0.60128313, step = 96990 (0.444 sec)
INFO:tensorflow:global_step/sec: 224.2
INFO:tensorflow:loss = 0.6474568, step = 97090 (0.446 sec)
INFO:tensorflow:global_step/sec: 226.347
INFO:tensorflow:loss = 0.46150205, step = 97190 (0.442 sec)
INFO:tensorflow:global_step/sec: 233.826
INFO:tensorflow:loss = 0.507779, step = 97290 (0.427 sec)
INFO:tensorflow:global_step/sec: 227.132
INFO:tensorflow:loss = 0.55455345, step = 97390 (0.441 sec)
INFO:tensorflow:global_step/sec: 235.61
INFO:tensorflow:loss = 0.6481557, step = 97490 (0.424 sec)
IN

INFO:tensorflow:loss = 0.60074663, step = 104690 (0.425 sec)
INFO:tensorflow:global_step/sec: 233.73
INFO:tensorflow:loss = 0.4623939, step = 104790 (0.428 sec)
INFO:tensorflow:global_step/sec: 231.421
INFO:tensorflow:loss = 0.5082789, step = 104890 (0.432 sec)
INFO:tensorflow:global_step/sec: 233.431
INFO:tensorflow:loss = 0.46201965, step = 104990 (0.428 sec)
INFO:tensorflow:global_step/sec: 240.792
INFO:tensorflow:loss = 0.55443704, step = 105090 (0.415 sec)
INFO:tensorflow:global_step/sec: 239.902
INFO:tensorflow:loss = 0.5544389, step = 105190 (0.417 sec)
INFO:tensorflow:global_step/sec: 236.308
INFO:tensorflow:loss = 0.6471301, step = 105290 (0.423 sec)
INFO:tensorflow:global_step/sec: 239.637
INFO:tensorflow:loss = 0.4615341, step = 105390 (0.417 sec)
INFO:tensorflow:global_step/sec: 233.435
INFO:tensorflow:loss = 0.5078486, step = 105490 (0.429 sec)
INFO:tensorflow:global_step/sec: 230.989
INFO:tensorflow:loss = 0.3220248, step = 105590 (0.433 sec)
INFO:tensorflow:global_step/s

INFO:tensorflow:loss = 0.55445296, step = 112253 (0.455 sec)
INFO:tensorflow:global_step/sec: 235.337
INFO:tensorflow:loss = 0.6010827, step = 112353 (0.425 sec)
INFO:tensorflow:global_step/sec: 235.862
INFO:tensorflow:loss = 0.64780515, step = 112453 (0.424 sec)
INFO:tensorflow:global_step/sec: 238.391
INFO:tensorflow:loss = 0.74152845, step = 112553 (0.419 sec)
INFO:tensorflow:global_step/sec: 234.301
INFO:tensorflow:loss = 0.64718163, step = 112653 (0.427 sec)
INFO:tensorflow:global_step/sec: 231.845
INFO:tensorflow:loss = 0.41440138, step = 112753 (0.431 sec)
INFO:tensorflow:global_step/sec: 231.911
INFO:tensorflow:loss = 0.50772053, step = 112853 (0.431 sec)
INFO:tensorflow:global_step/sec: 231.227
INFO:tensorflow:loss = 0.27520725, step = 112953 (0.433 sec)
INFO:tensorflow:global_step/sec: 232.83
INFO:tensorflow:loss = 0.64826477, step = 113053 (0.429 sec)
INFO:tensorflow:global_step/sec: 232.787
INFO:tensorflow:loss = 0.46086547, step = 113153 (0.430 sec)
INFO:tensorflow:global_

INFO:tensorflow:loss = 0.4134285, step = 120353 (0.453 sec)
INFO:tensorflow:global_step/sec: 222.485
INFO:tensorflow:loss = 0.3666113, step = 120453 (0.450 sec)
INFO:tensorflow:global_step/sec: 226.401
INFO:tensorflow:loss = 0.46017757, step = 120553 (0.441 sec)
INFO:tensorflow:global_step/sec: 233.869
INFO:tensorflow:loss = 0.4601356, step = 120653 (0.427 sec)
INFO:tensorflow:global_step/sec: 230.556
INFO:tensorflow:loss = 0.64886606, step = 120753 (0.434 sec)
INFO:tensorflow:global_step/sec: 235.897
INFO:tensorflow:loss = 0.5546464, step = 120853 (0.424 sec)
INFO:tensorflow:global_step/sec: 234.003
INFO:tensorflow:loss = 0.55465776, step = 120953 (0.427 sec)
INFO:tensorflow:global_step/sec: 235.885
INFO:tensorflow:loss = 0.5072898, step = 121053 (0.424 sec)
INFO:tensorflow:global_step/sec: 240.814
INFO:tensorflow:loss = 0.55468935, step = 121153 (0.415 sec)
INFO:tensorflow:global_step/sec: 224.494
INFO:tensorflow:loss = 0.554713, step = 121253 (0.446 sec)
INFO:tensorflow:global_step/

INFO:tensorflow:loss = 0.5078509, step = 128453 (0.416 sec)
INFO:tensorflow:global_step/sec: 240.557
INFO:tensorflow:loss = 0.6013434, step = 128553 (0.416 sec)
INFO:tensorflow:global_step/sec: 234.837
INFO:tensorflow:loss = 0.41300112, step = 128653 (0.426 sec)
INFO:tensorflow:global_step/sec: 235.475
INFO:tensorflow:loss = 0.5545611, step = 128753 (0.425 sec)
INFO:tensorflow:global_step/sec: 223.534
INFO:tensorflow:loss = 0.60126704, step = 128853 (0.447 sec)
INFO:tensorflow:global_step/sec: 195.974
INFO:tensorflow:loss = 0.6485547, step = 128953 (0.510 sec)
INFO:tensorflow:global_step/sec: 225.842
INFO:tensorflow:loss = 0.648381, step = 129053 (0.443 sec)
INFO:tensorflow:global_step/sec: 237.307
INFO:tensorflow:loss = 0.36791083, step = 129153 (0.422 sec)
INFO:tensorflow:global_step/sec: 227.56
INFO:tensorflow:loss = 0.55445695, step = 129253 (0.439 sec)
INFO:tensorflow:global_step/sec: 233.973
INFO:tensorflow:loss = 0.32098943, step = 129353 (0.427 sec)
INFO:tensorflow:global_step/

## Evaluation

In [22]:
#Evaluation on test data

eval_metrics = model.evaluate(input_fn=test_input_fn, name="test")  

print ("Perplexity on test set: {:.03}".format(math.exp(eval_metrics['cross_entropy_loss'])))
print("Accuracy on test set: {:.02%}".format(eval_metrics['accuracy']))

eval_metrics

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-12-09-00:12:54
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20181209-0002/model.ckpt-135315
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-12-09-00:13:00
INFO:tensorflow:Saving dict for global step 135315: accuracy = 0.7689815, cross_entropy_loss = 0.54061544, global_step = 135315, loss = 0.5440164
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 135315: /tmp/tf_bow_sst_20181209-0002/model.ckpt-135315
Perplexity on test set: 1.72
Accuracy on test set: 76.90%


{'accuracy': 0.7689815,
 'cross_entropy_loss': 0.54061544,
 'loss': 0.5440164,
 'global_step': 135315}

In [23]:
#Evaluation on training data

eval_metrics = model.evaluate(input_fn=train_input_fn, name="train")  

print ("Perplexity on train set: {:.03}".format(math.exp(eval_metrics['cross_entropy_loss'])))
print("Accuracy on train set: {:.02%}".format(eval_metrics['accuracy']))
eval_metrics

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-12-09-00:13:01
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20181209-0002/model.ckpt-135315
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-12-09-00:13:43
INFO:tensorflow:Saving dict for global step 135315: accuracy = 0.76928604, cross_entropy_loss = 0.54030526, global_step = 135315, loss = 0.54366934
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 135315: /tmp/tf_bow_sst_20181209-0002/model.ckpt-135315
Perplexity on train set: 1.72
Accuracy on train set: 76.93%


{'accuracy': 0.76928604,
 'cross_entropy_loss': 0.54030526,
 'loss': 0.54366934,
 'global_step': 135315}