# MBTI Parallel Classification Model with Neural BOW (J/P Axis)

First, load libraries and useful functions from class:

In [1]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division


import os, sys, re, json, time, datetime, shutil
from importlib import reload
import collections, itertools

# NumPy and TensorFlow
import numpy as np
import pandas as pd
import tensorflow as tf
import patched_numpy_io
assert(tf.__version__.startswith("1."))

# Utils and Helper libraries
# import nltk
import utils, vocabulary
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import math
from nltk.corpus import stopwords

  from ._conv import register_converters as _register_converters


## Specifications for Binary NBOW for MBTI

In this baseline, the task is to predict the first MBTI axis (J vs. P) given a text string. We will model after the A2 assignment, with Architecture and Parameters defined below.

### Pre-Processing:
* Minimial pre-processing, only separating punctuation from text and lower-case all text
* Assigning words to numerical indices based on a fixed Vocab size, defined by word fre-quency in training set

### Architecture:
* Encoder: Bag of Words 
* Decoder: Softmax
* Classification: Binary (2 MBTI types - J or P)

### Parameters
* Batch Size: 25 
* Text length: 100
* Vocabulary size (V): ~328K - removed stopwords
* Embedding Size: 50
* Hidden Dimensions: 25

### Training:
* Epochs = 25 
* 80% train, 20% test
* Loss: Softmax Cross Entropy
* Optimizers: Adagrad

## Load Corpus & Pre-Process

In [2]:
#load data
df = pd.read_csv('./mbti_1.csv')
df.head(5)

Unnamed: 0,type,posts
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...
1,ENTP,'I'm finding the lack of me in these posts ver...
2,INTP,'Good one _____ https://www.youtube.com/wat...
3,INTJ,"'Dear INTP, I enjoyed our conversation the o..."
4,ENTJ,'You're fired.|||That's another silly misconce...


In [3]:
# function to clean and tokenize sentence ["Hello world."] into list of words ["hello world"]
def clean(sentence):
    ignore_words = ['a']
    words = re.sub("[^\w]", " ",  sentence).split() #nltk.word_tokenize(sentence)
    words_cleaned = [w.lower() for w in words if w not in ignore_words]
    stop_words = set(stopwords.words('english'))
    words_cleaned = [w for w in words_cleaned if not w in stop_words]
    words_string = ''.join(words_cleaned)
    return words_string

In [4]:
# split posts per users into separate sentences
post = []
utype = []
user = []

for index, row in df.iterrows():
    posts = row['posts'].split('|||')
    posts_clean = []
    for sentence in posts:
        posts_clean.append(clean(sentence))
    post.extend(posts_clean)
#     post.extend(posts)
    utype.extend([row['type'] for i in range(len(posts))])
    user.extend([index for i in range(len(posts))])
    
short_posts = pd.DataFrame({"user": user,"type": utype,"post": post})
print(short_posts.shape)
short_posts.head(5)

(422845, 3)


Unnamed: 0,user,type,post
0,0,INFJ,httpwwwyoutubecomwatchvqsxhcwe3krw
1,0,INFJ,http41mediatumblrcomtumblr_lfouy03pma1qa1rooo1...
2,0,INFJ,enfpintjmomentshttpswwwyoutubecomwatchviz7le1g...
3,0,INFJ,lifechangingexperiencelife
4,0,INFJ,httpwwwyoutubecomwatchvvxzeywwrdw8httpwwwyoutu...


In [5]:
# Split data: 80% train, 20% test
post_train, post_test, label_train, label_test = train_test_split(np.array(short_posts['post']), 
                                                    np.array(short_posts['type']), 
                                                    test_size=0.2, 
                                                    random_state=88)


print("MBIT posts", post_train[:5])
print('')
print("MBTI Labels: ",label_train[:5])

MBIT posts ['httpswwwyoutubecomwatchvbxvkaah2d7m'
 'isfjsinfpsbalancereallywellthinklearncommunicatesjschoosingwordstonevoicecarefullynpslearningtakethingslesspersonallysj'
 'seekrecognitionfame'
 'honestmaybegivingvibesselfassuredthinkbullygoinggosomeonethinkfightbacksomeoneweak'
 'probablythinkingreallypersonallypreferaxbcdyfunctionstackcomparedgrantallowspartsgrantgetsrightstillconsistentjunge']

MBTI Labels:  ['INTP' 'INFP' 'INTP' 'ENFP' 'INTJ']


In [6]:
# Build a vocabulary (V size is defaulted to full text) for train corpus
vocab_mbti = vocabulary.Vocabulary((utils.canonicalize_word(w) for w in post_train))
vocab_mbti.size

328905

In [7]:
print (vocab_mbti.words_to_ids(['basically', 'internet','the']))
print (vocab_mbti.ids_to_words([1233,799,2])) 

[1233, 799, 2]
['basically', 'internet', '<unk>']


In [8]:
# tokenize and canonicalize train and test sets
x_train = []
for post in post_train:
    x_train.append(vocab_mbti.words_to_ids(post.split()))

x_test = []
for post in post_test:
    x_test.append(vocab_mbti.words_to_ids(post.split()))

In [9]:
print("Original Text: ",post_train[88])
print("Canonicalized Text: ", x_train[88])
print("Max lengths of texts: ", max([len(x) for x in x_train+x_test]))

Original Text:  agreebrutal
Canonicalized Text:  [2305]
Max lengths of texts:  1


In [10]:
print(len(label_train[0]))


4


In [11]:
def binary_mbti(string):
    label_bin = []
    if string[0]=="E":
        label_bin.append(0)
    else:
        label_bin.append(1)
    if string[1]=="N":
        label_bin.append(0)
    else:
        label_bin.append(1)
    if string[2]=="F":
        label_bin.append(0)
    else:
        label_bin.append(1)
    if string[3]=="J":
        label_bin.append(0)
    else:
        label_bin.append(1)
        
    assert len(label_bin) == 4,"Not a valid MBTI type"
    return label_bin

In [12]:
print(label_train[0])
print(binary_mbti(label_train[0]))

INTP
[1, 0, 1, 1]


In [13]:
y_train_id = list(map(lambda x: binary_mbti(x), label_train))
y_test_id = list(map(lambda x: binary_mbti(x), label_test))

print(y_train_id[0:5])
print(label_train[0:5])
print(y_test_id[0:5])
print(label_test[0:5])

[[1, 0, 1, 1], [1, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1], [1, 0, 1, 0]]
['INTP' 'INFP' 'INTP' 'ENFP' 'INTJ']
[[1, 0, 0, 1], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 1]]
['INFP' 'INFJ' 'INFJ' 'INFJ' 'INFP']


## Bulid the NBOW Model

In [14]:
def pad_np_array(example_ids, max_len=35, pad_id=0):
    """Pad a list of lists of ids into a rectangular NumPy array.

    """
    arr = np.full([len(example_ids), max_len], pad_id, dtype=np.int32)
    ns = np.zeros([len(example_ids)], dtype=np.int32)
    for i, ids in enumerate(example_ids):
        cpy_len = min(len(ids), max_len)
        arr[i,:cpy_len] = ids[:cpy_len]
        ns[i] = cpy_len
    return arr, ns

def tokenize_post(post_string):
    return vocab_mbti.words_to_ids(post_string)

In [15]:
def as_padded_array(post_ids, targets, max_len=40, pad_id=0,
                    root_only=False, df_idxs=None):
    """Return the dataset as a (padded) NumPy array.
  
    """
    #needs to put together the pad_np_array output with  the target labels in dataframe
    
    
    x, ns = pad_np_array(post_ids, max_len=max_len, pad_id=pad_id)
    return x, ns, np.array(targets)

In [16]:
y_train_4 = []
for i in range(len(y_train_id)):
    y_train_4.append(y_train_id[i][3])

y_test_4 = []
for i in range(len(y_test_id)):
    y_test_4.append(y_test_id[i][3])

In [17]:
train_x, train_ns, train_y = as_padded_array(x_train, y_train_4)
test_x, test_ns, test_y = as_padded_array(x_test, y_test_4)

In [18]:
y_test_id[0:5]

[[1, 0, 0, 1], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 1]]

In [19]:
#set up model using tf.estimator

import MBTI_BOW_model; reload(MBTI_BOW_model)


model_params = dict(V=vocab_mbti.size, embed_dim=50, hidden_dims=[25], num_classes=2,
                    encoder_type='bow',
                    lr=0.1, optimizer='adagrad', beta=0.01)

checkpoint_dir = "/tmp/tf_bow_sst_" + datetime.datetime.now().strftime("%Y%m%d-%H%M")
if os.path.isdir(checkpoint_dir):
    shutil.rmtree(checkpoint_dir)

vocab_mbti.write_projector_config(checkpoint_dir, "Encoder/Embedding_Layer/W_embed")

model = tf.estimator.Estimator(model_fn=MBTI_BOW_model.classifier_model_fn, 
                               params=model_params,
                               model_dir=checkpoint_dir)
print("")
print("To view training (once it starts), run:\n")
print("    tensorboard --logdir='{:s}' --port 6006".format(checkpoint_dir))
print("\nThen in your browser, open: http://localhost:6006")

Vocabulary (328,905 words) written to '/tmp/tf_bow_sst_20181209-0126/metadata.tsv'
Projector config written to /tmp/tf_bow_sst_20181209-0126/projector_config.pbtxt
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tf_bow_sst_20181209-0126', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f8e7b161748>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}

To view training (once it starts), run:

    tensorboard --logdir='/tmp/tf_bow_sst_20181209-0126' --port 6006

Then in your browser, open: htt

## Train Model

In [20]:
#start training

# Training params, just used in this cell for the input_fn-s
train_params = dict(batch_size=25, total_epochs=10, eval_every=2)
assert(train_params['total_epochs'] % train_params['eval_every'] == 0)

train_input_fn = patched_numpy_io.numpy_input_fn(
                    x={"ids": train_x, "ns": train_ns}, y=train_y,
                    batch_size=train_params['batch_size'], 
                    num_epochs=train_params['eval_every'], shuffle=True, seed=42
                 )


test_input_fn = tf.estimator.inputs.numpy_input_fn(
                    x={"ids": test_x, "ns": test_ns}, y=test_y,
                    batch_size=25, num_epochs=1, shuffle=False
                )

for _ in range(train_params['total_epochs'] // train_params['eval_every']):
    model.train(input_fn=train_input_fn)


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tf_bow_sst_20181209-0126/model.ckpt.
INFO:tensorflow:loss = 1.0790395, step = 1
INFO:tensorflow:global_step/sec: 127.899
INFO:tensorflow:loss = 0.8309624, step = 101 (0.784 sec)
INFO:tensorflow:global_step/sec: 164.574
INFO:tensorflow:loss = 0.747859, step = 201 (0.608 sec)
INFO:tensorflow:global_step/sec: 169.385
INFO:tensorflow:loss = 0.71720934, step = 301 (0.591 sec)
INFO:tensorflow:global_step/sec: 169.375
INFO:tensorflow:loss = 0.7075082, step = 401 (0.590 sec)
INFO:tensorflow:global_step/sec: 174.625
INFO:tensorflow:loss = 0.6763199, step = 501 (0.573 sec)
INFO:tensorflow:global_step/sec: 164.793
INFO:tensorflow:loss = 0.66607106, step = 601 (0.607 sec)
INFO:tensorflow:global_step/sec: 164.046
IN

INFO:tensorflow:global_step/sec: 169.92
INFO:tensorflow:loss = 0.6563321, step = 8001 (0.589 sec)
INFO:tensorflow:global_step/sec: 167.761
INFO:tensorflow:loss = 0.706841, step = 8101 (0.596 sec)
INFO:tensorflow:global_step/sec: 170.264
INFO:tensorflow:loss = 0.688788, step = 8201 (0.587 sec)
INFO:tensorflow:global_step/sec: 167.756
INFO:tensorflow:loss = 0.65972, step = 8301 (0.596 sec)
INFO:tensorflow:global_step/sec: 173.01
INFO:tensorflow:loss = 0.64202285, step = 8401 (0.578 sec)
INFO:tensorflow:global_step/sec: 174.449
INFO:tensorflow:loss = 0.70666355, step = 8501 (0.573 sec)
INFO:tensorflow:global_step/sec: 171.707
INFO:tensorflow:loss = 0.7438603, step = 8601 (0.583 sec)
INFO:tensorflow:global_step/sec: 173.57
INFO:tensorflow:loss = 0.67344105, step = 8701 (0.576 sec)
INFO:tensorflow:global_step/sec: 173.273
INFO:tensorflow:loss = 0.6901567, step = 8801 (0.577 sec)
INFO:tensorflow:global_step/sec: 169.392
INFO:tensorflow:loss = 0.69074047, step = 8901 (0.590 sec)
INFO:tensorfl

INFO:tensorflow:loss = 0.6734922, step = 16201 (0.589 sec)
INFO:tensorflow:global_step/sec: 170.189
INFO:tensorflow:loss = 0.7085162, step = 16301 (0.587 sec)
INFO:tensorflow:global_step/sec: 171.577
INFO:tensorflow:loss = 0.6561131, step = 16401 (0.583 sec)
INFO:tensorflow:global_step/sec: 174.506
INFO:tensorflow:loss = 0.63760436, step = 16501 (0.573 sec)
INFO:tensorflow:global_step/sec: 169.812
INFO:tensorflow:loss = 0.6900512, step = 16601 (0.589 sec)
INFO:tensorflow:global_step/sec: 174.296
INFO:tensorflow:loss = 0.6734976, step = 16701 (0.575 sec)
INFO:tensorflow:global_step/sec: 171.726
INFO:tensorflow:loss = 0.7574185, step = 16801 (0.582 sec)
INFO:tensorflow:global_step/sec: 170.018
INFO:tensorflow:loss = 0.7062669, step = 16901 (0.588 sec)
INFO:tensorflow:global_step/sec: 170.214
INFO:tensorflow:loss = 0.7223044, step = 17001 (0.587 sec)
INFO:tensorflow:global_step/sec: 168.766
INFO:tensorflow:loss = 0.67342097, step = 17101 (0.596 sec)
INFO:tensorflow:global_step/sec: 172.90

INFO:tensorflow:loss = 0.63954806, step = 24401 (0.505 sec)
INFO:tensorflow:global_step/sec: 200.885
INFO:tensorflow:loss = 0.72581863, step = 24501 (0.498 sec)
INFO:tensorflow:global_step/sec: 190.94
INFO:tensorflow:loss = 0.65628815, step = 24601 (0.524 sec)
INFO:tensorflow:global_step/sec: 192.917
INFO:tensorflow:loss = 0.65638, step = 24701 (0.519 sec)
INFO:tensorflow:global_step/sec: 256.15
INFO:tensorflow:loss = 0.6908159, step = 24801 (0.390 sec)
INFO:tensorflow:global_step/sec: 319.895
INFO:tensorflow:loss = 0.7921475, step = 24901 (0.313 sec)
INFO:tensorflow:global_step/sec: 184.189
INFO:tensorflow:loss = 0.67347354, step = 25001 (0.543 sec)
INFO:tensorflow:global_step/sec: 172.899
INFO:tensorflow:loss = 0.67344, step = 25101 (0.578 sec)
INFO:tensorflow:global_step/sec: 171.094
INFO:tensorflow:loss = 0.6735015, step = 25201 (0.585 sec)
INFO:tensorflow:global_step/sec: 175.679
INFO:tensorflow:loss = 0.6907953, step = 25301 (0.570 sec)
INFO:tensorflow:global_step/sec: 178.349
IN

INFO:tensorflow:loss = 0.63894176, step = 32064 (0.587 sec)
INFO:tensorflow:global_step/sec: 172.062
INFO:tensorflow:loss = 0.74428236, step = 32164 (0.581 sec)
INFO:tensorflow:global_step/sec: 165.816
INFO:tensorflow:loss = 0.7422382, step = 32264 (0.603 sec)
INFO:tensorflow:global_step/sec: 173.584
INFO:tensorflow:loss = 0.6391429, step = 32364 (0.576 sec)
INFO:tensorflow:global_step/sec: 168.559
INFO:tensorflow:loss = 0.6399873, step = 32464 (0.593 sec)
INFO:tensorflow:global_step/sec: 175.628
INFO:tensorflow:loss = 0.7237217, step = 32564 (0.569 sec)
INFO:tensorflow:global_step/sec: 157.503
INFO:tensorflow:loss = 0.6734275, step = 32664 (0.632 sec)
INFO:tensorflow:global_step/sec: 176.774
INFO:tensorflow:loss = 0.6574809, step = 32764 (0.568 sec)
INFO:tensorflow:global_step/sec: 176.928
INFO:tensorflow:loss = 0.6409271, step = 32864 (0.565 sec)
INFO:tensorflow:global_step/sec: 170.997
INFO:tensorflow:loss = 0.6238606, step = 32964 (0.585 sec)
INFO:tensorflow:global_step/sec: 166.52

INFO:tensorflow:loss = 0.65770483, step = 40264 (0.577 sec)
INFO:tensorflow:global_step/sec: 172.304
INFO:tensorflow:loss = 0.6574648, step = 40364 (0.581 sec)
INFO:tensorflow:global_step/sec: 169.49
INFO:tensorflow:loss = 0.6734282, step = 40464 (0.590 sec)
INFO:tensorflow:global_step/sec: 176.274
INFO:tensorflow:loss = 0.690143, step = 40564 (0.567 sec)
INFO:tensorflow:global_step/sec: 176.289
INFO:tensorflow:loss = 0.6737018, step = 40664 (0.568 sec)
INFO:tensorflow:global_step/sec: 166.544
INFO:tensorflow:loss = 0.6735627, step = 40764 (0.600 sec)
INFO:tensorflow:global_step/sec: 169.5
INFO:tensorflow:loss = 0.70789653, step = 40864 (0.590 sec)
INFO:tensorflow:global_step/sec: 174.033
INFO:tensorflow:loss = 0.67355186, step = 40964 (0.575 sec)
INFO:tensorflow:global_step/sec: 177.032
INFO:tensorflow:loss = 0.6399868, step = 41064 (0.565 sec)
INFO:tensorflow:global_step/sec: 163.201
INFO:tensorflow:loss = 0.6398736, step = 41164 (0.613 sec)
INFO:tensorflow:global_step/sec: 171.267
I

INFO:tensorflow:loss = 0.72398376, step = 48464 (0.578 sec)
INFO:tensorflow:global_step/sec: 175.97
INFO:tensorflow:loss = 0.6901633, step = 48564 (0.569 sec)
INFO:tensorflow:global_step/sec: 174.902
INFO:tensorflow:loss = 0.68971753, step = 48664 (0.572 sec)
INFO:tensorflow:global_step/sec: 172.31
INFO:tensorflow:loss = 0.7228846, step = 48764 (0.580 sec)
INFO:tensorflow:global_step/sec: 172.231
INFO:tensorflow:loss = 0.70492095, step = 48864 (0.580 sec)
INFO:tensorflow:global_step/sec: 170.713
INFO:tensorflow:loss = 0.6413772, step = 48964 (0.586 sec)
INFO:tensorflow:global_step/sec: 171.73
INFO:tensorflow:loss = 0.62484604, step = 49064 (0.583 sec)
INFO:tensorflow:global_step/sec: 166.773
INFO:tensorflow:loss = 0.591073, step = 49164 (0.599 sec)
INFO:tensorflow:global_step/sec: 170.071
INFO:tensorflow:loss = 0.72268456, step = 49264 (0.588 sec)
INFO:tensorflow:global_step/sec: 170.731
INFO:tensorflow:loss = 0.7068066, step = 49364 (0.586 sec)
INFO:tensorflow:global_step/sec: 173.772

INFO:tensorflow:loss = 0.68952185, step = 56127 (0.585 sec)
INFO:tensorflow:global_step/sec: 173.12
INFO:tensorflow:loss = 0.64092666, step = 56227 (0.578 sec)
INFO:tensorflow:global_step/sec: 170.757
INFO:tensorflow:loss = 0.7230266, step = 56327 (0.586 sec)
INFO:tensorflow:global_step/sec: 178.246
INFO:tensorflow:loss = 0.6901617, step = 56427 (0.561 sec)
INFO:tensorflow:global_step/sec: 173.544
INFO:tensorflow:loss = 0.6734213, step = 56527 (0.576 sec)
INFO:tensorflow:global_step/sec: 176.294
INFO:tensorflow:loss = 0.6415445, step = 56627 (0.567 sec)
INFO:tensorflow:global_step/sec: 171.61
INFO:tensorflow:loss = 0.6402266, step = 56727 (0.583 sec)
INFO:tensorflow:global_step/sec: 177.402
INFO:tensorflow:loss = 0.59001297, step = 56827 (0.563 sec)
INFO:tensorflow:global_step/sec: 177.054
INFO:tensorflow:loss = 0.65637594, step = 56927 (0.565 sec)
INFO:tensorflow:global_step/sec: 173.5
INFO:tensorflow:loss = 0.6228364, step = 57027 (0.576 sec)
INFO:tensorflow:global_step/sec: 177.571


INFO:tensorflow:loss = 0.65673316, step = 64327 (0.631 sec)
INFO:tensorflow:global_step/sec: 171.414
INFO:tensorflow:loss = 0.67344403, step = 64427 (0.584 sec)
INFO:tensorflow:global_step/sec: 172.674
INFO:tensorflow:loss = 0.62332815, step = 64527 (0.579 sec)
INFO:tensorflow:global_step/sec: 172.686
INFO:tensorflow:loss = 0.6235215, step = 64627 (0.580 sec)
INFO:tensorflow:global_step/sec: 172.089
INFO:tensorflow:loss = 0.70702, step = 64727 (0.581 sec)
INFO:tensorflow:global_step/sec: 173.538
INFO:tensorflow:loss = 0.63993317, step = 64827 (0.577 sec)
INFO:tensorflow:global_step/sec: 169.859
INFO:tensorflow:loss = 0.70721287, step = 64927 (0.588 sec)
INFO:tensorflow:global_step/sec: 177.792
INFO:tensorflow:loss = 0.7079136, step = 65027 (0.563 sec)
INFO:tensorflow:global_step/sec: 175.669
INFO:tensorflow:loss = 0.6735259, step = 65127 (0.569 sec)
INFO:tensorflow:global_step/sec: 160.49
INFO:tensorflow:loss = 0.6909821, step = 65227 (0.623 sec)
INFO:tensorflow:global_step/sec: 175.90

INFO:tensorflow:loss = 0.6734913, step = 72527 (0.577 sec)
INFO:tensorflow:global_step/sec: 174.56
INFO:tensorflow:loss = 0.62322783, step = 72627 (0.573 sec)
INFO:tensorflow:global_step/sec: 152.84
INFO:tensorflow:loss = 0.7249605, step = 72727 (0.652 sec)
INFO:tensorflow:global_step/sec: 171.044
INFO:tensorflow:loss = 0.67353946, step = 72827 (0.587 sec)
INFO:tensorflow:global_step/sec: 169.875
INFO:tensorflow:loss = 0.724683, step = 72927 (0.588 sec)
INFO:tensorflow:global_step/sec: 171.987
INFO:tensorflow:loss = 0.60593325, step = 73027 (0.582 sec)
INFO:tensorflow:global_step/sec: 169.833
INFO:tensorflow:loss = 0.64009607, step = 73127 (0.588 sec)
INFO:tensorflow:global_step/sec: 173.491
INFO:tensorflow:loss = 0.6898306, step = 73227 (0.576 sec)
INFO:tensorflow:global_step/sec: 170.515
INFO:tensorflow:loss = 0.67346275, step = 73327 (0.586 sec)
INFO:tensorflow:global_step/sec: 172.358
INFO:tensorflow:loss = 0.6734339, step = 73427 (0.580 sec)
INFO:tensorflow:global_step/sec: 166.65

INFO:tensorflow:loss = 0.6734288, step = 80727 (0.610 sec)
INFO:tensorflow:global_step/sec: 176.054
INFO:tensorflow:loss = 0.6574967, step = 80827 (0.568 sec)
INFO:tensorflow:global_step/sec: 169.368
INFO:tensorflow:loss = 0.67341626, step = 80927 (0.590 sec)
INFO:tensorflow:global_step/sec: 170.46
INFO:tensorflow:loss = 0.68937045, step = 81027 (0.587 sec)
INFO:tensorflow:global_step/sec: 165.38
INFO:tensorflow:loss = 0.7728852, step = 81127 (0.605 sec)
INFO:tensorflow:Saving checkpoints for 81189 into /tmp/tf_bow_sst_20181209-0126/model.ckpt.
INFO:tensorflow:Loss for final step: 0.7155154.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20181209-0126/model.ckpt-81189
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 81189 into /tmp/tf_bow_sst_20181209-

INFO:tensorflow:loss = 0.60928684, step = 88390 (0.608 sec)
INFO:tensorflow:global_step/sec: 164.917
INFO:tensorflow:loss = 0.6895485, step = 88490 (0.606 sec)
INFO:tensorflow:global_step/sec: 165.908
INFO:tensorflow:loss = 0.69019663, step = 88590 (0.603 sec)
INFO:tensorflow:global_step/sec: 164.884
INFO:tensorflow:loss = 0.6568183, step = 88690 (0.608 sec)
INFO:tensorflow:global_step/sec: 165.452
INFO:tensorflow:loss = 0.77354276, step = 88790 (0.603 sec)
INFO:tensorflow:global_step/sec: 171.437
INFO:tensorflow:loss = 0.6568157, step = 88890 (0.583 sec)
INFO:tensorflow:global_step/sec: 165.847
INFO:tensorflow:loss = 0.72336775, step = 88990 (0.603 sec)
INFO:tensorflow:global_step/sec: 166.859
INFO:tensorflow:loss = 0.7067519, step = 89090 (0.600 sec)
INFO:tensorflow:global_step/sec: 162.039
INFO:tensorflow:loss = 0.65671134, step = 89190 (0.617 sec)
INFO:tensorflow:global_step/sec: 166.232
INFO:tensorflow:loss = 0.70671004, step = 89290 (0.602 sec)
INFO:tensorflow:global_step/sec: 16

INFO:tensorflow:loss = 0.64085007, step = 96590 (0.603 sec)
INFO:tensorflow:global_step/sec: 163.692
INFO:tensorflow:loss = 0.6895349, step = 96690 (0.611 sec)
INFO:tensorflow:global_step/sec: 166.476
INFO:tensorflow:loss = 0.7217554, step = 96790 (0.600 sec)
INFO:tensorflow:global_step/sec: 167.459
INFO:tensorflow:loss = 0.6734515, step = 96890 (0.597 sec)
INFO:tensorflow:global_step/sec: 166.254
INFO:tensorflow:loss = 0.6900662, step = 96990 (0.602 sec)
INFO:tensorflow:global_step/sec: 168.963
INFO:tensorflow:loss = 0.6409262, step = 97090 (0.592 sec)
INFO:tensorflow:global_step/sec: 162.287
INFO:tensorflow:loss = 0.62517303, step = 97190 (0.617 sec)
INFO:tensorflow:global_step/sec: 161.084
INFO:tensorflow:loss = 0.7226846, step = 97290 (0.621 sec)
INFO:tensorflow:global_step/sec: 160.913
INFO:tensorflow:loss = 0.6734555, step = 97390 (0.621 sec)
INFO:tensorflow:global_step/sec: 166.039
INFO:tensorflow:loss = 0.70729166, step = 97490 (0.602 sec)
INFO:tensorflow:global_step/sec: 166.6

INFO:tensorflow:global_step/sec: 170.42
INFO:tensorflow:loss = 0.6735217, step = 104790 (0.587 sec)
INFO:tensorflow:global_step/sec: 165.487
INFO:tensorflow:loss = 0.7067957, step = 104890 (0.605 sec)
INFO:tensorflow:global_step/sec: 165.498
INFO:tensorflow:loss = 0.64026684, step = 104990 (0.604 sec)
INFO:tensorflow:global_step/sec: 165.14
INFO:tensorflow:loss = 0.6901763, step = 105090 (0.605 sec)
INFO:tensorflow:global_step/sec: 172.668
INFO:tensorflow:loss = 0.6734714, step = 105190 (0.579 sec)
INFO:tensorflow:global_step/sec: 168.564
INFO:tensorflow:loss = 0.606276, step = 105290 (0.594 sec)
INFO:tensorflow:global_step/sec: 175.433
INFO:tensorflow:loss = 0.6398595, step = 105390 (0.570 sec)
INFO:tensorflow:global_step/sec: 171.488
INFO:tensorflow:loss = 0.62263393, step = 105490 (0.583 sec)
INFO:tensorflow:global_step/sec: 174.896
INFO:tensorflow:loss = 0.63973725, step = 105590 (0.572 sec)
INFO:tensorflow:global_step/sec: 173.653
INFO:tensorflow:loss = 0.724859, step = 105690 (0.

INFO:tensorflow:global_step/sec: 162.5
INFO:tensorflow:loss = 0.6252273, step = 112353 (0.615 sec)
INFO:tensorflow:global_step/sec: 161.753
INFO:tensorflow:loss = 0.6734246, step = 112453 (0.618 sec)
INFO:tensorflow:global_step/sec: 162.834
INFO:tensorflow:loss = 0.57780224, step = 112553 (0.614 sec)
INFO:tensorflow:global_step/sec: 164.043
INFO:tensorflow:loss = 0.6734389, step = 112653 (0.610 sec)
INFO:tensorflow:global_step/sec: 156.529
INFO:tensorflow:loss = 0.6734367, step = 112753 (0.640 sec)
INFO:tensorflow:global_step/sec: 146.768
INFO:tensorflow:loss = 0.6899601, step = 112853 (0.681 sec)
INFO:tensorflow:global_step/sec: 146.723
INFO:tensorflow:loss = 0.73991686, step = 112953 (0.681 sec)
INFO:tensorflow:global_step/sec: 149.499
INFO:tensorflow:loss = 0.62327784, step = 113053 (0.670 sec)
INFO:tensorflow:global_step/sec: 145.843
INFO:tensorflow:loss = 0.6734545, step = 113153 (0.685 sec)
INFO:tensorflow:global_step/sec: 147.266
INFO:tensorflow:loss = 0.63956004, step = 113253 

INFO:tensorflow:global_step/sec: 179.144
INFO:tensorflow:loss = 0.6567505, step = 120453 (0.558 sec)
INFO:tensorflow:global_step/sec: 173.373
INFO:tensorflow:loss = 0.6401558, step = 120553 (0.577 sec)
INFO:tensorflow:global_step/sec: 177.403
INFO:tensorflow:loss = 0.67349076, step = 120653 (0.564 sec)
INFO:tensorflow:global_step/sec: 172.085
INFO:tensorflow:loss = 0.6401695, step = 120753 (0.581 sec)
INFO:tensorflow:global_step/sec: 174.561
INFO:tensorflow:loss = 0.6734297, step = 120853 (0.572 sec)
INFO:tensorflow:global_step/sec: 173.887
INFO:tensorflow:loss = 0.5927015, step = 120953 (0.575 sec)
INFO:tensorflow:global_step/sec: 164.356
INFO:tensorflow:loss = 0.59154516, step = 121053 (0.609 sec)
INFO:tensorflow:global_step/sec: 161.334
INFO:tensorflow:loss = 0.73950577, step = 121153 (0.620 sec)
INFO:tensorflow:global_step/sec: 163.573
INFO:tensorflow:loss = 0.68989736, step = 121253 (0.611 sec)
INFO:tensorflow:global_step/sec: 162.658
INFO:tensorflow:loss = 0.6896707, step = 12135

INFO:tensorflow:global_step/sec: 163.744
INFO:tensorflow:loss = 0.7228166, step = 128553 (0.611 sec)
INFO:tensorflow:global_step/sec: 166.302
INFO:tensorflow:loss = 0.64111155, step = 128653 (0.601 sec)
INFO:tensorflow:global_step/sec: 164.152
INFO:tensorflow:loss = 0.6734206, step = 128753 (0.609 sec)
INFO:tensorflow:global_step/sec: 167.957
INFO:tensorflow:loss = 0.72176886, step = 128853 (0.595 sec)
INFO:tensorflow:global_step/sec: 167.669
INFO:tensorflow:loss = 0.7050496, step = 128953 (0.596 sec)
INFO:tensorflow:global_step/sec: 169.07
INFO:tensorflow:loss = 0.6250672, step = 129053 (0.592 sec)
INFO:tensorflow:global_step/sec: 168.445
INFO:tensorflow:loss = 0.6898643, step = 129153 (0.594 sec)
INFO:tensorflow:global_step/sec: 168.28
INFO:tensorflow:loss = 0.6236697, step = 129253 (0.594 sec)
INFO:tensorflow:global_step/sec: 167.353
INFO:tensorflow:loss = 0.7731864, step = 129353 (0.598 sec)
INFO:tensorflow:global_step/sec: 166.068
INFO:tensorflow:loss = 0.73828584, step = 129453 (

## Evaluation

In [21]:
#Evaluation on test data

eval_metrics = model.evaluate(input_fn=test_input_fn, name="test")  

print ("Perplexity on test set: {:.03}".format(math.exp(eval_metrics['cross_entropy_loss'])))
print("Accuracy on test set: {:.02%}".format(eval_metrics['accuracy']))

eval_metrics

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-12-09-01:40:04
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20181209-0126/model.ckpt-135315
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-12-09-01:40:08
INFO:tensorflow:Saving dict for global step 135315: accuracy = 0.6067945, cross_entropy_loss = 0.6701861, global_step = 135315, loss = 0.6706374
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 135315: /tmp/tf_bow_sst_20181209-0126/model.ckpt-135315
Perplexity on test set: 1.95
Accuracy on test set: 60.68%


{'accuracy': 0.6067945,
 'cross_entropy_loss': 0.6701861,
 'loss': 0.6706374,
 'global_step': 135315}

In [22]:
#Evaluation on training data

eval_metrics = model.evaluate(input_fn=train_input_fn, name="train")  

print ("Perplexity on train set: {:.03}".format(math.exp(eval_metrics['cross_entropy_loss'])))
print("Accuracy on train set: {:.02%}".format(eval_metrics['accuracy']))
eval_metrics

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-12-09-01:40:09
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20181209-0126/model.ckpt-135315
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-12-09-01:40:40
INFO:tensorflow:Saving dict for global step 135315: accuracy = 0.6042965, cross_entropy_loss = 0.67118233, global_step = 135315, loss = 0.67170286
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 135315: /tmp/tf_bow_sst_20181209-0126/model.ckpt-135315
Perplexity on train set: 1.96
Accuracy on train set: 60.43%


{'accuracy': 0.6042965,
 'cross_entropy_loss': 0.67118233,
 'loss': 0.67170286,
 'global_step': 135315}