In [0]:
# Reference: https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub

#Predicting Sentiment with BERT on TF Hub

In [0]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

In addition to the standard libraries we imported above, we'll need to install BERT's python package.

In [0]:
!pip install bert-tensorflow



In [0]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization

In [0]:
# Set the output directory for saving model file
# Optionally, set a GCP bucket location

OUTPUT_DIR = 'Output/'#@param {type:"string"}
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = False #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}

if USE_BUCKET:
  OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
  from google.colab import auth
  auth.authenticate_user()

if DO_DELETE:
  try:
    tf.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    # Doesn't matter if the directory didn't exist
    pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))


***** Model output directory: Output/ *****


#Data

In [0]:
import pandas as pd
import numpy as np
company = 'AMAZON'
time = 1440

df_news = pd.read_csv('/content/'+company+'_'+str(time)+'_labeled_news.csv')[['Text','Date','Time','Positive_Sentiment']]
msk = np.random.rand(len(df_news)) < 0.8
train = df_news[msk]
test = df_news[~msk]

In [0]:
train.columns

Index(['Text', 'Date', 'Time', 'Positive_Sentiment'], dtype='object')

For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)

In [0]:
DATA_COLUMN = 'Text'
LABEL_COLUMN = 'Positive_Sentiment'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]

##Data PreProcessing:

In [0]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):


1. Lowercase our text (if we're using a BERT lowercase model)
2. Tokenize it (i.e. "sally says hi" -> ["sally", "says", "hi"])
3. Break words into WordPieces (i.e. "calling" -> ["call", "##ing"])
4. Map our words to indexes using a vocab file that BERT provides
5. Add special "CLS" and "SEP" tokens (see the [readme](https://github.com/google-research/bert))
6. Append "index" and "segment" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))





In [0]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0422 18:58:49.148531 140115933247360 saver.py:1483] Saver not created because there are no variables in the graph to restore


Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands.

In [0]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

INFO:tensorflow:Writing example 0 of 10503


I0422 18:58:49.793099 140115933247360 run_classifier.py:774] Writing example 0 of 10503


INFO:tensorflow:*** Example ***


I0422 18:58:49.826216 140115933247360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0422 18:58:49.828205 140115933247360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] [ " ve ##ris ##ign , inc . vr ##s ##n dec . 12 catalyst pharmaceuticals , inc . cp ##r ##x dec . 12 f ##5 networks , inc . ff ##iv dec . 11 best buy co . bb ##y dec . 11 lowe ' s companies , inc . low dec . 11 p ##nc financial services group , inc . p ##nc dec . 11 bank of america corp . ba ##c dec . 8 kansas city southern ks ##u dec . 8 ve ##riz ##on communications , inc . v ##z dec . 7 bed bath & beyond , inc . bb ##by dec . 7 regions financial corp . rf dec . 7 key ##corp key dec . 6 ko [SEP]


I0422 18:58:49.833632 140115933247360 run_classifier.py:464] tokens: [CLS] [ " ve ##ris ##ign , inc . vr ##s ##n dec . 12 catalyst pharmaceuticals , inc . cp ##r ##x dec . 12 f ##5 networks , inc . ff ##iv dec . 11 best buy co . bb ##y dec . 11 lowe ' s companies , inc . low dec . 11 p ##nc financial services group , inc . p ##nc dec . 11 bank of america corp . ba ##c dec . 8 kansas city southern ks ##u dec . 8 ve ##riz ##on communications , inc . v ##z dec . 7 bed bath & beyond , inc . bb ##by dec . 7 regions financial corp . rf dec . 7 key ##corp key dec . 6 ko [SEP]


INFO:tensorflow:input_ids: 101 1031 1000 2310 6935 23773 1010 4297 1012 27830 2015 2078 11703 1012 2260 16771 24797 1010 4297 1012 18133 2099 2595 11703 1012 2260 1042 2629 6125 1010 4297 1012 21461 12848 11703 1012 2340 2190 4965 2522 1012 22861 2100 11703 1012 2340 14086 1005 1055 3316 1010 4297 1012 2659 11703 1012 2340 1052 12273 3361 2578 2177 1010 4297 1012 1052 12273 11703 1012 2340 2924 1997 2637 13058 1012 8670 2278 11703 1012 1022 5111 2103 2670 29535 2226 11703 1012 1022 2310 21885 2239 4806 1010 4297 1012 1058 2480 11703 1012 1021 2793 7198 1004 3458 1010 4297 1012 22861 3762 11703 1012 1021 4655 3361 13058 1012 21792 11703 1012 1021 3145 24586 3145 11703 1012 1020 12849 102


I0422 18:58:49.839701 140115933247360 run_classifier.py:465] input_ids: 101 1031 1000 2310 6935 23773 1010 4297 1012 27830 2015 2078 11703 1012 2260 16771 24797 1010 4297 1012 18133 2099 2595 11703 1012 2260 1042 2629 6125 1010 4297 1012 21461 12848 11703 1012 2340 2190 4965 2522 1012 22861 2100 11703 1012 2340 14086 1005 1055 3316 1010 4297 1012 2659 11703 1012 2340 1052 12273 3361 2578 2177 1010 4297 1012 1052 12273 11703 1012 2340 2924 1997 2637 13058 1012 8670 2278 11703 1012 1022 5111 2103 2670 29535 2226 11703 1012 1022 2310 21885 2239 4806 1010 4297 1012 1058 2480 11703 1012 1021 2793 7198 1004 3458 1010 4297 1012 22861 3762 11703 1012 1021 4655 3361 13058 1012 21792 11703 1012 1021 3145 24586 3145 11703 1012 1020 12849 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0422 18:58:49.844464 140115933247360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 18:58:49.848189 140115933247360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: False (id = 0)


I0422 18:58:49.852575 140115933247360 run_classifier.py:468] label: False (id = 0)


INFO:tensorflow:*** Example ***


I0422 18:58:49.858301 140115933247360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0422 18:58:49.862620 140115933247360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] [ ' consumers now experience ai mostly through image recognition to help cat ##ego ##rize digital photographs and speech recognition that helps power digital voice assistants such as apple inc ’ s ( aa ##pl . o ) sir ##i or amazon . com inc ’ s ( am ##z ##n . o ) alexa . ' ] [SEP]


I0422 18:58:49.867922 140115933247360 run_classifier.py:464] tokens: [CLS] [ ' consumers now experience ai mostly through image recognition to help cat ##ego ##rize digital photographs and speech recognition that helps power digital voice assistants such as apple inc ’ s ( aa ##pl . o ) sir ##i or amazon . com inc ’ s ( am ##z ##n . o ) alexa . ' ] [SEP]


INFO:tensorflow:input_ids: 101 1031 1005 10390 2085 3325 9932 3262 2083 3746 5038 2000 2393 4937 20265 25709 3617 7008 1998 4613 5038 2008 7126 2373 3617 2376 16838 2107 2004 6207 4297 1521 1055 1006 9779 24759 1012 1051 1007 2909 2072 2030 9733 1012 4012 4297 1521 1055 1006 2572 2480 2078 1012 1051 1007 24969 1012 1005 1033 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 18:58:49.872734 140115933247360 run_classifier.py:465] input_ids: 101 1031 1005 10390 2085 3325 9932 3262 2083 3746 5038 2000 2393 4937 20265 25709 3617 7008 1998 4613 5038 2008 7126 2373 3617 2376 16838 2107 2004 6207 4297 1521 1055 1006 9779 24759 1012 1051 1007 2909 2072 2030 9733 1012 4012 4297 1521 1055 1006 2572 2480 2078 1012 1051 1007 24969 1012 1005 1033 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 18:58:49.877099 140115933247360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 18:58:49.882422 140115933247360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: True (id = 1)


I0422 18:58:49.887708 140115933247360 run_classifier.py:468] label: True (id = 1)


INFO:tensorflow:*** Example ***


I0422 18:58:49.897058 140115933247360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0422 18:58:49.901010 140115933247360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] [ " if you don ' t think there was a group of sophisticated people that worked the numbers on whole foods at its 2016 low , identified amazon ( am ##z ##n ) as a likely target and bought accordingly then you shouldn ' t be in the game . sell your stocks and buy some bit ##co ##in . " , " here are several quick lessons when it comes to deals that were shared at the deal economy conference on thursday . the conference is hosted each year by the ##st ##ree ##t ' s sister publication the deal . of course , these helpful ideas came from the top mind that is the ##st ##ree ##t ' s founder and action alert [SEP]


I0422 18:58:49.904452 140115933247360 run_classifier.py:464] tokens: [CLS] [ " if you don ' t think there was a group of sophisticated people that worked the numbers on whole foods at its 2016 low , identified amazon ( am ##z ##n ) as a likely target and bought accordingly then you shouldn ' t be in the game . sell your stocks and buy some bit ##co ##in . " , " here are several quick lessons when it comes to deals that were shared at the deal economy conference on thursday . the conference is hosted each year by the ##st ##ree ##t ' s sister publication the deal . of course , these helpful ideas came from the top mind that is the ##st ##ree ##t ' s founder and action alert [SEP]


INFO:tensorflow:input_ids: 101 1031 1000 2065 2017 2123 1005 1056 2228 2045 2001 1037 2177 1997 12138 2111 2008 2499 1996 3616 2006 2878 9440 2012 2049 2355 2659 1010 4453 9733 1006 2572 2480 2078 1007 2004 1037 3497 4539 1998 4149 11914 2059 2017 5807 1005 1056 2022 1999 1996 2208 1012 5271 2115 15768 1998 4965 2070 2978 3597 2378 1012 1000 1010 1000 2182 2024 2195 4248 8220 2043 2009 3310 2000 9144 2008 2020 4207 2012 1996 3066 4610 3034 2006 9432 1012 1996 3034 2003 4354 2169 2095 2011 1996 3367 9910 2102 1005 1055 2905 4772 1996 3066 1012 1997 2607 1010 2122 14044 4784 2234 2013 1996 2327 2568 2008 2003 1996 3367 9910 2102 1005 1055 3910 1998 2895 9499 102


I0422 18:58:49.907829 140115933247360 run_classifier.py:465] input_ids: 101 1031 1000 2065 2017 2123 1005 1056 2228 2045 2001 1037 2177 1997 12138 2111 2008 2499 1996 3616 2006 2878 9440 2012 2049 2355 2659 1010 4453 9733 1006 2572 2480 2078 1007 2004 1037 3497 4539 1998 4149 11914 2059 2017 5807 1005 1056 2022 1999 1996 2208 1012 5271 2115 15768 1998 4965 2070 2978 3597 2378 1012 1000 1010 1000 2182 2024 2195 4248 8220 2043 2009 3310 2000 9144 2008 2020 4207 2012 1996 3066 4610 3034 2006 9432 1012 1996 3034 2003 4354 2169 2095 2011 1996 3367 9910 2102 1005 1055 2905 4772 1996 3066 1012 1997 2607 1010 2122 14044 4784 2234 2013 1996 2327 2568 2008 2003 1996 3367 9910 2102 1005 1055 3910 1998 2895 9499 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0422 18:58:49.911729 140115933247360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 18:58:49.915476 140115933247360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: False (id = 0)


I0422 18:58:49.919123 140115933247360 run_classifier.py:468] label: False (id = 0)


INFO:tensorflow:*** Example ***


I0422 18:58:49.930542 140115933247360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0422 18:58:49.934234 140115933247360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] [ " some investors sitting on shares of the iphone maker , which are up over 51 % so far in 2017 , gr ##umble that this lead - story distinction means a kiss of death has been planted on apple aa ##pl , + 0 . 00 % . today : apple ' s golden era , how the new tax law affects you , investment advisors ' view of 2018 , bit ##co ##in , pg & e , embracing vol ##ati ##lity , and more . https : / / t . co / v ##vn ##9 ##lo ##j ##jn ##f pic . twitter . com / 8 ##ed ##iz ##nc ##gm ##r — barron ' s ( @ barron ##son ##line ) [SEP]


I0422 18:58:49.938385 140115933247360 run_classifier.py:464] tokens: [CLS] [ " some investors sitting on shares of the iphone maker , which are up over 51 % so far in 2017 , gr ##umble that this lead - story distinction means a kiss of death has been planted on apple aa ##pl , + 0 . 00 % . today : apple ' s golden era , how the new tax law affects you , investment advisors ' view of 2018 , bit ##co ##in , pg & e , embracing vol ##ati ##lity , and more . https : / / t . co / v ##vn ##9 ##lo ##j ##jn ##f pic . twitter . com / 8 ##ed ##iz ##nc ##gm ##r — barron ' s ( @ barron ##son ##line ) [SEP]


INFO:tensorflow:input_ids: 101 1031 1000 2070 9387 3564 2006 6661 1997 1996 18059 9338 1010 2029 2024 2039 2058 4868 1003 2061 2521 1999 2418 1010 24665 26607 2008 2023 2599 1011 2466 7835 2965 1037 3610 1997 2331 2038 2042 8461 2006 6207 9779 24759 1010 1009 1014 1012 4002 1003 1012 2651 1024 6207 1005 1055 3585 3690 1010 2129 1996 2047 4171 2375 13531 2017 1010 5211 18934 1005 3193 1997 2760 1010 2978 3597 2378 1010 18720 1004 1041 1010 23581 5285 10450 18605 1010 1998 2062 1012 16770 1024 1013 1013 1056 1012 2522 1013 1058 16022 2683 4135 3501 22895 2546 27263 1012 10474 1012 4012 1013 1022 2098 10993 12273 21693 2099 1517 23594 1005 1055 1006 1030 23594 3385 4179 1007 102


I0422 18:58:49.943023 140115933247360 run_classifier.py:465] input_ids: 101 1031 1000 2070 9387 3564 2006 6661 1997 1996 18059 9338 1010 2029 2024 2039 2058 4868 1003 2061 2521 1999 2418 1010 24665 26607 2008 2023 2599 1011 2466 7835 2965 1037 3610 1997 2331 2038 2042 8461 2006 6207 9779 24759 1010 1009 1014 1012 4002 1003 1012 2651 1024 6207 1005 1055 3585 3690 1010 2129 1996 2047 4171 2375 13531 2017 1010 5211 18934 1005 3193 1997 2760 1010 2978 3597 2378 1010 18720 1004 1041 1010 23581 5285 10450 18605 1010 1998 2062 1012 16770 1024 1013 1013 1056 1012 2522 1013 1058 16022 2683 4135 3501 22895 2546 27263 1012 10474 1012 4012 1013 1022 2098 10993 12273 21693 2099 1517 23594 1005 1055 1006 1030 23594 3385 4179 1007 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0422 18:58:49.947186 140115933247360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 18:58:49.951019 140115933247360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: True (id = 1)


I0422 18:58:49.954289 140115933247360 run_classifier.py:468] label: True (id = 1)


INFO:tensorflow:*** Example ***


I0422 18:58:49.994911 140115933247360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0422 18:58:49.998641 140115933247360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] [ ' 2017 年 12 月 29 日 （ 金 ） 07 : 47 : 11 「 シ ##カ ##コ 日 [UNK] 平 [UNK] [UNK] [UNK] （ [UNK] ） 」 （ 28 日 ） [UNK] 229 ##55 （ ト ##ル [UNK] て ） [UNK] [UNK] [UNK] 228 ##25 大 [UNK] 比 + 45 （ イ ##フ ##ニ ##ン ##ク 比 + 25 ） vol [UNK] 229 ##15 （ [UNK] [UNK] て ） [UNK] [UNK] [UNK] 227 ##90 大 [UNK] 比 + 10 （ イ ##フ ##ニ ##ン ##ク 比 - 10 ） vol 140 ##52 「 [UNK] 国 [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] （ [UNK] ） 」 （ 28 日 ） [UNK] 天 堂 、 日 [UNK] [UNK] 金 か 小 安 い [UNK] は [SEP]


I0422 18:58:50.002074 140115933247360 run_classifier.py:464] tokens: [CLS] [ ' 2017 年 12 月 29 日 （ 金 ） 07 : 47 : 11 「 シ ##カ ##コ 日 [UNK] 平 [UNK] [UNK] [UNK] （ [UNK] ） 」 （ 28 日 ） [UNK] 229 ##55 （ ト ##ル [UNK] て ） [UNK] [UNK] [UNK] 228 ##25 大 [UNK] 比 + 45 （ イ ##フ ##ニ ##ン ##ク 比 + 25 ） vol [UNK] 229 ##15 （ [UNK] [UNK] て ） [UNK] [UNK] [UNK] 227 ##90 大 [UNK] 比 + 10 （ イ ##フ ##ニ ##ン ##ク 比 - 10 ） vol 140 ##52 「 [UNK] 国 [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] （ [UNK] ） 」 （ 28 日 ） [UNK] 天 堂 、 日 [UNK] [UNK] 金 か 小 安 い [UNK] は [SEP]


INFO:tensorflow:input_ids: 101 1031 1005 2418 1840 2260 1872 2756 1864 1987 1964 1988 5718 1024 4700 1024 2340 1641 1706 30226 30230 1864 100 1839 100 100 100 1987 100 1988 1642 1987 2654 1864 1988 100 22777 24087 1987 1714 30259 100 1665 1988 100 100 100 22238 17788 1810 100 1890 1009 3429 1987 1695 30246 30242 30263 30228 1890 1009 2423 1988 5285 100 22777 16068 1987 100 100 1665 1988 100 100 100 21489 21057 1810 100 1890 1009 2184 1987 1695 30246 30242 30263 30228 1890 1011 2184 1988 5285 8574 25746 1641 100 1799 100 100 100 100 100 100 1987 100 1988 1642 1987 2654 1864 1988 100 1811 1805 1635 1864 100 100 1964 1651 1829 1820 1647 100 1672 102


I0422 18:58:50.003771 140115933247360 run_classifier.py:465] input_ids: 101 1031 1005 2418 1840 2260 1872 2756 1864 1987 1964 1988 5718 1024 4700 1024 2340 1641 1706 30226 30230 1864 100 1839 100 100 100 1987 100 1988 1642 1987 2654 1864 1988 100 22777 24087 1987 1714 30259 100 1665 1988 100 100 100 22238 17788 1810 100 1890 1009 3429 1987 1695 30246 30242 30263 30228 1890 1009 2423 1988 5285 100 22777 16068 1987 100 100 1665 1988 100 100 100 21489 21057 1810 100 1890 1009 2184 1987 1695 30246 30242 30263 30228 1890 1011 2184 1988 5285 8574 25746 1641 100 1799 100 100 100 100 100 100 1987 100 1988 1642 1987 2654 1864 1988 100 1811 1805 1635 1864 100 100 1964 1651 1829 1820 1647 100 1672 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0422 18:58:50.007895 140115933247360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 18:58:50.011812 140115933247360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: True (id = 1)


I0422 18:58:50.014900 140115933247360 run_classifier.py:468] label: True (id = 1)


INFO:tensorflow:Writing example 10000 of 10503


I0422 19:01:14.974750 140115933247360 run_classifier.py:774] Writing example 10000 of 10503


INFO:tensorflow:Writing example 0 of 2590


I0422 19:01:20.291975 140115933247360 run_classifier.py:774] Writing example 0 of 2590


INFO:tensorflow:*** Example ***


I0422 19:01:20.310563 140115933247360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0422 19:01:20.320602 140115933247360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] [ ' a value stock is one that trades at a lower price relative to fundamental ##s such earnings or sales and is viewed as under ##val ##ue ##d by investors . growth stocks are shares in companies that are expected to grow at a faster rate than the market average . ' , ' a rather violent rotation has been playing out in the stock market in recent sessions and was on display monday , with an early rally to an all - time high by the s & p 500 eventually cut short as investors again dumped previously high - flying tech stocks , among the best performers of the year to date , including the so - called faa ##ng - - for [SEP]


I0422 19:01:20.327585 140115933247360 run_classifier.py:464] tokens: [CLS] [ ' a value stock is one that trades at a lower price relative to fundamental ##s such earnings or sales and is viewed as under ##val ##ue ##d by investors . growth stocks are shares in companies that are expected to grow at a faster rate than the market average . ' , ' a rather violent rotation has been playing out in the stock market in recent sessions and was on display monday , with an early rally to an all - time high by the s & p 500 eventually cut short as investors again dumped previously high - flying tech stocks , among the best performers of the year to date , including the so - called faa ##ng - - for [SEP]


INFO:tensorflow:input_ids: 101 1031 1005 1037 3643 4518 2003 2028 2008 14279 2012 1037 2896 3976 5816 2000 8050 2015 2107 16565 2030 4341 1998 2003 7021 2004 2104 10175 5657 2094 2011 9387 1012 3930 15768 2024 6661 1999 3316 2008 2024 3517 2000 4982 2012 1037 5514 3446 2084 1996 3006 2779 1012 1005 1010 1005 1037 2738 6355 9963 2038 2042 2652 2041 1999 1996 4518 3006 1999 3522 6521 1998 2001 2006 4653 6928 1010 2007 2019 2220 8320 2000 2019 2035 1011 2051 2152 2011 1996 1055 1004 1052 3156 2776 3013 2460 2004 9387 2153 14019 3130 2152 1011 3909 6627 15768 1010 2426 1996 2190 9567 1997 1996 2095 2000 3058 1010 2164 1996 2061 1011 2170 17032 3070 1011 1011 2005 102


I0422 19:01:20.331478 140115933247360 run_classifier.py:465] input_ids: 101 1031 1005 1037 3643 4518 2003 2028 2008 14279 2012 1037 2896 3976 5816 2000 8050 2015 2107 16565 2030 4341 1998 2003 7021 2004 2104 10175 5657 2094 2011 9387 1012 3930 15768 2024 6661 1999 3316 2008 2024 3517 2000 4982 2012 1037 5514 3446 2084 1996 3006 2779 1012 1005 1010 1005 1037 2738 6355 9963 2038 2042 2652 2041 1999 1996 4518 3006 1999 3522 6521 1998 2001 2006 4653 6928 1010 2007 2019 2220 8320 2000 2019 2035 1011 2051 2152 2011 1996 1055 1004 1052 3156 2776 3013 2460 2004 9387 2153 14019 3130 2152 1011 3909 6627 15768 1010 2426 1996 2190 9567 1997 1996 2095 2000 3058 1010 2164 1996 2061 1011 2170 17032 3070 1011 1011 2005 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0422 19:01:20.333535 140115933247360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:01:20.336989 140115933247360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: True (id = 1)


I0422 19:01:20.338692 140115933247360 run_classifier.py:468] label: True (id = 1)


INFO:tensorflow:*** Example ***


I0422 19:01:20.346142 140115933247360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0422 19:01:20.349916 140115933247360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] [ ' tv sales : apple tv is now available for purchase on amazon . com , inc . ’ s ( nas ##da ##q : am ##z ##n ) website , reports 9 ##to ##5 ##mac . the launch of the set - top box comes after the two companies ended a years - long feud . it also resulted in the amazon prime video app coming to the device . those looking to order the tv from amazon are out of luck as it is currently out of stock . check out more recent apple rumors or ' ] [SEP]


I0422 19:01:20.353346 140115933247360 run_classifier.py:464] tokens: [CLS] [ ' tv sales : apple tv is now available for purchase on amazon . com , inc . ’ s ( nas ##da ##q : am ##z ##n ) website , reports 9 ##to ##5 ##mac . the launch of the set - top box comes after the two companies ended a years - long feud . it also resulted in the amazon prime video app coming to the device . those looking to order the tv from amazon are out of luck as it is currently out of stock . check out more recent apple rumors or ' ] [SEP]


INFO:tensorflow:input_ids: 101 1031 1005 2694 4341 1024 6207 2694 2003 2085 2800 2005 5309 2006 9733 1012 4012 1010 4297 1012 1521 1055 1006 17235 2850 4160 1024 2572 2480 2078 1007 4037 1010 4311 1023 3406 2629 22911 1012 1996 4888 1997 1996 2275 1011 2327 3482 3310 2044 1996 2048 3316 3092 1037 2086 1011 2146 13552 1012 2009 2036 4504 1999 1996 9733 3539 2678 10439 2746 2000 1996 5080 1012 2216 2559 2000 2344 1996 2694 2013 9733 2024 2041 1997 6735 2004 2009 2003 2747 2041 1997 4518 1012 4638 2041 2062 3522 6207 11256 2030 1005 1033 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:01:20.357206 140115933247360 run_classifier.py:465] input_ids: 101 1031 1005 2694 4341 1024 6207 2694 2003 2085 2800 2005 5309 2006 9733 1012 4012 1010 4297 1012 1521 1055 1006 17235 2850 4160 1024 2572 2480 2078 1007 4037 1010 4311 1023 3406 2629 22911 1012 1996 4888 1997 1996 2275 1011 2327 3482 3310 2044 1996 2048 3316 3092 1037 2086 1011 2146 13552 1012 2009 2036 4504 1999 1996 9733 3539 2678 10439 2746 2000 1996 5080 1012 2216 2559 2000 2344 1996 2694 2013 9733 2024 2041 1997 6735 2004 2009 2003 2747 2041 1997 4518 1012 4638 2041 2062 3522 6207 11256 2030 1005 1033 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:01:20.362408 140115933247360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:01:20.367826 140115933247360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: False (id = 0)


I0422 19:01:20.372014 140115933247360 run_classifier.py:468] label: False (id = 0)


INFO:tensorflow:*** Example ***


I0422 19:01:20.387720 140115933247360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0422 19:01:20.393334 140115933247360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] [ " the day ' s gains were broad , with all 11 of the s & p 500 ' s primary sectors finishing in positive territory , led by energy shares , which were among the biggest gain ##ers . " , ' " there are still a lot of details we don \ ' t know , but if the corporate tax rate is moved down near 20 % , you \ ' d be adding $ 10 per share to s & p 500 earnings . if you do that , valuation ##s are not out of w ##ha ##ck with what growth rates will be . " ' , ' mega ##cap tech stocks , hard hit on wednesday , rebound ##ed [SEP]


I0422 19:01:20.396888 140115933247360 run_classifier.py:464] tokens: [CLS] [ " the day ' s gains were broad , with all 11 of the s & p 500 ' s primary sectors finishing in positive territory , led by energy shares , which were among the biggest gain ##ers . " , ' " there are still a lot of details we don \ ' t know , but if the corporate tax rate is moved down near 20 % , you \ ' d be adding $ 10 per share to s & p 500 earnings . if you do that , valuation ##s are not out of w ##ha ##ck with what growth rates will be . " ' , ' mega ##cap tech stocks , hard hit on wednesday , rebound ##ed [SEP]


INFO:tensorflow:input_ids: 101 1031 1000 1996 2154 1005 1055 12154 2020 5041 1010 2007 2035 2340 1997 1996 1055 1004 1052 3156 1005 1055 3078 11105 5131 1999 3893 3700 1010 2419 2011 2943 6661 1010 2029 2020 2426 1996 5221 5114 2545 1012 1000 1010 1005 1000 2045 2024 2145 1037 2843 1997 4751 2057 2123 1032 1005 1056 2113 1010 2021 2065 1996 5971 4171 3446 2003 2333 2091 2379 2322 1003 1010 2017 1032 1005 1040 2022 5815 1002 2184 2566 3745 2000 1055 1004 1052 3156 16565 1012 2065 2017 2079 2008 1010 26004 2015 2024 2025 2041 1997 1059 3270 3600 2007 2054 3930 6165 2097 2022 1012 1000 1005 1010 1005 13164 17695 6627 15768 1010 2524 2718 2006 9317 1010 27755 2098 102


I0422 19:01:20.398638 140115933247360 run_classifier.py:465] input_ids: 101 1031 1000 1996 2154 1005 1055 12154 2020 5041 1010 2007 2035 2340 1997 1996 1055 1004 1052 3156 1005 1055 3078 11105 5131 1999 3893 3700 1010 2419 2011 2943 6661 1010 2029 2020 2426 1996 5221 5114 2545 1012 1000 1010 1005 1000 2045 2024 2145 1037 2843 1997 4751 2057 2123 1032 1005 1056 2113 1010 2021 2065 1996 5971 4171 3446 2003 2333 2091 2379 2322 1003 1010 2017 1032 1005 1040 2022 5815 1002 2184 2566 3745 2000 1055 1004 1052 3156 16565 1012 2065 2017 2079 2008 1010 26004 2015 2024 2025 2041 1997 1059 3270 3600 2007 2054 3930 6165 2097 2022 1012 1000 1005 1010 1005 13164 17695 6627 15768 1010 2524 2718 2006 9317 1010 27755 2098 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0422 19:01:20.402359 140115933247360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:01:20.407484 140115933247360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: False (id = 0)


I0422 19:01:20.412893 140115933247360 run_classifier.py:468] label: False (id = 0)


INFO:tensorflow:*** Example ***


I0422 19:01:20.444671 140115933247360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0422 19:01:20.464268 140115933247360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] [ ' 06 . 12 . 2017 - 23 : 16 uh ##r die amazon prime video app ist ab so ##fort auf apple tv in uber 100 land ##ern ve ##rf ##ug ##bar ( nas ##da ##q : am ##z ##n ) – amazon ga ##b he ##ute be ##kan ##nt , das ##s die prime video app auf apple tv 4 ##k und fr ##uh ##ere ##n generation ##en von apple tv in uber 100 land ##ern ve ##rf ##ug ##bar ist . mit ##gli ##ede ##r von prime video ha ##ben nun no ##ch me ##hr mo ##gli ##ch ##kei ##ten , pre ##is ##ge ##kr ##onte und von k ##rit ##ike ##rn ge ##fe ##ier ##te ti ##tel , ein ##sch ##lie [SEP]


I0422 19:01:20.466928 140115933247360 run_classifier.py:464] tokens: [CLS] [ ' 06 . 12 . 2017 - 23 : 16 uh ##r die amazon prime video app ist ab so ##fort auf apple tv in uber 100 land ##ern ve ##rf ##ug ##bar ( nas ##da ##q : am ##z ##n ) – amazon ga ##b he ##ute be ##kan ##nt , das ##s die prime video app auf apple tv 4 ##k und fr ##uh ##ere ##n generation ##en von apple tv in uber 100 land ##ern ve ##rf ##ug ##bar ist . mit ##gli ##ede ##r von prime video ha ##ben nun no ##ch me ##hr mo ##gli ##ch ##kei ##ten , pre ##is ##ge ##kr ##onte und von k ##rit ##ike ##rn ge ##fe ##ier ##te ti ##tel , ein ##sch ##lie [SEP]


INFO:tensorflow:input_ids: 101 1031 1005 5757 1012 2260 1012 2418 1011 2603 1024 2385 7910 2099 3280 9733 3539 2678 10439 21541 11113 2061 13028 21200 6207 2694 1999 19169 2531 2455 11795 2310 12881 15916 8237 1006 17235 2850 4160 1024 2572 2480 2078 1007 1516 9733 11721 2497 2002 10421 2022 9126 3372 1010 8695 2015 3280 3539 2678 10439 21200 6207 2694 1018 2243 6151 10424 27225 7869 2078 4245 2368 3854 6207 2694 1999 19169 2531 2455 11795 2310 12881 15916 8237 21541 1012 10210 25394 14728 2099 3854 3539 2678 5292 10609 16634 2053 2818 2033 8093 9587 25394 2818 29501 6528 1010 3653 2483 3351 21638 28040 6151 3854 1047 14778 17339 6826 16216 7959 3771 2618 14841 9834 1010 16417 11624 8751 102


I0422 19:01:20.469972 140115933247360 run_classifier.py:465] input_ids: 101 1031 1005 5757 1012 2260 1012 2418 1011 2603 1024 2385 7910 2099 3280 9733 3539 2678 10439 21541 11113 2061 13028 21200 6207 2694 1999 19169 2531 2455 11795 2310 12881 15916 8237 1006 17235 2850 4160 1024 2572 2480 2078 1007 1516 9733 11721 2497 2002 10421 2022 9126 3372 1010 8695 2015 3280 3539 2678 10439 21200 6207 2694 1018 2243 6151 10424 27225 7869 2078 4245 2368 3854 6207 2694 1999 19169 2531 2455 11795 2310 12881 15916 8237 21541 1012 10210 25394 14728 2099 3854 3539 2678 5292 10609 16634 2053 2818 2033 8093 9587 25394 2818 29501 6528 1010 3653 2483 3351 21638 28040 6151 3854 1047 14778 17339 6826 16216 7959 3771 2618 14841 9834 1010 16417 11624 8751 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0422 19:01:20.472424 140115933247360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:01:20.475346 140115933247360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: False (id = 0)


I0422 19:01:20.478097 140115933247360 run_classifier.py:468] label: False (id = 0)


INFO:tensorflow:*** Example ***


I0422 19:01:20.555504 140115933247360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0422 19:01:20.557729 140115933247360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] [ ' why amazon and google are du ##king it out news ##fe ##ed ##back @ fool . com ( mac greer ) save ' , " amazon . com ( nas ##da ##q : am ##z ##n ) and google are battling on the streaming side of their businesses , but it looks as if amazon is going to take the br ##unt of the damage . dave & buster ' s ( nas ##da ##q : play ) is up big on an earnings report that wasn ' t all good news . cot ##y ' s ( ny ##se : cot ##y ) european legal battle draws to a close , with big ram ##ification ##s for their european business that don ' [SEP]


I0422 19:01:20.559380 140115933247360 run_classifier.py:464] tokens: [CLS] [ ' why amazon and google are du ##king it out news ##fe ##ed ##back @ fool . com ( mac greer ) save ' , " amazon . com ( nas ##da ##q : am ##z ##n ) and google are battling on the streaming side of their businesses , but it looks as if amazon is going to take the br ##unt of the damage . dave & buster ' s ( nas ##da ##q : play ) is up big on an earnings report that wasn ' t all good news . cot ##y ' s ( ny ##se : cot ##y ) european legal battle draws to a close , with big ram ##ification ##s for their european business that don ' [SEP]


INFO:tensorflow:input_ids: 101 1031 1005 2339 9733 1998 8224 2024 4241 6834 2009 2041 2739 7959 2098 5963 1030 7966 1012 4012 1006 6097 25939 1007 3828 1005 1010 1000 9733 1012 4012 1006 17235 2850 4160 1024 2572 2480 2078 1007 1998 8224 2024 17773 2006 1996 11058 2217 1997 2037 5661 1010 2021 2009 3504 2004 2065 9733 2003 2183 2000 2202 1996 7987 16671 1997 1996 4053 1012 4913 1004 18396 1005 1055 1006 17235 2850 4160 1024 2377 1007 2003 2039 2502 2006 2019 16565 3189 2008 2347 1005 1056 2035 2204 2739 1012 26046 2100 1005 1055 1006 6396 3366 1024 26046 2100 1007 2647 3423 2645 9891 2000 1037 2485 1010 2007 2502 8223 9031 2015 2005 2037 2647 2449 2008 2123 1005 102


I0422 19:01:20.561147 140115933247360 run_classifier.py:465] input_ids: 101 1031 1005 2339 9733 1998 8224 2024 4241 6834 2009 2041 2739 7959 2098 5963 1030 7966 1012 4012 1006 6097 25939 1007 3828 1005 1010 1000 9733 1012 4012 1006 17235 2850 4160 1024 2572 2480 2078 1007 1998 8224 2024 17773 2006 1996 11058 2217 1997 2037 5661 1010 2021 2009 3504 2004 2065 9733 2003 2183 2000 2202 1996 7987 16671 1997 1996 4053 1012 4913 1004 18396 1005 1055 1006 17235 2850 4160 1024 2377 1007 2003 2039 2502 2006 2019 16565 3189 2008 2347 1005 1056 2035 2204 2739 1012 26046 2100 1005 1055 1006 6396 3366 1024 26046 2100 1007 2647 3423 2645 9891 2000 1037 2485 1010 2007 2502 8223 9031 2015 2005 2037 2647 2449 2008 2123 1005 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0422 19:01:20.563010 140115933247360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:01:20.564714 140115933247360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: True (id = 1)


I0422 19:01:20.566267 140115933247360 run_classifier.py:468] label: True (id = 1)


#Creating a model

Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning).

In [0]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction.

In [0]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [0]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [0]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [0]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [0]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


INFO:tensorflow:Using config: {'_model_dir': 'Output/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f6ed8b24550>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


I0422 19:02:10.269200 140115933247360 estimator.py:201] Using config: {'_model_dir': 'Output/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f6ed8b24550>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with Tensorflow [Estimators](https://www.tensorflow.org/guide/estimators).

In [0]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.

In [0]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
INFO:tensorflow:Skipping training since max_steps has already saved.


I0422 19:02:10.414537 140115933247360 estimator.py:351] Skipping training since max_steps has already saved.


Training took time  0:00:00.017031


Now let's use our test data to see how well our model did:

In [0]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [0]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.


I0422 19:02:12.429821 140115933247360 estimator.py:1111] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0422 19:02:17.998373 140115933247360 saver.py:1483] Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.


I0422 19:02:29.107475 140115933247360 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2019-04-22T19:02:29Z


I0422 19:02:29.135259 140115933247360 evaluation.py:257] Starting evaluation at 2019-04-22T19:02:29Z


INFO:tensorflow:Graph was finalized.


I0422 19:02:30.596773 140115933247360 monitored_session.py:222] Graph was finalized.


INFO:tensorflow:Restoring parameters from Output/model.ckpt-986


I0422 19:02:30.607504 140115933247360 saver.py:1270] Restoring parameters from Output/model.ckpt-986


INFO:tensorflow:Running local_init_op.


I0422 19:02:33.188335 140115933247360 session_manager.py:491] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0422 19:02:33.460125 140115933247360 session_manager.py:493] Done running local_init_op.


INFO:tensorflow:Finished evaluation at 2019-04-22-19:02:57


I0422 19:02:57.732160 140115933247360 evaluation.py:277] Finished evaluation at 2019-04-22-19:02:57


INFO:tensorflow:Saving dict for global step 986: auc = 0.6276554, eval_accuracy = 0.6305019, f1_score = 0.68837506, false_negatives = 259.0, false_positives = 698.0, global_step = 986, loss = 0.897372, precision = 0.6022792, recall = 0.8031915, true_negatives = 576.0, true_positives = 1057.0


I0422 19:02:57.734482 140115933247360 estimator.py:1979] Saving dict for global step 986: auc = 0.6276554, eval_accuracy = 0.6305019, f1_score = 0.68837506, false_negatives = 259.0, false_positives = 698.0, global_step = 986, loss = 0.897372, precision = 0.6022792, recall = 0.8031915, true_negatives = 576.0, true_positives = 1057.0


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 986: Output/model.ckpt-986


I0422 19:02:57.741768 140115933247360 estimator.py:2039] Saving 'checkpoint_path' summary for global step 986: Output/model.ckpt-986


{'auc': 0.6276554,
 'eval_accuracy': 0.6305019,
 'f1_score': 0.68837506,
 'false_negatives': 259.0,
 'false_positives': 698.0,
 'global_step': 986,
 'loss': 0.897372,
 'precision': 0.6022792,
 'recall': 0.8031915,
 'true_negatives': 576.0,
 'true_positives': 1057.0}

Now let's write code to make predictions on new sentences:

In [0]:
def getPrediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]

In [0]:
pred_sentences = ['Consumers now experience AI mostly through image recognition','Some investors sitting on shares of the iPhone maker, which are up over 51%','']
predictions = getPrediction(pred_sentences)

INFO:tensorflow:Writing example 0 of 2


I0422 19:02:57.772401 140115933247360 run_classifier.py:774] Writing example 0 of 2


INFO:tensorflow:*** Example ***


I0422 19:02:57.775898 140115933247360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: 


I0422 19:02:57.779614 140115933247360 run_classifier.py:462] guid: 


INFO:tensorflow:tokens: [CLS] consumers now experience ai mostly through image recognition [SEP]


I0422 19:02:57.783572 140115933247360 run_classifier.py:464] tokens: [CLS] consumers now experience ai mostly through image recognition [SEP]


INFO:tensorflow:input_ids: 101 10390 2085 3325 9932 3262 2083 3746 5038 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:02:57.785931 140115933247360 run_classifier.py:465] input_ids: 101 10390 2085 3325 9932 3262 2083 3746 5038 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:02:57.788195 140115933247360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:02:57.791921 140115933247360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0422 19:02:57.795777 140115933247360 run_classifier.py:468] label: 0 (id = 0)


INFO:tensorflow:*** Example ***


I0422 19:02:57.797830 140115933247360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: 


I0422 19:02:57.800013 140115933247360 run_classifier.py:462] guid: 


INFO:tensorflow:tokens: [CLS] some investors sitting on shares of the iphone maker , which are up over 51 % [SEP]


I0422 19:02:57.802043 140115933247360 run_classifier.py:464] tokens: [CLS] some investors sitting on shares of the iphone maker , which are up over 51 % [SEP]


INFO:tensorflow:input_ids: 101 2070 9387 3564 2006 6661 1997 1996 18059 9338 1010 2029 2024 2039 2058 4868 1003 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:02:57.807381 140115933247360 run_classifier.py:465] input_ids: 101 2070 9387 3564 2006 6661 1997 1996 18059 9338 1010 2029 2024 2039 2058 4868 1003 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:02:57.811998 140115933247360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0422 19:02:57.814275 140115933247360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0422 19:02:57.816246 140115933247360 run_classifier.py:468] label: 0 (id = 0)


INFO:tensorflow:Calling model_fn.


I0422 19:02:57.855359 140115933247360 estimator.py:1111] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0422 19:03:01.699200 140115933247360 saver.py:1483] Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


I0422 19:03:01.883387 140115933247360 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Graph was finalized.


I0422 19:03:02.413614 140115933247360 monitored_session.py:222] Graph was finalized.


INFO:tensorflow:Restoring parameters from Output/model.ckpt-986


I0422 19:03:02.424150 140115933247360 saver.py:1270] Restoring parameters from Output/model.ckpt-986


INFO:tensorflow:Running local_init_op.


I0422 19:03:03.291658 140115933247360 session_manager.py:491] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0422 19:03:03.408571 140115933247360 session_manager.py:493] Done running local_init_op.


Voila! We have a sentiment classifier!

In [0]:
predictions

[('Consumers now experience AI mostly through image recognition',
  array([-0.7665952 , -0.62472653], dtype=float32),
  'Positive'),
 ('Some investors sitting on shares of the iPhone maker, which are up over 51%',
  array([-2.114453  , -0.12862833], dtype=float32),
  'Positive')]