In [None]:
# Copyright 2019 Google Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Predicting Movie Review Sentiment with BERT on TF Hub

If you’ve been following Natural Language Processing over the past year, you’ve probably heard of BERT: Bidirectional Encoder Representations from Transformers. It’s a neural network architecture designed by Google researchers that’s totally transformed what’s state-of-the-art for NLP tasks, like text classification, translation, summarization, and question answering.

Now that BERT's been added to [TF Hub](https://www.tensorflow.org/hub) as a loadable module, it's easy(ish) to add into existing Tensorflow text pipelines. In an existing pipeline, BERT can replace text embedding layers like ELMO and GloVE. Alternatively, [finetuning](http://wiki.fast.ai/index.php/Fine_tuning) BERT can provide both an accuracy boost and faster training time in many cases.

Here, we'll train a model to predict whether an IMDB movie review is positive or negative using BERT in Tensorflow with tf hub. Some code was adapted from [this colab notebook](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb). Let's get started!

In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

W0329 08:04:41.413568 4439307712 __init__.py:56] Some hub symbols are not available because TensorFlow version is less than 1.14


In addition to the standard libraries we imported above, we'll need to install BERT's python package.

In [2]:
# !pip install bert-tensorflow

In [2]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization

Below, we'll set an output directory location to store our model output and checkpoints. This can be a local directory, in which case you'd set OUTPUT_DIR to the name of the directory you'd like to create. If you're running this code in Google's hosted Colab, the directory won't persist after the Colab session ends.

Alternatively, if you're a GCP user, you can store output in a GCP bucket. To do that, set a directory name in OUTPUT_DIR and the name of the GCP bucket in the BUCKET field.

Set DO_DELETE to rewrite the OUTPUT_DIR if it exists. Otherwise, Tensorflow will load existing model checkpoints from that directory (if they exist).

In [3]:
# # Set the output directory for saving model file
# # Optionally, set a GCP bucket location

# OUTPUT_DIR = 'OUTPUT_DIR_NAME'#@param {type:"string"}
# #@markdown Whether or not to clear/delete the directory and create a new one
# DO_DELETE = False #@param {type:"boolean"}
# #@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
# USE_BUCKET = True #@param {type:"boolean"}
# BUCKET = 'BUCKET_NAME' #@param {type:"string"}

# if USE_BUCKET:
#   OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
#   from google.colab import auth
#   auth.authenticate_user()

# if DO_DELETE:
#   try:
#     tf.gfile.DeleteRecursively(OUTPUT_DIR)
#   except:
#     # Doesn't matter if the directory didn't exist
#     pass
# tf.gfile.MakeDirs(OUTPUT_DIR)
# print('***** Model output directory: {} *****'.format(OUTPUT_DIR))

OUTPUT_DIR = 'bert-tfhub/aclImdb_v1'
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))

***** Model output directory: bert-tfhub/aclImdb_v1 *****


# Data

First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).

In [4]:
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  data = {}
  data["sentence"] = []
  data["sentiment"] = []
  for file_path in os.listdir(directory):
    with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
      data["sentence"].append(f.read())
      data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  pos_df["polarity"] = 1
  neg_df["polarity"] = 0
  return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
#   dataset = tf.keras.utils.get_file(
#       fname="aclImdb.tar.gz", 
#       origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
#       extract=True)
  
  train_df = load_dataset('/Users/renato/Desktop/2 - Deep Learning with Python/Cap 6 - RNN and Text Process/dataset/aclImdb/train')
  test_df = load_dataset('/Users/renato/Desktop/2 - Deep Learning with Python/Cap 6 - RNN and Text Process/dataset/aclImdb/test')
  
  return train_df, test_df

In [5]:
train, test = download_and_load_datasets()

To keep training fast, we'll take a sample of 5000 train and test examples, respectively.

In [6]:
train = train.sample(5000)
test = test.sample(5000)

In [7]:
train.columns

Index(['sentence', 'sentiment', 'polarity'], dtype='object')

For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)

In [8]:
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]

# Data Preprocessing
We'll need to transform our data into a format BERT understands. This involves two steps. First, we create  `InputExample`'s using the constructor provided in the BERT library.

- `text_a` is the text we want to classify, which in this case, is the `Request` field in our Dataframe. 
- `text_b` is used if we're training a model to understand the relationship between sentences (i.e. is `text_b` a translation of `text_a`? Is `text_b` an answer to the question asked by `text_a`?). This doesn't apply to our task, so we can leave `text_b` blank.
- `label` is the label for our example, i.e. True, False

In [9]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):


1. Lowercase our text (if we're using a BERT lowercase model)
2. Tokenize it (i.e. "sally says hi" -> ["sally", "says", "hi"])
3. Break words into WordPieces (i.e. "calling" -> ["call", "##ing"])
4. Map our words to indexes using a vocab file that BERT provides
5. Add special "CLS" and "SEP" tokens (see the [readme](https://github.com/google-research/bert))
6. Append "index" and "segment" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))

Happily, we don't have to worry about most of these details.




To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:

In [13]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0329 08:18:30.962333 4439307712 saver.py:1483] Saver not created because there are no variables in the graph to restore


Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info["do_lower_case"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:

In [12]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands.

In [17]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

INFO:tensorflow:Writing example 0 of 5000


I0329 08:21:02.705108 4439307712 run_classifier.py:774] Writing example 0 of 5000


INFO:tensorflow:*** Example ***


I0329 08:21:02.715077 4439307712 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0329 08:21:02.716451 4439307712 run_classifier.py:462] guid: None






INFO:tensorflow:input_ids: 101 1996 27594 2121 5432 2003 2005 2216 2111 2040 2215 2000 2156 2005 3209 2054 4176 1998 12793 3413 2077 2037 2159 1010 2348 1045 2123 1005 1056 5254 2009 1999 2307 6987 1012 1026 7987 1013 1028 1026 7987 1013 1028 1000 3011 1000 2003 2019 22480 1012 3938 3371 5988 2544 2241 2006 1000 4774 3011 1000 2029 1045 3427 2035 2006 4035 2694 1012 1996 2694 2544 2001 17356 2011 2585 2012 6528 10235 1010 1037 14408 17441 12268 1010 2040 1045 2018 6257 2018 2036 2589 2009 2005 1000 3011 1000 2021 2009 2003 4754 5954 1010 2732 10313 1005 1055 2952 27263 4232 1012 2045 2024 5570 7171 1997 1996 3011 2013 2686 2061 2008 1005 1055 2089 2022 6413 1012 102


I0329 08:21:02.719197 4439307712 run_classifier.py:465] input_ids: 101 1996 27594 2121 5432 2003 2005 2216 2111 2040 2215 2000 2156 2005 3209 2054 4176 1998 12793 3413 2077 2037 2159 1010 2348 1045 2123 1005 1056 5254 2009 1999 2307 6987 1012 1026 7987 1013 1028 1026 7987 1013 1028 1000 3011 1000 2003 2019 22480 1012 3938 3371 5988 2544 2241 2006 1000 4774 3011 1000 2029 1045 3427 2035 2006 4035 2694 1012 1996 2694 2544 2001 17356 2011 2585 2012 6528 10235 1010 1037 14408 17441 12268 1010 2040 1045 2018 6257 2018 2036 2589 2009 2005 1000 3011 1000 2021 2009 2003 4754 5954 1010 2732 10313 1005 1055 2952 27263 4232 1012 2045 2024 5570 7171 1997 1996 3011 2013 2686 2061 2008 1005 1055 2089 2022 6413 1012 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0329 08:21:02.720581 4439307712 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0329 08:21:02.721781 4439307712 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0329 08:21:02.722975 4439307712 run_classifier.py:468] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0329 08:21:02.733407 4439307712 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0329 08:21:02.735146 4439307712 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] i was searching through hollywood video last night with a friend trying to find a good - looking horror movie to watch over the new year ' s weekend . as i was looking through the shelves , " severed " spotted my eye , and i grabbed it off the shelf and it looked like it might be a decent b - grade horror movie . the cover looked fairly good . the plot sounded semi - interesting . so i rented it . what a mistake . don ' t be fooled by the cover , which actually looks decent . i ' m thinking that more money was spent on the cover artwork than the movie itself . the film follows two police [SEP]


I0329 08:21:02.736577 4439307712 run_classifier.py:464] tokens: [CLS] i was searching through hollywood video last night with a friend trying to find a good - looking horror movie to watch over the new year ' s weekend . as i was looking through the shelves , " severed " spotted my eye , and i grabbed it off the shelf and it looked like it might be a decent b - grade horror movie . the cover looked fairly good . the plot sounded semi - interesting . so i rented it . what a mistake . don ' t be fooled by the cover , which actually looks decent . i ' m thinking that more money was spent on the cover artwork than the movie itself . the film follows two police [SEP]


INFO:tensorflow:input_ids: 101 1045 2001 6575 2083 5365 2678 2197 2305 2007 1037 2767 2667 2000 2424 1037 2204 1011 2559 5469 3185 2000 3422 2058 1996 2047 2095 1005 1055 5353 1012 2004 1045 2001 2559 2083 1996 15475 1010 1000 16574 1000 7282 2026 3239 1010 1998 1045 4046 2009 2125 1996 11142 1998 2009 2246 2066 2009 2453 2022 1037 11519 1038 1011 3694 5469 3185 1012 1996 3104 2246 7199 2204 1012 1996 5436 5015 4100 1011 5875 1012 2061 1045 12524 2009 1012 2054 1037 6707 1012 2123 1005 1056 2022 25857 2011 1996 3104 1010 2029 2941 3504 11519 1012 1045 1005 1049 3241 2008 2062 2769 2001 2985 2006 1996 3104 8266 2084 1996 3185 2993 1012 1996 2143 4076 2048 2610 102


I0329 08:21:02.737747 4439307712 run_classifier.py:465] input_ids: 101 1045 2001 6575 2083 5365 2678 2197 2305 2007 1037 2767 2667 2000 2424 1037 2204 1011 2559 5469 3185 2000 3422 2058 1996 2047 2095 1005 1055 5353 1012 2004 1045 2001 2559 2083 1996 15475 1010 1000 16574 1000 7282 2026 3239 1010 1998 1045 4046 2009 2125 1996 11142 1998 2009 2246 2066 2009 2453 2022 1037 11519 1038 1011 3694 5469 3185 1012 1996 3104 2246 7199 2204 1012 1996 5436 5015 4100 1011 5875 1012 2061 1045 12524 2009 1012 2054 1037 6707 1012 2123 1005 1056 2022 25857 2011 1996 3104 1010 2029 2941 3504 11519 1012 1045 1005 1049 3241 2008 2062 2769 2001 2985 2006 1996 3104 8266 2084 1996 3185 2993 1012 1996 2143 4076 2048 2610 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0329 08:21:02.739247 4439307712 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0329 08:21:02.740489 4439307712 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0329 08:21:02.741632 4439307712 run_classifier.py:468] label: 0 (id = 0)


INFO:tensorflow:*** Example ***


I0329 08:21:02.746748 4439307712 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0329 08:21:02.748101 4439307712 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] 9 / 10 - 30 minutes of pure holiday terror . okay , so it ' s not that scary . but it sure is fun . < br / > < br / > the crypt keeper ( john ka ##ssi ##r ) tales a tale of holiday fear , giving us all christmas goose . . . go ##sse ##bu ##mps that is . b ##wa ##ha ##ha ##ha ##hh ##a . you should really be careful what you axe santa for . have a scary christmas and a happy new fear . okay i ' ll stop . < br / > < br / > okay , so in the story , a greedy wife ( best scream ##er in the world [SEP]


I0329 08:21:02.749560 4439307712 run_classifier.py:464] tokens: [CLS] 9 / 10 - 30 minutes of pure holiday terror . okay , so it ' s not that scary . but it sure is fun . < br / > < br / > the crypt keeper ( john ka ##ssi ##r ) tales a tale of holiday fear , giving us all christmas goose . . . go ##sse ##bu ##mps that is . b ##wa ##ha ##ha ##ha ##hh ##a . you should really be careful what you axe santa for . have a scary christmas and a happy new fear . okay i ' ll stop . < br / > < br / > okay , so in the story , a greedy wife ( best scream ##er in the world [SEP]


INFO:tensorflow:input_ids: 101 1023 1013 2184 1011 2382 2781 1997 5760 6209 7404 1012 3100 1010 2061 2009 1005 1055 2025 2008 12459 1012 2021 2009 2469 2003 4569 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 19888 10684 1006 2198 10556 18719 2099 1007 7122 1037 6925 1997 6209 3571 1010 3228 2149 2035 4234 13020 1012 1012 1012 2175 11393 8569 25370 2008 2003 1012 1038 4213 3270 3270 3270 23644 2050 1012 2017 2323 2428 2022 6176 2054 2017 12946 4203 2005 1012 2031 1037 12459 4234 1998 1037 3407 2047 3571 1012 3100 1045 1005 2222 2644 1012 1026 7987 1013 1028 1026 7987 1013 1028 3100 1010 2061 1999 1996 2466 1010 1037 20505 2564 1006 2190 6978 2121 1999 1996 2088 102


I0329 08:21:02.750886 4439307712 run_classifier.py:465] input_ids: 101 1023 1013 2184 1011 2382 2781 1997 5760 6209 7404 1012 3100 1010 2061 2009 1005 1055 2025 2008 12459 1012 2021 2009 2469 2003 4569 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 19888 10684 1006 2198 10556 18719 2099 1007 7122 1037 6925 1997 6209 3571 1010 3228 2149 2035 4234 13020 1012 1012 1012 2175 11393 8569 25370 2008 2003 1012 1038 4213 3270 3270 3270 23644 2050 1012 2017 2323 2428 2022 6176 2054 2017 12946 4203 2005 1012 2031 1037 12459 4234 1998 1037 3407 2047 3571 1012 3100 1045 1005 2222 2644 1012 1026 7987 1013 1028 1026 7987 1013 1028 3100 1010 2061 1999 1996 2466 1010 1037 20505 2564 1006 2190 6978 2121 1999 1996 2088 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0329 08:21:02.752640 4439307712 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0329 08:21:02.754755 4439307712 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0329 08:21:02.755973 4439307712 run_classifier.py:468] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0329 08:21:02.770127 4439307712 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0329 08:21:02.771226 4439307712 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] the world of the 1973 sci - fi drama soy ##lent green is what we could be seeing if we aren ' t careful . it is a world in which new york city ' s population has topped the 40 million mark in the year 202 ##2 . over ##pop ##ulation , air pollution , year - long heat waves , and food shortages are the rule . the only hope comes from a food product called soy ##lent green . but what is this particular food stuff really made of ? that question is at the heart of this admitted ##ly somewhat dated but still intriguing film , based on harry harrison ' s 1966 novel " make room ! make room ! " [SEP]


I0329 08:21:02.772583 4439307712 run_classifier.py:464] tokens: [CLS] the world of the 1973 sci - fi drama soy ##lent green is what we could be seeing if we aren ' t careful . it is a world in which new york city ' s population has topped the 40 million mark in the year 202 ##2 . over ##pop ##ulation , air pollution , year - long heat waves , and food shortages are the rule . the only hope comes from a food product called soy ##lent green . but what is this particular food stuff really made of ? that question is at the heart of this admitted ##ly somewhat dated but still intriguing film , based on harry harrison ' s 1966 novel " make room ! make room ! " [SEP]


INFO:tensorflow:input_ids: 101 1996 2088 1997 1996 3381 16596 1011 10882 3689 25176 16136 2665 2003 2054 2057 2071 2022 3773 2065 2057 4995 1005 1056 6176 1012 2009 2003 1037 2088 1999 2029 2047 2259 2103 1005 1055 2313 2038 9370 1996 2871 2454 2928 1999 1996 2095 16798 2475 1012 2058 16340 9513 1010 2250 10796 1010 2095 1011 2146 3684 5975 1010 1998 2833 22623 2024 1996 3627 1012 1996 2069 3246 3310 2013 1037 2833 4031 2170 25176 16136 2665 1012 2021 2054 2003 2023 3327 2833 4933 2428 2081 1997 1029 2008 3160 2003 2012 1996 2540 1997 2023 4914 2135 5399 6052 2021 2145 23824 2143 1010 2241 2006 4302 6676 1005 1055 3547 3117 1000 2191 2282 999 2191 2282 999 1000 102


I0329 08:21:02.774088 4439307712 run_classifier.py:465] input_ids: 101 1996 2088 1997 1996 3381 16596 1011 10882 3689 25176 16136 2665 2003 2054 2057 2071 2022 3773 2065 2057 4995 1005 1056 6176 1012 2009 2003 1037 2088 1999 2029 2047 2259 2103 1005 1055 2313 2038 9370 1996 2871 2454 2928 1999 1996 2095 16798 2475 1012 2058 16340 9513 1010 2250 10796 1010 2095 1011 2146 3684 5975 1010 1998 2833 22623 2024 1996 3627 1012 1996 2069 3246 3310 2013 1037 2833 4031 2170 25176 16136 2665 1012 2021 2054 2003 2023 3327 2833 4933 2428 2081 1997 1029 2008 3160 2003 2012 1996 2540 1997 2023 4914 2135 5399 6052 2021 2145 23824 2143 1010 2241 2006 4302 6676 1005 1055 3547 3117 1000 2191 2282 999 2191 2282 999 1000 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0329 08:21:02.775548 4439307712 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0329 08:21:02.777034 4439307712 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0329 08:21:02.778736 4439307712 run_classifier.py:468] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0329 08:21:02.789219 4439307712 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0329 08:21:02.790378 4439307712 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] watching the first 30 minutes of sands of oblivion gave me high hopes . it seemed i was in for a cheaper version of the mummy . the setup was promising , in the 1920 ' s cecil b . demi ##lle makes his opus of the ten command ##ments . it seems in using real egyptian artifacts for the movie set they unleashed an ancient and terrible evil ( don ' t they always ? ) . aware of what had been unleashed demi ##lle orders the entire set buried instead of the usual practice of tearing it down . hopefully the evil will be buried with it for all time . then we switch to present day where a team is attempting to ex [SEP]


I0329 08:21:02.791571 4439307712 run_classifier.py:464] tokens: [CLS] watching the first 30 minutes of sands of oblivion gave me high hopes . it seemed i was in for a cheaper version of the mummy . the setup was promising , in the 1920 ' s cecil b . demi ##lle makes his opus of the ten command ##ments . it seems in using real egyptian artifacts for the movie set they unleashed an ancient and terrible evil ( don ' t they always ? ) . aware of what had been unleashed demi ##lle orders the entire set buried instead of the usual practice of tearing it down . hopefully the evil will be buried with it for all time . then we switch to present day where a team is attempting to ex [SEP]


INFO:tensorflow:input_ids: 101 3666 1996 2034 2382 2781 1997 13457 1997 24034 2435 2033 2152 8069 1012 2009 2790 1045 2001 1999 2005 1037 16269 2544 1997 1996 22788 1012 1996 16437 2001 10015 1010 1999 1996 4444 1005 1055 11978 1038 1012 27668 6216 3084 2010 16895 1997 1996 2702 3094 8163 1012 2009 3849 1999 2478 2613 6811 10471 2005 1996 3185 2275 2027 22416 2019 3418 1998 6659 4763 1006 2123 1005 1056 2027 2467 1029 1007 1012 5204 1997 2054 2018 2042 22416 27668 6216 4449 1996 2972 2275 3950 2612 1997 1996 5156 3218 1997 13311 2009 2091 1012 11504 1996 4763 2097 2022 3950 2007 2009 2005 2035 2051 1012 2059 2057 6942 2000 2556 2154 2073 1037 2136 2003 7161 2000 4654 102


I0329 08:21:02.792698 4439307712 run_classifier.py:465] input_ids: 101 3666 1996 2034 2382 2781 1997 13457 1997 24034 2435 2033 2152 8069 1012 2009 2790 1045 2001 1999 2005 1037 16269 2544 1997 1996 22788 1012 1996 16437 2001 10015 1010 1999 1996 4444 1005 1055 11978 1038 1012 27668 6216 3084 2010 16895 1997 1996 2702 3094 8163 1012 2009 3849 1999 2478 2613 6811 10471 2005 1996 3185 2275 2027 22416 2019 3418 1998 6659 4763 1006 2123 1005 1056 2027 2467 1029 1007 1012 5204 1997 2054 2018 2042 22416 27668 6216 4449 1996 2972 2275 3950 2612 1997 1996 5156 3218 1997 13311 2009 2091 1012 11504 1996 4763 2097 2022 3950 2007 2009 2005 2035 2051 1012 2059 2057 6942 2000 2556 2154 2073 1037 2136 2003 7161 2000 4654 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0329 08:21:02.793829 4439307712 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0329 08:21:02.795273 4439307712 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0329 08:21:02.796722 4439307712 run_classifier.py:468] label: 0 (id = 0)


INFO:tensorflow:Writing example 0 of 5000


I0329 08:21:24.996325 4439307712 run_classifier.py:774] Writing example 0 of 5000


INFO:tensorflow:*** Example ***


I0329 08:21:25.002312 4439307712 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0329 08:21:25.003743 4439307712 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] otto prem ##inger ' s " the man with the golden arm " is a reference to heroin addiction - something that must have been rather risky to film back in 1955 , fifty years ago ( the ce ##nsor ##s today still have a problem with drug content in films ! ) . < br / > < br / > the lead role was originally offered to marlon brand ##o , then snatched by frank sinatra before brand ##o could respond . sinatra convincing ##ly portrays a pro card dealer and ex - heroin addict who returns home to the city only to find himself battling the demons of temptation . < br / > < br / > prem ##inger is one of [SEP]


I0329 08:21:25.004791 4439307712 run_classifier.py:464] tokens: [CLS] otto prem ##inger ' s " the man with the golden arm " is a reference to heroin addiction - something that must have been rather risky to film back in 1955 , fifty years ago ( the ce ##nsor ##s today still have a problem with drug content in films ! ) . < br / > < br / > the lead role was originally offered to marlon brand ##o , then snatched by frank sinatra before brand ##o could respond . sinatra convincing ##ly portrays a pro card dealer and ex - heroin addict who returns home to the city only to find himself battling the demons of temptation . < br / > < br / > prem ##inger is one of [SEP]


INFO:tensorflow:input_ids: 101 8064 26563 9912 1005 1055 1000 1996 2158 2007 1996 3585 2849 1000 2003 1037 4431 2000 19690 13449 1011 2242 2008 2442 2031 2042 2738 19188 2000 2143 2067 1999 3982 1010 5595 2086 3283 1006 1996 8292 29577 2015 2651 2145 2031 1037 3291 2007 4319 4180 1999 3152 999 1007 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2599 2535 2001 2761 3253 2000 25861 4435 2080 1010 2059 14177 2011 3581 19643 2077 4435 2080 2071 6869 1012 19643 13359 2135 17509 1037 4013 4003 11033 1998 4654 1011 19690 26855 2040 5651 2188 2000 1996 2103 2069 2000 2424 2370 17773 1996 7942 1997 17232 1012 1026 7987 1013 1028 1026 7987 1013 1028 26563 9912 2003 2028 1997 102


I0329 08:21:25.005974 4439307712 run_classifier.py:465] input_ids: 101 8064 26563 9912 1005 1055 1000 1996 2158 2007 1996 3585 2849 1000 2003 1037 4431 2000 19690 13449 1011 2242 2008 2442 2031 2042 2738 19188 2000 2143 2067 1999 3982 1010 5595 2086 3283 1006 1996 8292 29577 2015 2651 2145 2031 1037 3291 2007 4319 4180 1999 3152 999 1007 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2599 2535 2001 2761 3253 2000 25861 4435 2080 1010 2059 14177 2011 3581 19643 2077 4435 2080 2071 6869 1012 19643 13359 2135 17509 1037 4013 4003 11033 1998 4654 1011 19690 26855 2040 5651 2188 2000 1996 2103 2069 2000 2424 2370 17773 1996 7942 1997 17232 1012 1026 7987 1013 1028 1026 7987 1013 1028 26563 9912 2003 2028 1997 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0329 08:21:25.007292 4439307712 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0329 08:21:25.008492 4439307712 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0329 08:21:25.010102 4439307712 run_classifier.py:468] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0329 08:21:25.013772 4439307712 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0329 08:21:25.015145 4439307712 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] the fun ##nies ##t performance was by sha ##lom harlow , as matt dillon ' s super ##mo ##del girlfriend . she was more interesting to me than all the lead actors . this movie got it all wrong ; even the most depend ##able actress of the century , joan cu ##sas ##k , was not able to rise about the ridiculous ##ness of the plot . i did enjoy hearing " mach ##o man " by the village people over the closing credits . the rest of the movie might have been to ##ler ##able if it were to rise to that level of energy . [SEP]


I0329 08:21:25.016462 4439307712 run_classifier.py:464] tokens: [CLS] the fun ##nies ##t performance was by sha ##lom harlow , as matt dillon ' s super ##mo ##del girlfriend . she was more interesting to me than all the lead actors . this movie got it all wrong ; even the most depend ##able actress of the century , joan cu ##sas ##k , was not able to rise about the ridiculous ##ness of the plot . i did enjoy hearing " mach ##o man " by the village people over the closing credits . the rest of the movie might have been to ##ler ##able if it were to rise to that level of energy . [SEP]


INFO:tensorflow:input_ids: 101 1996 4569 15580 2102 2836 2001 2011 21146 21297 22545 1010 2004 4717 14602 1005 1055 3565 5302 9247 6513 1012 2016 2001 2062 5875 2000 2033 2084 2035 1996 2599 5889 1012 2023 3185 2288 2009 2035 3308 1025 2130 1996 2087 12530 3085 3883 1997 1996 2301 1010 7437 12731 20939 2243 1010 2001 2025 2583 2000 4125 2055 1996 9951 2791 1997 1996 5436 1012 1045 2106 5959 4994 1000 24532 2080 2158 1000 2011 1996 2352 2111 2058 1996 5494 6495 1012 1996 2717 1997 1996 3185 2453 2031 2042 2000 3917 3085 2065 2009 2020 2000 4125 2000 2008 2504 1997 2943 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0329 08:21:25.017712 4439307712 run_classifier.py:465] input_ids: 101 1996 4569 15580 2102 2836 2001 2011 21146 21297 22545 1010 2004 4717 14602 1005 1055 3565 5302 9247 6513 1012 2016 2001 2062 5875 2000 2033 2084 2035 1996 2599 5889 1012 2023 3185 2288 2009 2035 3308 1025 2130 1996 2087 12530 3085 3883 1997 1996 2301 1010 7437 12731 20939 2243 1010 2001 2025 2583 2000 4125 2055 1996 9951 2791 1997 1996 5436 1012 1045 2106 5959 4994 1000 24532 2080 2158 1000 2011 1996 2352 2111 2058 1996 5494 6495 1012 1996 2717 1997 1996 3185 2453 2031 2042 2000 3917 3085 2065 2009 2020 2000 4125 2000 2008 2504 1997 2943 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0329 08:21:25.019587 4439307712 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0329 08:21:25.020916 4439307712 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0329 08:21:25.022037 4439307712 run_classifier.py:468] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0329 08:21:25.026874 4439307712 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0329 08:21:25.028592 4439307712 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] you get an hour and a half of brain ##de ##ad i have to save the world action all the way , one liner ##s and a woman somewhere in the equation . this time he plays a guy who help people in the witness protection program . to quote the cover of the movie . he erase ##s their past to save their future . wo ##pp ##i , this have to be cool : ) i bet younger people will like it though but it is a little over the top for my taste . and the one liner ##s are really something else , and i don ' t mean that in a good way . after killing a crocodile he manage to [SEP]


I0329 08:21:25.029922 4439307712 run_classifier.py:464] tokens: [CLS] you get an hour and a half of brain ##de ##ad i have to save the world action all the way , one liner ##s and a woman somewhere in the equation . this time he plays a guy who help people in the witness protection program . to quote the cover of the movie . he erase ##s their past to save their future . wo ##pp ##i , this have to be cool : ) i bet younger people will like it though but it is a little over the top for my taste . and the one liner ##s are really something else , and i don ' t mean that in a good way . after killing a crocodile he manage to [SEP]


INFO:tensorflow:input_ids: 101 2017 2131 2019 3178 1998 1037 2431 1997 4167 3207 4215 1045 2031 2000 3828 1996 2088 2895 2035 1996 2126 1010 2028 11197 2015 1998 1037 2450 4873 1999 1996 8522 1012 2023 2051 2002 3248 1037 3124 2040 2393 2111 1999 1996 7409 3860 2565 1012 2000 14686 1996 3104 1997 1996 3185 1012 2002 22505 2015 2037 2627 2000 3828 2037 2925 1012 24185 9397 2072 1010 2023 2031 2000 2022 4658 1024 1007 1045 6655 3920 2111 2097 2066 2009 2295 2021 2009 2003 1037 2210 2058 1996 2327 2005 2026 5510 1012 1998 1996 2028 11197 2015 2024 2428 2242 2842 1010 1998 1045 2123 1005 1056 2812 2008 1999 1037 2204 2126 1012 2044 4288 1037 21843 2002 6133 2000 102


I0329 08:21:25.031698 4439307712 run_classifier.py:465] input_ids: 101 2017 2131 2019 3178 1998 1037 2431 1997 4167 3207 4215 1045 2031 2000 3828 1996 2088 2895 2035 1996 2126 1010 2028 11197 2015 1998 1037 2450 4873 1999 1996 8522 1012 2023 2051 2002 3248 1037 3124 2040 2393 2111 1999 1996 7409 3860 2565 1012 2000 14686 1996 3104 1997 1996 3185 1012 2002 22505 2015 2037 2627 2000 3828 2037 2925 1012 24185 9397 2072 1010 2023 2031 2000 2022 4658 1024 1007 1045 6655 3920 2111 2097 2066 2009 2295 2021 2009 2003 1037 2210 2058 1996 2327 2005 2026 5510 1012 1998 1996 2028 11197 2015 2024 2428 2242 2842 1010 1998 1045 2123 1005 1056 2812 2008 1999 1037 2204 2126 1012 2044 4288 1037 21843 2002 6133 2000 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0329 08:21:25.033116 4439307712 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0329 08:21:25.034410 4439307712 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0329 08:21:25.035687 4439307712 run_classifier.py:468] label: 0 (id = 0)


INFO:tensorflow:*** Example ***


I0329 08:21:25.041631 4439307712 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0329 08:21:25.042956 4439307712 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] i started to take a critical view on this adaptation within the first few minutes but as a dedicated jane austen fan i per ##se ##vered through to the end . . . however , this is not a programme i would recommend to someone unfamiliar with her work as i don ' t think it does the book justice , nor makes particularly entertaining television in its own right . there was something about this adaptation that lacked bel ##ie ##va ##bility - many of the costumes and even the actors did not have an authentic look and i found the acting to be , at times , poor . there is no doubt that the actors were all very good - looking , but [SEP]


I0329 08:21:25.044207 4439307712 run_classifier.py:464] tokens: [CLS] i started to take a critical view on this adaptation within the first few minutes but as a dedicated jane austen fan i per ##se ##vered through to the end . . . however , this is not a programme i would recommend to someone unfamiliar with her work as i don ' t think it does the book justice , nor makes particularly entertaining television in its own right . there was something about this adaptation that lacked bel ##ie ##va ##bility - many of the costumes and even the actors did not have an authentic look and i found the acting to be , at times , poor . there is no doubt that the actors were all very good - looking , but [SEP]


INFO:tensorflow:input_ids: 101 1045 2318 2000 2202 1037 4187 3193 2006 2023 6789 2306 1996 2034 2261 2781 2021 2004 1037 4056 4869 24177 5470 1045 2566 3366 25896 2083 2000 1996 2203 1012 1012 1012 2174 1010 2023 2003 2025 1037 4746 1045 2052 16755 2000 2619 16261 2007 2014 2147 2004 1045 2123 1005 1056 2228 2009 2515 1996 2338 3425 1010 4496 3084 3391 14036 2547 1999 2049 2219 2157 1012 2045 2001 2242 2055 2023 6789 2008 10858 19337 2666 3567 8553 1011 2116 1997 1996 12703 1998 2130 1996 5889 2106 2025 2031 2019 14469 2298 1998 1045 2179 1996 3772 2000 2022 1010 2012 2335 1010 3532 1012 2045 2003 2053 4797 2008 1996 5889 2020 2035 2200 2204 1011 2559 1010 2021 102


I0329 08:21:25.045583 4439307712 run_classifier.py:465] input_ids: 101 1045 2318 2000 2202 1037 4187 3193 2006 2023 6789 2306 1996 2034 2261 2781 2021 2004 1037 4056 4869 24177 5470 1045 2566 3366 25896 2083 2000 1996 2203 1012 1012 1012 2174 1010 2023 2003 2025 1037 4746 1045 2052 16755 2000 2619 16261 2007 2014 2147 2004 1045 2123 1005 1056 2228 2009 2515 1996 2338 3425 1010 4496 3084 3391 14036 2547 1999 2049 2219 2157 1012 2045 2001 2242 2055 2023 6789 2008 10858 19337 2666 3567 8553 1011 2116 1997 1996 12703 1998 2130 1996 5889 2106 2025 2031 2019 14469 2298 1998 1045 2179 1996 3772 2000 2022 1010 2012 2335 1010 3532 1012 2045 2003 2053 4797 2008 1996 5889 2020 2035 2200 2204 1011 2559 1010 2021 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0329 08:21:25.046820 4439307712 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0329 08:21:25.048069 4439307712 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0329 08:21:25.049121 4439307712 run_classifier.py:468] label: 0 (id = 0)


INFO:tensorflow:*** Example ***


I0329 08:21:25.057547 4439307712 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0329 08:21:25.059109 4439307712 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] my yards ##tick for measuring a movie ' s watch - ability is if i get sq ##ui ##rm ##y . if i start shifting positions and noticing my butt is sore , the film is too long . this movie did not even come close to being boring . predictable in some parts sure , but never boring . < br / > < br / > all of the other military branches have had love notes written about them and seen their recruitment levels go up , why not the coast guard too ? they are definitely under - appreciated , until the day your boat sinks that is . < br / > < br / > the movie was very enjoyable and [SEP]


I0329 08:21:25.060051 4439307712 run_classifier.py:464] tokens: [CLS] my yards ##tick for measuring a movie ' s watch - ability is if i get sq ##ui ##rm ##y . if i start shifting positions and noticing my butt is sore , the film is too long . this movie did not even come close to being boring . predictable in some parts sure , but never boring . < br / > < br / > all of the other military branches have had love notes written about them and seen their recruitment levels go up , why not the coast guard too ? they are definitely under - appreciated , until the day your boat sinks that is . < br / > < br / > the movie was very enjoyable and [SEP]


INFO:tensorflow:input_ids: 101 2026 4210 26348 2005 9854 1037 3185 1005 1055 3422 1011 3754 2003 2065 1045 2131 5490 10179 10867 2100 1012 2065 1045 2707 9564 4460 1998 15103 2026 10007 2003 14699 1010 1996 2143 2003 2205 2146 1012 2023 3185 2106 2025 2130 2272 2485 2000 2108 11771 1012 21425 1999 2070 3033 2469 1010 2021 2196 11771 1012 1026 7987 1013 1028 1026 7987 1013 1028 2035 1997 1996 2060 2510 5628 2031 2018 2293 3964 2517 2055 2068 1998 2464 2037 15680 3798 2175 2039 1010 2339 2025 1996 3023 3457 2205 1029 2027 2024 5791 2104 1011 12315 1010 2127 1996 2154 2115 4049 23462 2008 2003 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 3185 2001 2200 22249 1998 102


I0329 08:21:25.061082 4439307712 run_classifier.py:465] input_ids: 101 2026 4210 26348 2005 9854 1037 3185 1005 1055 3422 1011 3754 2003 2065 1045 2131 5490 10179 10867 2100 1012 2065 1045 2707 9564 4460 1998 15103 2026 10007 2003 14699 1010 1996 2143 2003 2205 2146 1012 2023 3185 2106 2025 2130 2272 2485 2000 2108 11771 1012 21425 1999 2070 3033 2469 1010 2021 2196 11771 1012 1026 7987 1013 1028 1026 7987 1013 1028 2035 1997 1996 2060 2510 5628 2031 2018 2293 3964 2517 2055 2068 1998 2464 2037 15680 3798 2175 2039 1010 2339 2025 1996 3023 3457 2205 1029 2027 2024 5791 2104 1011 12315 1010 2127 1996 2154 2115 4049 23462 2008 2003 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 3185 2001 2200 22249 1998 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0329 08:21:25.062987 4439307712 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0329 08:21:25.064316 4439307712 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0329 08:21:25.065471 4439307712 run_classifier.py:468] label: 1 (id = 1)


# Creating a model

Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning).

In [24]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction.

In [25]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [26]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [27]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [28]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [29]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


INFO:tensorflow:Using config: {'_model_dir': 'bert-tfhub/aclImdb_v1', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a2fed2240>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


I0329 08:37:47.744974 4439307712 estimator.py:201] Using config: {'_model_dir': 'bert-tfhub/aclImdb_v1', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a2fed2240>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with Tensorflow [Estimators](https://www.tensorflow.org/guide/estimators).

In [30]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.

In [31]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
INFO:tensorflow:Calling model_fn.


I0329 08:38:02.615426 4439307712 estimator.py:1111] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0329 08:38:05.568967 4439307712 saver.py:1483] Saver not created because there are no variables in the graph to restore


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


W0329 08:38:05.713428 4439307712 deprecation.py:506] From <ipython-input-24-ca03218f28a6>:34: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


W0329 08:38:05.761381 4439307712 deprecation.py:323] From /Users/renato/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/learning_rate_decay_v2.py:321: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


Instructions for updating:
Use tf.cast instead.


W0329 08:38:05.830350 4439307712 deprecation.py:323] From /Users/renato/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Instructions for updating:
Use tf.cast instead.


W0329 08:38:13.058399 4439307712 deprecation.py:323] From /Users/renato/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/metrics_impl.py:455: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.



For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

INFO:tensorflow:Done calling model_fn.


I0329 08:38:14.841753 4439307712 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


I0329 08:38:14.845369 4439307712 basic_session_run_hooks.py:527] Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


I0329 08:38:18.769957 4439307712 monitored_session.py:222] Graph was finalized.


INFO:tensorflow:Running local_init_op.


I0329 08:38:23.078418 4439307712 session_manager.py:491] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0329 08:38:23.394394 4439307712 session_manager.py:493] Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into bert-tfhub/aclImdb_v1/model.ckpt.


I0329 08:38:33.431998 4439307712 basic_session_run_hooks.py:594] Saving checkpoints for 0 into bert-tfhub/aclImdb_v1/model.ckpt.


INFO:tensorflow:loss = 0.7208684, step = 1


I0329 08:39:08.951168 4439307712 basic_session_run_hooks.py:249] loss = 0.7208684, step = 1


INFO:tensorflow:global_step/sec: 0.0508141


I0329 09:11:56.907016 4439307712 basic_session_run_hooks.py:680] global_step/sec: 0.0508141


INFO:tensorflow:loss = 0.41532114, step = 101 (1967.960 sec)


I0329 09:11:56.911420 4439307712 basic_session_run_hooks.py:247] loss = 0.41532114, step = 101 (1967.960 sec)


KeyboardInterrupt: 

Now let's use our test data to see how well our model did:

In [32]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [None]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.


I0329 09:29:57.689817 4439307712 estimator.py:1111] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0329 09:30:01.320329 4439307712 saver.py:1483] Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.


I0329 09:30:09.264703 4439307712 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2019-03-29T12:30:09Z


I0329 09:30:09.285585 4439307712 evaluation.py:257] Starting evaluation at 2019-03-29T12:30:09Z


INFO:tensorflow:Graph was finalized.


I0329 09:30:10.611688 4439307712 monitored_session.py:222] Graph was finalized.


Instructions for updating:
Use standard file APIs to check for files with this prefix.


W0329 09:30:10.613604 4439307712 deprecation.py:323] From /Users/renato/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


INFO:tensorflow:Restoring parameters from bert-tfhub/aclImdb_v1/model.ckpt-0


I0329 09:30:10.615682 4439307712 saver.py:1270] Restoring parameters from bert-tfhub/aclImdb_v1/model.ckpt-0


INFO:tensorflow:Running local_init_op.


I0329 09:30:13.181838 4439307712 session_manager.py:491] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0329 09:30:13.488112 4439307712 session_manager.py:493] Done running local_init_op.


Now let's write code to make predictions on new sentences:

In [None]:
def getPrediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]

In [None]:
pred_sentences = [
  "That movie was absolutely awful",
  "The acting was a bit lacking",
  "The film was creative and surprising",
  "Absolutely fantastic!"
]

In [None]:
predictions = getPrediction(pred_sentences)

Voila! We have a sentiment classifier!

In [None]:
predictions