In [None]:
# Copyright 2019 Google Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Predicting Movie Review Sentiment with BERT on TF Hub

If you’ve been following Natural Language Processing over the past year, you’ve probably heard of BERT: Bidirectional Encoder Representations from Transformers. It’s a neural network architecture designed by Google researchers that’s totally transformed what’s state-of-the-art for NLP tasks, like text classification, translation, summarization, and question answering.

Now that BERT's been added to [TF Hub](https://www.tensorflow.org/hub) as a loadable module, it's easy(ish) to add into existing Tensorflow text pipelines. In an existing pipeline, BERT can replace text embedding layers like ELMO and GloVE. Alternatively, [finetuning](http://wiki.fast.ai/index.php/Fine_tuning) BERT can provide both an accuracy boost and faster training time in many cases.

Here, we'll train a model to predict whether an IMDB movie review is positive or negative using BERT in Tensorflow with tf hub. Some code was adapted from [this colab notebook](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb). Let's get started!

In [1]:
# %tensorflow_version 1.x
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

In addition to the standard libraries we imported above, we'll need to install BERT's python package.

In [2]:
!pip install bert-tensorflow



In [3]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization




Below, we'll set an output directory location to store our model output and checkpoints. This can be a local directory, in which case you'd set OUTPUT_DIR to the name of the directory you'd like to create. If you're running this code in Google's hosted Colab, the directory won't persist after the Colab session ends.

Alternatively, if you're a GCP user, you can store output in a GCP bucket. To do that, set a directory name in OUTPUT_DIR and the name of the GCP bucket in the BUCKET field.

Set DO_DELETE to rewrite the OUTPUT_DIR if it exists. Otherwise, Tensorflow will load existing model checkpoints from that directory (if they exist).

In [4]:
# Set the output directory for saving model file
# Optionally, set a GCP bucket location

OUTPUT_DIR = 'output_dir'#@param {type:"string"}
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = False #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}

if USE_BUCKET:
  OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
  from google.colab import auth
  auth.authenticate_user()

if DO_DELETE:
  try:
    tf.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    # Doesn't matter if the directory didn't exist
    pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))


***** Model output directory: output_dir *****


#Data

First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).

In [5]:
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  data = {}
  data["sentence"] = []
  data["sentiment"] = []
  for file_path in os.listdir(directory):
    with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
      data["sentence"].append(f.read())
      data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  pos_df["polarity"] = 1
  neg_df["polarity"] = 0
  return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
  dataset = tf.keras.utils.get_file(
      fname="aclImdb.tar.gz", 
      origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
      extract=True)
  
  train_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                       "aclImdb", "train"))
  test_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                      "aclImdb", "test"))
  
  return train_df, test_df


In [6]:
train, test = download_and_load_datasets()

In [7]:
train

Unnamed: 0,sentence,sentiment,polarity
0,Salvage is the worst so called horror film I'v...,1,0
1,I fell asleep on my couch at 7:35pm last night...,1,0
2,I was lucky enough to have seen this on a whim...,10,1
3,I'm all for the idea of a grand epic of the Am...,1,0
4,"Boy, this was one lousy movie! While I haven't...",2,0
...,...,...,...
24995,This movie will kick your ass! Powerful acting...,10,1
24996,"First one was much better, I had enjoyed it a ...",2,0
24997,It's a great American martial arts movie. The ...,10,1
24998,Alone in The Dark is one of my favorite role-p...,1,0


To keep training fast, we'll take a sample of 5000 train and test examples, respectively.

In [8]:
train = train.sample(5000)
test = test.sample(5000)

For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)

In [9]:
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]

#Data Preprocessing
We'll need to transform our data into a format BERT understands. This involves two steps. First, we create  `InputExample`'s using the constructor provided in the BERT library.

- `text_a` is the text we want to classify, which in this case, is the `Request` field in our Dataframe. 
- `text_b` is used if we're training a model to understand the relationship between sentences (i.e. is `text_b` a translation of `text_a`? Is `text_b` an answer to the question asked by `text_a`?). This doesn't apply to our task, so we can leave `text_b` blank.
- `label` is the label for our example, i.e. True, False

In [10]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):


1. Lowercase our text (if we're using a BERT lowercase model)
2. Tokenize it (i.e. "sally says hi" -> ["sally", "says", "hi"])
3. Break words into WordPieces (i.e. "calling" -> ["call", "##ing"])
4. Map our words to indexes using a vocab file that BERT provides
5. Add special "CLS" and "SEP" tokens (see the [readme](https://github.com/google-research/bert))
6. Append "index" and "segment" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))

Happily, we don't have to worry about most of these details.




To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:

In [11]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info["do_lower_case"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:

In [12]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands.

In [13]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)







INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i am glad other people enjoyed this movie , cause i know it doesn ' t have the greatest reputation and it made no money at the box office . i thought it was terrific and there are several reasons why - bog ##dan ##ovich directs with the light ##est of touches , the cast ( especially cole ##en camp ) is perfect and the big bad apple never looked better on film . you ' ve seen worse movies ! [SEP]


INFO:tensorflow:tokens: [CLS] i am glad other people enjoyed this movie , cause i know it doesn ' t have the greatest reputation and it made no money at the box office . i thought it was terrific and there are several reasons why - bog ##dan ##ovich directs with the light ##est of touches , the cast ( especially cole ##en camp ) is perfect and the big bad apple never looked better on film . you ' ve seen worse movies ! [SEP]


INFO:tensorflow:input_ids: 101 1045 2572 5580 2060 2111 5632 2023 3185 1010 3426 1045 2113 2009 2987 1005 1056 2031 1996 4602 5891 1998 2009 2081 2053 2769 2012 1996 3482 2436 1012 1045 2245 2009 2001 27547 1998 2045 2024 2195 4436 2339 1011 22132 7847 12303 23303 2007 1996 2422 4355 1997 12817 1010 1996 3459 1006 2926 5624 2368 3409 1007 2003 3819 1998 1996 2502 2919 6207 2196 2246 2488 2006 2143 1012 2017 1005 2310 2464 4788 5691 999 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1045 2572 5580 2060 2111 5632 2023 3185 1010 3426 1045 2113 2009 2987 1005 1056 2031 1996 4602 5891 1998 2009 2081 2053 2769 2012 1996 3482 2436 1012 1045 2245 2009 2001 27547 1998 2045 2024 2195 4436 2339 1011 22132 7847 12303 23303 2007 1996 2422 4355 1997 12817 1010 1996 3459 1006 2926 5624 2368 3409 1007 2003 3819 1998 1996 2502 2919 6207 2196 2246 2488 2006 2143 1012 2017 1005 2310 2464 4788 5691 999 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this is only related to the first movie by the name . the plot has nothing to do with the first and the whole movie stink ##s ! ! ! i have no idea what they were thinking but this movie is so bad . avoid this at all costs , the first movie in the series is acceptable as a slash ##er flick and so is the fourth but this one and the 3rd are rubbish ! ! [SEP]


INFO:tensorflow:tokens: [CLS] this is only related to the first movie by the name . the plot has nothing to do with the first and the whole movie stink ##s ! ! ! i have no idea what they were thinking but this movie is so bad . avoid this at all costs , the first movie in the series is acceptable as a slash ##er flick and so is the fourth but this one and the 3rd are rubbish ! ! [SEP]


INFO:tensorflow:input_ids: 101 2023 2003 2069 3141 2000 1996 2034 3185 2011 1996 2171 1012 1996 5436 2038 2498 2000 2079 2007 1996 2034 1998 1996 2878 3185 27136 2015 999 999 999 1045 2031 2053 2801 2054 2027 2020 3241 2021 2023 3185 2003 2061 2919 1012 4468 2023 2012 2035 5366 1010 1996 2034 3185 1999 1996 2186 2003 11701 2004 1037 18296 2121 17312 1998 2061 2003 1996 2959 2021 2023 2028 1998 1996 3822 2024 29132 999 999 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 2023 2003 2069 3141 2000 1996 2034 3185 2011 1996 2171 1012 1996 5436 2038 2498 2000 2079 2007 1996 2034 1998 1996 2878 3185 27136 2015 999 999 999 1045 2031 2053 2801 2054 2027 2020 3241 2021 2023 3185 2003 2061 2919 1012 4468 2023 2012 2035 5366 1010 1996 2034 3185 1999 1996 2186 2003 11701 2004 1037 18296 2121 17312 1998 2061 2003 1996 2959 2021 2023 2028 1998 1996 3822 2024 29132 999 999 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this is the best sci - fi that i have seen in my 29 years of watching sci - fi . i also believe that dark angel will become a cult favorite . the action is great but jessica alba is the best and most gorgeous star on tv today . [SEP]


INFO:tensorflow:tokens: [CLS] this is the best sci - fi that i have seen in my 29 years of watching sci - fi . i also believe that dark angel will become a cult favorite . the action is great but jessica alba is the best and most gorgeous star on tv today . [SEP]


INFO:tensorflow:input_ids: 101 2023 2003 1996 2190 16596 1011 10882 2008 1045 2031 2464 1999 2026 2756 2086 1997 3666 16596 1011 10882 1012 1045 2036 2903 2008 2601 4850 2097 2468 1037 8754 5440 1012 1996 2895 2003 2307 2021 8201 18255 2003 1996 2190 1998 2087 9882 2732 2006 2694 2651 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 2023 2003 1996 2190 16596 1011 10882 2008 1045 2031 2464 1999 2026 2756 2086 1997 3666 16596 1011 10882 1012 1045 2036 2903 2008 2601 4850 2097 2468 1037 8754 5440 1012 1996 2895 2003 2307 2021 8201 18255 2003 1996 2190 1998 2087 9882 2732 2006 2694 2651 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] poor robert eng ##lund makes another flop and to the expense of to ##be hooper who usually makes pretty good horror movies but he failed pretty bad at this one . eng ##lund plays the well known mar ##que de sad ##e who in the 17th century was en ##pr ##ison ##ed for his obsession of pain and the pleasure of bringing pain upon himself as well as watching others also be in pain . the story is so confusing with the flip flop from one century to another and i became confused as to what was going on and what was the purpose of this movie . all i saw was a young lady that became en ##tra ##pped by a strange lesbian who des [SEP]


INFO:tensorflow:tokens: [CLS] poor robert eng ##lund makes another flop and to the expense of to ##be hooper who usually makes pretty good horror movies but he failed pretty bad at this one . eng ##lund plays the well known mar ##que de sad ##e who in the 17th century was en ##pr ##ison ##ed for his obsession of pain and the pleasure of bringing pain upon himself as well as watching others also be in pain . the story is so confusing with the flip flop from one century to another and i became confused as to what was going on and what was the purpose of this movie . all i saw was a young lady that became en ##tra ##pped by a strange lesbian who des [SEP]


INFO:tensorflow:input_ids: 101 3532 2728 25540 18028 3084 2178 28583 1998 2000 1996 10961 1997 2000 4783 23717 2040 2788 3084 3492 2204 5469 5691 2021 2002 3478 3492 2919 2012 2023 2028 1012 25540 18028 3248 1996 2092 2124 9388 4226 2139 6517 2063 2040 1999 1996 5550 2301 2001 4372 18098 10929 2098 2005 2010 17418 1997 3255 1998 1996 5165 1997 5026 3255 2588 2370 2004 2092 2004 3666 2500 2036 2022 1999 3255 1012 1996 2466 2003 2061 16801 2007 1996 11238 28583 2013 2028 2301 2000 2178 1998 1045 2150 5457 2004 2000 2054 2001 2183 2006 1998 2054 2001 1996 3800 1997 2023 3185 1012 2035 1045 2387 2001 1037 2402 3203 2008 2150 4372 6494 11469 2011 1037 4326 11690 2040 4078 102


INFO:tensorflow:input_ids: 101 3532 2728 25540 18028 3084 2178 28583 1998 2000 1996 10961 1997 2000 4783 23717 2040 2788 3084 3492 2204 5469 5691 2021 2002 3478 3492 2919 2012 2023 2028 1012 25540 18028 3248 1996 2092 2124 9388 4226 2139 6517 2063 2040 1999 1996 5550 2301 2001 4372 18098 10929 2098 2005 2010 17418 1997 3255 1998 1996 5165 1997 5026 3255 2588 2370 2004 2092 2004 3666 2500 2036 2022 1999 3255 1012 1996 2466 2003 2061 16801 2007 1996 11238 28583 2013 2028 2301 2000 2178 1998 1045 2150 5457 2004 2000 2054 2001 2183 2006 1998 2054 2001 1996 3800 1997 2023 3185 1012 2035 1045 2387 2001 1037 2402 3203 2008 2150 4372 6494 11469 2011 1037 4326 11690 2040 4078 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] jim carr ##ey is one of the fun ##nies ##t and most gifted comedians in film today . with his hyper ##active sp ##ont ##ane ##ity and his rubber face he can just go crazy , and we love him for it . he has the ability to make med ##io ##cre comedies ( ala ace ventura ) , and turn them into decent comedic outing ##s . or , in the case of ' liar liar ' , make them some of the most hilarious contemporary comedies around . carr ##ey has also proven himself capable of tack ##ling dramas . he was excellent in both ' man on the moon ' and ' the truman show . ' the guy is remarkable . < [SEP]


INFO:tensorflow:tokens: [CLS] jim carr ##ey is one of the fun ##nies ##t and most gifted comedians in film today . with his hyper ##active sp ##ont ##ane ##ity and his rubber face he can just go crazy , and we love him for it . he has the ability to make med ##io ##cre comedies ( ala ace ventura ) , and turn them into decent comedic outing ##s . or , in the case of ' liar liar ' , make them some of the most hilarious contemporary comedies around . carr ##ey has also proven himself capable of tack ##ling dramas . he was excellent in both ' man on the moon ' and ' the truman show . ' the guy is remarkable . < [SEP]


INFO:tensorflow:input_ids: 101 3958 12385 3240 2003 2028 1997 1996 4569 15580 2102 1998 2087 12785 25119 1999 2143 2651 1012 2007 2010 23760 19620 11867 12162 7231 3012 1998 2010 8903 2227 2002 2064 2074 2175 4689 1010 1998 2057 2293 2032 2005 2009 1012 2002 2038 1996 3754 2000 2191 19960 3695 16748 22092 1006 21862 9078 21151 1007 1010 1998 2735 2068 2046 11519 21699 26256 2015 1012 2030 1010 1999 1996 2553 1997 1005 16374 16374 1005 1010 2191 2068 2070 1997 1996 2087 26316 3824 22092 2105 1012 12385 3240 2038 2036 10003 2370 5214 1997 26997 2989 16547 1012 2002 2001 6581 1999 2119 1005 2158 2006 1996 4231 1005 1998 1005 1996 15237 2265 1012 1005 1996 3124 2003 9487 1012 1026 102


INFO:tensorflow:input_ids: 101 3958 12385 3240 2003 2028 1997 1996 4569 15580 2102 1998 2087 12785 25119 1999 2143 2651 1012 2007 2010 23760 19620 11867 12162 7231 3012 1998 2010 8903 2227 2002 2064 2074 2175 4689 1010 1998 2057 2293 2032 2005 2009 1012 2002 2038 1996 3754 2000 2191 19960 3695 16748 22092 1006 21862 9078 21151 1007 1010 1998 2735 2068 2046 11519 21699 26256 2015 1012 2030 1010 1999 1996 2553 1997 1005 16374 16374 1005 1010 2191 2068 2070 1997 1996 2087 26316 3824 22092 2105 1012 12385 3240 2038 2036 10003 2370 5214 1997 26997 2989 16547 1012 2002 2001 6581 1999 2119 1005 2158 2006 1996 4231 1005 1998 1005 1996 15237 2265 1012 1005 1996 3124 2003 9487 1012 1026 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] ridiculous , nausea ##ting dogg ##ere ##l with terrible acting ; in ##ept ##ly , superficial ##ly , and conde ##sc ##ending ##ly tr ##aw ##ling all the most ban ##al cl ##iche ##s about tuscany and italy , divorce and mid ##life . the main actor nervously grimace ##s her way through the film , struggling to portray the appropriate level of smug , self - cong ##rat ##ulator ##y self - pity the worthless character and script call for . i ' m sure the book was bad , but it can ' t have been this bad ! the camera is permanently fitted with a vomit - yellow " tu ##scan " lens ##e filter ( perhaps the tu ##scan sun wasn ' [SEP]


INFO:tensorflow:tokens: [CLS] ridiculous , nausea ##ting dogg ##ere ##l with terrible acting ; in ##ept ##ly , superficial ##ly , and conde ##sc ##ending ##ly tr ##aw ##ling all the most ban ##al cl ##iche ##s about tuscany and italy , divorce and mid ##life . the main actor nervously grimace ##s her way through the film , struggling to portray the appropriate level of smug , self - cong ##rat ##ulator ##y self - pity the worthless character and script call for . i ' m sure the book was bad , but it can ' t have been this bad ! the camera is permanently fitted with a vomit - yellow " tu ##scan " lens ##e filter ( perhaps the tu ##scan sun wasn ' [SEP]


INFO:tensorflow:input_ids: 101 9951 1010 19029 3436 28844 7869 2140 2007 6659 3772 1025 1999 23606 2135 1010 23105 2135 1010 1998 24707 11020 18537 2135 19817 10376 2989 2035 1996 2087 7221 2389 18856 17322 2015 2055 23322 1998 3304 1010 8179 1998 3054 15509 1012 1996 2364 3364 12531 25898 2015 2014 2126 2083 1996 2143 1010 8084 2000 17279 1996 6413 2504 1997 20673 1010 2969 1011 26478 8609 20350 2100 2969 1011 12063 1996 22692 2839 1998 5896 2655 2005 1012 1045 1005 1049 2469 1996 2338 2001 2919 1010 2021 2009 2064 1005 1056 2031 2042 2023 2919 999 1996 4950 2003 8642 7130 2007 1037 23251 1011 3756 1000 10722 29378 1000 10014 2063 11307 1006 3383 1996 10722 29378 3103 2347 1005 102


INFO:tensorflow:input_ids: 101 9951 1010 19029 3436 28844 7869 2140 2007 6659 3772 1025 1999 23606 2135 1010 23105 2135 1010 1998 24707 11020 18537 2135 19817 10376 2989 2035 1996 2087 7221 2389 18856 17322 2015 2055 23322 1998 3304 1010 8179 1998 3054 15509 1012 1996 2364 3364 12531 25898 2015 2014 2126 2083 1996 2143 1010 8084 2000 17279 1996 6413 2504 1997 20673 1010 2969 1011 26478 8609 20350 2100 2969 1011 12063 1996 22692 2839 1998 5896 2655 2005 1012 1045 1005 1049 2469 1996 2338 2001 2919 1010 2021 2009 2064 1005 1056 2031 2042 2023 2919 999 1996 4950 2003 8642 7130 2007 1037 23251 1011 3756 1000 10722 29378 1000 10014 2063 11307 1006 3383 1996 10722 29378 3103 2347 1005 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this could have been a really good movie if someone would just have known how to finish the film . < br / > < br / > the story was going along just fine and heading towards that point in every movie like this where the " gray " characters turn " good " and the " bad " guys get their just dessert ##s and * boom * . . . it ' s like they ran out of script and the cast just started to make things up . < br / > < br / > which wouldn ' t have been so bad . . . if the cast had just continued with the character development they had already put in [SEP]


INFO:tensorflow:tokens: [CLS] this could have been a really good movie if someone would just have known how to finish the film . < br / > < br / > the story was going along just fine and heading towards that point in every movie like this where the " gray " characters turn " good " and the " bad " guys get their just dessert ##s and * boom * . . . it ' s like they ran out of script and the cast just started to make things up . < br / > < br / > which wouldn ' t have been so bad . . . if the cast had just continued with the character development they had already put in [SEP]


INFO:tensorflow:input_ids: 101 2023 2071 2031 2042 1037 2428 2204 3185 2065 2619 2052 2074 2031 2124 2129 2000 3926 1996 2143 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2466 2001 2183 2247 2074 2986 1998 5825 2875 2008 2391 1999 2296 3185 2066 2023 2073 1996 1000 3897 1000 3494 2735 1000 2204 1000 1998 1996 1000 2919 1000 4364 2131 2037 2074 18064 2015 1998 1008 8797 1008 1012 1012 1012 2009 1005 1055 2066 2027 2743 2041 1997 5896 1998 1996 3459 2074 2318 2000 2191 2477 2039 1012 1026 7987 1013 1028 1026 7987 1013 1028 2029 2876 1005 1056 2031 2042 2061 2919 1012 1012 1012 2065 1996 3459 2018 2074 2506 2007 1996 2839 2458 2027 2018 2525 2404 1999 102


INFO:tensorflow:input_ids: 101 2023 2071 2031 2042 1037 2428 2204 3185 2065 2619 2052 2074 2031 2124 2129 2000 3926 1996 2143 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2466 2001 2183 2247 2074 2986 1998 5825 2875 2008 2391 1999 2296 3185 2066 2023 2073 1996 1000 3897 1000 3494 2735 1000 2204 1000 1998 1996 1000 2919 1000 4364 2131 2037 2074 18064 2015 1998 1008 8797 1008 1012 1012 1012 2009 1005 1055 2066 2027 2743 2041 1997 5896 1998 1996 3459 2074 2318 2000 2191 2477 2039 1012 1026 7987 1013 1028 1026 7987 1013 1028 2029 2876 1005 1056 2031 2042 2061 2919 1012 1012 1012 2065 1996 3459 2018 2074 2506 2007 1996 2839 2458 2027 2018 2525 2404 1999 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] muscular ' scientists ' , unpleasant ##ly thin females in swim ##suit ##s , lots of beer drinking . . yet it ' s too long to be a beer commercial . oh , okay , there ' s some plot about a big shark - like monster that ' s killing people and stuff . but it ' s nothing you haven ' t seen before . [SEP]


INFO:tensorflow:tokens: [CLS] muscular ' scientists ' , unpleasant ##ly thin females in swim ##suit ##s , lots of beer drinking . . yet it ' s too long to be a beer commercial . oh , okay , there ' s some plot about a big shark - like monster that ' s killing people and stuff . but it ' s nothing you haven ' t seen before . [SEP]


INFO:tensorflow:input_ids: 101 13472 1005 6529 1005 1010 16010 2135 4857 3801 1999 9880 28880 2015 1010 7167 1997 5404 5948 1012 1012 2664 2009 1005 1055 2205 2146 2000 2022 1037 5404 3293 1012 2821 1010 3100 1010 2045 1005 1055 2070 5436 2055 1037 2502 11420 1011 2066 6071 2008 1005 1055 4288 2111 1998 4933 1012 2021 2009 1005 1055 2498 2017 4033 1005 1056 2464 2077 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 13472 1005 6529 1005 1010 16010 2135 4857 3801 1999 9880 28880 2015 1010 7167 1997 5404 5948 1012 1012 2664 2009 1005 1055 2205 2146 2000 2022 1037 5404 3293 1012 2821 1010 3100 1010 2045 1005 1055 2070 5436 2055 1037 2502 11420 1011 2066 6071 2008 1005 1055 4288 2111 1998 4933 1012 2021 2009 1005 1055 2498 2017 4033 1005 1056 2464 2077 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] alright , let me break it down for ya . . . haggard is probably one of the fun ##nies ##t pointless movies you ' ll ever see . it ' s got a mixture of a unique storyline about a guy having girl troubles and everything going backwards for him mixed in with countless humorous scenes that will keep you laughing throughout the whole movie , basically , if you ' ve seen jack ##ass or the ck ##y series , you ' d know what to expect for humor , considering it has most of the people from those movies . overall . . . i just had to give it a 10 / 10 because its one of my favorite movies of all [SEP]


INFO:tensorflow:tokens: [CLS] alright , let me break it down for ya . . . haggard is probably one of the fun ##nies ##t pointless movies you ' ll ever see . it ' s got a mixture of a unique storyline about a guy having girl troubles and everything going backwards for him mixed in with countless humorous scenes that will keep you laughing throughout the whole movie , basically , if you ' ve seen jack ##ass or the ck ##y series , you ' d know what to expect for humor , considering it has most of the people from those movies . overall . . . i just had to give it a 10 / 10 because its one of my favorite movies of all [SEP]


INFO:tensorflow:input_ids: 101 10303 1010 2292 2033 3338 2009 2091 2005 8038 1012 1012 1012 27912 2003 2763 2028 1997 1996 4569 15580 2102 23100 5691 2017 1005 2222 2412 2156 1012 2009 1005 1055 2288 1037 8150 1997 1037 4310 9994 2055 1037 3124 2383 2611 13460 1998 2673 2183 11043 2005 2032 3816 1999 2007 14518 14742 5019 2008 2097 2562 2017 5870 2802 1996 2878 3185 1010 10468 1010 2065 2017 1005 2310 2464 2990 12054 2030 1996 23616 2100 2186 1010 2017 1005 1040 2113 2054 2000 5987 2005 8562 1010 6195 2009 2038 2087 1997 1996 2111 2013 2216 5691 1012 3452 1012 1012 1012 1045 2074 2018 2000 2507 2009 1037 2184 1013 2184 2138 2049 2028 1997 2026 5440 5691 1997 2035 102


INFO:tensorflow:input_ids: 101 10303 1010 2292 2033 3338 2009 2091 2005 8038 1012 1012 1012 27912 2003 2763 2028 1997 1996 4569 15580 2102 23100 5691 2017 1005 2222 2412 2156 1012 2009 1005 1055 2288 1037 8150 1997 1037 4310 9994 2055 1037 3124 2383 2611 13460 1998 2673 2183 11043 2005 2032 3816 1999 2007 14518 14742 5019 2008 2097 2562 2017 5870 2802 1996 2878 3185 1010 10468 1010 2065 2017 1005 2310 2464 2990 12054 2030 1996 23616 2100 2186 1010 2017 1005 1040 2113 2054 2000 5987 2005 8562 1010 6195 2009 2038 2087 1997 1996 2111 2013 2216 5691 1012 3452 1012 1012 1012 1045 2074 2018 2000 2507 2009 1037 2184 1013 2184 2138 2049 2028 1997 2026 5440 5691 1997 2035 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] the efficacy of this picture was best proven on the intended target audience , namely teens . my 14 - year - old son became so eng ##ross ##ed in this film that i rate it considerably higher than its im ##ita ##tor " mad city . " it sparked debate in our household on issues such as peer pressure and loyalty vs . doing the right thing . for that alone , i rate this film a 10 ! parents should watch it with their teens and discuss it afterwards . < br / > < br / > i very much liked the smart dialogue and consistent acting . i thought that james re ##mar was adequate in his role , but the teenage [SEP]


INFO:tensorflow:tokens: [CLS] the efficacy of this picture was best proven on the intended target audience , namely teens . my 14 - year - old son became so eng ##ross ##ed in this film that i rate it considerably higher than its im ##ita ##tor " mad city . " it sparked debate in our household on issues such as peer pressure and loyalty vs . doing the right thing . for that alone , i rate this film a 10 ! parents should watch it with their teens and discuss it afterwards . < br / > < br / > i very much liked the smart dialogue and consistent acting . i thought that james re ##mar was adequate in his role , but the teenage [SEP]


INFO:tensorflow:input_ids: 101 1996 21150 1997 2023 3861 2001 2190 10003 2006 1996 3832 4539 4378 1010 8419 13496 1012 2026 2403 1011 2095 1011 2214 2365 2150 2061 25540 25725 2098 1999 2023 2143 2008 1045 3446 2009 9839 3020 2084 2049 10047 6590 4263 1000 5506 2103 1012 1000 2009 13977 5981 1999 2256 4398 2006 3314 2107 2004 8152 3778 1998 9721 5443 1012 2725 1996 2157 2518 1012 2005 2008 2894 1010 1045 3446 2023 2143 1037 2184 999 3008 2323 3422 2009 2007 2037 13496 1998 6848 2009 5728 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2200 2172 4669 1996 6047 7982 1998 8335 3772 1012 1045 2245 2008 2508 2128 7849 2001 11706 1999 2010 2535 1010 2021 1996 9454 102


INFO:tensorflow:input_ids: 101 1996 21150 1997 2023 3861 2001 2190 10003 2006 1996 3832 4539 4378 1010 8419 13496 1012 2026 2403 1011 2095 1011 2214 2365 2150 2061 25540 25725 2098 1999 2023 2143 2008 1045 3446 2009 9839 3020 2084 2049 10047 6590 4263 1000 5506 2103 1012 1000 2009 13977 5981 1999 2256 4398 2006 3314 2107 2004 8152 3778 1998 9721 5443 1012 2725 1996 2157 2518 1012 2005 2008 2894 1010 1045 3446 2023 2143 1037 2184 999 3008 2323 3422 2009 2007 2037 13496 1998 6848 2009 5728 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2200 2172 4669 1996 6047 7982 1998 8335 3772 1012 1045 2245 2008 2508 2128 7849 2001 11706 1999 2010 2535 1010 2021 1996 9454 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


In [14]:
train_features

[<bert.run_classifier.InputFeatures at 0x7fa3e8945110>,
 <bert.run_classifier.InputFeatures at 0x7fa3e77e92d0>,
 <bert.run_classifier.InputFeatures at 0x7fa3e37d13d0>,
 <bert.run_classifier.InputFeatures at 0x7fa3e37f58d0>,
 <bert.run_classifier.InputFeatures at 0x7fa3e8688190>,
 <bert.run_classifier.InputFeatures at 0x7fa3e37d1ad0>,
 <bert.run_classifier.InputFeatures at 0x7fa3e38450d0>,
 <bert.run_classifier.InputFeatures at 0x7fa3e37f5e90>,
 <bert.run_classifier.InputFeatures at 0x7fa3e37f59d0>,
 <bert.run_classifier.InputFeatures at 0x7fa3e6395c50>,
 <bert.run_classifier.InputFeatures at 0x7fa3e6395510>,
 <bert.run_classifier.InputFeatures at 0x7fa3e37d1b50>,
 <bert.run_classifier.InputFeatures at 0x7fa3e6395690>,
 <bert.run_classifier.InputFeatures at 0x7fa3e89452d0>,
 <bert.run_classifier.InputFeatures at 0x7fa3e37f5850>,
 <bert.run_classifier.InputFeatures at 0x7fa3e8688250>,
 <bert.run_classifier.InputFeatures at 0x7fa3e37fa590>,
 <bert.run_classifier.InputFeatures at 0x7fa46d4

#Creating a model

Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning).

In [15]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction.

In [16]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [17]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [18]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [19]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [20]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


INFO:tensorflow:Using config: {'_model_dir': 'output_dir', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fa3e033df90>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Using config: {'_model_dir': 'output_dir', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fa3e033df90>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with Tensorflow [Estimators](https://www.tensorflow.org/guide/estimators).

In [21]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.

In [None]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




















Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into output_dir/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into output_dir/model.ckpt.


Now let's use our test data to see how well our model did:

In [None]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [None]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-02-12T21:04:20Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from gs://bert-tfhub/aclImdb_v1/model.ckpt-468
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-02-12-21:06:05
INFO:tensorflow:Saving dict for global step 468: auc = 0.86659324, eval_accuracy = 0.8664, f1_score = 0.8659711, false_negatives = 375.0, false_positives = 293.0, global_step = 468, loss = 0.51870537, precision = 0.880457, recall = 0.8519542, true_negatives = 2174.0, true_positives = 2158.0
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 468: gs://bert-tfhub/aclImdb_v1/model.ckpt-468


{'auc': 0.86659324,
 'eval_accuracy': 0.8664,
 'f1_score': 0.8659711,
 'false_negatives': 375.0,
 'false_positives': 293.0,
 'global_step': 468,
 'loss': 0.51870537,
 'precision': 0.880457,
 'recall': 0.8519542,
 'true_negatives': 2174.0,
 'true_positives': 2158.0}

Now let's write code to make predictions on new sentences:

In [None]:
def getPrediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]

In [None]:
pred_sentences = [
  "That movie was absolutely awful",
  "The acting was a bit lacking",
  "The film was creative and surprising",
  "Absolutely fantastic!"
]

In [None]:
predictions = getPrediction(pred_sentences)

INFO:tensorflow:Writing example 0 of 4
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: 
INFO:tensorflow:tokens: [CLS] that movie was absolutely awful [SEP]
INFO:tensorflow:input_ids: 101 2008 3185 2001 7078 9643 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Voila! We have a sentiment classifier!

In [None]:
predictions

[('That movie was absolutely awful',
  array([-4.9142293e-03, -5.3180690e+00], dtype=float32),
  'Negative'),
 ('The acting was a bit lacking',
  array([-0.03325794, -3.4200459 ], dtype=float32),
  'Negative'),
 ('The film was creative and surprising',
  array([-5.3589125e+00, -4.7171740e-03], dtype=float32),
  'Positive'),
 ('Absolutely fantastic!',
  array([-5.0434084 , -0.00647258], dtype=float32),
  'Positive')]