<a href="https://colab.research.google.com/github/michaelyao/star_dust/blob/master/notebooks/predicting_movie_reviews_with_bert_on_tf_hub.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://github.com/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb

In [25]:
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [26]:
!ls /content/gdrive/'My Drive'/googlecolab/data

!ln -s '/content/gdrive/My Drive/googlecolab'  /googlecolab

!ls /googlecolab



ln: failed to create symbolic link '/googlecolab/googlecolab': Function not implemented
data


In [0]:
DATA_PATH="/googlecolab/data"

In [0]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime
import os
from os import path
import pathlib

In [5]:
!pip install bert-tensorflow

Collecting bert-tensorflow
[?25l  Downloading https://files.pythonhosted.org/packages/a6/66/7eb4e8b6ea35b7cc54c322c816f976167a43019750279a8473d355800a93/bert_tensorflow-1.0.1-py2.py3-none-any.whl (67kB)
[K     |████████████████████████████████| 71kB 2.6MB/s 
Installing collected packages: bert-tensorflow
Successfully installed bert-tensorflow-1.0.1


In [0]:
import gzip
import shutil

        
def extract_all(archives, extract_path, ziptype = None):
    print(f"extracting {archives} to {extract_path}, type {ziptype}")
    shutil.unpack_archive(archives, extract_path, ziptype)

In [0]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization

In [31]:
# Set the output directory for saving model file
# Optionally, set a GCP bucket location

OUTPUT_DIR = DATA_PATH + '/'+'bert_text_output'#@param {type:"string"}
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = False #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}


if DO_DELETE:
  try:
    tf.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    # Doesn't matter if the directory didn't exist
    pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))

***** Model output directory: /googlecolab/data/bert_text_output *****


In [0]:
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  data = {}
  data["sentence"] = []
  data["sentiment"] = []
  for file_path in os.listdir(directory):
    print(f"loading directory data:  directory -> {directory} file_path -> {file_path}")
    with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
      data["sentence"].append(f.read())
      data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  print(f"load dataset to directory @ {directory}")
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  pos_df["polarity"] = 1
  neg_df["polarity"] = 0
  return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
  
  ds_path = DATA_PATH + "/aclImdb.tar.gz"
  
  if( not path.exists(ds_path)):
    print(f"The data file {ds_path} does not exist. Download.... ")
    dataset = tf.keras.utils.get_file(
        fname=ds_path, 
        origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
        extract=False)
    print(f"dataset {dataset}")
    extract_all(DATA_PATH + "/aclImdb.tar.gz", DATA_PATH, "gztar")
  else:
    print(f"We have dataset already downloaded. {ds_path}")
    dataset = ds_path
  
  dataset_path = os.path.join(os.path.dirname(dataset)
  print("loading dataset  {dataset_path}")
  train_df = load_dataset(dataset_path,"aclImdb", "train"))
  test_df = load_dataset(dataset_path, "aclImdb", "test"))
  
  return train_df, test_df

In [36]:
train, test = download_and_load_datasets()

directory /googlecolab/data/aclImdb/train
directory, file_path ->/googlecolab/data/aclImdb/train/pos 127_7.txt
directory, file_path ->/googlecolab/data/aclImdb/train/pos 126_10.txt
directory, file_path ->/googlecolab/data/aclImdb/train/pos 125_7.txt
directory, file_path ->/googlecolab/data/aclImdb/train/pos 124_10.txt
directory, file_path ->/googlecolab/data/aclImdb/train/pos 123_10.txt
directory, file_path ->/googlecolab/data/aclImdb/train/pos 122_9.txt
directory, file_path ->/googlecolab/data/aclImdb/train/pos 121_10.txt
directory, file_path ->/googlecolab/data/aclImdb/train/pos 120_8.txt
directory, file_path ->/googlecolab/data/aclImdb/train/pos 119_10.txt
directory, file_path ->/googlecolab/data/aclImdb/train/pos 118_8.txt
directory, file_path ->/googlecolab/data/aclImdb/train/pos 117_10.txt
directory, file_path ->/googlecolab/data/aclImdb/train/pos 116_10.txt
directory, file_path ->/googlecolab/data/aclImdb/train/pos 115_10.txt
directory, file_path ->/googlecolab/data/aclImdb/trai

In [35]:
train.columns

Index(['sentence', 'sentiment', 'polarity'], dtype='object')

In [0]:
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]

In [0]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

In [0]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [0]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

In [0]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)







INFO:tensorflow:Writing example 0 of 25000


INFO:tensorflow:Writing example 0 of 25000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] wow what can i say . i like shit ##y movies and i go out of my way to watch a corn ##y action flick , but snake eater i would have rather had a nail driven into my pee hole while my grandma gave me a lap dance . lorenzo lama ##s , p ##ff ##t more like lorenzo lame ##ass this guy has as much acting ability as bill clinton has self control . it has all the goods to make a really bad movie even worse . crazed hill ##bill ##es yep ! needles ##s ti ##t shot ( with a real weird scar ) yep ! crap ##py soundtrack yep ! i wish i could give the movie - 10 stars but [SEP]


INFO:tensorflow:tokens: [CLS] wow what can i say . i like shit ##y movies and i go out of my way to watch a corn ##y action flick , but snake eater i would have rather had a nail driven into my pee hole while my grandma gave me a lap dance . lorenzo lama ##s , p ##ff ##t more like lorenzo lame ##ass this guy has as much acting ability as bill clinton has self control . it has all the goods to make a really bad movie even worse . crazed hill ##bill ##es yep ! needles ##s ti ##t shot ( with a real weird scar ) yep ! crap ##py soundtrack yep ! i wish i could give the movie - 10 stars but [SEP]


INFO:tensorflow:input_ids: 101 10166 2054 2064 1045 2360 1012 1045 2066 4485 2100 5691 1998 1045 2175 2041 1997 2026 2126 2000 3422 1037 9781 2100 2895 17312 1010 2021 7488 28496 1045 2052 2031 2738 2018 1037 13774 5533 2046 2026 21392 4920 2096 2026 13055 2435 2033 1037 5001 3153 1012 12484 18832 2015 1010 1052 4246 2102 2062 2066 12484 20342 12054 2023 3124 2038 2004 2172 3772 3754 2004 3021 7207 2038 2969 2491 1012 2009 2038 2035 1996 5350 2000 2191 1037 2428 2919 3185 2130 4788 1012 28343 2940 24457 2229 15624 999 17044 2015 14841 2102 2915 1006 2007 1037 2613 6881 11228 1007 15624 999 10231 7685 6050 15624 999 1045 4299 1045 2071 2507 1996 3185 1011 2184 3340 2021 102


INFO:tensorflow:input_ids: 101 10166 2054 2064 1045 2360 1012 1045 2066 4485 2100 5691 1998 1045 2175 2041 1997 2026 2126 2000 3422 1037 9781 2100 2895 17312 1010 2021 7488 28496 1045 2052 2031 2738 2018 1037 13774 5533 2046 2026 21392 4920 2096 2026 13055 2435 2033 1037 5001 3153 1012 12484 18832 2015 1010 1052 4246 2102 2062 2066 12484 20342 12054 2023 3124 2038 2004 2172 3772 3754 2004 3021 7207 2038 2969 2491 1012 2009 2038 2035 1996 5350 2000 2191 1037 2428 2919 3185 2130 4788 1012 28343 2940 24457 2229 15624 999 17044 2015 14841 2102 2915 1006 2007 1037 2613 6881 11228 1007 15624 999 10231 7685 6050 15624 999 1045 4299 1045 2071 2507 1996 3185 1011 2184 3340 2021 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i wasn ' t a fan of seeing this movie at all , but when my g ##f called me and said she had a free advanced screening pass i tagged along only for the sake of seeing eva long ##oria and laughing at jason big ##gs antics . < br / > < br / > overall it was actually better then i expected but not by much . this was like a hybrid of how to lose a guy in 10 days and just like heaven . a typical romantic comedy with its moments i guess . the movie was quite short though ( around 85 min . ) but it was enough to tell the whole story , build some character development and [SEP]


INFO:tensorflow:tokens: [CLS] i wasn ' t a fan of seeing this movie at all , but when my g ##f called me and said she had a free advanced screening pass i tagged along only for the sake of seeing eva long ##oria and laughing at jason big ##gs antics . < br / > < br / > overall it was actually better then i expected but not by much . this was like a hybrid of how to lose a guy in 10 days and just like heaven . a typical romantic comedy with its moments i guess . the movie was quite short though ( around 85 min . ) but it was enough to tell the whole story , build some character development and [SEP]


INFO:tensorflow:input_ids: 101 1045 2347 1005 1056 1037 5470 1997 3773 2023 3185 2012 2035 1010 2021 2043 2026 1043 2546 2170 2033 1998 2056 2016 2018 1037 2489 3935 11326 3413 1045 26610 2247 2069 2005 1996 8739 1997 3773 9345 2146 11069 1998 5870 2012 4463 2502 5620 27440 1012 1026 7987 1013 1028 1026 7987 1013 1028 3452 2009 2001 2941 2488 2059 1045 3517 2021 2025 2011 2172 1012 2023 2001 2066 1037 8893 1997 2129 2000 4558 1037 3124 1999 2184 2420 1998 2074 2066 6014 1012 1037 5171 6298 4038 2007 2049 5312 1045 3984 1012 1996 3185 2001 3243 2460 2295 1006 2105 5594 8117 1012 1007 2021 2009 2001 2438 2000 2425 1996 2878 2466 1010 3857 2070 2839 2458 1998 102


INFO:tensorflow:input_ids: 101 1045 2347 1005 1056 1037 5470 1997 3773 2023 3185 2012 2035 1010 2021 2043 2026 1043 2546 2170 2033 1998 2056 2016 2018 1037 2489 3935 11326 3413 1045 26610 2247 2069 2005 1996 8739 1997 3773 9345 2146 11069 1998 5870 2012 4463 2502 5620 27440 1012 1026 7987 1013 1028 1026 7987 1013 1028 3452 2009 2001 2941 2488 2059 1045 3517 2021 2025 2011 2172 1012 2023 2001 2066 1037 8893 1997 2129 2000 4558 1037 3124 1999 2184 2420 1998 2074 2066 6014 1012 1037 5171 6298 4038 2007 2049 5312 1045 3984 1012 1996 3185 2001 3243 2460 2295 1006 2105 5594 8117 1012 1007 2021 2009 2001 2438 2000 2425 1996 2878 2466 1010 3857 2070 2839 2458 1998 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] there isn ' t more i can say that saying this film was awful . the whole chinese ##ness is awakened in your being because of the ancestors was a hard sell . but telling the audience that every chinese knows chinese history without even studying it just laugh ##able . that is like saying every american knows american history without studying or every filipino , etc , etc . it just isn ' t bel ##ie ##vable . < br / > < br / > the story is flat out hideous . it talked about shin being from a monastery in china - later identifying it from be ##jing . however , the early sequences of the film show the map focusing in on [SEP]


INFO:tensorflow:tokens: [CLS] there isn ' t more i can say that saying this film was awful . the whole chinese ##ness is awakened in your being because of the ancestors was a hard sell . but telling the audience that every chinese knows chinese history without even studying it just laugh ##able . that is like saying every american knows american history without studying or every filipino , etc , etc . it just isn ' t bel ##ie ##vable . < br / > < br / > the story is flat out hideous . it talked about shin being from a monastery in china - later identifying it from be ##jing . however , the early sequences of the film show the map focusing in on [SEP]


INFO:tensorflow:input_ids: 101 2045 3475 1005 1056 2062 1045 2064 2360 2008 3038 2023 2143 2001 9643 1012 1996 2878 2822 2791 2003 20256 1999 2115 2108 2138 1997 1996 10748 2001 1037 2524 5271 1012 2021 4129 1996 4378 2008 2296 2822 4282 2822 2381 2302 2130 5702 2009 2074 4756 3085 1012 2008 2003 2066 3038 2296 2137 4282 2137 2381 2302 5702 2030 2296 10275 1010 4385 1010 4385 1012 2009 2074 3475 1005 1056 19337 2666 12423 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2466 2003 4257 2041 22293 1012 2009 5720 2055 12277 2108 2013 1037 6408 1999 2859 1011 2101 12151 2009 2013 2022 29518 1012 2174 1010 1996 2220 10071 1997 1996 2143 2265 1996 4949 7995 1999 2006 102


INFO:tensorflow:input_ids: 101 2045 3475 1005 1056 2062 1045 2064 2360 2008 3038 2023 2143 2001 9643 1012 1996 2878 2822 2791 2003 20256 1999 2115 2108 2138 1997 1996 10748 2001 1037 2524 5271 1012 2021 4129 1996 4378 2008 2296 2822 4282 2822 2381 2302 2130 5702 2009 2074 4756 3085 1012 2008 2003 2066 3038 2296 2137 4282 2137 2381 2302 5702 2030 2296 10275 1010 4385 1010 4385 1012 2009 2074 3475 1005 1056 19337 2666 12423 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2466 2003 4257 2041 22293 1012 2009 5720 2055 12277 2108 2013 1037 6408 1999 2859 1011 2101 12151 2009 2013 2022 29518 1012 2174 1010 1996 2220 10071 1997 1996 2143 2265 1996 4949 7995 1999 2006 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] it is fitting on a musical sunday to get your heart a pumping , and no one can do that better than little richard . the man could sing the drawers off the ladies and defined rock and roll . < br / > < br / > look to leon to provide a definitive characterization , as he has done so with david ru ##ffin in the temptation ##s and jackie wilson in mr . rock ' n ' roll : the alan freed story . < br / > < br / > this was a fascinating bio ##pic as we saw little richard struggle with his father , with his church and with himself over just who he was . he won the [SEP]


INFO:tensorflow:tokens: [CLS] it is fitting on a musical sunday to get your heart a pumping , and no one can do that better than little richard . the man could sing the drawers off the ladies and defined rock and roll . < br / > < br / > look to leon to provide a definitive characterization , as he has done so with david ru ##ffin in the temptation ##s and jackie wilson in mr . rock ' n ' roll : the alan freed story . < br / > < br / > this was a fascinating bio ##pic as we saw little richard struggle with his father , with his church and with himself over just who he was . he won the [SEP]


INFO:tensorflow:input_ids: 101 2009 2003 11414 2006 1037 3315 4465 2000 2131 2115 2540 1037 14107 1010 1998 2053 2028 2064 2079 2008 2488 2084 2210 2957 1012 1996 2158 2071 6170 1996 22497 2125 1996 6456 1998 4225 2600 1998 4897 1012 1026 7987 1013 1028 1026 7987 1013 1028 2298 2000 6506 2000 3073 1037 15764 23191 1010 2004 2002 2038 2589 2061 2007 2585 21766 15379 1999 1996 17232 2015 1998 9901 4267 1999 2720 1012 2600 1005 1050 1005 4897 1024 1996 5070 10650 2466 1012 1026 7987 1013 1028 1026 7987 1013 1028 2023 2001 1037 17160 16012 24330 2004 2057 2387 2210 2957 5998 2007 2010 2269 1010 2007 2010 2277 1998 2007 2370 2058 2074 2040 2002 2001 1012 2002 2180 1996 102


INFO:tensorflow:input_ids: 101 2009 2003 11414 2006 1037 3315 4465 2000 2131 2115 2540 1037 14107 1010 1998 2053 2028 2064 2079 2008 2488 2084 2210 2957 1012 1996 2158 2071 6170 1996 22497 2125 1996 6456 1998 4225 2600 1998 4897 1012 1026 7987 1013 1028 1026 7987 1013 1028 2298 2000 6506 2000 3073 1037 15764 23191 1010 2004 2002 2038 2589 2061 2007 2585 21766 15379 1999 1996 17232 2015 1998 9901 4267 1999 2720 1012 2600 1005 1050 1005 4897 1024 1996 5070 10650 2466 1012 1026 7987 1013 1028 1026 7987 1013 1028 2023 2001 1037 17160 16012 24330 2004 2057 2387 2210 2957 5998 2007 2010 2269 1010 2007 2010 2277 1998 2007 2370 2058 2074 2040 2002 2001 1012 2002 2180 1996 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i honestly had somewhat high expectations when i first began to watch this movie , but it turned out to be probably one of the most boring films i have ever seen ! < br / > < br / > first of all , the pace is incredibly slow , so it seems much longer than it is ( and it ' s not short ) . i ' m sure when jane austen wrote the book , she made it several pages long , filled it with description , and didn ' t intend for people to read in in one day , or it might drag and lose it ' s appeal , which " emma " most certainly did . now " [SEP]


INFO:tensorflow:tokens: [CLS] i honestly had somewhat high expectations when i first began to watch this movie , but it turned out to be probably one of the most boring films i have ever seen ! < br / > < br / > first of all , the pace is incredibly slow , so it seems much longer than it is ( and it ' s not short ) . i ' m sure when jane austen wrote the book , she made it several pages long , filled it with description , and didn ' t intend for people to read in in one day , or it might drag and lose it ' s appeal , which " emma " most certainly did . now " [SEP]


INFO:tensorflow:input_ids: 101 1045 9826 2018 5399 2152 10908 2043 1045 2034 2211 2000 3422 2023 3185 1010 2021 2009 2357 2041 2000 2022 2763 2028 1997 1996 2087 11771 3152 1045 2031 2412 2464 999 1026 7987 1013 1028 1026 7987 1013 1028 2034 1997 2035 1010 1996 6393 2003 11757 4030 1010 2061 2009 3849 2172 2936 2084 2009 2003 1006 1998 2009 1005 1055 2025 2460 1007 1012 1045 1005 1049 2469 2043 4869 24177 2626 1996 2338 1010 2016 2081 2009 2195 5530 2146 1010 3561 2009 2007 6412 1010 1998 2134 1005 1056 13566 2005 2111 2000 3191 1999 1999 2028 2154 1010 2030 2009 2453 8011 1998 4558 2009 1005 1055 5574 1010 2029 1000 5616 1000 2087 5121 2106 1012 2085 1000 102


INFO:tensorflow:input_ids: 101 1045 9826 2018 5399 2152 10908 2043 1045 2034 2211 2000 3422 2023 3185 1010 2021 2009 2357 2041 2000 2022 2763 2028 1997 1996 2087 11771 3152 1045 2031 2412 2464 999 1026 7987 1013 1028 1026 7987 1013 1028 2034 1997 2035 1010 1996 6393 2003 11757 4030 1010 2061 2009 3849 2172 2936 2084 2009 2003 1006 1998 2009 1005 1055 2025 2460 1007 1012 1045 1005 1049 2469 2043 4869 24177 2626 1996 2338 1010 2016 2081 2009 2195 5530 2146 1010 3561 2009 2007 6412 1010 1998 2134 1005 1056 13566 2005 2111 2000 3191 1999 1999 2028 2154 1010 2030 2009 2453 8011 1998 4558 2009 1005 1055 5574 1010 2029 1000 5616 1000 2087 5121 2106 1012 2085 1000 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:Writing example 10000 of 25000


INFO:tensorflow:Writing example 10000 of 25000


INFO:tensorflow:Writing example 20000 of 25000


INFO:tensorflow:Writing example 20000 of 25000


INFO:tensorflow:Writing example 0 of 25000


INFO:tensorflow:Writing example 0 of 25000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this is the " battlefield earth " of mini series . it has with a few exceptions , all the disastrous ingredients that doomed that movie and will follow it to the grave in the turkey cemetery . they are both adaptations of books with a endless amount of pages who has been turned to a complete mess by a script writer and a director ( in this case they are the same person . ) who clearly don ' t know what they are doing , they have both a messiah wanna ##be that don ' t really deliver , as a hero ( played in this case by a guy that looks like mark ham ##ill but sadly the force is not with him [SEP]


INFO:tensorflow:tokens: [CLS] this is the " battlefield earth " of mini series . it has with a few exceptions , all the disastrous ingredients that doomed that movie and will follow it to the grave in the turkey cemetery . they are both adaptations of books with a endless amount of pages who has been turned to a complete mess by a script writer and a director ( in this case they are the same person . ) who clearly don ' t know what they are doing , they have both a messiah wanna ##be that don ' t really deliver , as a hero ( played in this case by a guy that looks like mark ham ##ill but sadly the force is not with him [SEP]


INFO:tensorflow:input_ids: 101 2023 2003 1996 1000 11686 3011 1000 1997 7163 2186 1012 2009 2038 2007 1037 2261 11790 1010 2035 1996 16775 12760 2008 20076 2008 3185 1998 2097 3582 2009 2000 1996 6542 1999 1996 4977 4528 1012 2027 2024 2119 17241 1997 2808 2007 1037 10866 3815 1997 5530 2040 2038 2042 2357 2000 1037 3143 6752 2011 1037 5896 3213 1998 1037 2472 1006 1999 2023 2553 2027 2024 1996 2168 2711 1012 1007 2040 4415 2123 1005 1056 2113 2054 2027 2024 2725 1010 2027 2031 2119 1037 22112 10587 4783 2008 2123 1005 1056 2428 8116 1010 2004 1037 5394 1006 2209 1999 2023 2553 2011 1037 3124 2008 3504 2066 2928 10654 8591 2021 13718 1996 2486 2003 2025 2007 2032 102


INFO:tensorflow:input_ids: 101 2023 2003 1996 1000 11686 3011 1000 1997 7163 2186 1012 2009 2038 2007 1037 2261 11790 1010 2035 1996 16775 12760 2008 20076 2008 3185 1998 2097 3582 2009 2000 1996 6542 1999 1996 4977 4528 1012 2027 2024 2119 17241 1997 2808 2007 1037 10866 3815 1997 5530 2040 2038 2042 2357 2000 1037 3143 6752 2011 1037 5896 3213 1998 1037 2472 1006 1999 2023 2553 2027 2024 1996 2168 2711 1012 1007 2040 4415 2123 1005 1056 2113 2054 2027 2024 2725 1010 2027 2031 2119 1037 22112 10587 4783 2008 2123 1005 1056 2428 8116 1010 2004 1037 5394 1006 2209 1999 2023 2553 2011 1037 3124 2008 3504 2066 2928 10654 8591 2021 13718 1996 2486 2003 2025 2007 2032 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i will admit , i had the opportunity in the past to watch this film , and after about 5 - 10 minutes into it , i felt like many did . i was expecting a monkey movie that was similar to the television show , but instead i was given . . . well , i didn ' t know what i was given to be honest . < br / > < br / > however , after finally watching this film , i realized that not only had i had a closed mind to the brilliance it depicts , i also found myself watching it over and over again . it ' s the one movie that never cease ##s to interest me [SEP]


INFO:tensorflow:tokens: [CLS] i will admit , i had the opportunity in the past to watch this film , and after about 5 - 10 minutes into it , i felt like many did . i was expecting a monkey movie that was similar to the television show , but instead i was given . . . well , i didn ' t know what i was given to be honest . < br / > < br / > however , after finally watching this film , i realized that not only had i had a closed mind to the brilliance it depicts , i also found myself watching it over and over again . it ' s the one movie that never cease ##s to interest me [SEP]


INFO:tensorflow:input_ids: 101 1045 2097 6449 1010 1045 2018 1996 4495 1999 1996 2627 2000 3422 2023 2143 1010 1998 2044 2055 1019 1011 2184 2781 2046 2009 1010 1045 2371 2066 2116 2106 1012 1045 2001 8074 1037 10608 3185 2008 2001 2714 2000 1996 2547 2265 1010 2021 2612 1045 2001 2445 1012 1012 1012 2092 1010 1045 2134 1005 1056 2113 2054 1045 2001 2445 2000 2022 7481 1012 1026 7987 1013 1028 1026 7987 1013 1028 2174 1010 2044 2633 3666 2023 2143 1010 1045 3651 2008 2025 2069 2018 1045 2018 1037 2701 2568 2000 1996 28850 2009 11230 1010 1045 2036 2179 2870 3666 2009 2058 1998 2058 2153 1012 2009 1005 1055 1996 2028 3185 2008 2196 13236 2015 2000 3037 2033 102


INFO:tensorflow:input_ids: 101 1045 2097 6449 1010 1045 2018 1996 4495 1999 1996 2627 2000 3422 2023 2143 1010 1998 2044 2055 1019 1011 2184 2781 2046 2009 1010 1045 2371 2066 2116 2106 1012 1045 2001 8074 1037 10608 3185 2008 2001 2714 2000 1996 2547 2265 1010 2021 2612 1045 2001 2445 1012 1012 1012 2092 1010 1045 2134 1005 1056 2113 2054 1045 2001 2445 2000 2022 7481 1012 1026 7987 1013 1028 1026 7987 1013 1028 2174 1010 2044 2633 3666 2023 2143 1010 1045 3651 2008 2025 2069 2018 1045 2018 1037 2701 2568 2000 1996 28850 2009 11230 1010 1045 2036 2179 2870 3666 2009 2058 1998 2058 2153 1012 2009 1005 1055 1996 2028 3185 2008 2196 13236 2015 2000 3037 2033 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i absolutely love promised land . the first episode that i saw , was while i was on my mission from 2003 - 2005 . i really loved the rich family background portrayed in the show . here was a family with struggles of their own , but instead of dwelling on them ; they would reach out with love to others who may have had the same problems , in an effort to forget themselves , and go to work . this is what caused me to fall in love with the show . all of the actors ; especially gerald mc ##rane ##y ; had demonstrated the true meaning of " family " which has left an ind ##eli ##ble mark on my life [SEP]


INFO:tensorflow:tokens: [CLS] i absolutely love promised land . the first episode that i saw , was while i was on my mission from 2003 - 2005 . i really loved the rich family background portrayed in the show . here was a family with struggles of their own , but instead of dwelling on them ; they would reach out with love to others who may have had the same problems , in an effort to forget themselves , and go to work . this is what caused me to fall in love with the show . all of the actors ; especially gerald mc ##rane ##y ; had demonstrated the true meaning of " family " which has left an ind ##eli ##ble mark on my life [SEP]


INFO:tensorflow:input_ids: 101 1045 7078 2293 5763 2455 1012 1996 2034 2792 2008 1045 2387 1010 2001 2096 1045 2001 2006 2026 3260 2013 2494 1011 2384 1012 1045 2428 3866 1996 4138 2155 4281 6791 1999 1996 2265 1012 2182 2001 1037 2155 2007 11785 1997 2037 2219 1010 2021 2612 1997 13160 2006 2068 1025 2027 2052 3362 2041 2007 2293 2000 2500 2040 2089 2031 2018 1996 2168 3471 1010 1999 2019 3947 2000 5293 3209 1010 1998 2175 2000 2147 1012 2023 2003 2054 3303 2033 2000 2991 1999 2293 2007 1996 2265 1012 2035 1997 1996 5889 1025 2926 9659 11338 18053 2100 1025 2018 7645 1996 2995 3574 1997 1000 2155 1000 2029 2038 2187 2019 27427 20806 3468 2928 2006 2026 2166 102


INFO:tensorflow:input_ids: 101 1045 7078 2293 5763 2455 1012 1996 2034 2792 2008 1045 2387 1010 2001 2096 1045 2001 2006 2026 3260 2013 2494 1011 2384 1012 1045 2428 3866 1996 4138 2155 4281 6791 1999 1996 2265 1012 2182 2001 1037 2155 2007 11785 1997 2037 2219 1010 2021 2612 1997 13160 2006 2068 1025 2027 2052 3362 2041 2007 2293 2000 2500 2040 2089 2031 2018 1996 2168 3471 1010 1999 2019 3947 2000 5293 3209 1010 1998 2175 2000 2147 1012 2023 2003 2054 3303 2033 2000 2991 1999 2293 2007 1996 2265 1012 2035 1997 1996 5889 1025 2926 9659 11338 18053 2100 1025 2018 7645 1996 2995 3574 1997 1000 2155 1000 2029 2038 2187 2019 27427 20806 3468 2928 2006 2026 2166 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] not since " harold and maud ##e " has suicide been so successfully used as the theme for a love story . the acting , by a top notch cast is terrific . with a well structured script , things progress at an enjoyable pace . there are moments of black comedy , but i would classify " my suicidal sweetheart " as a unique romantic comedy , with a touch of dark humor . the relationship between two psychological ##ly challenged lovers , is both touching and engaging . the movie actually makes you question , what is a normal relationship ? i highly recommend " my suicidal sweetheart " . seek it out on dvd under the title " crazy for love " . [SEP]


INFO:tensorflow:tokens: [CLS] not since " harold and maud ##e " has suicide been so successfully used as the theme for a love story . the acting , by a top notch cast is terrific . with a well structured script , things progress at an enjoyable pace . there are moments of black comedy , but i would classify " my suicidal sweetheart " as a unique romantic comedy , with a touch of dark humor . the relationship between two psychological ##ly challenged lovers , is both touching and engaging . the movie actually makes you question , what is a normal relationship ? i highly recommend " my suicidal sweetheart " . seek it out on dvd under the title " crazy for love " . [SEP]


INFO:tensorflow:input_ids: 101 2025 2144 1000 7157 1998 21696 2063 1000 2038 5920 2042 2061 5147 2109 2004 1996 4323 2005 1037 2293 2466 1012 1996 3772 1010 2011 1037 2327 18624 3459 2003 27547 1012 2007 1037 2092 14336 5896 1010 2477 5082 2012 2019 22249 6393 1012 2045 2024 5312 1997 2304 4038 1010 2021 1045 2052 26268 1000 2026 26094 12074 1000 2004 1037 4310 6298 4038 1010 2007 1037 3543 1997 2601 8562 1012 1996 3276 2090 2048 8317 2135 8315 10205 1010 2003 2119 7244 1998 11973 1012 1996 3185 2941 3084 2017 3160 1010 2054 2003 1037 3671 3276 1029 1045 3811 16755 1000 2026 26094 12074 1000 1012 6148 2009 2041 2006 4966 2104 1996 2516 1000 4689 2005 2293 1000 1012 102


INFO:tensorflow:input_ids: 101 2025 2144 1000 7157 1998 21696 2063 1000 2038 5920 2042 2061 5147 2109 2004 1996 4323 2005 1037 2293 2466 1012 1996 3772 1010 2011 1037 2327 18624 3459 2003 27547 1012 2007 1037 2092 14336 5896 1010 2477 5082 2012 2019 22249 6393 1012 2045 2024 5312 1997 2304 4038 1010 2021 1045 2052 26268 1000 2026 26094 12074 1000 2004 1037 4310 6298 4038 1010 2007 1037 3543 1997 2601 8562 1012 1996 3276 2090 2048 8317 2135 8315 10205 1010 2003 2119 7244 1998 11973 1012 1996 3185 2941 3084 2017 3160 1010 2054 2003 1037 3671 3276 1029 1045 3811 16755 1000 2026 26094 12074 1000 1012 6148 2009 2041 2006 4966 2104 1996 2516 1000 4689 2005 2293 1000 1012 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] the acting is excellent in this film , with some great actors . it was fun to see fred mc ##mur ##ray as a young man . this is not a comedy . it ' s a drama and the apparently comedic instances are pit ##iful . this is not a comedy . it ' s a drama and the apparently comedic instances are pit ##iful , and some of them appear forced and con ##tri ##ved . it ' s in the script , though , not the fault of the acting . < br / > < br / > the 10 line requirement forces me to write some more . . . hmm ##m . loved carole lombard ' s my man godfrey [SEP]


INFO:tensorflow:tokens: [CLS] the acting is excellent in this film , with some great actors . it was fun to see fred mc ##mur ##ray as a young man . this is not a comedy . it ' s a drama and the apparently comedic instances are pit ##iful . this is not a comedy . it ' s a drama and the apparently comedic instances are pit ##iful , and some of them appear forced and con ##tri ##ved . it ' s in the script , though , not the fault of the acting . < br / > < br / > the 10 line requirement forces me to write some more . . . hmm ##m . loved carole lombard ' s my man godfrey [SEP]


INFO:tensorflow:input_ids: 101 1996 3772 2003 6581 1999 2023 2143 1010 2007 2070 2307 5889 1012 2009 2001 4569 2000 2156 5965 11338 20136 9447 2004 1037 2402 2158 1012 2023 2003 2025 1037 4038 1012 2009 1005 1055 1037 3689 1998 1996 4593 21699 12107 2024 6770 18424 1012 2023 2003 2025 1037 4038 1012 2009 1005 1055 1037 3689 1998 1996 4593 21699 12107 2024 6770 18424 1010 1998 2070 1997 2068 3711 3140 1998 9530 18886 7178 1012 2009 1005 1055 1999 1996 5896 1010 2295 1010 2025 1996 6346 1997 1996 3772 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2184 2240 9095 2749 2033 2000 4339 2070 2062 1012 1012 1012 17012 2213 1012 3866 24348 23441 1005 1055 2026 2158 18238 102


INFO:tensorflow:input_ids: 101 1996 3772 2003 6581 1999 2023 2143 1010 2007 2070 2307 5889 1012 2009 2001 4569 2000 2156 5965 11338 20136 9447 2004 1037 2402 2158 1012 2023 2003 2025 1037 4038 1012 2009 1005 1055 1037 3689 1998 1996 4593 21699 12107 2024 6770 18424 1012 2023 2003 2025 1037 4038 1012 2009 1005 1055 1037 3689 1998 1996 4593 21699 12107 2024 6770 18424 1010 1998 2070 1997 2068 3711 3140 1998 9530 18886 7178 1012 2009 1005 1055 1999 1996 5896 1010 2295 1010 2025 1996 6346 1997 1996 3772 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2184 2240 9095 2749 2033 2000 4339 2070 2062 1012 1012 1012 17012 2213 1012 3866 24348 23441 1005 1055 2026 2158 18238 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:Writing example 10000 of 25000


INFO:tensorflow:Writing example 10000 of 25000


INFO:tensorflow:Writing example 20000 of 25000


INFO:tensorflow:Writing example 20000 of 25000


In [0]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)

In [0]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn

In [0]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [0]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [0]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [0]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})

INFO:tensorflow:Using config: {'_model_dir': 'bert_text_output', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f03ba217908>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Using config: {'_model_dir': 'bert_text_output', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f03ba217908>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Next we create an input builder function that takes our training feature set (train_features) and produces a generator. This is a pretty standard design pattern for working with Tensorflow Estimators.

In [0]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

In [0]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




















Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into bert_text_output/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into bert_text_output/model.ckpt.


INFO:tensorflow:loss = 0.7526283, step = 0


INFO:tensorflow:loss = 0.7526283, step = 0


INFO:tensorflow:global_step/sec: 0.572738


INFO:tensorflow:global_step/sec: 0.572738


INFO:tensorflow:loss = 0.3414274, step = 100 (174.604 sec)


INFO:tensorflow:loss = 0.3414274, step = 100 (174.604 sec)


INFO:tensorflow:global_step/sec: 0.6418


INFO:tensorflow:global_step/sec: 0.6418


INFO:tensorflow:loss = 0.33880764, step = 200 (155.809 sec)


INFO:tensorflow:loss = 0.33880764, step = 200 (155.809 sec)


INFO:tensorflow:global_step/sec: 0.639564


INFO:tensorflow:global_step/sec: 0.639564


INFO:tensorflow:loss = 0.33057457, step = 300 (156.357 sec)


INFO:tensorflow:loss = 0.33057457, step = 300 (156.357 sec)


INFO:tensorflow:global_step/sec: 0.641719


INFO:tensorflow:global_step/sec: 0.641719


INFO:tensorflow:loss = 0.58380526, step = 400 (155.836 sec)


INFO:tensorflow:loss = 0.58380526, step = 400 (155.836 sec)


INFO:tensorflow:Saving checkpoints for 500 into bert_text_output/model.ckpt.


INFO:tensorflow:Saving checkpoints for 500 into bert_text_output/model.ckpt.


INFO:tensorflow:global_step/sec: 0.614369


INFO:tensorflow:global_step/sec: 0.614369


INFO:tensorflow:loss = 0.15531257, step = 500 (162.763 sec)


INFO:tensorflow:loss = 0.15531257, step = 500 (162.763 sec)


INFO:tensorflow:global_step/sec: 0.641364


INFO:tensorflow:global_step/sec: 0.641364


INFO:tensorflow:loss = 0.23914036, step = 600 (155.919 sec)


INFO:tensorflow:loss = 0.23914036, step = 600 (155.919 sec)


INFO:tensorflow:global_step/sec: 0.641895


INFO:tensorflow:global_step/sec: 0.641895


INFO:tensorflow:loss = 0.3982699, step = 700 (155.794 sec)


INFO:tensorflow:loss = 0.3982699, step = 700 (155.794 sec)


INFO:tensorflow:global_step/sec: 0.640915


INFO:tensorflow:global_step/sec: 0.640915


INFO:tensorflow:loss = 0.2952415, step = 800 (156.021 sec)


INFO:tensorflow:loss = 0.2952415, step = 800 (156.021 sec)


INFO:tensorflow:global_step/sec: 0.640745


INFO:tensorflow:global_step/sec: 0.640745


INFO:tensorflow:loss = 0.47070277, step = 900 (156.069 sec)


INFO:tensorflow:loss = 0.47070277, step = 900 (156.069 sec)


INFO:tensorflow:Saving checkpoints for 1000 into bert_text_output/model.ckpt.


INFO:tensorflow:Saving checkpoints for 1000 into bert_text_output/model.ckpt.


INFO:tensorflow:global_step/sec: 0.616011


INFO:tensorflow:global_step/sec: 0.616011


INFO:tensorflow:loss = 0.12850077, step = 1000 (162.334 sec)


INFO:tensorflow:loss = 0.12850077, step = 1000 (162.334 sec)


INFO:tensorflow:global_step/sec: 0.640065


INFO:tensorflow:global_step/sec: 0.640065


INFO:tensorflow:loss = 0.04715849, step = 1100 (156.237 sec)


INFO:tensorflow:loss = 0.04715849, step = 1100 (156.237 sec)


INFO:tensorflow:global_step/sec: 0.639812


INFO:tensorflow:global_step/sec: 0.639812


INFO:tensorflow:loss = 0.008142305, step = 1200 (156.294 sec)


INFO:tensorflow:loss = 0.008142305, step = 1200 (156.294 sec)


INFO:tensorflow:global_step/sec: 0.639323


INFO:tensorflow:global_step/sec: 0.639323


INFO:tensorflow:loss = 0.1492814, step = 1300 (156.419 sec)


INFO:tensorflow:loss = 0.1492814, step = 1300 (156.419 sec)


In [0]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [0]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

In [0]:
def getPrediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]

In [0]:
pred_sentences = [
  "That movie was absolutely awful",
  "The acting was a bit lacking",
  "The film was creative and surprising",
  "Absolutely fantastic!"
]

In [0]:
predictions = getPrediction(pred_sentences)

In [0]:
predictions