In [0]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

In [3]:
!pip install bert-tensorflow

Collecting bert-tensorflow
[?25l  Downloading https://files.pythonhosted.org/packages/a6/66/7eb4e8b6ea35b7cc54c322c816f976167a43019750279a8473d355800a93/bert_tensorflow-1.0.1-py2.py3-none-any.whl (67kB)
[K     |████████████████████████████████| 71kB 3.2MB/s 
Installing collected packages: bert-tensorflow
Successfully installed bert-tensorflow-1.0.1


In [4]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization




In [0]:
OUTPUT_DIR = "output" #@param {type:"string"}
DO_DELETE = False #@param {type:"boolean"}


In [0]:
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  
  data = {}
  data["sentence"] = []
  data["sentiment"] = []
  for file_path in os.listdir(directory):
    
    with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
      data["sentence"].append(f.read())
      data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  pos_df["polarity"] = 1
  neg_df["polarity"] = 0
  return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
  dataset = tf.keras.utils.get_file(
      fname="aclImdb.tar.gz", 
      origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
      extract=True)
  
  train_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                       "aclImdb", "train"))
  
  test_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                      "aclImdb", "test"))
  
  return train_df, test_df


In [7]:
train, test = download_and_load_datasets()

Downloading data from http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz


In [0]:
train = train.sample(5000)
test = test.sample(5000)

In [0]:
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]

**Data Preprocessing**

We'll need to transform our data into a format BERT understands. This involves two steps. First, we create InputExample's using the constructor provided in the BERT library.

text_a is the text we want to classify, which in this case, is the Request field in our Dataframe.
text_b is used if we're training a model to understand the relationship between sentences (i.e. is text_b a translation of text_a? Is text_b an answer to the question asked by text_a?). This doesn't apply to our task, so we can leave text_b blank.
label is the label for our example, i.e. True, False

In [0]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

In [11]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [12]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands.

In [13]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)







INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i loved the original . it was brilliant and always will be . strangely though , i actually looked forward to seeing the re - make . i ' m usually a little bit against re - makes because there ' s far too many of them , but somehow this intrigued me . i was really enjoying it to begin with . caine brilliant , as usual , and jude law managing to hold is own next to him . it was quite clever how it was modern ##ised and it was working . what stops this from being really good is the last seven minutes . it goes completely away from the original , so far in fact that is cease ##s to be [SEP]


INFO:tensorflow:tokens: [CLS] i loved the original . it was brilliant and always will be . strangely though , i actually looked forward to seeing the re - make . i ' m usually a little bit against re - makes because there ' s far too many of them , but somehow this intrigued me . i was really enjoying it to begin with . caine brilliant , as usual , and jude law managing to hold is own next to him . it was quite clever how it was modern ##ised and it was working . what stops this from being really good is the last seven minutes . it goes completely away from the original , so far in fact that is cease ##s to be [SEP]


INFO:tensorflow:input_ids: 101 1045 3866 1996 2434 1012 2009 2001 8235 1998 2467 2097 2022 1012 13939 2295 1010 1045 2941 2246 2830 2000 3773 1996 2128 1011 2191 1012 1045 1005 1049 2788 1037 2210 2978 2114 2128 1011 3084 2138 2045 1005 1055 2521 2205 2116 1997 2068 1010 2021 5064 2023 18896 2033 1012 1045 2001 2428 9107 2009 2000 4088 2007 1012 19881 8235 1010 2004 5156 1010 1998 12582 2375 6605 2000 2907 2003 2219 2279 2000 2032 1012 2009 2001 3243 12266 2129 2009 2001 2715 5084 1998 2009 2001 2551 1012 2054 6762 2023 2013 2108 2428 2204 2003 1996 2197 2698 2781 1012 2009 3632 3294 2185 2013 1996 2434 1010 2061 2521 1999 2755 2008 2003 13236 2015 2000 2022 102


INFO:tensorflow:input_ids: 101 1045 3866 1996 2434 1012 2009 2001 8235 1998 2467 2097 2022 1012 13939 2295 1010 1045 2941 2246 2830 2000 3773 1996 2128 1011 2191 1012 1045 1005 1049 2788 1037 2210 2978 2114 2128 1011 3084 2138 2045 1005 1055 2521 2205 2116 1997 2068 1010 2021 5064 2023 18896 2033 1012 1045 2001 2428 9107 2009 2000 4088 2007 1012 19881 8235 1010 2004 5156 1010 1998 12582 2375 6605 2000 2907 2003 2219 2279 2000 2032 1012 2009 2001 3243 12266 2129 2009 2001 2715 5084 1998 2009 2001 2551 1012 2054 6762 2023 2013 2108 2428 2204 2003 1996 2197 2698 2781 1012 2009 3632 3294 2185 2013 1996 2434 1010 2061 2521 1999 2755 2008 2003 13236 2015 2000 2022 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] sap ##s at sea < br / > < br / > aspect ratio : 1 . 37 : 1 < br / > < br / > sound format : mono < br / > < br / > ( black and white ) < br / > < br / > suffering from ' horn ##op ##ho ##bia ' , ollie embark ##s on a ' rest ##ful ' boat trip , but he and stan get mixed up with an escaped convict ( ry ##cha ##rd cramer ) . chaos en ##su ##es . < br / > < br / > this feature length comedy - an ok entry which nonetheless un ##sp ##ool ##s like a mere imitation of laurel and [SEP]


INFO:tensorflow:tokens: [CLS] sap ##s at sea < br / > < br / > aspect ratio : 1 . 37 : 1 < br / > < br / > sound format : mono < br / > < br / > ( black and white ) < br / > < br / > suffering from ' horn ##op ##ho ##bia ' , ollie embark ##s on a ' rest ##ful ' boat trip , but he and stan get mixed up with an escaped convict ( ry ##cha ##rd cramer ) . chaos en ##su ##es . < br / > < br / > this feature length comedy - an ok entry which nonetheless un ##sp ##ool ##s like a mere imitation of laurel and [SEP]


INFO:tensorflow:input_ids: 101 20066 2015 2012 2712 1026 7987 1013 1028 1026 7987 1013 1028 7814 6463 1024 1015 1012 4261 1024 1015 1026 7987 1013 1028 1026 7987 1013 1028 2614 4289 1024 18847 1026 7987 1013 1028 1026 7987 1013 1028 1006 2304 1998 2317 1007 1026 7987 1013 1028 1026 7987 1013 1028 6114 2013 1005 7109 7361 6806 11607 1005 1010 25208 28866 2015 2006 1037 1005 2717 3993 1005 4049 4440 1010 2021 2002 1998 9761 2131 3816 2039 2007 2019 6376 20462 1006 29431 7507 4103 29433 1007 1012 8488 4372 6342 2229 1012 1026 7987 1013 1028 1026 7987 1013 1028 2023 3444 3091 4038 1011 2019 7929 4443 2029 9690 4895 13102 13669 2015 2066 1037 8210 20017 1997 11893 1998 102


INFO:tensorflow:input_ids: 101 20066 2015 2012 2712 1026 7987 1013 1028 1026 7987 1013 1028 7814 6463 1024 1015 1012 4261 1024 1015 1026 7987 1013 1028 1026 7987 1013 1028 2614 4289 1024 18847 1026 7987 1013 1028 1026 7987 1013 1028 1006 2304 1998 2317 1007 1026 7987 1013 1028 1026 7987 1013 1028 6114 2013 1005 7109 7361 6806 11607 1005 1010 25208 28866 2015 2006 1037 1005 2717 3993 1005 4049 4440 1010 2021 2002 1998 9761 2131 3816 2039 2007 2019 6376 20462 1006 29431 7507 4103 29433 1007 1012 8488 4372 6342 2229 1012 1026 7987 1013 1028 1026 7987 1013 1028 2023 3444 3091 4038 1011 2019 7929 4443 2029 9690 4895 13102 13669 2015 2066 1037 8210 20017 1997 11893 1998 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i was one of those " few americans " that grew up with all of gerry andersen ' s marvelous creations . thunder ##birds was a great series for the time and would have made a great action / adventure movie if only the writers could have figured out where to target it . < br / > < br / > i expected it to be a rom ##p , but i did not expect it to aim at such a low age group . like lost in space , this could have been both visually stunning and exciting . it should have focused on more action / adventure and the goal of the original series . . . saving people in trouble . < [SEP]


INFO:tensorflow:tokens: [CLS] i was one of those " few americans " that grew up with all of gerry andersen ' s marvelous creations . thunder ##birds was a great series for the time and would have made a great action / adventure movie if only the writers could have figured out where to target it . < br / > < br / > i expected it to be a rom ##p , but i did not expect it to aim at such a low age group . like lost in space , this could have been both visually stunning and exciting . it should have focused on more action / adventure and the goal of the original series . . . saving people in trouble . < [SEP]


INFO:tensorflow:input_ids: 101 1045 2001 2028 1997 2216 1000 2261 4841 1000 2008 3473 2039 2007 2035 1997 14926 18308 1005 1055 28851 20677 1012 8505 12887 2001 1037 2307 2186 2005 1996 2051 1998 2052 2031 2081 1037 2307 2895 1013 6172 3185 2065 2069 1996 4898 2071 2031 6618 2041 2073 2000 4539 2009 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 3517 2009 2000 2022 1037 17083 2361 1010 2021 1045 2106 2025 5987 2009 2000 6614 2012 2107 1037 2659 2287 2177 1012 2066 2439 1999 2686 1010 2023 2071 2031 2042 2119 17453 14726 1998 10990 1012 2009 2323 2031 4208 2006 2062 2895 1013 6172 1998 1996 3125 1997 1996 2434 2186 1012 1012 1012 7494 2111 1999 4390 1012 1026 102


INFO:tensorflow:input_ids: 101 1045 2001 2028 1997 2216 1000 2261 4841 1000 2008 3473 2039 2007 2035 1997 14926 18308 1005 1055 28851 20677 1012 8505 12887 2001 1037 2307 2186 2005 1996 2051 1998 2052 2031 2081 1037 2307 2895 1013 6172 3185 2065 2069 1996 4898 2071 2031 6618 2041 2073 2000 4539 2009 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 3517 2009 2000 2022 1037 17083 2361 1010 2021 1045 2106 2025 5987 2009 2000 6614 2012 2107 1037 2659 2287 2177 1012 2066 2439 1999 2686 1010 2023 2071 2031 2042 2119 17453 14726 1998 10990 1012 2009 2323 2031 4208 2006 2062 2895 1013 6172 1998 1996 3125 1997 1996 2434 2186 1012 1012 1012 7494 2111 1999 4390 1012 1026 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] " the secretary " is one of those che ##es ##y , cl ##iche ##d , " thriller ##s " that one is subjected to watching on a sunday afternoon , when there is virtually nothing else on . while the plot ( a dem ##ented woman becomes jealous of all who succeed over her in the office and decides to do whatever she can to stop them ) may be one of a kind , i recognized countless plot twists , probably taken from other tv movies that i had been subjected to for the very same reason . < br / > < br / > to make matters worse , i was not wild about the cast . mel harris is one of [SEP]


INFO:tensorflow:tokens: [CLS] " the secretary " is one of those che ##es ##y , cl ##iche ##d , " thriller ##s " that one is subjected to watching on a sunday afternoon , when there is virtually nothing else on . while the plot ( a dem ##ented woman becomes jealous of all who succeed over her in the office and decides to do whatever she can to stop them ) may be one of a kind , i recognized countless plot twists , probably taken from other tv movies that i had been subjected to for the very same reason . < br / > < br / > to make matters worse , i was not wild about the cast . mel harris is one of [SEP]


INFO:tensorflow:input_ids: 101 1000 1996 3187 1000 2003 2028 1997 2216 18178 2229 2100 1010 18856 17322 2094 1010 1000 10874 2015 1000 2008 2028 2003 13532 2000 3666 2006 1037 4465 5027 1010 2043 2045 2003 8990 2498 2842 2006 1012 2096 1996 5436 1006 1037 17183 14088 2450 4150 9981 1997 2035 2040 9510 2058 2014 1999 1996 2436 1998 7288 2000 2079 3649 2016 2064 2000 2644 2068 1007 2089 2022 2028 1997 1037 2785 1010 1045 3858 14518 5436 21438 1010 2763 2579 2013 2060 2694 5691 2008 1045 2018 2042 13532 2000 2005 1996 2200 2168 3114 1012 1026 7987 1013 1028 1026 7987 1013 1028 2000 2191 5609 4788 1010 1045 2001 2025 3748 2055 1996 3459 1012 11463 5671 2003 2028 1997 102


INFO:tensorflow:input_ids: 101 1000 1996 3187 1000 2003 2028 1997 2216 18178 2229 2100 1010 18856 17322 2094 1010 1000 10874 2015 1000 2008 2028 2003 13532 2000 3666 2006 1037 4465 5027 1010 2043 2045 2003 8990 2498 2842 2006 1012 2096 1996 5436 1006 1037 17183 14088 2450 4150 9981 1997 2035 2040 9510 2058 2014 1999 1996 2436 1998 7288 2000 2079 3649 2016 2064 2000 2644 2068 1007 2089 2022 2028 1997 1037 2785 1010 1045 3858 14518 5436 21438 1010 2763 2579 2013 2060 2694 5691 2008 1045 2018 2042 13532 2000 2005 1996 2200 2168 3114 1012 1026 7987 1013 1028 1026 7987 1013 1028 2000 2191 5609 4788 1010 1045 2001 2025 3748 2055 1996 3459 1012 11463 5671 2003 2028 1997 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] have you ever in your life , gone out for a sport ' s activity , tried your best , and then found yourself in an important segment of it , where for a brief moment , you were given a chance to be a hero and a champion and . . . failed ? i believe many of us have had that moment in our lives . this is the premise of the movie , " the best of times . " in this story a middle age banker , named jack dundee ( robin williams ) suffers from the deep melancholy of a football mistake , which happened years ago , is inspired to re - play the game . . again . in [SEP]


INFO:tensorflow:tokens: [CLS] have you ever in your life , gone out for a sport ' s activity , tried your best , and then found yourself in an important segment of it , where for a brief moment , you were given a chance to be a hero and a champion and . . . failed ? i believe many of us have had that moment in our lives . this is the premise of the movie , " the best of times . " in this story a middle age banker , named jack dundee ( robin williams ) suffers from the deep melancholy of a football mistake , which happened years ago , is inspired to re - play the game . . again . in [SEP]


INFO:tensorflow:input_ids: 101 2031 2017 2412 1999 2115 2166 1010 2908 2041 2005 1037 4368 1005 1055 4023 1010 2699 2115 2190 1010 1998 2059 2179 4426 1999 2019 2590 6903 1997 2009 1010 2073 2005 1037 4766 2617 1010 2017 2020 2445 1037 3382 2000 2022 1037 5394 1998 1037 3410 1998 1012 1012 1012 3478 1029 1045 2903 2116 1997 2149 2031 2018 2008 2617 1999 2256 3268 1012 2023 2003 1996 18458 1997 1996 3185 1010 1000 1996 2190 1997 2335 1012 1000 1999 2023 2466 1037 2690 2287 13448 1010 2315 2990 14252 1006 5863 3766 1007 17567 2013 1996 2784 22247 1997 1037 2374 6707 1010 2029 3047 2086 3283 1010 2003 4427 2000 2128 1011 2377 1996 2208 1012 1012 2153 1012 1999 102


INFO:tensorflow:input_ids: 101 2031 2017 2412 1999 2115 2166 1010 2908 2041 2005 1037 4368 1005 1055 4023 1010 2699 2115 2190 1010 1998 2059 2179 4426 1999 2019 2590 6903 1997 2009 1010 2073 2005 1037 4766 2617 1010 2017 2020 2445 1037 3382 2000 2022 1037 5394 1998 1037 3410 1998 1012 1012 1012 3478 1029 1045 2903 2116 1997 2149 2031 2018 2008 2617 1999 2256 3268 1012 2023 2003 1996 18458 1997 1996 3185 1010 1000 1996 2190 1997 2335 1012 1000 1999 2023 2466 1037 2690 2287 13448 1010 2315 2990 14252 1006 5863 3766 1007 17567 2013 1996 2784 22247 1997 1037 2374 6707 1010 2029 3047 2086 3283 1010 2003 4427 2000 2128 1011 2377 1996 2208 1012 1012 2153 1012 1999 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] so far miguel bard ##em ' s career it ' s been one of the more dreadful of recent spanish cinema . he ' s made nothing but rubbish . . . until now . " inca ##uto ##s " has been quite a surprise : it ' s a serious film , with rhythm , with a great cast and very entertaining . < br / > < br / > the art of robb ##ing , that ' s what " inca ##uto ##s " is about . a film much alike to david ma ##met ' s " house of game " and stuff like that . a thousand of twists in the script , and a story where nothing ' s like [SEP]


INFO:tensorflow:tokens: [CLS] so far miguel bard ##em ' s career it ' s been one of the more dreadful of recent spanish cinema . he ' s made nothing but rubbish . . . until now . " inca ##uto ##s " has been quite a surprise : it ' s a serious film , with rhythm , with a great cast and very entertaining . < br / > < br / > the art of robb ##ing , that ' s what " inca ##uto ##s " is about . a film much alike to david ma ##met ' s " house of game " and stuff like that . a thousand of twists in the script , and a story where nothing ' s like [SEP]


INFO:tensorflow:input_ids: 101 2061 2521 8374 22759 6633 1005 1055 2476 2009 1005 1055 2042 2028 1997 1996 2062 21794 1997 3522 3009 5988 1012 2002 1005 1055 2081 2498 2021 29132 1012 1012 1012 2127 2085 1012 1000 27523 16161 2015 1000 2038 2042 3243 1037 4474 1024 2009 1005 1055 1037 3809 2143 1010 2007 6348 1010 2007 1037 2307 3459 1998 2200 14036 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2396 1997 26211 2075 1010 2008 1005 1055 2054 1000 27523 16161 2015 1000 2003 2055 1012 1037 2143 2172 11455 2000 2585 5003 11368 1005 1055 1000 2160 1997 2208 1000 1998 4933 2066 2008 1012 1037 4595 1997 21438 1999 1996 5896 1010 1998 1037 2466 2073 2498 1005 1055 2066 102


INFO:tensorflow:input_ids: 101 2061 2521 8374 22759 6633 1005 1055 2476 2009 1005 1055 2042 2028 1997 1996 2062 21794 1997 3522 3009 5988 1012 2002 1005 1055 2081 2498 2021 29132 1012 1012 1012 2127 2085 1012 1000 27523 16161 2015 1000 2038 2042 3243 1037 4474 1024 2009 1005 1055 1037 3809 2143 1010 2007 6348 1010 2007 1037 2307 3459 1998 2200 14036 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2396 1997 26211 2075 1010 2008 1005 1055 2054 1000 27523 16161 2015 1000 2003 2055 1012 1037 2143 2172 11455 2000 2585 5003 11368 1005 1055 1000 2160 1997 2208 1000 1998 4933 2066 2008 1012 1037 4595 1997 21438 1999 1996 5896 1010 1998 1037 2466 2073 2498 1005 1055 2066 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] that ' s what i kept asking myself during the many fights , screaming matches , swearing and general mayhem that per ##me ##ate the 84 minutes . the comparisons also stand up when you think of the one - dimensional characters , who have so little depth that it is virtually impossible to care what happens to them . they are just badly written cy ##pher ##s for the director to hang his multicultural beliefs on , a topic that has been done much better in other dramas both on tv and the cinema . < br / > < br / > i must confess , i ' m not really one for spotting bad performances during a film , but it must be [SEP]


INFO:tensorflow:tokens: [CLS] that ' s what i kept asking myself during the many fights , screaming matches , swearing and general mayhem that per ##me ##ate the 84 minutes . the comparisons also stand up when you think of the one - dimensional characters , who have so little depth that it is virtually impossible to care what happens to them . they are just badly written cy ##pher ##s for the director to hang his multicultural beliefs on , a topic that has been done much better in other dramas both on tv and the cinema . < br / > < br / > i must confess , i ' m not really one for spotting bad performances during a film , but it must be [SEP]


INFO:tensorflow:input_ids: 101 2008 1005 1055 2054 1045 2921 4851 2870 2076 1996 2116 9590 1010 7491 3503 1010 25082 1998 2236 26865 2008 2566 4168 3686 1996 6391 2781 1012 1996 18539 2036 3233 2039 2043 2017 2228 1997 1996 2028 1011 8789 3494 1010 2040 2031 2061 2210 5995 2008 2009 2003 8990 5263 2000 2729 2054 6433 2000 2068 1012 2027 2024 2074 6649 2517 22330 27921 2015 2005 1996 2472 2000 6865 2010 27135 9029 2006 1010 1037 8476 2008 2038 2042 2589 2172 2488 1999 2060 16547 2119 2006 2694 1998 1996 5988 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2442 18766 1010 1045 1005 1049 2025 2428 2028 2005 27963 2919 4616 2076 1037 2143 1010 2021 2009 2442 2022 102


INFO:tensorflow:input_ids: 101 2008 1005 1055 2054 1045 2921 4851 2870 2076 1996 2116 9590 1010 7491 3503 1010 25082 1998 2236 26865 2008 2566 4168 3686 1996 6391 2781 1012 1996 18539 2036 3233 2039 2043 2017 2228 1997 1996 2028 1011 8789 3494 1010 2040 2031 2061 2210 5995 2008 2009 2003 8990 5263 2000 2729 2054 6433 2000 2068 1012 2027 2024 2074 6649 2517 22330 27921 2015 2005 1996 2472 2000 6865 2010 27135 9029 2006 1010 1037 8476 2008 2038 2042 2589 2172 2488 1999 2060 16547 2119 2006 2694 1998 1996 5988 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2442 18766 1010 1045 1005 1049 2025 2428 2028 2005 27963 2919 4616 2076 1037 2143 1010 2021 2009 2442 2022 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] " one dark night " is a staple in the 1980 ' s low budget horror genre . filled with retro pun ##s , clothing and scenery , " o ##d ##n " transports the viewer to a simpler time , when horror films were just that . . . horror ! < br / > < br / > nothing so intense that you can ' t understand what ##s going on , the film tells a dark fable of what happens when you mess with the dead . well acted by it ' s stable of scream - queens , and a fine directorial job by tom mc ##lou ##gh ##lin , whom rev ##els in the time and makes you believe what he [SEP]


INFO:tensorflow:tokens: [CLS] " one dark night " is a staple in the 1980 ' s low budget horror genre . filled with retro pun ##s , clothing and scenery , " o ##d ##n " transports the viewer to a simpler time , when horror films were just that . . . horror ! < br / > < br / > nothing so intense that you can ' t understand what ##s going on , the film tells a dark fable of what happens when you mess with the dead . well acted by it ' s stable of scream - queens , and a fine directorial job by tom mc ##lou ##gh ##lin , whom rev ##els in the time and makes you believe what he [SEP]


INFO:tensorflow:input_ids: 101 1000 2028 2601 2305 1000 2003 1037 18785 1999 1996 3150 1005 1055 2659 5166 5469 6907 1012 3561 2007 22307 26136 2015 1010 5929 1998 17363 1010 1000 1051 2094 2078 1000 19003 1996 13972 2000 1037 16325 2051 1010 2043 5469 3152 2020 2074 2008 1012 1012 1012 5469 999 1026 7987 1013 1028 1026 7987 1013 1028 2498 2061 6387 2008 2017 2064 1005 1056 3305 2054 2015 2183 2006 1010 1996 2143 4136 1037 2601 28458 1997 2054 6433 2043 2017 6752 2007 1996 2757 1012 2092 6051 2011 2009 1005 1055 6540 1997 6978 1011 8603 1010 1998 1037 2986 21635 3105 2011 3419 11338 23743 5603 4115 1010 3183 7065 9050 1999 1996 2051 1998 3084 2017 2903 2054 2002 102


INFO:tensorflow:input_ids: 101 1000 2028 2601 2305 1000 2003 1037 18785 1999 1996 3150 1005 1055 2659 5166 5469 6907 1012 3561 2007 22307 26136 2015 1010 5929 1998 17363 1010 1000 1051 2094 2078 1000 19003 1996 13972 2000 1037 16325 2051 1010 2043 5469 3152 2020 2074 2008 1012 1012 1012 5469 999 1026 7987 1013 1028 1026 7987 1013 1028 2498 2061 6387 2008 2017 2064 1005 1056 3305 2054 2015 2183 2006 1010 1996 2143 4136 1037 2601 28458 1997 2054 6433 2043 2017 6752 2007 1996 2757 1012 2092 6051 2011 2009 1005 1055 6540 1997 6978 1011 8603 1010 1998 1037 2986 21635 3105 2011 3419 11338 23743 5603 4115 1010 3183 7065 9050 1999 1996 2051 1998 3084 2017 2903 2054 2002 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] aaa is my favorite movie . . . i have seen it a number of times ( don ' t remember the count now ) and every time i just love it . . . . this is the best movie of raj kumar santos ##hi . . the comedy , dialogues , and performance is amazing . . all the actors and actresses have done a superb job . . . you cannot stop laughing while watching this movie . . . its just hilarious . . . amir khan and salman khan have done a great job . . . and acting by par ##esh raw ##al was excellent as always . . . . . the music is inspired from the old hindi [SEP]


INFO:tensorflow:tokens: [CLS] aaa is my favorite movie . . . i have seen it a number of times ( don ' t remember the count now ) and every time i just love it . . . . this is the best movie of raj kumar santos ##hi . . the comedy , dialogues , and performance is amazing . . all the actors and actresses have done a superb job . . . you cannot stop laughing while watching this movie . . . its just hilarious . . . amir khan and salman khan have done a great job . . . and acting by par ##esh raw ##al was excellent as always . . . . . the music is inspired from the old hindi [SEP]


INFO:tensorflow:input_ids: 101 13360 2003 2026 5440 3185 1012 1012 1012 1045 2031 2464 2009 1037 2193 1997 2335 1006 2123 1005 1056 3342 1996 4175 2085 1007 1998 2296 2051 1045 2074 2293 2009 1012 1012 1012 1012 2023 2003 1996 2190 3185 1997 11948 9600 11053 4048 1012 1012 1996 4038 1010 22580 1010 1998 2836 2003 6429 1012 1012 2035 1996 5889 1998 19910 2031 2589 1037 21688 3105 1012 1012 1012 2017 3685 2644 5870 2096 3666 2023 3185 1012 1012 1012 2049 2074 26316 1012 1012 1012 18904 4967 1998 28542 4967 2031 2589 1037 2307 3105 1012 1012 1012 1998 3772 2011 11968 9953 6315 2389 2001 6581 2004 2467 1012 1012 1012 1012 1012 1996 2189 2003 4427 2013 1996 2214 9269 102


INFO:tensorflow:input_ids: 101 13360 2003 2026 5440 3185 1012 1012 1012 1045 2031 2464 2009 1037 2193 1997 2335 1006 2123 1005 1056 3342 1996 4175 2085 1007 1998 2296 2051 1045 2074 2293 2009 1012 1012 1012 1012 2023 2003 1996 2190 3185 1997 11948 9600 11053 4048 1012 1012 1996 4038 1010 22580 1010 1998 2836 2003 6429 1012 1012 2035 1996 5889 1998 19910 2031 2589 1037 21688 3105 1012 1012 1012 2017 3685 2644 5870 2096 3666 2023 3185 1012 1012 1012 2049 2074 26316 1012 1012 1012 18904 4967 1998 28542 4967 2031 2589 1037 2307 3105 1012 1012 1012 1998 3772 2011 11968 9953 6315 2389 2001 6581 2004 2467 1012 1012 1012 1012 1012 1996 2189 2003 4427 2013 1996 2214 9269 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this is a movie that will leave you thinking , is he or isn ' t he ? while many people have complained about the ambiguous ending , it gives room for the audience to think and interpret it from the signs . this is my interpretation and theory , and i believe it is very sound . < br / > < br / > first , here is the plot . one day , pro ##t ( kevin space ##y ) suddenly appears in the midst of a busy train station . after attempting to help a woman from mug ##gers , he is arrested and sent to bellevue , and later transferred to dr . mark powell ' s ( jeff bridges ) [SEP]


INFO:tensorflow:tokens: [CLS] this is a movie that will leave you thinking , is he or isn ' t he ? while many people have complained about the ambiguous ending , it gives room for the audience to think and interpret it from the signs . this is my interpretation and theory , and i believe it is very sound . < br / > < br / > first , here is the plot . one day , pro ##t ( kevin space ##y ) suddenly appears in the midst of a busy train station . after attempting to help a woman from mug ##gers , he is arrested and sent to bellevue , and later transferred to dr . mark powell ' s ( jeff bridges ) [SEP]


INFO:tensorflow:input_ids: 101 2023 2003 1037 3185 2008 2097 2681 2017 3241 1010 2003 2002 2030 3475 1005 1056 2002 1029 2096 2116 2111 2031 10865 2055 1996 20080 4566 1010 2009 3957 2282 2005 1996 4378 2000 2228 1998 17841 2009 2013 1996 5751 1012 2023 2003 2026 7613 1998 3399 1010 1998 1045 2903 2009 2003 2200 2614 1012 1026 7987 1013 1028 1026 7987 1013 1028 2034 1010 2182 2003 1996 5436 1012 2028 2154 1010 4013 2102 1006 4901 2686 2100 1007 3402 3544 1999 1996 12930 1997 1037 5697 3345 2276 1012 2044 7161 2000 2393 1037 2450 2013 14757 15776 1010 2002 2003 4727 1998 2741 2000 26756 1010 1998 2101 4015 2000 2852 1012 2928 8997 1005 1055 1006 5076 7346 1007 102


INFO:tensorflow:input_ids: 101 2023 2003 1037 3185 2008 2097 2681 2017 3241 1010 2003 2002 2030 3475 1005 1056 2002 1029 2096 2116 2111 2031 10865 2055 1996 20080 4566 1010 2009 3957 2282 2005 1996 4378 2000 2228 1998 17841 2009 2013 1996 5751 1012 2023 2003 2026 7613 1998 3399 1010 1998 1045 2903 2009 2003 2200 2614 1012 1026 7987 1013 1028 1026 7987 1013 1028 2034 1010 2182 2003 1996 5436 1012 2028 2154 1010 4013 2102 1006 4901 2686 2100 1007 3402 3544 1999 1996 12930 1997 1037 5697 3345 2276 1012 2044 7161 2000 2393 1037 2450 2013 14757 15776 1010 2002 2003 4727 1998 2741 2000 26756 1010 1998 2101 4015 2000 2852 1012 2928 8997 1005 1055 1006 5076 7346 1007 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


Creating a model
Now that we've prepared our data, let's focus on building a model. create_model does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called fine-tuning.

In [0]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


In [0]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [0]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [0]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [0]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [19]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


INFO:tensorflow:Using config: {'_model_dir': 'output', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fc664d1f4e0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Using config: {'_model_dir': 'output', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fc664d1f4e0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [0]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

In [0]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from output/model.ckpt-0


INFO:tensorflow:Restoring parameters from output/model.ckpt-0


Instructions for updating:
Use standard file utilities to get mtimes.


Instructions for updating:
Use standard file utilities to get mtimes.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into output/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into output/model.ckpt.


INFO:tensorflow:loss = 0.7161311, step = 1


INFO:tensorflow:loss = 0.7161311, step = 1


INFO:tensorflow:global_step/sec: 0.0208333


INFO:tensorflow:global_step/sec: 0.0208333






INFO:tensorflow:loss = 0.47000405, step = 101 (4800.026 sec)


INFO:tensorflow:loss = 0.47000405, step = 101 (4800.026 sec)


















INFO:tensorflow:global_step/sec: 0.0203472


INFO:tensorflow:global_step/sec: 0.0203472


INFO:tensorflow:loss = 0.1891584, step = 201 (4914.676 sec)


INFO:tensorflow:loss = 0.1891584, step = 201 (4914.676 sec)


In [0]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [23]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2019-10-21T14:10:50Z


INFO:tensorflow:Starting evaluation at 2019-10-21T14:10:50Z


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from output/model.ckpt-0


INFO:tensorflow:Restoring parameters from output/model.ckpt-0


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Finished evaluation at 2019-10-21-14:58:12


INFO:tensorflow:Finished evaluation at 2019-10-21-14:58:12


INFO:tensorflow:Saving dict for global step 0: auc = 0.47967505, eval_accuracy = 0.4798, f1_score = 0.66631097, false_negatives = 1690.0, false_positives = 911.0, global_step = 0, loss = 0.7063751, precision = 0.4700407, recall = 0.32345876, true_negatives = 1591.0, true_positives = 808.0


INFO:tensorflow:Saving dict for global step 0: auc = 0.47967505, eval_accuracy = 0.4798, f1_score = 0.66631097, false_negatives = 1690.0, false_positives = 911.0, global_step = 0, loss = 0.7063751, precision = 0.4700407, recall = 0.32345876, true_negatives = 1591.0, true_positives = 808.0


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 0: output/model.ckpt-0


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 0: output/model.ckpt-0


{'auc': 0.47967505,
 'eval_accuracy': 0.4798,
 'f1_score': 0.66631097,
 'false_negatives': 1690.0,
 'false_positives': 911.0,
 'global_step': 0,
 'loss': 0.7063751,
 'precision': 0.4700407,
 'recall': 0.32345876,
 'true_negatives': 1591.0,
 'true_positives': 808.0}

In [0]:
def getPrediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]

In [0]:
pred_sentences = [
  "That movie was absolutely awful",
  "The acting was a bit lacking",
  "The film was creative and surprising",
  "Absolutely fantastic!"
]

In [0]:
predictions = getPrediction(pred_sentences)

In [0]:
predictions