In [0]:
# Copyright 2019 Google Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Predicting Movie Review Sentiment with BERT on TF Hub

If you’ve been following Natural Language Processing over the past year, you’ve probably heard of BERT: Bidirectional Encoder Representations from Transformers. It’s a neural network architecture designed by Google researchers that’s totally transformed what’s state-of-the-art for NLP tasks, like text classification, translation, summarization, and question answering.

Now that BERT's been added to [TF Hub](https://www.tensorflow.org/hub) as a loadable module, it's easy(ish) to add into existing Tensorflow text pipelines. In an existing pipeline, BERT can replace text embedding layers like ELMO and GloVE. Alternatively, [finetuning](http://wiki.fast.ai/index.php/Fine_tuning) BERT can provide both an accuracy boost and faster training time in many cases.

Here, we'll train a model to predict whether an IMDB movie review is positive or negative using BERT in Tensorflow with tf hub. Some code was adapted from [this colab notebook](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb). Let's get started!

In [0]:
#import tensorflow.compat.v1 as tf
#tf.disable_v2_behavior()

%tensorflow_version 1.x

In [0]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

In addition to the standard libraries we imported above, we'll need to install BERT's python package.

In [36]:
!pip install bert-tensorflow



In [0]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization

Below, we'll set an output directory location to store our model output and checkpoints. This can be a local directory, in which case you'd set OUTPUT_DIR to the name of the directory you'd like to create. If you're running this code in Google's hosted Colab, the directory won't persist after the Colab session ends.

Alternatively, if you're a GCP user, you can store output in a GCP bucket. To do that, set a directory name in OUTPUT_DIR and the name of the GCP bucket in the BUCKET field.

Set DO_DELETE to rewrite the OUTPUT_DIR if it exists. Otherwise, Tensorflow will load existing model checkpoints from that directory (if they exist).

In [38]:
# Set the output directory for saving model file
# Optionally, set a GCP bucket location

OUTPUT_DIR = 'bert-sentiment'#@param {type:"string"}
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = True #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = True #@param {type:"boolean"}
BUCKET = 'mybuckettest001' #@param {type:"string"}

if USE_BUCKET:
  OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
  from google.colab import auth
  auth.authenticate_user()

if DO_DELETE:
  try:
    tf.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    # Doesn't matter if the directory didn't exist
    pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))


***** Model output directory: gs://mybuckettest001/bert-sentiment *****


#Data

First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).

In [39]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [40]:
!pwd
!rm -rf data
!rm -rf data_zombie
!tar -zxvf /content/gdrive/My\ Drive/NLP_Projects/truthsayer/data.tar.gz

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
./data_combined/train/neg/66ec6ac9a7f6c71b06724356baf640de3d539265_1
./data_combined/train/neg/0f4c035b0bb946a0f0306ff9f056d300bdc9cdad_1
./data_combined/train/neg/be6f260774a56df561370cfcd9c17441e61fd8bd_1
./data_combined/train/neg/8c94bae553029027a85d87c581df002249252b08_1
./data_combined/train/neg/0fc7f612c96789ecba314f4cf944d572fad7cc2f_1
./data_combined/train/neg/285230652482028302b965def6a187608480ae66_1
./data_combined/train/neg/d18141404ed7a53b26e57bdd4ebec9933eea9f92_1
./data_combined/train/neg/17a39003036ddcaa8ec554d46084d55d28233f51_1
./data_combined/train/neg/d1958c4795146b93f0dd034fb74a14d28441dfd4_1
./data_combined/train/neg/cd56eae35dd8d681ae6facee6c3cab37e862ec6b_1
./data_combined/train/neg/0e8c75c458056f82cacbd7501391a31885c98f75_1
./data_combined/train/neg/8ce4e866de7218c749e15b9b7a6e96891a7e6a44_1
./data_combined/train/neg/651ddf29243305a5fdd1544681b1e3a340706159_1
./data_combined/train/neg/a5ab47d7e178

In [41]:
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  data = {}
  data["sentence"] = []
  data["sentiment"] = []
  numFails = 0
  numSuccess = 0
  for file_path in os.listdir(directory):
    with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
      try:
        sentence = f.read()
        sentiment = re.match(".*_(\d+)", file_path).group(1)
      except:
        numFails = numFails + 1
        print("failed to decode sentence")
      
      data["sentence"].append(sentence)
      #data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
      data["sentiment"].append(sentiment)
      numSuccess = numSuccess + 1

  print(directory+" success/fails: "+str(numSuccess)+"/"+str(numFails))
  print("data[sentence]: " + str(len(data["sentence"])))
  print("data[sentiment]: " + str(len(data["sentiment"])))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  pos_df["polarity"] = 1
  neg_df["polarity"] = 0
  return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
  #dataset = tf.keras.utils.get_file(
  #    fname="aclImdb.tar.gz", 
  #    origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
  #    extract=True)
  
  #train_df = load_dataset(os.path.join(os.path.dirname(dataset), 
  #                                     "aclImdb", "train"))
  #test_df = load_dataset(os.path.join(os.path.dirname(dataset), 
  #                                    "aclImdb", "test"))
  train_df = load_dataset(os.path.join("data_combined", "train"))
  dev_df = load_dataset(os.path.join("data_combined", "dev"))
  test_df = load_dataset(os.path.join("data_combined", "test"))
  return train_df, dev_df, test_df

train, dev, test = download_and_load_datasets()

data_combined/train/pos success/fails: 6333/0
data[sentence]: 6333
data[sentiment]: 6333
failed to decode sentence
failed to decode sentence
data_combined/train/neg success/fails: 10107/2
data[sentence]: 10107
data[sentiment]: 10107
data_combined/dev/pos success/fails: 90/0
data[sentence]: 90
data[sentiment]: 90
data_combined/dev/neg success/fails: 167/0
data[sentence]: 167
data[sentiment]: 167
data_combined/test/pos success/fails: 102/0
data[sentence]: 102
data[sentiment]: 102
data_combined/test/neg success/fails: 166/0
data[sentence]: 166
data[sentiment]: 166


To keep training fast, we'll take a sample of 5000 train and test examples, respectively.

In [0]:
train = train.sample(10000)
dev = dev.sample(150)
test = test.sample(150)

In [43]:
train.columns

Index(['sentence', 'sentiment', 'polarity'], dtype='object')

For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)

In [0]:
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]

#Data Preprocessing
We'll need to transform our data into a format BERT understands. This involves two steps. First, we create  `InputExample`'s using the constructor provided in the BERT library.

- `text_a` is the text we want to classify, which in this case, is the `Request` field in our Dataframe. 
- `text_b` is used if we're training a model to understand the relationship between sentences (i.e. is `text_b` a translation of `text_a`? Is `text_b` an answer to the question asked by `text_a`?). This doesn't apply to our task, so we can leave `text_b` blank.
- `label` is the label for our example, i.e. True, False

In [0]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

dev_InputExamples = dev.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):


1. Lowercase our text (if we're using a BERT lowercase model)
2. Tokenize it (i.e. "sally says hi" -> ["sally", "says", "hi"])
3. Break words into WordPieces (i.e. "calling" -> ["call", "##ing"])
4. Map our words to indexes using a vocab file that BERT provides
5. Add special "CLS" and "SEP" tokens (see the [readme](https://github.com/google-research/bert))
6. Append "index" and "segment" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))

Happily, we don't have to worry about most of these details.




To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:

In [46]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info["do_lower_case"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:

In [47]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands.

In [0]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
dev_features = bert.run_classifier.convert_examples_to_features(dev_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

INFO:tensorflow:Writing example 0 of 10000


INFO:tensorflow:Writing example 0 of 10000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] sunny ##vale , cal ##if . ( reuters ) - oscar vale ##ra likes to use 3d printers to build an assortment of crafts , but he is now turning his hobby toward the fight against the corona ##virus pan ##de ##mic . measurements are seen on the plastic face shield for an earlier prototype of a 3d - printed face shield to be used by medical professionals in the fight against corona ##virus disease ( co ##vid - 19 ) in sunny ##vale , california , u . s . on april 1 , 2020 . reuters / nathan fran ##din ##o in just four days , the new jersey high school teacher has printed and distributed 200 face shields to medical professionals across the [SEP]


INFO:tensorflow:tokens: [CLS] sunny ##vale , cal ##if . ( reuters ) - oscar vale ##ra likes to use 3d printers to build an assortment of crafts , but he is now turning his hobby toward the fight against the corona ##virus pan ##de ##mic . measurements are seen on the plastic face shield for an earlier prototype of a 3d - printed face shield to be used by medical professionals in the fight against corona ##virus disease ( co ##vid - 19 ) in sunny ##vale , california , u . s . on april 1 , 2020 . reuters / nathan fran ##din ##o in just four days , the new jersey high school teacher has printed and distributed 200 face shields to medical professionals across the [SEP]


INFO:tensorflow:input_ids: 101 11559 17479 1010 10250 10128 1012 1006 26665 1007 1011 7436 10380 2527 7777 2000 2224 7605 23557 2000 3857 2019 26285 1997 14030 1010 2021 2002 2003 2085 3810 2010 17792 2646 1996 2954 2114 1996 21887 23350 6090 3207 7712 1012 11702 2024 2464 2006 1996 6081 2227 6099 2005 2019 3041 8773 1997 1037 7605 1011 6267 2227 6099 2000 2022 2109 2011 2966 8390 1999 1996 2954 2114 21887 23350 4295 1006 2522 17258 1011 2539 1007 1999 11559 17479 1010 2662 1010 1057 1012 1055 1012 2006 2258 1015 1010 12609 1012 26665 1013 7150 23151 8718 2080 1999 2074 2176 2420 1010 1996 2047 3933 2152 2082 3836 2038 6267 1998 5500 3263 2227 11824 2000 2966 8390 2408 1996 102


INFO:tensorflow:input_ids: 101 11559 17479 1010 10250 10128 1012 1006 26665 1007 1011 7436 10380 2527 7777 2000 2224 7605 23557 2000 3857 2019 26285 1997 14030 1010 2021 2002 2003 2085 3810 2010 17792 2646 1996 2954 2114 1996 21887 23350 6090 3207 7712 1012 11702 2024 2464 2006 1996 6081 2227 6099 2005 2019 3041 8773 1997 1037 7605 1011 6267 2227 6099 2000 2022 2109 2011 2966 8390 1999 1996 2954 2114 21887 23350 4295 1006 2522 17258 1011 2539 1007 1999 11559 17479 1010 2662 1010 1057 1012 1055 1012 2006 2258 1015 1010 12609 1012 26665 1013 7150 23151 8718 2080 1999 2074 2176 2420 1010 1996 2047 3933 2152 2082 3836 2038 6267 1998 5500 3263 2227 11824 2000 2966 8390 2408 1996 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None






INFO:tensorflow:input_ids: 101 2375 12088 13366 2100 16234 1010 3613 2000 3113 1999 15433 2408 1996 2406 4372 8017 3351 2023 3746 2000 24679 14408 3258 15983 8292 25655 1013 1059 6673 15983 8292 25655 1013 1059 6673 3841 6643 25500 3126 1997 21210 2213 1010 5076 14841 5677 6137 1997 8814 12273 1010 6260 2063 12170 25074 3122 1997 18133 2099 1998 29431 3122 12975 1997 1059 22540 2140 5201 2000 2023 3189 1012 4174 1005 1055 2375 12088 2513 2000 1996 9424 9857 2004 1996 2110 2680 2049 3284 3679 2193 1997 2522 17258 1011 2539 3141 6677 1999 2484 2847 1012 1996 6372 19596 2750 1996 16234 2013 3537 18079 1012 21625 1059 16584 5017 1998 2270 2740 4584 2040 1005 2310 2170 2005 3132 21403 102


INFO:tensorflow:input_ids: 101 2375 12088 13366 2100 16234 1010 3613 2000 3113 1999 15433 2408 1996 2406 4372 8017 3351 2023 3746 2000 24679 14408 3258 15983 8292 25655 1013 1059 6673 15983 8292 25655 1013 1059 6673 3841 6643 25500 3126 1997 21210 2213 1010 5076 14841 5677 6137 1997 8814 12273 1010 6260 2063 12170 25074 3122 1997 18133 2099 1998 29431 3122 12975 1997 1059 22540 2140 5201 2000 2023 3189 1012 4174 1005 1055 2375 12088 2513 2000 1996 9424 9857 2004 1996 2110 2680 2049 3284 3679 2193 1997 2522 17258 1011 2539 3141 6677 1999 2484 2847 1012 1996 6372 19596 2750 1996 16234 2013 3537 18079 1012 21625 1059 16584 5017 1998 2270 2740 4584 2040 1005 2310 2170 2005 3132 21403 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] white house trade adviser peter navarro warned in a late january memo the corona ##virus could cost the united states trillion ##s without a travel ban on china . navarro called for an immediate travel ban on china in the memo , dated january 29 , that predicted a corona ##virus pan ##de ##mic would cost the united states over $ 3 . 8 trillion . the details of the memo , which was first reported by the new york times on monday evening and later published in full by ax ##ios , was addressed to the national security council just days before the president enacted his travel ban on china . “ the clear dominant strategy is an immediate travel ban on china , ” [SEP]


INFO:tensorflow:tokens: [CLS] white house trade adviser peter navarro warned in a late january memo the corona ##virus could cost the united states trillion ##s without a travel ban on china . navarro called for an immediate travel ban on china in the memo , dated january 29 , that predicted a corona ##virus pan ##de ##mic would cost the united states over $ 3 . 8 trillion . the details of the memo , which was first reported by the new york times on monday evening and later published in full by ax ##ios , was addressed to the national security council just days before the president enacted his travel ban on china . “ the clear dominant strategy is an immediate travel ban on china , ” [SEP]


INFO:tensorflow:input_ids: 101 2317 2160 3119 11747 2848 23524 7420 1999 1037 2397 2254 24443 1996 21887 23350 2071 3465 1996 2142 2163 23458 2015 2302 1037 3604 7221 2006 2859 1012 23524 2170 2005 2019 6234 3604 7221 2006 2859 1999 1996 24443 1010 6052 2254 2756 1010 2008 10173 1037 21887 23350 6090 3207 7712 2052 3465 1996 2142 2163 2058 1002 1017 1012 1022 23458 1012 1996 4751 1997 1996 24443 1010 2029 2001 2034 2988 2011 1996 2047 2259 2335 2006 6928 3944 1998 2101 2405 1999 2440 2011 22260 10735 1010 2001 8280 2000 1996 2120 3036 2473 2074 2420 2077 1996 2343 11955 2010 3604 7221 2006 2859 1012 1523 1996 3154 7444 5656 2003 2019 6234 3604 7221 2006 2859 1010 1524 102


INFO:tensorflow:input_ids: 101 2317 2160 3119 11747 2848 23524 7420 1999 1037 2397 2254 24443 1996 21887 23350 2071 3465 1996 2142 2163 23458 2015 2302 1037 3604 7221 2006 2859 1012 23524 2170 2005 2019 6234 3604 7221 2006 2859 1999 1996 24443 1010 6052 2254 2756 1010 2008 10173 1037 21887 23350 6090 3207 7712 2052 3465 1996 2142 2163 2058 1002 1017 1012 1022 23458 1012 1996 4751 1997 1996 24443 1010 2029 2001 2034 2988 2011 1996 2047 2259 2335 2006 6928 3944 1998 2101 2405 1999 2440 2011 22260 10735 1010 2001 8280 2000 1996 2120 3036 2473 2074 2420 2077 1996 2343 11955 2010 3604 7221 2006 2859 1012 1523 1996 3154 7444 5656 2003 2019 6234 3604 7221 2006 2859 1010 1524 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] file photo : a man stands near an ibm logo at the mobile world congress in barcelona , spain , february 25 , 2019 . reuters / sergio perez ( reuters ) - international business machines corp ( ibm . n ) on monday named former bank of america corp ( ba ##c . n ) top technology executive howard bo ##ville head of its cloud business , as ar ##vin ##d krishna takes over from long - time chief gin ##ni rome ##tty . krishna headed the big blue ’ s fast - growing cloud business before his elevation as chief executive officer in late - january . as chief technology officer of bank of america , bo ##ville was responsible for building and running [SEP]


INFO:tensorflow:tokens: [CLS] file photo : a man stands near an ibm logo at the mobile world congress in barcelona , spain , february 25 , 2019 . reuters / sergio perez ( reuters ) - international business machines corp ( ibm . n ) on monday named former bank of america corp ( ba ##c . n ) top technology executive howard bo ##ville head of its cloud business , as ar ##vin ##d krishna takes over from long - time chief gin ##ni rome ##tty . krishna headed the big blue ’ s fast - growing cloud business before his elevation as chief executive officer in late - january . as chief technology officer of bank of america , bo ##ville was responsible for building and running [SEP]


INFO:tensorflow:input_ids: 101 5371 6302 1024 1037 2158 4832 2379 2019 9980 8154 2012 1996 4684 2088 3519 1999 7623 1010 3577 1010 2337 2423 1010 10476 1012 26665 1013 13983 10730 1006 26665 1007 1011 2248 2449 6681 13058 1006 9980 1012 1050 1007 2006 6928 2315 2280 2924 1997 2637 13058 1006 8670 2278 1012 1050 1007 2327 2974 3237 4922 8945 3077 2132 1997 2049 6112 2449 1010 2004 12098 6371 2094 10871 3138 2058 2013 2146 1011 2051 2708 18353 3490 4199 15353 1012 10871 3753 1996 2502 2630 1521 1055 3435 1011 3652 6112 2449 2077 2010 6678 2004 2708 3237 2961 1999 2397 1011 2254 1012 2004 2708 2974 2961 1997 2924 1997 2637 1010 8945 3077 2001 3625 2005 2311 1998 2770 102


INFO:tensorflow:input_ids: 101 5371 6302 1024 1037 2158 4832 2379 2019 9980 8154 2012 1996 4684 2088 3519 1999 7623 1010 3577 1010 2337 2423 1010 10476 1012 26665 1013 13983 10730 1006 26665 1007 1011 2248 2449 6681 13058 1006 9980 1012 1050 1007 2006 6928 2315 2280 2924 1997 2637 13058 1006 8670 2278 1012 1050 1007 2327 2974 3237 4922 8945 3077 2132 1997 2049 6112 2449 1010 2004 12098 6371 2094 10871 3138 2058 2013 2146 1011 2051 2708 18353 3490 4199 15353 1012 10871 3753 1996 2502 2630 1521 1055 3435 1011 3652 6112 2449 2077 2010 6678 2004 2708 3237 2961 1999 2397 1011 2254 1012 2004 2708 2974 2961 1997 2924 1997 2637 1010 8945 3077 2001 3625 2005 2311 1998 2770 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] “ it ’ s something that i ’ ve been doing for years and years , ” youtube ##r karen ka ##vet ##t says in her video of 10 expert - level tips for doing a ji ##gs ##aw puzzle . “ look at any poster or image or view through a window and think to yourself , if this was a ji ##gs ##aw puzzle , how would i go about putting it together ? ” the rest of the video runs through other practical tips — like si ##fting new pieces through a cola ##nder to remove the puzzle dust , or storing edge pieces in a separate zip - lock bag . ka ##vet ##t is part of the growing presence of ji [SEP]


INFO:tensorflow:tokens: [CLS] “ it ’ s something that i ’ ve been doing for years and years , ” youtube ##r karen ka ##vet ##t says in her video of 10 expert - level tips for doing a ji ##gs ##aw puzzle . “ look at any poster or image or view through a window and think to yourself , if this was a ji ##gs ##aw puzzle , how would i go about putting it together ? ” the rest of the video runs through other practical tips — like si ##fting new pieces through a cola ##nder to remove the puzzle dust , or storing edge pieces in a separate zip - lock bag . ka ##vet ##t is part of the growing presence of ji [SEP]


INFO:tensorflow:input_ids: 101 1523 2009 1521 1055 2242 2008 1045 1521 2310 2042 2725 2005 2086 1998 2086 1010 1524 7858 2099 8129 10556 19510 2102 2758 1999 2014 2678 1997 2184 6739 1011 2504 10247 2005 2725 1037 10147 5620 10376 11989 1012 1523 2298 2012 2151 13082 2030 3746 2030 3193 2083 1037 3332 1998 2228 2000 4426 1010 2065 2023 2001 1037 10147 5620 10376 11989 1010 2129 2052 1045 2175 2055 5128 2009 2362 1029 1524 1996 2717 1997 1996 2678 3216 2083 2060 6742 10247 1517 2066 9033 26169 2047 4109 2083 1037 15270 11563 2000 6366 1996 11989 6497 1010 2030 23977 3341 4109 1999 1037 3584 14101 1011 5843 4524 1012 10556 19510 2102 2003 2112 1997 1996 3652 3739 1997 10147 102


INFO:tensorflow:input_ids: 101 1523 2009 1521 1055 2242 2008 1045 1521 2310 2042 2725 2005 2086 1998 2086 1010 1524 7858 2099 8129 10556 19510 2102 2758 1999 2014 2678 1997 2184 6739 1011 2504 10247 2005 2725 1037 10147 5620 10376 11989 1012 1523 2298 2012 2151 13082 2030 3746 2030 3193 2083 1037 3332 1998 2228 2000 4426 1010 2065 2023 2001 1037 10147 5620 10376 11989 1010 2129 2052 1045 2175 2055 5128 2009 2362 1029 1524 1996 2717 1997 1996 2678 3216 2083 2060 6742 10247 1517 2066 9033 26169 2047 4109 2083 1037 15270 11563 2000 6366 1996 11989 6497 1010 2030 23977 3341 4109 1999 1037 3584 14101 1011 5843 4524 1012 10556 19510 2102 2003 2112 1997 1996 3652 3739 1997 10147 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


#Creating a model

Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning).

In [0]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction.

In [0]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [0]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [0]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

print("num_train_steps: "+str(num_train_steps))
print("num_warmup_steps: " +str(num_warmup_steps))

In [0]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [0]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with Tensorflow [Estimators](https://www.tensorflow.org/guide/estimators).

In [0]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.

In [0]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

In [0]:
dev_input_fn = run_classifier.input_fn_builder(
    features=dev_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [0]:
estimator.evaluate(input_fn=dev_input_fn, steps=None)

Now let's use our test data to see how well our model did:

In [0]:
#test_input_fn = run_classifier.input_fn_builder(
#    features=test_features,
#    seq_length=MAX_SEQ_LENGTH,
#    is_training=False,
#    drop_remainder=False)

In [0]:
#estimator.evaluate(input_fn=test_input_fn, steps=None)

Now let's write code to make predictions on new sentences:

In [0]:
def getPrediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]

In [0]:
# first line is from cnsnews.com
# rest are from huffington post

pred_sentences = [
  "“Coronavirus now, politics later – please,” Media Research Center President Brent Bozell writes in a Washington Times commentary urging the liberal media to turn their efforts from their obsession with attacking President Donald Trump to serving the greater good during a time of national crisis.",
  "President Donald Trump is rejecting calls to put a single military commander in charge of medical supplies for the COVID-19 pandemic.",
  "Boris Johnson is breathing without a ventilator and is in “good spirits” while being treated in intensive care for coronavirus symptoms, Downing Street has said.",
  "President Donald Trump reportedly owns a stake in a company that produces hydroxychloroquine, the anti-malaria drug he has repeatedly touted as a coronavirus treatment even though his experts say there’s no strong evidence it works. "
  ]

In [0]:
predictions = getPrediction(pred_sentences)

In [0]:
predictions