### **Set TPU environment**

In [None]:
import datetime
import json
import os
import pprint
import random
import string
import sys
import tensorflow as tf

assert 'COLAB_TPU_ADDR' in os.environ, 'ERROR: Not connected to a TPU runtime; please see the first cell in this notebook for instructions!'
TPU_ADDRESS = 'grpc://' + os.environ['COLAB_TPU_ADDR']
print('TPU address is', TPU_ADDRESS)

from google.colab import auth
auth.authenticate_user()
with tf.Session(TPU_ADDRESS) as session:
  print('TPU devices:')
  pprint.pprint(session.list_devices())

  # Upload credentials to TPU.
  with open('/content/adc.json', 'r') as f:
    auth_info = json.load(f)
  tf.contrib.cloud.configure_gcs(session, credentials=auth_info)
  # Now credentials are set for all future sessions on this TPU.

### **Prepare and import BERT modules**

In [None]:
import sys

!test -d bert_repo || git clone https://github.com/google-research/bert bert_repo
if not 'bert_repo' in sys.path:
  sys.path += ['bert_repo']

# import python modules defined by BERT
import modeling
import optimization
import run_classifier
import run_classifier_with_tfhub
import tokenization

# import tfhub 
import tensorflow_hub as hub

**Add own processor**

In [None]:
class InputExample(object):
  """A single training/test example for simple sequence classification."""

  def __init__(self, guid, text_a, text_b=None, label=None, page_identifier=None):
    """Constructs a InputExample.
    Args:
      guid: Unique id for the example.
      text_a: string. The untokenized text of the first sequence. For single
        sequence tasks, only this sequence must be specified.
      text_b: (Optional) string. The untokenized text of the second sequence.
        Only must be specified for sequence pair tasks.
      label: (Optional) string. The label of the example. This should be
        specified for train and dev examples, but not for test examples.
      page_identifier: (Optional) string. The page identifier of the sentence.
    """
    self.guid = guid
    self.text_a = text_a
    self.text_b = text_b
    self.label = label
    self.page_identifier = page_identifier
    

class myProcessor():
  def read_json(self, path, file_name):
    """Reads json file."""
    f_input = open(os.path.join(path, file_name), 'r')
    content = json.load(f_input)
    return content
  
  def get_train_examples(self, data_dir):
    """See base class."""
    return self._create_examples(self.read_json(data_dir, "train_origin_sentence6.json"))

  def get_dev_examples(self, data_dir):
    """See base class."""
    return self._create_examples(self.read_json(data_dir, "dev_origin_sentence.json"))

  def get_test_examples(self, data_dir):
    """See base class."""
    return self._create_test_examples(self.read_json(data_dir, "test.json"))

  def get_labels(self):
    """See base class."""
    return ["NOT ENOUGH INFO", "SUPPORTS", "REFUTES"]

  def _create_examples(self, data_set):
    """Creates examples for the training and dev sets."""
    examples = []
    num = 0
    for key in data_set:
      text_a = tokenization.convert_to_unicode(data_set[key]['claim'])
      label = tokenization.convert_to_unicode(data_set[key]['label'])
      evidence_list = data_set[key]['evidence']
      
      for evidence in evidence_list:
        num += 1
        guid = key + '_' + str(evidence[1])
        text_b = tokenization.convert_to_unicode(evidence[2])
        examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))

    print("Total examples: %d" % num)
    print("Data set size: %d" % len(data_set))
    return examples
  
  def _create_test_examples(self, data_set):
    """Creates examples for the test sets."""
    examples = []
    num = 0
    for key in data_set:
      text_a = tokenization.convert_to_unicode(data_set[key]['claim'])
      label = tokenization.convert_to_unicode(data_set[key]['label'])
      evidence_list = data_set[key]['evidence']
      
      for evidence in evidence_list:
        num += 1
        page_identifier = evidence[0]
        guid = key + '_' + str(evidence[1])
        text_b = tokenization.convert_to_unicode(evidence[2])
        examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label, page_identifier=page_identifier))
 
    print("Total examples: %d" % num)
    print("Data set size: %d" % len(data_set))
    return examples

## **Prepare for training**

In [None]:
TASK = 'web_search' #@param {type:"string"}

from google.colab import drive
drive.mount('/gdrive')

TASK_DATA_DIR = '/gdrive/My Drive/web_search'

BUCKET = 'lcdmx' #@param {type:"string"}
assert BUCKET, 'Must specify an existing GCS bucket name'
OUTPUT_DIR = 'gs://{}/bert-tfhub/models/{}'.format(BUCKET, TASK)
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))

# Available pretrained model checkpoints:
#   uncased_L-12_H-768_A-12: uncased BERT base model
#   uncased_L-24_H-1024_A-16: uncased BERT large model
#   cased_L-12_H-768_A-12: cased BERT large model
BERT_MODEL = 'uncased_L-12_H-768_A-12' #@param {type:"string"}
BERT_MODEL_HUB = 'https://tfhub.dev/google/bert_' + BERT_MODEL + '/1'

### **load tokenizer module from TF Hub**

In [None]:
tokenizer = run_classifier_with_tfhub.create_tokenizer_from_hub_module(BERT_MODEL_HUB)

### **Prepare the training data and initialize TPU config**

In [None]:
TRAIN_BATCH_SIZE = 32
EVAL_BATCH_SIZE = 8
PREDICT_BATCH_SIZE = 8
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
MAX_SEQ_LENGTH = 128
# Warmup is a period of time where the learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 1000
SAVE_SUMMARY_STEPS = 500

processor = myProcessor()
label_list = processor.get_labels()

# Compute number of train and warmup steps from batch size
train_examples = processor.get_train_examples(TASK_DATA_DIR)
num_train_steps = int(len(train_examples) / TRAIN_BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

# Setup TPU related config
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)
NUM_TPU_CORES = 8
ITERATIONS_PER_LOOP = 1000

def get_run_config(output_dir):
  return tf.contrib.tpu.RunConfig(
    cluster=tpu_cluster_resolver,
    model_dir=output_dir,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS,
    tpu_config=tf.contrib.tpu.TPUConfig(
        iterations_per_loop=ITERATIONS_PER_LOOP,
        num_shards=NUM_TPU_CORES,
        per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2))


## **Fine-tune and Run Predictions on a pretrained BERT Model from TF Hub**

In [None]:
# Force TF Hub writes to the GS bucket we provide.
os.environ['TFHUB_CACHE_DIR'] = OUTPUT_DIR

model_fn = run_classifier_with_tfhub.model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps,
  use_tpu=True,
  bert_hub_module_handle=BERT_MODEL_HUB
)

estimator_from_tfhub = tf.contrib.tpu.TPUEstimator(
  use_tpu=True,
  model_fn=model_fn,
  config=get_run_config(OUTPUT_DIR),
  train_batch_size=TRAIN_BATCH_SIZE,
  eval_batch_size=EVAL_BATCH_SIZE,
  predict_batch_size=PREDICT_BATCH_SIZE,
)


In [None]:
# Train the model
def model_train(estimator):
  print('Please wait...')
  # We'll set sequences to be at most 128 tokens long.
  train_features = run_classifier.convert_examples_to_features(
      train_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  print('***** Started training at {} *****'.format(datetime.datetime.now()))
  print('  Num examples = {}'.format(len(train_examples)))
  print('  Batch size = {}'.format(TRAIN_BATCH_SIZE))
  tf.logging.info("  Num steps = %d", num_train_steps)
  train_input_fn = run_classifier.input_fn_builder(
      features=train_features,
      seq_length=MAX_SEQ_LENGTH,
      is_training=True,
      drop_remainder=True)
  estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
  print('***** Finished training at {} *****'.format(datetime.datetime.now()))



In [None]:
model_train(estimator_from_tfhub)

In [None]:
def model_eval(estimator):
  # Eval the model.
  eval_examples = processor.get_dev_examples(TASK_DATA_DIR)
  eval_features = run_classifier.convert_examples_to_features(
      eval_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  print('***** Started evaluation at {} *****'.format(datetime.datetime.now()))
  print('  Num examples = {}'.format(len(eval_examples)))
  print('  Batch size = {}'.format(EVAL_BATCH_SIZE))

  # Eval will be slightly WRONG on the TPU because it will truncate
  # the last batch.
  eval_steps = int(len(eval_examples) / EVAL_BATCH_SIZE)
  eval_input_fn = run_classifier.input_fn_builder(
      features=eval_features,
      seq_length=MAX_SEQ_LENGTH,
      is_training=False,
      drop_remainder=True)
  result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
  print('***** Finished evaluation at {} *****'.format(datetime.datetime.now()))
  output_eval_file = os.path.join(OUTPUT_DIR, "eval_results.txt")
  with tf.gfile.GFile(output_eval_file, "w") as writer:
    print("***** Eval results *****")
    for key in sorted(result.keys()):
      print('  {} = {}'.format(key, str(result[key])))
      writer.write("%s = %s\n" % (key, str(result[key])))


In [None]:
model_eval(estimator_from_tfhub)

In [None]:
def choose_label(evidence_list):
  pre_label = [0, 0, 0]
  for evidence in evidence_list:
    if evidence[2] == 'NOT ENOUGH INFO':
      pre_label[0] += 1
    elif evidence[2] == 'SUPPORTS':
      pre_label[1] += 1
    elif evidence[2] == 'REFUTES':
      pre_label[2] += 1
    
  majority = pre_label.index(max(pre_label))
  if majority == 0:
    if pre_label[0] == pre_label[1]:
      majority = 1
    elif pre_label[0] == pre_label[2]:
      majority = 2
    
    if pre_label[1] != 0:
      majority = 1
    if pre_label[2] != 0:
      majority = 2
    
  if majority == 0:
    label = 'NOT ENOUGH INFO'
  elif majority == 1:
    label = 'SUPPORTS'
  elif majority == 2:
    label = 'REFUTES'
  
  new_evidence = []
  if label == 'NOT ENOUGH INFO':
    return label, []
  elif label == 'SUPPORTS':
    for evidence in evidence_list:
      if evidence[2] == 'SUPPORTS':
        new_evidence.append([evidence[0], int(evidence[1])])
    return label, new_evidence
  elif label == 'REFUTES':
    for evidence in evidence_list:
      if evidence[2] == 'REFUTES':
        new_evidence.append([evidence[0], int(evidence[1])])
    return label, new_evidence
  
def model_predict(estimator):
  # Make predictions on a subset of eval examples
  prediction_examples = processor.get_test_examples(TASK_DATA_DIR)
  input_features = run_classifier.convert_examples_to_features(
      prediction_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=True)
  predictions = estimator.predict(predict_input_fn)
  
  pre_list = {}
  prev_key = ""
  num = 0
  evidence_list = []
  for example, prediction in zip(prediction_examples, predictions):
    key = example.guid.split('_')[0]
    
    if num == 0:
      prev_key = key
      
    claim = example.text_a
    pre_label = prediction['probabilities'].tolist()
    pos = pre_label.index(max(pre_label))
    
    if pos == 0:
      label = "NOT ENOUGH INFO"
    elif pos == 1:
      label = "SUPPORTS"
    elif pos == 2:
      label = "REFUTES"
        
    page_identifier = example.page_identifier
    sentence_number = example.guid.split('_')[1]
    
    if key not in pre_list:
      pre_list[key] = {}
      
    pre_list[key]['claim'] = claim
    pre_list[key]['label'] = ''
    
    if prev_key != key:
      pre_list[prev_key]['evidence'] = evidence_list
      evidence_list = []
      
    evidence_list.append([page_identifier, sentence_number, label])
    
    prev_key = key
    num += 1
    
    print('key: %s \nclaim: %s \nsentence: %s prob: %s \nlabel: %s \n ' % 
          (key, claim, example.text_b, prediction['probabilities'],label))
  
  pre_list[prev_key]['evidence'] = evidence_list
  
  print(len(pre_list))

  final_pre_list = {}
  num1 = 0
  num2 = 0
  num3 = 0
  for key in pre_list:
    final_pre_list[key] = {}
    final_pre_list[key]['claim'] = pre_list[key]['claim']
    final_pre_list[key]['label'], final_pre_list[key]['evidence'] = choose_label(pre_list[key]['evidence'])
    if final_pre_list[key]['label'] == 'NOT ENOUGH INFO':
      num1 +=1
    elif final_pre_list[key]['label'] == 'SUPPORTS':
      num2 += 1
    elif final_pre_list[key]['label'] == 'REFUTES':
      num3 +=1
  
  print(len(final_pre_list))
  print('NOT ENOUGH INFO: %s' % num1)
  print('SUPPORTS: %s' % num2)
  print('REFUTES: %s' % num3)

  f_output = open(os.path.join(TASK_DATA_DIR, "testoutput.json"), 'w')
  json.dump(final_pre_list, f_output)
  f_output.close()


In [None]:
model_predict(estimator_from_tfhub) 