In [0]:
import datetime
import json
import os
import pprint
import random
import string
import sys
import pprint
import tensorflow as tf

if 'COLAB_TPU_ADDR' not in os.environ:
  print('ERROR: Not connected to a TPU runtime')
else:
  TPU_ADDRESS = 'grpc://' + os.environ['COLAB_TPU_ADDR']
  print ('TPU address is', TPU_ADDRESS)

from google.colab import auth
auth.authenticate_user()
with tf.Session(TPU_ADDRESS) as session:
  print('TPU devices:')
  pprint.pprint(session.list_devices())

  # Upload credentials to TPU.
  with open('/content/adc.json', 'r') as f:
    auth_info = json.load(f)
  tf.contrib.cloud.configure_gcs(session, credentials=auth_info)

In [0]:
!pip install bert-tensorflow

## Download fine-tune BERT file and get authority for Google drive:

In [0]:
!test -d bert_model_repo || git clone https://github.com/google-research/bert bert_model_repo
if not 'bert_model_repo' in sys.path:
  sys.path += ['bert_model_repo']

# import python modules defined by BERT
import modeling
import optimization
import run_classifier
import run_classifier_with_tfhub
import tokenization
from run_classifier import InputExample

# import tfhub 
import tensorflow_hub as hub

#Get authority for google drive
from google.colab import drive
drive.mount('/content/gdrive')

## Load our own processors:

In [0]:
# Trained for sentence selection
class WstasentenceProcessor(run_classifier.DataProcessor):
  """Processor for the WSTA data set to select sentences."""
  def get_train_examples(self, data_dir):
    """See base class."""
    return self._create_examples(
        self._read_tsv(os.path.join(data_dir, "trainset_for_model.tsv")), "train")

  def get_dev_examples(self, data_dir):
    """See base class."""
    return self._create_examples(
        self._read_tsv(os.path.join(data_dir, "devset_for_model.tsv")), "dev")

  def get_test_examples(self, data_dir, filename):
    """See base class."""
    return self._create_examples(
        self._read_tsv(os.path.join(data_dir, filename)), "test")

  def get_labels(self):
    """See base class."""
    return ['1', '0']

  def _create_examples(self, lines, set_type):
    examples = []
    for (i, line) in enumerate(lines):
      if i == 0:
        continue
      guid = "%s-%s" % (set_type, i)
      text_a = tokenization.convert_to_unicode(line[0])
      text_b = tokenization.convert_to_unicode(line[1])
      if set_type == "test":
        label = "0"
      else:
        label = tokenization.convert_to_unicode(line[2])
      examples.append(
          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
    return examples
      

#Trained for classification
class WstalabelProcessor(run_classifier.DataProcessor):
    """Processor for the WSTA data set to classify label."""

    def get_train_examples(self, data_dir):
      return self._create_examples(
        self._read_tsv(os.path.join(data_dir, "train_classification.tsv")), "train")

    def get_dev_examples(self, data_dir):
      return self._create_examples(
        self._read_tsv(os.path.join(data_dir, "dev_classification.tsv")), "dev")

    def get_test_examples(self, data_dir, filename):
      return self._create_examples(
        self._read_tsv(os.path.join(data_dir, filename)), "test")

    def get_labels(self):
      return ["SUPPORTS","REFUTES","NOT ENOUGH INFO"]
    
    def _create_examples(self, lines, set_type):
      examples = []
      for (i, line) in enumerate(lines):
        if i == 0:
          continue
        guid = "%s-%s" % (set_type, i)
        text_a = tokenization.convert_to_unicode(line[0])
        text_b = tokenization.convert_to_unicode(line[1])
        if set_type == "test":
          label = "NOT ENOUGH INFO"
        else:
          label = tokenization.convert_to_unicode(line[2])
        examples.append(
            InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
      return examples

## Load model and config:

In [0]:
TRAIN_BATCH_SIZE = 24
EVAL_BATCH_SIZE = 8
PREDICT_BATCH_SIZE = 8
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 2.0
MAX_SEQ_LENGTH = 128
# Used to help training
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 1000
SAVE_SUMMARY_STEPS = 500

processors = {
  "wstasentence": WstasentenceProcessor,
  "wstalabel": WstalabelProcessor,
}

## 1. Sentence selection:

#### Configuration:

In [0]:
TASK = 'wstasentence'
assert TASK in ('wstasentence', 'wstalabel')

BUCKET = 'colab-storage'
TASK_DATA_DIR = '/content/gdrive/My Drive'
print('Task data directory: {}'.format(TASK_DATA_DIR))
#!ls $TASK_DATA_DIR

OUTPUT_DIR = 'gs://{}/bert-models/{}'.format(BUCKET, TASK)
tf.gfile.MakeDirs(OUTPUT_DIR) #model output dir
# Force TF Hub writes to the GS bucket we provide.
os.environ['TFHUB_CACHE_DIR'] = OUTPUT_DIR

BERT_MODEL = 'uncased_L-12_H-768_A-12'
BERT_MODEL_HUB = 'https://tfhub.dev/google/bert_' + BERT_MODEL + '/1'

tokenizer = run_classifier_with_tfhub.create_tokenizer_from_hub_module(BERT_MODEL_HUB)

processor = processors[TASK.lower()]()
label_list = processor.get_labels()

# Compute number of train and warmup steps from batch size
train_examples = processor.get_train_examples(TASK_DATA_DIR)

num_train_steps = int(len(train_examples) / TRAIN_BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

# TPU config
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)
NUM_TPU_CORES = 8
ITERATIONS_PER_LOOP = 1000

In [0]:
#Estimator config
def get_run_config(output_dir):
  return tf.contrib.tpu.RunConfig(
    cluster=tpu_cluster_resolver,
    model_dir=output_dir,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS,
    tpu_config=tf.contrib.tpu.TPUConfig(
        iterations_per_loop=ITERATIONS_PER_LOOP,
        num_shards=NUM_TPU_CORES,
        per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2))

model_fn = run_classifier_with_tfhub.model_fn_builder(
  use_tpu=True,
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps,
  bert_hub_module_handle=BERT_MODEL_HUB
)

estimator_from_tfhub = tf.contrib.tpu.TPUEstimator(
  use_tpu=True,
  model_fn=model_fn,
  config=get_run_config(OUTPUT_DIR),
  train_batch_size=TRAIN_BATCH_SIZE,
  eval_batch_size=EVAL_BATCH_SIZE,
  predict_batch_size=PREDICT_BATCH_SIZE,
)

#### Train, evaluate and predict:

In [0]:
# Train the model
def model_train(estimator):
  train_features = run_classifier.convert_examples_to_features(
      train_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  print('Started training at {}'.format(datetime.datetime.now()))
  print('Num examples = {}'.format(len(train_examples)))
  tf.logging.info("Num steps = %d", num_train_steps)
  train_input_fn = run_classifier.input_fn_builder(
      features=train_features,
      seq_length=MAX_SEQ_LENGTH,
      is_training=True,
      drop_remainder=True)
  estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
  print('Finished training at {}'.format(datetime.datetime.now()))
  
  
# Eval the model.
def model_eval(estimator):
  eval_examples = processor.get_dev_examples(TASK_DATA_DIR)
  eval_features = run_classifier.convert_examples_to_features(
      eval_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  print('Started evaluation at {}'.format(datetime.datetime.now()))
  print('Num examples = {}'.format(len(eval_examples)))
  eval_steps = int(len(eval_examples) / EVAL_BATCH_SIZE)
  eval_input_fn = run_classifier.input_fn_builder(
      features=eval_features,
      seq_length=MAX_SEQ_LENGTH,
      is_training=False,
      drop_remainder=True)
  result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
  print('Finished evaluation at {}'.format(datetime.datetime.now()))
  output_eval_file = os.path.join(OUTPUT_DIR, "eval_results.txt")
  with tf.gfile.GFile(output_eval_file, "w") as writer:
    print("***** Eval results *****")
    for key in sorted(result.keys()):
      print('  {} = {}'.format(key, str(result[key])))
      writer.write("%s = %s\n" % (key, str(result[key])))
      
#Predict
def model_predict(estimator):
  #Due to delayed submission of codalab, this part of code alternatively uses dev set as test set for alternative evaluation:
  #prediction_examples = processor.get_test_examples(TASK_DATA_DIR,"devset_usedfortest_for_model.tsv")
  
  #use original test file
  prediction_examples = processor.get_test_examples(TASK_DATA_DIR,"testset_for_model.tsv")
  input_features = run_classifier.convert_examples_to_features(prediction_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=True)
  predictions = estimator.predict(predict_input_fn)
  #return predictions
  for example, prediction in zip(prediction_examples, predictions):
    #Due to delayed submission of codalab, this part of code alternatively uses dev set as test set for alternative evaluation:
    #output_test_file = os.path.join(OUTPUT_DIR, "selection_devastest_results.txt")
    
    #use original test file
    output_test_file = os.path.join(OUTPUT_DIR, "selection_test_results.txt")
    with tf.gfile.GFile(output_test_file, "w") as writer:
      for prediction in predictions:
        writer.write("{}\n".format(prediction['probabilities']))

In [0]:
model_train(estimator_from_tfhub)

In [0]:
model_eval(estimator_from_tfhub)

In [0]:
model_predict(estimator_from_tfhub)


## 2. Classification:

#### Configuration:

In [0]:
TASK = 'wstalabel'
assert TASK in ('wstasentence', 'wstalabel')

BUCKET = 'colab-storage'
TASK_DATA_DIR = '/content/gdrive/My Drive'
print('Task data directory: {}'.format(TASK_DATA_DIR))
#!ls $TASK_DATA_DIR

OUTPUT_DIR = 'gs://{}/bert-models/{}'.format(BUCKET, TASK)
tf.gfile.MakeDirs(OUTPUT_DIR) #model output dir
# Force TF Hub writes to the GS bucket we provide.
os.environ['TFHUB_CACHE_DIR'] = OUTPUT_DIR

BERT_MODEL = 'uncased_L-12_H-768_A-12'
BERT_MODEL_HUB = 'https://tfhub.dev/google/bert_' + BERT_MODEL + '/1'

tokenizer = run_classifier_with_tfhub.create_tokenizer_from_hub_module(BERT_MODEL_HUB)

processor = processors[TASK.lower()]()
label_list = processor.get_labels()

# Compute number of train and warmup steps from batch size
train_examples = processor.get_train_examples(TASK_DATA_DIR)

num_train_steps = int(len(train_examples) / TRAIN_BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

# TPU config
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)
NUM_TPU_CORES = 8
ITERATIONS_PER_LOOP = 1000

In [0]:
#Estimator config
def get_run_config(output_dir):
  return tf.contrib.tpu.RunConfig(
    cluster=tpu_cluster_resolver,
    model_dir=output_dir,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS,
    tpu_config=tf.contrib.tpu.TPUConfig(
        iterations_per_loop=ITERATIONS_PER_LOOP,
        num_shards=NUM_TPU_CORES,
        per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2))

model_fn = run_classifier_with_tfhub.model_fn_builder(
  use_tpu=True,
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps,
  bert_hub_module_handle=BERT_MODEL_HUB
)

estimator_from_tfhub = tf.contrib.tpu.TPUEstimator(
  use_tpu=True,
  model_fn=model_fn,
  config=get_run_config(OUTPUT_DIR),
  train_batch_size=TRAIN_BATCH_SIZE,
  eval_batch_size=EVAL_BATCH_SIZE,
  predict_batch_size=PREDICT_BATCH_SIZE,
)

#### Train, evaluate and predict:

In [0]:
# Train the model
def model_train(estimator):
  train_features = run_classifier.convert_examples_to_features(
      train_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  print('Started training at {}'.format(datetime.datetime.now()))
  print('Num examples = {}'.format(len(train_examples)))
  tf.logging.info("Num steps = %d", num_train_steps)
  train_input_fn = run_classifier.input_fn_builder(
      features=train_features,
      seq_length=MAX_SEQ_LENGTH,
      is_training=True,
      drop_remainder=True)
  estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
  print('Finished training at {}'.format(datetime.datetime.now()))
  
  
# Eval the model.
def model_eval(estimator):
  eval_examples = processor.get_dev_examples(TASK_DATA_DIR)
  eval_features = run_classifier.convert_examples_to_features(
      eval_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  print('Started evaluation at {}'.format(datetime.datetime.now()))
  print('Num examples = {}'.format(len(eval_examples)))
  # Eval will be slightly WRONG on the TPU because it will truncate the last batch.
  eval_steps = int(len(eval_examples) / EVAL_BATCH_SIZE)
  eval_input_fn = run_classifier.input_fn_builder(
      features=eval_features,
      seq_length=MAX_SEQ_LENGTH,
      is_training=False,
      drop_remainder=True)
  result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
  print('Finished evaluation at {}'.format(datetime.datetime.now()))
  output_eval_file = os.path.join(OUTPUT_DIR, "eval_results.txt")
  with tf.gfile.GFile(output_eval_file, "w") as writer:
    print("***** Eval results *****")
    for key in sorted(result.keys()):
      print('  {} = {}'.format(key, str(result[key])))
      writer.write("%s = %s\n" % (key, str(result[key])))
      
#Predict
def model_predict(estimator):
  #Due to delayed submission of codalab, this part of code alternatively uses dev set as test set for alternative evaluation:
  #prediction_examples = processor.get_test_examples(TASK_DATA_DIR,"devastest_classification.tsv")
  
  #use original test file
  prediction_examples = processor.get_test_examples(TASK_DATA_DIR,"test_classification.tsv")
  input_features = run_classifier.convert_examples_to_features(prediction_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=True)
  predictions = estimator.predict(predict_input_fn)
  #return predictions
  for example, prediction in zip(prediction_examples, predictions):
    #Due to delayed submission of codalab, this part of code alternatively uses dev set as test set for alternative evaluation:
    #output_test_file = os.path.join(OUTPUT_DIR, "classification_devastest_results.txt")
    
    #use original test file
    output_test_file = os.path.join(OUTPUT_DIR, "classification_test_results.txt")
    with tf.gfile.GFile(output_test_file, "w") as writer:
      for prediction in predictions:
        writer.write("{}\n".format(prediction['probabilities']))

In [0]:
model_train(estimator_from_tfhub)

In [0]:
model_eval(estimator_from_tfhub)

In [0]:
model_predict(estimator_from_tfhub)