<a href="https://colab.research.google.com/github/justram/colab_ml_study/blob/master/ARC_T5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Colab setting command for tensorflow environment
%tensorflow_version 1.x

import tensorflow as tf
import tensorflow_datasets as tfds
import json
import os
import pprint

BASE_DIR = "gs://home_justram/arc" #@param { type: "string" }
if not BASE_DIR or BASE_DIR == "gs://":
    raise ValueError("You must enter a BASE_DIR.")
DATA_DIR = os.path.join(BASE_DIR, "data_challenge_bm25_rerank_aug_alpha_0.1_num_5")
MODELS_DIR = os.path.join(BASE_DIR, "models_challenge_bm25_rerank_aug_alpha_0.1_num_5")
ON_CLOUD = True

if ON_CLOUD:
    assert "COLAB_TPU_ADDR" in os.environ, "ERROR: Not connected to a TPU runtime; please see the first cell in this notebook for instructions!"
    TPU_ADDRESS = "grpc://" + os.environ["COLAB_TPU_ADDR"] 
    TPU_TOPOLOGY = "2x2"
    print("TPU address is", TPU_ADDRESS)
    
    from google.colab import auth
    auth.authenticate_user()
    with tf.Session(TPU_ADDRESS) as session:
        print('TPU devices:')
        pprint.pprint(session.list_devices())
        # Upload credentials to TPU.
        with open('/content/adc.json', 'r') as f:
            auth_info = json.load(f)
        tf.contrib.cloud.configure_gcs(session, credentials=auth_info)
    # Now credentials are set for all future sessions on this TPU.

TensorFlow 1.x selected.
TPU address is grpc://10.36.81.138:8470
TPU devices:
[_DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:CPU:0, CPU, -1, 6898080986511818927),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 2843220243435638414),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 4028120955066392016),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 2389264604928385794),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 11854357591315787191),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:3, TPU, 17179869184, 16681299530299940087),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:4, TPU, 17179869184, 9075188718538189832),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:5, TPU, 17179869184, 341962498644886785),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:6, TPU, 171

In [0]:
#@title Install and import required packages
if ON_CLOUD:
  !pip install -q t5==0.2.0

In [0]:
!pip install gdown



In [0]:
import gdown
# hacked transformer.py, utils.py of mesh_tensorflow
id_list = ["10yVgQd1yQK3KaquOhTw4QWZ_1phRsngs", "1zEtRXop8qrY6X7ih-EnDdZogophWHC8a"]
name_list = ['transformer.py', 'utils.py']
for gdrive_id, output_name in zip(id_list, name_list):
    gdown.download('https://drive.google.com/uc?id=' + gdrive_id, output_name, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=10yVgQd1yQK3KaquOhTw4QWZ_1phRsngs
To: /content/transformer.py
100%|██████████| 62.8k/62.8k [00:00<00:00, 14.6MB/s]
Downloading...
From: https://drive.google.com/uc?id=1zEtRXop8qrY6X7ih-EnDdZogophWHC8a
To: /content/utils.py
100%|██████████| 73.4k/73.4k [00:00<00:00, 24.8MB/s]


In [0]:

!cp transformer.py /tensorflow-1.15.2/python3.6/mesh_tensorflow/transformer/transformer.py
!cp utils.py /tensorflow-1.15.2/python3.6/mesh_tensorflow/transformer/utils.py

In [0]:
!cat /tensorflow-1.15.2/python3.6/mesh_tensorflow/transformer/transformer.py | grep scores

    beams, unused_scores = mtf.beam_search.beam_search(
    scores = mtf.gather(
        unused_scores, mtf.constant(unused_scores.mesh, 0, dtype=tf.int32), beam_dim) 
    return beams, scores


In [0]:
!cat /tensorflow-1.15.2/python3.6/mesh_tensorflow/transformer/utils.py | grep scores

        mtf_samples, scores = transformer_model.decode(
      scores = mtf.anonymize(scores)
      scores = lowering.export_to_tf_tensor(scores)
          "scores": scores}
    #score_string = result["scores"][1176].tolist()
    scores = result["scores"][[6136, 1176]].tolist()
    # scores = result["scores"][[27252, 3]].tolist()
    # scores = result["scores"][[150, 4273]].tolist()
    scores = [float(score) for score in scores]
    probs = torch.nn.functional.log_softmax(torch.from_numpy(np.array(scores)))


In [0]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

import t5
import tensorflow as tf
import tensorflow_datasets as tfds
import time

# Improve logging.
from contextlib import contextmanager
import logging as py_logging

if ON_CLOUD:
  tf.get_logger().propagate = False
  py_logging.root.setLevel('INFO')

@contextmanager
def tf_verbosity_level(level):
  og_level = tf.logging.get_verbosity()
  tf.logging.set_verbosity(level)
  yield
  tf.logging.set_verbosity(og_level)


In [0]:
counts_path = os.path.join(DATA_DIR, "counts.json")
tsv_path = {
    "train": os.path.join(DATA_DIR, "train.tsv"),
    "validation": os.path.join(DATA_DIR, "dev.tsv"),
    "test": os.path.join(DATA_DIR, "test.tsv"),
}

if tf.io.gfile.exists(counts_path):
    # Used cached data and counts.
    tf.logging.info("Loading num_example from cache.")
    num_examples = json.load(tf.io.gfile.GFile(counts_path))
else:
    # Create TSVs and get counts.
    tf.logging.info("Counting num_example in TSVs.")
    num_examples = {}
    for split, fname in tsv_path.items():
        num_examples[split] = sum(1 for line in tf.io.gfile.GFile(fname))
    json.dump(num_examples, tf.io.gfile.GFile(counts_path, "w"))

INFO:tensorflow:Counting num_example in TSVs.


In [0]:
import functools

def nq_dataset_fn(split, shuffle_files=False):
    # Load lines from the text file as examples.
    ds = tf.data.TextLineDataset(tsv_path[split])
    # Split each "<question>\t<answer>" example into (question, answer) tuple.
    ds = ds.map(
        functools.partial(tf.io.decode_csv, record_defaults=["", ""],
                          field_delim="\t", use_quote_delim=False),
                num_parallel_calls=tf.data.experimental.AUTOTUNE)
    # Map each tuple to a {"question": ... "answer": ...} dict.
    ds = ds.map(lambda *ex: dict(zip(["question", "answer"], ex)))

    return ds

print("A few raw validation examples...")
for ex in tfds.as_numpy(nq_dataset_fn("validation").take(5)):
    print(ex)

A few raw validation examples...
{'question': b'hypothesis: Juan and LaKeisha roll a few objects down a ramp. They want to see which object rolls the farthest. What should they do so they can repeat their investigation? Put the objects in groups. premise: Elevate one end of the ramp about 10 centimeters and see if students can predict how far the toys will roll and which ones will roll the farthest.', 'answer': b'false'}
{'question': b'hypothesis: Juan and LaKeisha roll a few objects down a ramp. They want to see which object rolls the farthest. What should they do so they can repeat their investigation? Put the objects in groups. premise: Allow a few more students to form groups with the objects.', 'answer': b'false'}
{'question': b'hypothesis: Juan and LaKeisha roll a few objects down a ramp. They want to see which object rolls the farthest. What should they do so they can repeat their investigation? Put the objects in groups. premise: Change the objects going down the ramp to observ

In [0]:
def trivia_preprocessor(ds):
    def normalize_text(text):
        """Lowercase and remove quotes from a TensorFlow string."""
        #text = tf.strings.lower(text)
        #text = tf.strings.regex_replace(text,"'(.*)'", r"\1")
        return text
    def to_inputs_and_targets(ex):
        """Map {"question": ..., "answer": ...}->{"inputs": ..., "targets": ...}."""
        return {
            "inputs": normalize_text(ex["question"]),
            "targets": normalize_text(ex["answer"])
            }
    return ds.map(to_inputs_and_targets, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [0]:
t5.data.TaskRegistry.add(
    "arc",
    # Supply a function which returns a tf.data.Dataset.
    dataset_fn=nq_dataset_fn,
    splits=["train", "validation", 'test'],
    # Supply a function which preprocesses text from the tf.data.Dataset.
    text_preprocessor=[trivia_preprocessor],
    # Use the same vocabulary that we used for pre-training.
    sentencepiece_model_path=t5.data.DEFAULT_SPM_PATH,
    # Lowercase targets before computing metrics.
    postprocess_fn=t5.data.postprocessors.lower_text, 
    # We'll use accuracy as our evaluation metric.
    metric_fns=[t5.evaluation.metrics.accuracy],
    # Not required, but helps for mixing and auto-caching.
    num_input_examples=num_examples
)

In [0]:
nq_task = t5.data.TaskRegistry.get("arc")
ds = nq_task.get_dataset(split="validation", sequence_length={"inputs": 450, "targets": 32})
print("A few preprocessed validation examples...")
for ex in tfds.as_numpy(ds.take(5)):
    print(ex)

A few preprocessed validation examples...
{'inputs_plaintext': b'hypothesis: Mercury, the planet nearest to the Sun, has extreme surface temperatures, ranging from 465\xc2\xb0C in sunlight to -180\xc2\xb0C in darkness. Why is there such a large range of temperatures on Mercury? The planet reflects heat from its dark side. premise: But on Mercury, the thin atmosphere does nothing to stabilize the incoming solar rays\xe2\x80\x94and because the distance to Mercury from the sun is so small, the day side of the planet feels the heat keenly, while the night side, turned from the sun, only registers the cold.', 'inputs': array([22455,    10, 23461,     6,     8,  4345, 13012,    12,     8,
        3068,     6,    65,  5272,  1774,  7902,     6,     3,  6836,
          45,   314,  4122,  1956,   254,    16, 16285,    12,     3,
          18, 20829,  1956,   254,    16, 14882,     5,  1615,    19,
         132,   224,     3,     9,   508,   620,    13,  7902,    30,
       23461,    58,    37, 

## Define Model

In [0]:
MODEL_SIZE = "3B" #@param["small", "base", "large", "3B", "11B"]
# Public GCS path for T5 pre-trained model checkpoints
BASE_PRETRAINED_DIR = "gs://t5-data/pretrained_models"
PRETRAINED_DIR = os.path.join(BASE_PRETRAINED_DIR, MODEL_SIZE)
MODEL_DIR = os.path.join(MODELS_DIR, MODEL_SIZE)

if ON_CLOUD and MODEL_SIZE == "3B":
    tf.logging.warn(
      "The `3B` model is too large to use with the 5GB GCS free tier. "
      "Make sure you have at least 25GB on GCS before continuing."
      )
elif ON_CLOUD and MODEL_SIZE == "11B":
    raise ValueError(
      "The `11B` parameter is too large to fine-tune on the `v2-8` TPU "
      "provided by Colab. Please comment out this Error if you're running "
      "on a larger TPU."
      )

# Set parallelism and batch size to fit on v2-8 TPU (if possible).
# Limit number of checkpoints to fit within 5GB (if possible).
model_parallelism, train_batch_size, keep_checkpoint_max = {
    "small": (1, 256, 16),
    "base": (2, 128, 8),
    "large": (8, 64, 4),
    "3B": (8, 16, 4),
    "11B": (8, 16, 1)}[MODEL_SIZE]

if not tf.io.gfile.exists(MODEL_DIR):
    tf.io.gfile.makedirs(MODEL_DIR) 



# The models from our paper are based on the Mesh Tensorflow Transformer.
model = t5.models.MtfModel(
    model_dir=MODEL_DIR,
    tpu=TPU_ADDRESS,
    tpu_topology=TPU_TOPOLOGY,
    model_parallelism=model_parallelism,
    batch_size=train_batch_size,
    sequence_length={"inputs": 450, "targets": 32},
    learning_rate_schedule=0.0005,
    save_checkpoints_steps=5000,
    keep_checkpoint_max=keep_checkpoint_max if ON_CLOUD else None,
    iterations_per_loop=100,
)



In [0]:
FINETUNE_STEPS =  40000#@param {type: "integer"}

model.finetune(
    mixture_or_task_name="arc",
    pretrained_model_dir=PRETRAINED_DIR,
    finetune_steps=FINETUNE_STEPS
)

INFO:tensorflow:Using config: {'_model_dir': 'gs://home_justram/arc/models_challenge_bm25_rerank_aug_alpha_0.1_num_5/3B', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.36.81.138:8470"
    }
  }
}
isolate_session_state: true
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fefbaa44a58>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://10.36.81.138:8470', '_evaluation_master': 'grpc://10.36.81.138:8

In [0]:
# Use a larger batch size for evaluation, which requires less memory.
model.batch_size = train_batch_size * 4
model.eval(
    mixture_or_task_name="arc",
    checkpoint_steps="all",
    split='validation'
)