In [0]:
!pip install transformers
!pip install tensorflow-addons
import tensorflow as tf
import tensorflow_addons as tfa
from transformers import TFDistilBertModel, DistilBertTokenizer, TFDistilBertMainLayer, TFDistilBertPreTrainedModel
from transformers.modeling_tf_utils import get_initializer
from generate_predictions import get_prediction_json
from absl import app
from absl import flags
from tensorflow.keras.optimizers import Adam
from adamw_optimizer import AdamW
epochs=5
batch_size = 2
init_learning_rate = 5e-5
init_weight_decay_rate = 0.01
num_warmup_steps = 0
shuffle_buffer_size = 100000
best_indexes = 20
answer_types = 5


def decode_record(record, x):
    """Decodes a record to a TensorFlow example."""
    example = tf.io.parse_single_example(record, x)
    for name in list(example.keys()):
        """Type conversion for compatibilty"""
        t = example[name]
        if t.dtype == tf.int64:
            t = tf.cast(t, tf.int32)
        example[name] = t
    return example

def read_train_record(tf_record_file, shuffle_buffer_size, batch_size=1):
    """
    Reads tf records into a MapDataset for training

    Parameters: tf_record file and hyperparameters

    Returns: Training dataset
    """
    def x_map(record):
        return ({
                    'unique_ids': record['unique_ids'],
                    'input_ids': record['input_ids'],
                    'input_mask': record['input_mask'],
                    'segment_ids': record['segment_ids'],
        }, {
            'start_positions': record['start_positions'],
            'end_positions': record['end_positions'],
            'answer_types': record['answer_types']
        })

    x = {
         "unique_ids": tf.io.FixedLenFeature([], tf.int64),
         "input_ids": tf.io.FixedLenFeature([512], tf.int64),
         "input_mask": tf.io.FixedLenFeature([512], tf.int64),
         "segment_ids": tf.io.FixedLenFeature([512], tf.int64),
         "start_positions": tf.io.FixedLenFeature([], tf.int64),
         "end_positions": tf.io.FixedLenFeature([], tf.int64),
         "answer_types": tf.io.FixedLenFeature([], tf.int64)
         }

    #  read dataset from record into examples
    dataset = tf.data.TFRecordDataset(tf_record_file).map(lambda record: decode_record(record, x))
    #  shuffle
    dataset = dataset.shuffle(shuffle_buffer_size) if shuffle_buffer_size != 0 else dataset
    # create batches
    dataset = dataset.batch(batch_size) if batch_size != 0 else dataset
    #  map dataset to features dictionary for ease of access
    dataset = dataset.map(x_map)

    return dataset


def read_val_record(tf_record_file, shuffle_buffer_size, batch_size=1):
    """
    Reads tf records into a MapDataset for validation

    Parameters: tf_record file and hyperparameters

    Returns: Validation dataset
    """
    def x_map(record):
        return ({
                    'unique_ids': record['unique_ids'],
                    'input_ids': record['input_ids'],
                    'input_mask': record['input_mask'],
                    'segment_ids': record['segment_ids'],
                    'token_map': record['token_map']

        })

    x = {"unique_ids": tf.io.FixedLenFeature([], tf.int64),
         "input_ids": tf.io.FixedLenFeature([512], tf.int64),
         "input_mask": tf.io.FixedLenFeature([512], tf.int64),
         "segment_ids": tf.io.FixedLenFeature([512], tf.int64),
         "token_map": tf.io.FixedLenFeature([512], tf.int64)
         }

    #  read dataset from record into examples
    dataset = tf.data.TFRecordDataset(tf_record_file).map(lambda record: decode_record(record, x))
    #  shuffle
    dataset = dataset.shuffle(shuffle_buffer_size) if shuffle_buffer_size != 0 else dataset
    # create batches
    dataset = dataset.batch(batch_size) if batch_size != 0 else dataset
    #  map dataset to features dictionary for ease of access
    dataset = dataset.map(x_map)

    return dataset


class TFNQModel(TFDistilBertPreTrainedModel):

    def __init__(self, config, *inputs, **kwargs):
        """Initializes model"""
        # initialize pretrained model
        TFDistilBertPreTrainedModel.__init__(self, config, *inputs, **kwargs)

        # set backend as DistilBert
        self.backend = TFDistilBertMainLayer(config, name="distilbert")

        #initialize dropout layers
        self.seq_output_dropout = tf.keras.layers.Dropout(kwargs.get('seq_output_dropout_prob', 0.05))
        self.pooled_output_dropout = tf.keras.layers.Dropout(kwargs.get('pooled_output_dropout_prob', 0.05))

        #set up classifiers on BERT outputs to give us start and end pos tags, as well as an answer type tag
        self.pos_classifier = tf.keras.layers.Dense(2,
                                                    kernel_initializer=get_initializer(config.initializer_range),
                                                    name='pos_classifier')

        self.answer_type_classifier = tf.keras.layers.Dense(answer_types,
                                                            kernel_initializer=get_initializer(
                                                                config.initializer_range),
                                                            name='answer_type_classifier')

    def call(self, inputs, **kwargs):
        """
        Invoked when model called to return logits

        Returns: logits for start token, end token and answer type

        """
        inputs = inputs[:2] if isinstance(inputs, tuple) else inputs
        outputs = self.backend(inputs, **kwargs)

        sequence_output = outputs[0]
        pooled_output = sequence_output[:, 0, :]
        #dropout fro both outputs
        sequence_output = self.seq_output_dropout(sequence_output, training=kwargs.get('training', False))
        pooled_output = self.pooled_output_dropout(pooled_output, training=kwargs.get('training', False))
        #splitting into start and end after passing throught classifier built on top of bert
        pos_logits = self.pos_classifier(sequence_output)
        start_pos_logits = pos_logits[:, :, 0]
        end_pos_logits = pos_logits[:, :, 1]

        answer_type_logits = self.answer_type_classifier(pooled_output)

        outputs = (start_pos_logits, end_pos_logits, answer_type_logits)

        return outputs

def initialize_acc():
    '''Initialize accuracy metrics using Sparse TopK categorical accuracy'''
    start_acc = tf.keras.metrics.SparseTopKCategoricalAccuracy(k=1)
    end_acc = tf.keras.metrics.SparseTopKCategoricalAccuracy(k=1)
    ans_acc = tf.keras.metrics.SparseTopKCategoricalAccuracy(k=1)
    total_acc = tf.keras.metrics.SparseTopKCategoricalAccuracy(k=1)
    return total_acc, start_acc, end_acc, ans_acc

class CustomSchedule(tf.keras.optimizers.schedules.PolynomialDecay):
    
    def __init__(self,
      initial_learning_rate,
      decay_steps,
      end_learning_rate=0.0001,
      power=1.0,
      cycle=False,
      name=None,
      num_warmup_steps=1000):
        
        # Since we have a custom __call__() method, we pass cycle=False when calling `super().__init__()` and
        # in self.__call__(), we simply do `step = step % self.decay_steps` to have cyclic behavior.
        super(CustomSchedule, self).__init__(initial_learning_rate, decay_steps, end_learning_rate, power, cycle=False, name=name)
        
        self.num_warmup_steps = num_warmup_steps
        
        self.cycle = tf.constant(cycle, dtype=tf.bool)
        
    def __call__(self, step):
        """ `step` is actually the step index, starting at 0.
        """
        
        # For cyclic behavior
        step = tf.cond(self.cycle and step >= self.decay_steps, lambda: step % self.decay_steps, lambda: step)
        
        learning_rate = super(CustomSchedule, self).__call__(step)

        # Copy (including the comments) from original bert optimizer with minor change.
        # Ref: https://github.com/google-research/bert/blob/master/optimization.py#L25
        
        # Implements linear warmup: if global_step < num_warmup_steps, the
        # learning rate will be `global_step / num_warmup_steps * init_lr`.
        if self.num_warmup_steps > 0:
            
            steps_int = tf.cast(step, tf.int32)
            warmup_steps_int = tf.constant(self.num_warmup_steps, dtype=tf.int32)

            steps_float = tf.cast(steps_int, tf.float32)
            warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)

            # The first training step has index (`step`) 0.
            # The original code use `steps_float / warmup_steps_float`, which gives `warmup_percent_done` being 0,
            # and causing `learning_rate` = 0, which is undesired.
            # For this reason, we use `(steps_float + 1) / warmup_steps_float`.
            # At `step = warmup_steps_float - 1`, i.e , at the `warmup_steps_float`-th step, 
            #`learning_rate` is `self.initial_learning_rate`.
            warmup_percent_done = (steps_float + 1) / warmup_steps_float
            
            warmup_learning_rate = self.initial_learning_rate * warmup_percent_done

            is_warmup = tf.cast(steps_int < warmup_steps_int, tf.float32)
            learning_rate = ((1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate)
                        
        return learning_rate
 
from tensorflow.keras.optimizers.schedules import PolynomialDecay
from transformers import create_optimizer as co
def create_optimizer(distilBert):
    """Initializes LAMB optimizer"""
    num_train_steps = int(epochs * 50000 / batch_size)
    # return co(init_learning_rate, num_train_steps, 1000, end_lr=0.0, optimizer_type='adamw')
    #AdamW optimizer, similar to what Google uses from tensorflow addons library
    #     return tfa.optimizers.AdamW(weight_decay=FLAGS.init_weight_decay_rate, learning_rate=FLAGS.init_learning_rate, beta_1=0.9, beta_2=0.999,
    #                       epsilon=1e-6)

    
    # learning rate scheduler
    schedule = CustomSchedule(initial_learning_rate=init_learning_rate,
    decay_steps=num_train_steps,
    end_learning_rate=init_learning_rate,
    power=1.0,
    cycle=True,    
    num_warmup_steps=0
    )
    
    # decay_var_list = []

    # for i in range(len(distilBert.trainable_variables)):
    #     name = distilBert.trainable_variables[i].name
    #     if any(x in name for x in ["LayerNorm", "layer_norm", "bias"]):
    #         decay_var_list.append(name)
    
    # return AdamW(weight_decay=init_weight_decay_rate, learning_rate=schedule, beta_1=0.9, beta_2=0.999, epsilon=1e-6, decay_var_list=decay_var_list)
    
    
    # LAMB optimizer, known for training BERT super fast (find it at https://arxiv.org/abs/1904.00962 )
    return tfa.optimizers.LAMB(
        learning_rate=schedule,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-06,
        weight_decay_rate=init_weight_decay_rate,
        exclude_from_weight_decay = ["LayerNorm", "layer_norm", "bias"],
        name='LAMB'
    )


def compute_loss(positions, logits):
    '''Finds loss between logits and labels'''

    # the way google defines loss in their bert-joint-baseline paper
    one_hot_positions = tf.one_hot(positions, depth=512, dtype=tf.float32)
    log_probs = tf.nn.log_softmax(logits, axis=-1)
    loss = -tf.reduce_mean(tf.reduce_sum(one_hot_positions * log_probs, axis=-1))

    return loss

    # using sparse categorical cross entropy (for both answer types and start and end tokens)

    # loss_ = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    # return tf.math.reduce_sum(loss_(positions, logits))


def compute_label_loss(labels, logits):
    '''Find loss for answer type labels'''
    # the way google defines loss in their bert-joint-baseline paper
    one_hot_labels = tf.one_hot(labels, depth=answer_types, dtype=tf.float32)
    log_probs = tf.nn.log_softmax(logits, axis=-1)
    loss = -tf.reduce_mean(
                tf.reduce_sum(one_hot_labels * log_probs, axis=-1))
    return loss
    # loss_ = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    # return tf.math.reduce_sum(loss_(labels, logits))

def compute_gradient(distilBert, input_ids, input_masks, segment_ids, start_pos_labels, end_pos_labels, answer_type_labels,train_acc, train_acc_start_pos,
                                              train_acc_end_pos, train_acc_ans_type ):
    '''
    Computes gradient based on averaged loss from start token, end token and answer type

    Inputs: features (x), labels (y), accuracy metrics

    Returns: Gradients, accuracy
    '''

    with tf.GradientTape() as tape:
        #find loss for all three outputs and average it to find total loss
        (start_pos_logits, end_pos_logits, answer_type_logits) = distilBert((input_ids, input_masks, segment_ids),
                                                                         training=True)
        loss_start_pos = compute_loss(start_pos_labels, start_pos_logits)
        loss_end_pos = compute_loss(end_pos_labels, end_pos_logits)
        loss_ans_type = compute_label_loss(answer_type_labels, answer_type_logits)
        total_loss = (loss_start_pos + loss_end_pos + loss_ans_type) / 3.0

    gradients = tape.gradient(total_loss, distilBert.trainable_variables)

    train_acc.update_state(start_pos_labels, start_pos_logits)
    train_acc.update_state(end_pos_labels, end_pos_logits)
    train_acc.update_state(answer_type_labels, answer_type_logits)
    train_acc_start_pos.update_state(start_pos_labels, start_pos_logits)
    train_acc_end_pos.update_state(end_pos_labels, end_pos_logits)
    train_acc_ans_type.update_state(answer_type_labels, answer_type_logits)

    acc = (train_acc, train_acc_start_pos, train_acc_end_pos, train_acc_ans_type)

    return gradients, acc


def checkpt(distilBert, checkpoint_path):
    """Reads checkpoint if present and returns checkpoint manager to store checkpoints if required"""
    ckpt = tf.train.Checkpoint(model=distilBert)
    ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=100)
    #restore latest checkpoint if present
    if ckpt_manager.latest_checkpoint:
        ckpt.restore(ckpt_manager.latest_checkpoint)
        last_epoch = int(ckpt_manager.latest_checkpoint.split("-")[-1])
        print(last_epoch)
        print("Latest checkpoint restored")
    else:
        print("No checkpoint found")
    return ckpt_manager
    

def train(distilBert, optimizer, train_dataset, ckpt_manager, train_acc, train_acc_start_pos, train_acc_end_pos, train_acc_ans_type):
    """
    Trains Model as per configurations

    Parameters: model, dataset, checkpoint manager, and metrics

    Returns: nothing but stores checkpoints
    """
    for epoch in range(epochs):
        #reset metrics at every epoch
        train_acc.reset_states()
        train_acc_start_pos.reset_states()
        train_acc_end_pos.reset_states()
        train_acc_ans_type.reset_states()

        for (instance, (x, y)) in enumerate(train_dataset):
            # if instance<50000: continue
            # if instance==50001: print("starting from 50001")
            if instance % 25000 == 0: ckpt_manager.save()
            #generate x and y 
            input_ids, input_masks, segment_ids = (x['input_ids'], x['input_mask'], x['segment_ids'])
            start_pos_labels, end_pos_labels, answer_type_labels = (
            y['start_positions'], y['end_positions'], y['answer_types'])

            #generate gradients and accuracy
            gradients, acc = compute_gradient(distilBert, input_ids, input_masks, segment_ids, start_pos_labels,
                                    end_pos_labels, answer_type_labels, train_acc, train_acc_start_pos,
                                    train_acc_end_pos, train_acc_ans_type)

            #apply gradients

            # syntax for tensorflow optimizer
            # optimizer.apply_gradients(zip(gradients, distilBert.trainable_variables))

            # syntax for huggingface optimizer
            optimizer.apply_gradients([(gradients[i],dtv) for i,dtv in enumerate(distilBert.trainable_variables)])

            #print accuracy
            (train_acc, train_acc_start_pos, train_acc_end_pos, train_acc_ans_type) = acc

            if (instance + 1) % 100 == 0:
                print('Epoch {}, Instances processed {}'.format(epoch + 1,instance + 1, ))

                print('Accuracy: Overall {:.6f}, Start Token {:.4f}, End Token {:.4f}, Answer Type {:.4f} '.format(train_acc.result(), 
                                train_acc_start_pos.result(), train_acc_end_pos.result(), train_acc_ans_type.result()))
            

                print("-" * 100)

        if (epoch + 1) % 1 == 0:
            print ('\nSaving checkpoint for epoch {} at {}'.format(epoch + 1, ckpt_manager.save()))

            print('Accuracy: Overall {:.6f}, Start Token {:.4f}, End Token {:.4f}, Answer Type {:.4f} '.format(train_acc.result(), 
                            train_acc_start_pos.result(), train_acc_end_pos.result(), train_acc_ans_type.result()))
            



if __name__ == "__main__":
    #retrieve datasets
    training_mode=False
    use_chkpt=True
    train_file = "/content/drive/My Drive/Colab Notebooks/nq-train.tfrecords-00000-of-00001"
    val_file = "/content/drive/My Drive/Colab Notebooks/eval_dev.tf_record"
    pred_file = "/content/drive/My Drive/Colab Notebooks/nq-dev-??.jsonl.gz"
    json_output_path = "/content/drive/My Drive/Colab Notebooks/predictions.json"
    checkpoint_path = "/content/drive/My Drive/Colab Notebooks/checkpoints/"
    if training_mode: train_dataset = read_train_record(train_file, shuffle_buffer_size,batch_size)
    else: val_dataset = read_val_record(val_file, shuffle_buffer_size, batch_size)
    print("data retrieved")
    #create model and tokenizer
    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased-distilled-squad')
    distilBert = TFNQModel.from_pretrained('distilbert-base-uncased-distilled-squad')
    print("Model created")
    if training_mode:
        #get checkpoint if exists
        if use_chkpt:
            ckpt_manager = checkpt(distilBert, checkpoint_path)
        else:
            ckpt = tf.train.Checkpoint(model=distilBert)
            ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=10)
        #define accuracy and loss metrics
        train_acc, train_acc_start_pos, train_acc_end_pos, train_acc_ans_type = initialize_acc()
        #create optimizer
        optimizer = create_optimizer(distilBert)
        #train
        print("Training...")
        train(distilBert, optimizer, train_dataset, ckpt_manager, train_acc, train_acc_start_pos, train_acc_end_pos, train_acc_ans_type)
    else:
        ckpt_manager = checkpt(distilBert, checkpoint_path)
        print("Getting predictions...")
        #generate predictions.json by converting logits to labels
        get_prediction_json(distilBert, val_dataset, pred_file, val_file, json_output_path, best_indexes)


#     app.run(main)


data retrieved
Model created
8
Latest checkpoint restored
Getting predictions...
/content/drive/My Drive/Colab Notebooks/nq-dev-??.jsonl.gz
/content/drive/My Drive/Colab Notebooks/predictions.json
Batch 100 processed
Batch 200 processed
Batch 300 processed
Batch 400 processed
Batch 500 processed
Batch 600 processed
Batch 700 processed
Batch 800 processed
Batch 900 processed
Batch 1000 processed
Batch 1100 processed
Batch 1200 processed
Batch 1300 processed
Batch 1400 processed
Batch 1500 processed
Batch 1600 processed
Batch 1700 processed
Batch 1800 processed
Batch 1900 processed
Batch 2000 processed
Batch 2100 processed
Batch 2200 processed
Batch 2300 processed
Batch 2400 processed
Batch 2500 processed
Batch 2600 processed
Batch 2700 processed
Batch 2800 processed
Batch 2900 processed
Batch 3000 processed
Batch 3100 processed
Batch 3200 processed
Batch 3300 processed
Batch 3400 processed
Batch 3500 processed
Batch 3600 processed
Batch 3700 processed
Batch 3800 processed
Batch 3900 pro

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
ckpt = tf.train.Checkpoint(model=distilBert)
ckpt_manager = tf.train.CheckpointManager(ckpt, "./", max_to_keep=10)
ckpt_manager.save()

'./ckpt-1'

In [15]:
!python -m nq_eval --gold_path=/content/drive/My\ Drive/Colab\ Notebooks/nq-dev-sample.jsonl.gz --predictions_path=/content/drive/My\ Drive/Colab\ Notebooks/predictions.json --logtostderr

  'command line!' % flag_name)
  'command line!' % flag_name)
I0603 12:45:20.294454 140238757410688 eval_utils.py:260] parsing /content/drive/My Drive/Colab Notebooks/nq-dev-sample.jsonl.gz ..... 
I0603 12:45:23.101317 140238757410688 eval_utils.py:213] Reading predictions from file: /content/drive/My Drive/Colab Notebooks/predictions.json
{"long-best-threshold-f1": 0.5803108808290156, "long-best-threshold-precision": 0.6222222222222222, "long-best-threshold-recall": 0.5436893203883495, "long-best-threshold": 6.31158971786499, "long-recall-at-precision>=0.5": 0.6504854368932039, "long-precision-at-precision>=0.5": 0.5037593984962406, "long-recall-at-precision>=0.75": 0.34951456310679613, "long-precision-at-precision>=0.75": 0.75, "long-recall-at-precision>=0.9": 0.009708737864077669, "long-precision-at-precision>=0.9": 1.0, "short-best-threshold-f1": 0.4729729729729729, "short-best-threshold-precision": 0.4794520547945205, "short-best-threshold-recall": 0.4666666666666667, "short-best-

In [0]:
# !pip install tensorflow-gpu
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [4]:
!nvidia-smi

Tue Jun  2 20:43:08 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   68C    P0    71W / 149W |   1179MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
+-------

In [0]:
!ps -aux|grep python

root          25  0.1  0.6 413020 84228 ?        Sl   14:17   0:32 /usr/bin/python2 /usr/local/bin/jupyter-notebook --ip="172.28.0.2" --port=9000 --FileContentsManager.root_dir="/" --MappingKernelManager.root_dir="/content"
root        7106 21.6 20.4 44274496 2724444 ?    Ssl  20:17   3:36 /usr/bin/python3 -m ipykernel_launcher -f /root/.local/share/jupyter/runtime/kernel-5a1ccf58-b03f-41c5-a53b-edf78f302b32.json
root        7933  0.0  0.0  39196  6504 ?        S    20:33   0:00 /bin/bash -c ps -aux|grep python
root        7935  0.0  0.0  38568  4916 ?        S    20:33   0:00 grep python


In [0]:
[1]*10**10

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
