## Imports

In [1]:
import os
from pathlib import Path

import numpy as np

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import tensorflow as tf

tf.get_logger().setLevel("INFO")


In [2]:
mirrored_strategy = tf.distribute.MirroredStrategy()


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


## Load in the appropriate datasets and labels

In [3]:
import models.data_load as data_load

(
    train_input_ids,
    train_token_type_ids,
    train_mask,
    train_impossible,
    train_start_positions,
    train_end_positions,
    qas_id,
) = data_load.load_train()

train_labels = np.vstack([train_start_positions, train_end_positions]).T
print(train_labels.shape)


(131911, 2)


### Learned and average pooling alone

In [4]:
from layers.learned_pooler import get_learned_pooling_model

learned_pooling_model = get_learned_pooling_model()
learned_pooling_model.summary()


Model: "learned_pooler"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 386, 1024,   0           []                               
                                25)]                                                              
                                                                                                  
 learned_pooler (LearnedPooler)  (None, 386, 1024, 1  26         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 dense (Dense)                  (None, 386, 1024, 2  4           ['learned_pooler[0][0]']         
                                )                                                    

In [5]:
from layers.average_pooler import get_average_pooler

average_pooler_model = get_average_pooler()
average_pooler_model.summary()


Model: "average_pooler"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 386, 1024,   0           []                               
                                25)]                                                              
                                                                                                  
 tf.math.reduce_mean (TFOpLambd  (None, 386, 1024, 1  0          ['input_2[0][0]']                
 a)                             )                                                                 
                                                                                                  
 dense_1 (Dense)                (None, 386, 1024, 2  4           ['tf.math.reduce_mean[0][0]']    
                                )                                                    

### Example training procedure

In [6]:
# Start generator with training labels, pointing to data directory with embeddings


import models.bert_large_uncased as bert_large_uncased

bert_model = bert_large_uncased.create_bert_qa_model(optimizer="adam")
bert_model.load_weights("./results/bert-large-uncased/training_checkpoints/ckpt_0004.ckpt")


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f2882ca91d0>

### Model compilation

All models are compiled as a bi-headed model, the first representing span start position and the second representing span end position. No activation is applied as the heads come directly from splitting a tensor.

In [7]:
def compile_model(model):
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    # monitor accuracy during the training process
    model.compile(loss=[loss, loss], optimizer="adam", metrics=["accuracy"])


# Gather a list of models to fit
# Since fitting is typically faster than data load, it is beneficial to many models at once
model_list = [average_pooler_model, learned_pooling_model]

for model_current in model_list:
    compile_model(model_current)


In [8]:
import bert_embedding_parser as bert_embedding_parser

gen = bert_embedding_parser.load_bert_embeddings(bert_model, batch_size=16)

i = 0
max_batches = 8248  # Can be any number; this is pre-calculated based on the amount of training data used; 8248 goes through entire dataset at batch size of 16


for batch in gen:
    # Read in the batch of data from generator
    X = batch[0]
    Y = batch[1]

    learned_pooling_model.fit(X, Y, epochs=1)

    # increment counter
    i += 1

    # When the number of
    if i == max_batches:  # 4 batches; can save each quarter
        break




InvalidArgumentError: Graph execution error:

Detected at node 'Equal' defined at (most recent call last):
    File "<frozen runpy>", line 198, in _run_module_as_main
    File "<frozen runpy>", line 88, in _run_code
    File "/opt/python/py311/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/opt/python/py311/lib/python3.11/site-packages/traitlets/config/application.py", line 1043, in launch_instance
      app.start()
    File "/opt/python/py311/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 725, in start
      self.io_loop.start()
    File "/opt/python/py311/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "/opt/python/py311/lib/python3.11/asyncio/base_events.py", line 607, in run_forever
      self._run_once()
    File "/opt/python/py311/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once
      handle._run()
    File "/opt/python/py311/lib/python3.11/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/opt/python/py311/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "/opt/python/py311/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "/opt/python/py311/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell
      await result
    File "/opt/python/py311/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/opt/python/py311/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "/opt/python/py311/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/opt/python/py311/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3009, in run_cell
      result = self._run_cell(
    File "/opt/python/py311/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3064, in _run_cell
      result = runner(coro)
    File "/opt/python/py311/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/opt/python/py311/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3269, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/opt/python/py311/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3448, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/opt/python/py311/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_21214/3157361031.py", line 14, in <module>
      learned_pooling_model.fit(X, Y, epochs=1)
    File "/opt/python/py311/lib/python3.11/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/opt/python/py311/lib/python3.11/site-packages/keras/engine/training.py", line 1685, in fit
      tmp_logs = self.train_function(iterator)
    File "/opt/python/py311/lib/python3.11/site-packages/keras/engine/training.py", line 1284, in train_function
      return step_function(self, iterator)
    File "/opt/python/py311/lib/python3.11/site-packages/keras/engine/training.py", line 1268, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/python/py311/lib/python3.11/site-packages/keras/engine/training.py", line 1249, in run_step
      outputs = model.train_step(data)
    File "/opt/python/py311/lib/python3.11/site-packages/keras/engine/training.py", line 1055, in train_step
      return self.compute_metrics(x, y, y_pred, sample_weight)
    File "/opt/python/py311/lib/python3.11/site-packages/keras/engine/training.py", line 1149, in compute_metrics
      self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "/opt/python/py311/lib/python3.11/site-packages/keras/engine/compile_utils.py", line 605, in update_state
      metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "/opt/python/py311/lib/python3.11/site-packages/keras/utils/metrics_utils.py", line 77, in decorated
      update_op = update_state_fn(*args, **kwargs)
    File "/opt/python/py311/lib/python3.11/site-packages/keras/metrics/base_metric.py", line 140, in update_state_fn
      return ag_update_state(*args, **kwargs)
    File "/opt/python/py311/lib/python3.11/site-packages/keras/metrics/base_metric.py", line 691, in update_state
      matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/opt/python/py311/lib/python3.11/site-packages/keras/metrics/accuracy_metrics.py", line 459, in sparse_categorical_accuracy
      matches = metrics_utils.sparse_categorical_matches(y_true, y_pred)
    File "/opt/python/py311/lib/python3.11/site-packages/keras/utils/metrics_utils.py", line 971, in sparse_categorical_matches
      matches = tf.cast(tf.equal(y_true, y_pred), backend.floatx())
Node: 'Equal'
required broadcastable shapes
	 [[{{node Equal}}]] [Op:__inference_train_function_36436]

In [None]:
import data.bert_embedding_parser as bert_embedding_parser

gen = bert_embedding_parser.load_bert_embeddings(bert_model, batch_size=16)

i = 0
max_batches = 8248  # Can be any number; this is pre-calculated based on the amount of training data used; 8248 goes through entire dataset at batch size of 16


for batch in gen:
    # Read in the batch of data from generator
    X = batch[0]
    Y = batch[1]

    for model_current in model_list:
        # Fit the generated dataset once
        model_current.fit(X, Y, epochs=1)

    # increment counter
    i += 1

    # When the number of
    if i == max_batches:  # 4 batches; can save each quarter
        break


In [None]:
# Save weights
weights_dir = "weights"
n = 0
for m in model_list:
    n += 1
    m.save_weights(weights_dir + "/%s.h5" % m.name)


# TODO: print out the 25 weights for each model (before and after fine-tuning)
