In [2]:
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa

In [3]:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(gpu_devices))
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

Num GPUs Available:  1


In [4]:
!nvidia-smi

Tue Feb  4 06:23:19 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 430.26       Driver Version: 430.26       CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Quadro GV100        Off  | 00000000:37:00.0 Off |                  Off |
| 38%   47C    P0    46W / 250W |    234MiB / 32508MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|    0  

In [5]:
from bert.model import create_albert_model
from bert.optimizers import (ECE, masked_sparse_categorical_crossentropy,
                             BertLinearSchedule)


opt = tf.optimizers.Adam(learning_rate=1E-4,
                         beta_2=0.98,
                         epsilon=1E-6)

opt = tf.train.experimental.enable_mixed_precision_graph_rewrite(opt)

strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")

with strategy.scope():
    model = create_albert_model(model_dimension=512,
                                transformer_dimension=512 * 4,
                                num_attention_heads=512 // 64,
                                num_transformer_layers=6,
                                vocab_size=24,
                                dropout_rate=0.,
                                max_relative_position=128,
                                weight_share=False)
    
    model.compile(
        loss=masked_sparse_categorical_crossentropy,
        metrics=[ECE],
        optimizer=opt,
        experimental_run_tf_function=True)    

    
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None)]            0         
_________________________________________________________________
embedding (Embedding)        (None, None, 512)         12288     
_________________________________________________________________
transformer (Transformer)    (None, None, 512)         2906176   
_________________________________________________________________
transformer_1 (Transformer)  (None, None, 512)         2906176   
_________________________________________________________________
transformer_2 (Transformer)  (None, None, 512)         2906176   
_________________________________________________________________
transformer_3 (Transformer)  (None, None, 512)         2906176   
_________________________________________________________________
transformer_4 (Transformer)  (None, None, 512)         290617

In [6]:
callbacks = [
    BertLinearSchedule(1E-5, 1000, int(1E7)),
#     tf.keras.callbacks.ModelCheckpoint(filepath='jupyter_test_checkpoints/ckpt_{epoch}_{val_ECE:.2f}.h5'),
    tf.keras.callbacks.TensorBoard(
        log_dir='../uniparc_checkpoints/tblogs/test',
        histogram_freq=0,
        write_graph=False,
        profile_batch=0,
        update_freq='epoch',
        embeddings_freq=0)]    

In [7]:
from bert.dataset import create_masked_input_dataset

with tf.device('/CPU:0'):
    training_data = create_masked_input_dataset(
        sequence_path='../uniparc_data/train_uniref100.txt.gz',
        max_sequence_length=1024,
        fix_sequence_length=True,
        batch_size=2)

    training_data = training_data.repeat().prefetch(tf.data.experimental.AUTOTUNE)

    valid_data = create_masked_input_dataset(
        sequence_path='../uniparc_data/dev_uniref50.txt.gz',
        max_sequence_length=1024,
        batch_size=2)

    valid_data = valid_data.repeat().prefetch(tf.data.experimental.AUTOTUNE)

In [8]:
model.fit(training_data, steps_per_epoch=500, epochs=5,
          verbose=1, validation_data=valid_data, validation_steps=10,
          callbacks=callbacks)

Train for 500 steps, validate for 10 steps
Epoch 1/5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f5701445278>

In [None]:
masked, true = next(iter(training_data))
predictions = model.predict(masked)

In [None]:
masked[:, 0]

In [None]:
model.save('../uniparc_checkpoints/test.h5')