In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa

In [2]:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(gpu_devices))
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

Num GPUs Available:  1


In [3]:
from bert.dataset import create_masked_input_dataset

In [4]:
!nvidia-smi

Fri Jan 17 13:58:16 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 430.26       Driver Version: 430.26       CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Quadro GV100        Off  | 00000000:37:00.0 Off |                  Off |
| 33%   46C    P2    46W / 250W |  29250MiB / 32508MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|    0  

## BERT layers

In [5]:
from tensorflow.keras import layers

from bert.layers import (PositionEmbedding, Transformer, TokenEmbedding, Bias,
                         gelu, initializer, Projection)

In [6]:
vocab_size = 22
embedding_dimension = 32
model_dimension = 128
transformer_dimension = 4 * model_dimension
num_attention_heads = model_dimension // 64
num_transformer_layers = 4
dropout_rate = 0.

inputs = layers.Input(shape=(None,), dtype=tf.int32, batch_size=None)

token_embedding_layer = TokenEmbedding(
    vocab_size, embedding_dimension, embeddings_initializer=initializer(),
    mask_zero=True)

embeddings = token_embedding_layer(inputs)

embeddings = Projection(model_dimension, dropout_rate,
                        use_residual=False)(embeddings)

transformer = Transformer(num_attention_heads, transformer_dimension,
                          dropout=dropout_rate, attention_type='relative',
                          max_relative_position=64)

for i in range(num_transformer_layers):
    embeddings = transformer(embeddings)

out = layers.Dense(vocab_size, activation=gelu,
                   kernel_initializer=initializer())(embeddings)
del out._keras_mask
# out = token_embedding_layer(out, transpose=True)
# out = Bias()(out)

model = tf.keras.Model(inputs, out, name='model')

In [11]:
from bert.optimizers import ECE, masked_sparse_categorical_crossentropy, BertLinearSchedule
    
opt = tfa.optimizers.AdamW(learning_rate=1E-4, weight_decay=0.0)

true_labels = tf.keras.layers.Input(
    shape=(None,), dtype=tf.int32, batch_size=None)

model.compile(
    target_tensors=true_labels,    
    loss=masked_sparse_categorical_crossentropy,
    metrics=[ECE],
    optimizer=opt,
    experimental_run_tf_function=True)

callbacks = [BertLinearSchedule(1E-4, 100, int(1E7)),
             tf.keras.callbacks.ModelCheckpoint(filepath='test_ckpts/ckpt_{epoch}.h5', verbose=1)]

In [12]:
from bert.dataset import create_masked_input_dataset

training_data = create_masked_input_dataset(
    sequence_path='../uniparc_data/sequences_train.txt',
    max_sequence_length=128,
    batch_size=64,
    masking_freq=.05)

training_data = training_data.repeat().prefetch(tf.data.experimental.AUTOTUNE)

valid_data = create_masked_input_dataset(
    sequence_path='../uniparc_data/sequences_valid.txt',
    max_sequence_length=128,
    batch_size=64,
    masking_freq=.05)

valid_data = valid_data.repeat().prefetch(tf.data.experimental.AUTOTUNE)

In [13]:
model.fit(training_data, steps_per_epoch=10, epochs=2,
          verbose=1, validation_data=valid_data, validation_steps=10,
          callbacks=callbacks)

Epoch 1/2
Epoch 00001: saving model to test_ckpts/ckpt_1.h5
Epoch 2/2
Epoch 00002: saving model to test_ckpts/ckpt_2.h5


<tensorflow.python.keras.callbacks.History at 0x7f3ff214af98>

In [None]:
# training_data_large = create_masked_input_dataset(
#     sequence_path='../uniparc_data/sequences_train.txt',
#     max_sequence_length=512,
#     batch_size=4,
#     masking_freq=.05)

# training_data_large = training_data_large.repeat().prefetch(tf.data.experimental.AUTOTUNE)

# valid_data_large = create_masked_input_dataset(
#     sequence_path='../uniparc_data/sequences_valid.txt',
#     max_sequence_length=512,
#     batch_size=4,
#     masking_freq=.05)

# valid_data_large = valid_data_large.repeat().prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
# model.fit(training_data_large, steps_per_epoch=1000, epochs=5,
#           verbose=1, validation_data=valid_data_large, validation_steps=100,
#           callbacks=callbacks)

In [None]:
model.save('test_ckpts/test.h5')

In [None]:
tf.keras.models.load_model('test_ckpts/test.h5')