In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa

In [2]:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(gpu_devices))
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

Num GPUs Available:  1


In [3]:
from bert.dataset import create_masked_input_dataset

In [4]:
!nvidia-smi

Thu Jan 30 14:05:43 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.87.00    Driver Version: 418.87.00    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:15:00.0 Off |                    0 |
| N/A   34C    P0    40W / 300W |     11MiB / 16130MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

## BERT layers

In [5]:
from bert.dataset import create_masked_input_dataset

training_data = create_masked_input_dataset(
    sequence_path='../uniparc_data/sequences_train.txt',
    max_sequence_length=128,
    batch_size=64,
    masking_freq=.05)

training_data = training_data.repeat().prefetch(tf.data.experimental.AUTOTUNE)

valid_data = create_masked_input_dataset(
    sequence_path='../uniparc_data/sequences_valid.txt',
    max_sequence_length=128,
    batch_size=64,
    masking_freq=.05)

valid_data = valid_data.repeat().prefetch(tf.data.experimental.AUTOTUNE)

In [6]:
masked_seqs, true_values = next(iter(valid_data))

In [7]:
from tensorflow.keras import layers

from bert.layers import (PositionEmbedding, Transformer, TokenEmbedding, Bias,
                         gelu, initializer, Projection, DenseNoMask)

In [8]:
vocab_size = 22
embedding_dimension = 32
model_dimension = 128
transformer_dimension = 4 * model_dimension
num_attention_heads = model_dimension // 64
num_transformer_layers = 4
dropout_rate = 0.

inputs = layers.Input(shape=(None,), dtype=tf.int32, batch_size=None)

# Amino-acid level embeddings
embeddings = layers.Embedding(
    vocab_size, model_dimension, embeddings_initializer=initializer(),
    mask_zero=True)(inputs)

# Initialize transformer, use ALBERT-style weight sharing
transformer = Transformer(
    num_attention_heads, transformer_dimension,
    attention_type='relative', max_relative_position=10,
    dropout=dropout_rate)

# Stack transformers together
for i in range(num_transformer_layers):
    embeddings = transformer(embeddings)

# Project back to original embedding dimension
out = DenseNoMask(vocab_size, activation=gelu,
                  kernel_initializer=initializer())(embeddings)

model = tf.keras.Model(inputs, out, name='model')

In [9]:
from bert.optimizers import ECE, masked_sparse_categorical_crossentropy, BertLinearSchedule
    
opt = tfa.optimizers.AdamW(learning_rate=1E-4, weight_decay=0.0)

true_labels = tf.keras.layers.Input(
    shape=(None,), dtype=tf.int32, batch_size=None)

model.compile(
    target_tensors=true_labels,    
    loss=masked_sparse_categorical_crossentropy,
    metrics=[ECE],
    optimizer=opt,
    experimental_run_tf_function=True)

callbacks = [BertLinearSchedule(1E-4, 100, int(1E7))]

In [10]:
model.fit(training_data, steps_per_epoch=100, epochs=4,
          verbose=1, validation_data=valid_data, validation_steps=10,
          callbacks=callbacks)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7f3acbc660f0>

In [11]:
# training_data_large = create_masked_input_dataset(
#     sequence_path='../uniparc_data/sequences_train.txt',
#     max_sequence_length=512,
#     batch_size=4,
#     masking_freq=.05)

# training_data_large = training_data_large.repeat().prefetch(tf.data.experimental.AUTOTUNE)

# valid_data_large = create_masked_input_dataset(
#     sequence_path='../uniparc_data/sequences_valid.txt',
#     max_sequence_length=512,
#     batch_size=4,
#     masking_freq=.05)

# valid_data_large = valid_data_large.repeat().prefetch(tf.data.experimental.AUTOTUNE)

In [12]:
# model.fit(training_data_large, steps_per_epoch=1000, epochs=5,
#           verbose=1, validation_data=valid_data_large, validation_steps=100,
#           callbacks=callbacks)

In [13]:
model.save('test_ckpts/test.h5')

OSError: Unable to create file (unable to open file: name = 'test_ckpts/test.h5', errno = 2, error message = 'No such file or directory', flags = 13, o_flags = 242)

In [None]:
tf.keras.models.load_model('test_ckpts/test.h5')