In [None]:
!nvidia-smi

In [None]:
import os
import sys
sys.path.append('..')

import numpy as np
import tensorflow as tf

gpu_devices = tf.config.experimental.list_physical_devices('GPU')

tf.config.set_visible_devices(gpu_devices[0], 'GPU')
tf.config.experimental.set_memory_growth(gpu_devices[0], True)

from tensorflow.keras.mixed_precision import experimental as mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

In [3]:
tf.__version__

'2.1.0'

In [4]:
from bert.model import create_albert_model
from bert.losses import ECE, masked_sparse_categorical_crossentropy, masked_sparse_categorical_accuracy
from bert.optimization import WarmUp

In [5]:
import tensorflow_addons.optimizers as tfa_optimizers

In [6]:
lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=1E-4,
    decay_steps=10000,
    end_learning_rate=0.0)

lr_schedule = WarmUp(
    initial_learning_rate=1E-4,
    decay_schedule_fn=lr_schedule,
    warmup_steps=1000)

optimizer = tfa_optimizers.LAMB(
    learning_rate=lr_schedule,
    weight_decay_rate=0.01,
    beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-6,
    exclude_from_weight_decay=['layer_norm', 'bias'])

# strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
# # strategy = tf.distribute.MirroredStrategy()

# with strategy.scope():
#     model = create_albert_model(model_dimension=768,
#                                 transformer_dimension=768 * 4,
#                                 num_attention_heads=768 // 64,
#                                 num_transformer_layers=12,
#                                 vocab_size=24,
#                                 dropout_rate=0.,
#                                 max_relative_position=128)
    
#     model.compile(
#         loss=masked_sparse_categorical_crossentropy,
#         metrics=[ECE, masked_sparse_categorical_accuracy],
#         optimizer=optimizer)

    
# model.summary()

In [9]:
from tensorflow.keras import layers

from bert.layers import (Attention, Transformer, PositionEmbedding,
                         gelu, initializer, Projection, DenseNoMask)

from bert.losses import masked_sparse_categorical_crossentropy, ECE

model_dimension=768
transformer_dimension=768 * 4
num_attention_heads=768 // 64
num_transformer_layers=12
vocab_size=24
dropout_rate=0.
max_relative_position=128
max_sequence_length=1024
final_layernorm = False
attention_type='attention'

inputs = layers.Input(shape=(None,), dtype=tf.int32, batch_size=None)

# Amino-acid level embeddings
embeddings = layers.Embedding(
    vocab_size, model_dimension, embeddings_initializer=initializer(),
    mask_zero=True)(inputs)

position_embedding = PositionEmbedding(
    max_sequence_length + 1, model_dimension, embeddings_initializer=initializer(),
    mask_zero=True)(inputs)

embeddings = layers.Add()([embeddings, position_embedding])

# Stack transformers together
for i in range(num_transformer_layers):

    # Whether to use layernorm on the final layer
    if not final_layernorm and i == (num_transformer_layers - 1):            
        use_layernorm=False
    else:
        use_layernorm=True

    transformer = Transformer(
        num_attention_heads, transformer_dimension,
        attention_type=attention_type,
        max_relative_position=max_relative_position,
        dropout=dropout_rate,
        use_layernorm=use_layernorm)

    embeddings = transformer(embeddings)

# Project to the 20 AA labels (and zero 'pad' label)
outputs = DenseNoMask(21, kernel_initializer=initializer())(embeddings)
outputs = layers.Activation('linear', dtype='float32')(outputs)

model = tf.keras.Model(inputs, outputs, name='model')
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 768)    18432       input_3[0][0]                    
__________________________________________________________________________________________________
position_embedding (PositionEmb (None, None, 768)    787200      input_3[0][0]                    
__________________________________________________________________________________________________
add (Add)                       (None, None, 768)    0           embedding_2[0][0]                
                                                                 position_embedding[0][0]     

In [14]:
model.compile(
    loss=masked_sparse_categorical_crossentropy,
    metrics=[ECE, masked_sparse_categorical_accuracy],
    optimizer=optimizer)

In [15]:
import os
from bert.dataset import create_masked_input_dataset
data_path = '/gpfs/alpine/proj-shared/bie108/split_uniref100'

# with tf.device('/CPU:0'):
training_data = create_masked_input_dataset(
    sequence_path=os.path.join(
        data_path, 'train_uniref100_split/train_100_*.txt.gz'),
    max_sequence_length=512,
    fix_sequence_length=True,
    batch_size=16,
    shard_num_workers=12,
    shard_worker_index=0)

valid_data = create_masked_input_dataset(
    sequence_path=os.path.join(
        data_path, 'train_uniref100_split/train_100_*.txt.gz'),
    max_sequence_length=512,
    batch_size=16)

In [16]:
inputs, targets = next(iter(training_data))

In [17]:
masked_sparse_categorical_accuracy(targets, model(inputs))

<tf.Tensor: shape=(), dtype=float32, numpy=0.05511811>

In [18]:
model.fit(training_data, steps_per_epoch=50, epochs=5,
          verbose=1, validation_data=valid_data, validation_steps=10)

Train for 50 steps, validate for 10 steps
Epoch 1/5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7ffd784b7b38>

In [19]:
masked, true = next(iter(training_data))
predictions = model.predict(masked)

In [None]:
masked[:, 0]