In [0]:
import numpy as np
import tensorflow as tf

In [4]:
!pip install tensorflow-gpu==2.0.0-alpha0

Collecting tensorflow-gpu==2.0.0-alpha0
[?25l  Downloading https://files.pythonhosted.org/packages/1a/66/32cffad095253219d53f6b6c2a436637bbe45ac4e7be0244557210dc3918/tensorflow_gpu-2.0.0a0-cp36-cp36m-manylinux1_x86_64.whl (332.1MB)
[K     |████████████████████████████████| 332.1MB 59kB/s 
Collecting tf-estimator-nightly<1.14.0.dev2019030116,>=1.14.0.dev2019030115 (from tensorflow-gpu==2.0.0-alpha0)
[?25l  Downloading https://files.pythonhosted.org/packages/13/82/f16063b4eed210dc2ab057930ac1da4fbe1e91b7b051a6c8370b401e6ae7/tf_estimator_nightly-1.14.0.dev2019030115-py2.py3-none-any.whl (411kB)
[K     |████████████████████████████████| 419kB 40.2MB/s 
Collecting tb-nightly<1.14.0a20190302,>=1.14.0a20190301 (from tensorflow-gpu==2.0.0-alpha0)
[?25l  Downloading https://files.pythonhosted.org/packages/a9/51/aa1d756644bf4624c03844115e4ac4058eff77acd786b26315f051a4b195/tb_nightly-1.14.0a20190301-py3-none-any.whl (3.0MB)
[K     |████████████████████████████████| 3.0MB 39.1MB/s 
Installin

In [2]:
tf.__version__

'2.0.0-alpha0'

In [4]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving gb1_single_fitness.csv to gb1_single_fitness.csv
User uploaded file "gb1_single_fitness.csv" with length 99478 bytes


In [0]:
import pandas as pd

In [6]:
function_df = pd.read_csv('gb1_single_fitness.csv')
function_df['sequence_len'] = function_df['sequence'].apply(lambda seq: len(seq)) 
max_len = function_df['sequence_len'].max()

def sequence_to_aa(seq):
  return list(seq)

def get_sequence_aa(df):
  df['sequence_aa'] = df['sequence'].apply(lambda seq:sequence_to_aa(seq))
  df['length'] = df['sequence'].apply(lambda seq:len(seq))
  sequence_aa = np.array(df['sequence_aa'])
  return sequence_aa

sequence_aa = get_sequence_aa(function_df)

# import neccesary tools from Keras
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical

tokenizer = Tokenizer()
tokenizer.fit_on_texts(sequence_aa)
encoded = tokenizer.texts_to_sequences(sequence_aa)

Using TensorFlow backend.


In [0]:
vocab_size = len(tokenizer.word_index) 

In [0]:
X = np.array(encoded)
y = function_df['fitness']

from sklearn.model_selection import train_test_split
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size = 0.2, random_state = 10)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size = 0.1, random_state = 10)

In [9]:
X_train.shape

(752, 56)

# The implementation below is adapted from Trung Tran's blog post [here](https://machinetalk.org/2019/04/29/create-the-transformer-with-tensorflow-2-0/).

Positional Encoding

In [0]:
# function for positional embedding
def positional_embedding(pos, embed_size):
  # embed_size is the number of dimensions of the embeddings, so in this case I will set it to be 20
  PE = np.zeros((1, embed_size))
  for i in range(embed_size):
    if i % 2 == 0:
      PE[:, i] = np.sin(pos / 10000 ** (i / embed_size))
    else:
      PE[:, i] = np.cos(pos / 10000 ** ((i - 1) / embed_size))
  return PE

embed_size = 20

pes = []
for i in range(max_len):
  pes.append(positional_embedding(i, embed_size))

In [0]:
pes = np.concatenate(pes, axis = 0) # forming a positional encoding matrix, with each row being the encoding for one position on the sequences

In [0]:
pes = tf.constant(pes, dtype = tf.float32)

In [13]:
pes.shape

TensorShape([Dimension(56), Dimension(20)])

The Multi-head Attention

In [0]:
class MultiHeadAttention(tf.keras.Model):
  def __init__(self, embed_size, h):
    # h is the number of attention heads
    super(MultiHeadAttention, self).__init__()
    self.query_size = embed_size // h
    self.key_size = embed_size // h
    self.value_size = embed_size // h
    
    self.h = h
    
    self.wq = [tf.keras.layers.Dense(self.query_size) for _ in range(h)]
    self.wk = [tf.keras.layers.Dense(self.key_size) for _ in range(h)]
    self.wv = [tf.keras.layers.Dense(self.value_size) for _ in range(h)]
    self.wo = tf.keras.layers.Dense(embed_size)
    
  def call(self, query, value):
    # query has shape (batch, query_len, embed_size)
    # value has shape (batch, value_len, model_size)
    heads = []
    for i in range(self.h):
      score = tf.matmul(query, value, transpose_b = True)

      # here we scaled the score as described in the paper
      score /= tf.math.sqrt(tf.dtypes.cast(self.key_size, tf.float32)) # score has shape (batch, query_len, value_len)

      weights = tf.nn.softmax(score, axis = 2)
      # alighment has shape (batch, query_len, value_len)

      head = tf.matmul(weights, self.wv[i](value))
      # head has shape (batch, decoder_len, value_size) # okay so what is decoder_len and value_size again?
      
      heads.append(head)
      
      # concatenate all the attention heads so that the last dimension summed up to embed_size
      heads = tf.concat(heads, axis = 2)
      heads = self.wo(heads)
      
      # heads has shape (batch, query_len, embed_size)
      return heads
    

The Encoder or in this case the Net

In [0]:
class Net(tf.keras.Model):
  def __init__(self, vocab_size, embed_size, num_layers, h):
    super(Net, self).__init__()
    self.embed_size = embed_size
    self.num_layers = num_layers
    self.h = h
    
    # One Embedding layer
    self.embedding = tf.keras.layers.Embedding(vocab_size + 1, embed_size)
    
    # num_layers MultiHeadAttention and Normalization layers
    self.attention = [MultiHeadAttention(embed_size, h) for _ in range(num_layers)]
    self.attention_norm = [tf.keras.layers.BatchNormalization() for _ in range(num_layers)]
    
    # num_layers FFN and Normalization layers
    self.dense_1 = [tf.keras.layers.Dense(embed_size * 4, activation = 'relu') for _ in range(num_layers)]
    self.dense_2 = [tf.keras.layers.Dense(embed_size) for _ in range(num_layers)]
    self.ffn_norm = [tf.keras.layers.BatchNormalization() for _ in range(num_layers)]
    self.flatten = tf.keras.layers.Flatten()
    self.dense_3 = tf.keras.layers.Dense(1)
    
  def call(self, sequence):
    sub_in = []
    for i in range(sequence.shape[1]):
      # compute the embedded vector
      embed = self.embedding(tf.expand_dims(sequence[:, i], axis = 1))
      # add positional encoding to the embedded vector
      sub_in.append(embed + pes[i, :])
      
      # concatenate the result so that the shape is (batch, len, embed_size)
    sub_in = tf.concat(sub_in, axis = 1)
      
    for i in range(self.num_layers):
      sub_out = []
      
      # iterate along the sequence length
      for j in range(sub_in.shape[1]):
          # compute the context vector towards the whole sequence
        attention = self.attention[i](tf.expand_dims(sub_in[:, j, :], axis = 1), sub_in)
        sub_out.append(attention)
          
      # concatenate the result to have shape (batch, len, embed_size)
      sub_out = tf.concat(sub_out, axis = 1)
      
      sub_out = sub_in + sub_out
      
      # normalize the output
      sub_out = self.attention_norm[i](sub_out)
      
      # the ffn input is the output of the MultiHeadAttention
      ffn_in = sub_out
      
      ffn_out = self.dense_1[i](ffn_in)
      ffn_out = self.dense_2[i](ffn_out)
      
      # add the residual connection
      ffn_out = ffn_in + ffn_out
      
      # normalize the output
      ffn_out = self.ffn_norm[i](ffn_out)
      
      # assign the fnn output to the next layer's MultiHeadAttention input
      sub_in = ffn_out
      ffn_out = self.flatten(ffn_out)
      ffn_out = self.dense_3(ffn_out)
    return ffn_out

In [0]:
net = Net(vocab_size + 1, 20, num_layers = 3, h = 3)

In [0]:
train_data = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_data = tf.data.Dataset.from_tensor_slices((X_val, y_val))
test_data = tf.data.Dataset.from_tensor_slices((X_test, y_test))

In [0]:
optimizer = tf.keras.optimizers.Adam()

In [0]:
output = net(X_train)

In [0]:
@tf.function
def train_step(sequence, fitness):
  with tf.GradientTape() as tape:
    output = net(sequence)
    
    loss = tf.keras.losses.mean_squared_error(output, fitness)
    
  variables = net.trainable_variables
  gradients = tape.gradient(loss, variables)
  optimizer.apply_gradients(zip(gradients, variables))
  
  return loss

In [0]:
# training loop
n_epochs = 100

for e in range(n_epochs):
  for batch, (sequence, fitness) in enumerate(train_data.take(-1)):
    loss = train_step(sequence, fitness)
  print(f'epoch - {e} loss - {loss.numpy()}')