# Intelligent Antibodies

> **Goal** Generate antibodies protein sequence.

In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
import keras
from keras import layers
import matplotlib.pyplot as plt
import tensorflow as tf


from modules.models.VAE import VAE
from modules.models.VAEFull import VAEFull
from modules.models.SiameseInteractionClassifier import f1, binary_crossentropy, mcc, forward, accuracy
from modules.encoding import ProteinOneHotEncoder

import pandas as pd
import numpy as np

In [7]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
  except RuntimeError as e:
    print(e)

In [8]:
vector_size = 200
alphabet_size = 18
input_dimensions = (vector_size, alphabet_size)

vae_full = VAEFull(200, 18)
vae_full.vae.reload(f'run/vae-one-hot-{vector_size}.keras')

Reloaded.


In [9]:
seq_input1 = layers.Input(shape=input_dimensions, name='seq_ag')
seq_input2 = layers.Input(shape=input_dimensions, name='seq_ab')

In [10]:
siamese = keras.models.load_model(f'run/models/siamese/one-hot-{vector_size}-model.h5', custom_objects=dict(f1=f1, mcc=mcc, binary_crossentropy=binary_crossentropy, forward=forward, accuracy=accuracy))

In [11]:
# Get target antigen
antigen_seq_id = "6xe1"

df_seq = pd.read_csv("../data/SAbDab/sequences.csv", sep=";")
antigen = df_seq[df_seq["seq_id"] == f"{antigen_seq_id}|ag"]

In [12]:
antigen_sequence = antigen["sequence"]

In [13]:
encoder = ProteinOneHotEncoder()

In [14]:
def generate_antibody_sequence(n, vector_size):
    z = tf.random.normal(shape=[n, 2])
    x_reconst = vae_full.decoder.predict(z, verbose=0)
    latent_dim = z.shape[1]
    for x in x_reconst:
        x_sample = x.reshape((200, 18))
        protein_sequence = "".join(list(encoder.decode(x_sample)))
        protein_onehot = encoder.encode([protein_sequence], vector_size)
        yield protein_sequence, protein_onehot


In [15]:
def test_interaction(onehot_antibody, onehot_antigen, threshold=0.8):
    score = siamese.predict([onehot_antibody, onehot_antigen])
    label = tf.cast(score > threshold, tf.int32)
    return label[0][0]

In [16]:
def generate_interacting_antibody(antigen, limit=10, batch_size = 10):
    onehot_antigen = encoder.encode(antigen, vector_size)
    for _ in range(limit):
        for sequence_antibody, onehot_antibody in generate_antibody_sequence(10, vector_size):
            if test_interaction(onehot_antibody, onehot_antigen):
                yield sequence_antibody


In [17]:
for protein_sequence, protein_onehot in generate_antibody_sequence(1, vector_size):
    print(protein_sequence)

TKSQKINGQSQKGGDLPTEDIDLEACLDNDDFAKHRCFEFGQEHQIPGSPSFGSREPDDDQFEQQRKEFDFRIQARGQSSSNDFGFFRMGKKQDIDPSMRGQDSKQEDCKDQKGGEFGSGKTQEPPIHFLKSESAGQHKGNKTLTDHDIRFFMRHRSIQRFILKEKRPAMHKQPSPQHAQHIKFDSKKDDHITHGGSPSF


In [18]:
with open(f"run/antibody-sequence-{antigen_seq_id}.fasta", "w") as f:
    for i, sequence in enumerate(generate_interacting_antibody(antigen_sequence)):
        print(sequence)
        f.write(f">{antigen_seq_id}_{i}\n{sequence}")
print(f"We generated {i} new antibody sequences")

HKLNDGTIRCAGPGPPSISEIGQTSTSLQDNNDNQKCFRRQFEFALRLHIAIRQTRGDPLHSMIFEFDAIGKGQFQKGIGMMIRQDKEKQQKNFQQPSDGEKMSKRLIQSKQIFPDSNAKIPSNGLLDFGFKFHMSARKGRRLEGFKLEHIRMFFISKFQFLIQPSNMIHDKFLLPAKSPPNEGIKKKQKKFGTQKAEFQ
HKGKNDTHFRAGIQTRNDGQIGQHKIQDQIIEDQKKQFRPPGAFAQGRGDKIICFPGKMPLSTCMESNHIICGDFQKSIQEEACQDKKKSQKHFQRQPQGSGEQIIKINRFKDMIDEDRKDDDDGGLLFGFKQIMSEDMSRCRFGFKSHHLCQFFSGKCQQMAQASNSCHIKFLLEKNGNGGMGRICCFKTCGNKQSQKQ
HKEKNDTHFTARPQPRSDSSIGQLEISFQIIEDNQKQFRPPTAFLQGRMDKIIQTPGDMFGSNCMESDHIIDGDFQKSIGEMARQDKKKSQKNFQRCPDGSKEFGIKIQSKIIKIDRDFPIDSNGLLDFGFKQIMSERKGRCLEGFKSEHLRMFFSGKCQQMIQFSNSCHQGFLLKANGNGGHGNICCQKKFGLKQSQFQ
HKEKNDTHFTARPQPRSDSSIGQLEISFQIIEDNQKQFRPPTAFLRGRHDKIQQTPGDMFGSNCQESDHIIDGDFQKFIGEMARQDKKKSQKNFQRCPDGSKEFGIIIQSKIIKPDRDFPIDSNGLLDFGFKQIMSERKGRRLEGFGLEHLRMFFSGKCQQMIQFSNSCHQTFLLKANGNGNHGIIICQKKFGLKQSQFQ
HKEKNDTHFTARPQTRNDSQIGQLIIQDQIIEDEKKQFRPPGAFAQGRMDKIICTPGKMPMSTCMESSHIICGDFQKSIQEGACQDKKKSQKNFQRCPDGSKEQGIKIPRKKIKIDSDRKDDKDGGLLFGFKQIMSERKKRCREGFKSHHLCQFFSGRCQQMAQFSNSCHIGFLLEANGNGGHGNICCFKTCGDKQ