# Intelligent Antibodies

> **Goal** Generate antibodies protein sequence.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import keras
from keras import layers
import matplotlib.pyplot as plt
import tensorflow as tf


from modules.models.VAE import VAE
from modules.models.VAEFull import VAEFull
from modules.models.SiameseInteractionClassifier import f1, binary_crossentropy, mcc, forward, accuracy
from modules.encoding import ProteinOneHotEncoder

import pandas as pd
import numpy as np

2025-09-04 08:23:42.829242: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
  except RuntimeError as e:
    print(e)

2025-09-04 08:23:46.894023: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2025-09-04 08:23:47.020111: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2025-09-04 08:23:47.023479: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [4]:
vector_size = 200
alphabet_size = 18
input_dimensions = (vector_size, alphabet_size)

vae_full = VAEFull(200, 18)
vae_full.vae.reload(f'run/vae-one-hot-{vector_size}.keras')

2025-09-04 08:23:47.079853: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.




2025-09-04 08:23:47.080944: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2025-09-04 08:23:47.083026: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2025-09-04 08:23:47.086985: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2025-09-04 08:23:47.217176: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2025-09-04 08:23:47.218831: I tensorflow/compiler/xla/stream_executo

Reloaded.


In [5]:
seq_input1 = layers.Input(shape=input_dimensions, name='seq_ag')
seq_input2 = layers.Input(shape=input_dimensions, name='seq_ab')

In [6]:
siamese = keras.models.load_model(f'run/models/siamese/one-hot-{vector_size}-model.h5', custom_objects=dict(f1=f1, mcc=mcc, binary_crossentropy=binary_crossentropy, forward=forward, accuracy=accuracy))

In [7]:
# Get target antigen
antigen_seq_id = "6xe1"

df_seq = pd.read_csv("../data/SAbDab/sequences.csv", sep=";")
antigen = df_seq[df_seq["seq_id"] == f"{antigen_seq_id}|ag"]

In [8]:
antigen_sequence = antigen["sequence"]

In [9]:
encoder = ProteinOneHotEncoder()

In [10]:
def generate_antibody_sequence(n, vector_size):
    z = tf.random.normal(shape=[n, 2])
    x_reconst = vae_full.decoder.predict(z, verbose=0)
    latent_dim = z.shape[1]
    for x in x_reconst:
        x_sample = x.reshape((200, 18))
        protein_sequence = "".join(list(encoder.decode(x_sample)))
        protein_onehot = encoder.encode([protein_sequence], vector_size)
        yield protein_sequence, protein_onehot


In [11]:
def test_interaction(onehot_antibody, onehot_antigen, threshold=0.8):
    score = siamese.predict([onehot_antibody, onehot_antigen])
    label = tf.cast(score > threshold, tf.int32)
    return label[0][0]

In [12]:
def generate_interacting_antibody(antigen, limit=10, batch_size = 10):
    onehot_antigen = encoder.encode(antigen, vector_size)
    for _ in range(limit):
        for sequence_antibody, onehot_antibody in generate_antibody_sequence(10, vector_size):
            if test_interaction(onehot_antibody, onehot_antigen):
                yield sequence_antibody


In [13]:
for protein_sequence, protein_onehot in generate_antibody_sequence(1, vector_size):
    print(protein_sequence)

2025-09-04 08:23:48.531946: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2025-09-04 08:23:48.569459: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8907


MSHLHFCWMHPHCMLTMWMKHDCWMICWHFNFPWMQCPMIEDPNLLHQCNLWHMLRCWPTHWLFEWLTMILTTWLHMFNLMNEFTIEWTRHSPFPWPKPWHTLGPFTPCWHWPWHWCGMHTGMWCGNCLWNTTDPFMWNRMWENNWMWCFHWMINGNGNFHDMLCWLWCWHQPGEMNIPIMWMGCQHKMHEFNTHKTGNK


In [15]:
with open(f"run/antibody-sequence-{antigen_seq_id}.fasta", "w") as f:
    for i, sequence in enumerate(generate_interacting_antibody(antigen_sequence)):
        # print(sequence)
        f.write(f">{antigen_seq_id}_{i}\n{sequence}")
print(f"We generated {i} new antibody sequences")

We generated 10 new antibody sequences
