# Score variants of concerns (VOCs) using escape maps

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from scipy.stats import spearmanr
import matplotlib.pyplot as plt
import sys
sys.path.append("../")
from utils import *
from global_variables import *
from escape_map import *
pgm_path = "PGM/"
sys.path.append(pgm_path + "source/")
sys.path.append(pgm_path + "utilities/")
import utilities, Proteins_utils, sequence_logo, plots_utils
import rbm, RBM_utils

  dmean_v_dw = np.dot(s1.T, V)
  mean_V = np.dot(weights, V) / sum_weights


Loaded 29 KD vectors




In [2]:
VOCS=Proteins_utils.load_FASTA("../seq_data/sequences_vocs.fasta", drop_duplicates=False)
VOCS = VOCS[:,BEGIN:-END]
def extract_voc_names_from_fasta(fasta_path):
    """
    Extracts VOC names from a FASTA file.
    Assumes each entry starts with a '>' followed by the VOC name.
    """
    voc_names = []
    with open(fasta_path, 'r') as f:
        for line in f:
            if line.startswith('>'):
                voc_name = line[1:].strip()
                voc_names.append(voc_name)
    return voc_names

VOC_NAMES = extract_voc_names_from_fasta("../seq_data/sequences_vocs.fasta")#viruses

In [3]:
# --- Build EscapeMap with all concentrations at 10^-12 (ACE2 and all antibodies) ---
raw_conc = np.full(len(KD_VECTORS), -5.0, dtype=np.float64)  # antibodies
model = EscapeMap(
    rbm=RBM,
    kd_vectors=KD_VECTORS,
    ace2_vector=ACE2_KD_VECTOR,
    raw_concentrations=raw_conc,
    raw_ace2=-9,      # ACE2 concentration in log10 space
    raw_beta=-1,
    total_beta=1.0,
)

# --- Score all VOCs ---
n = min(len(VOC_NAMES), VOCS.shape[0])  # guard against mismatch
scores = np.array([model.forward(VOCS[i]) for i in range(n)], dtype=float)

vocs_df = pd.DataFrame({
    "voc": VOC_NAMES[:n],
    "score": scores
}).sort_values("score", ascending=False).reset_index(drop=True)

print(vocs_df)


         voc       score
0      Delta -439.241669
1      Alpha -440.173828
2         WT -440.639526
3    BA.2.75 -447.673340
4       BA.2 -454.475647
5       BA.1 -464.416748
6       BA.5 -465.039429
7      Gamma -467.983368
8       Beta -473.059814
9     BQ.1.1 -473.994019
10  NB.1.8.1 -476.527405
11      JN.1 -481.806885
12       XBB -482.067596
13        EG -487.529419


In [4]:
model(VOCS)

array([-440.17383426, -473.05981545, -467.98337303, -439.24167542,
       -464.41673386, -454.47564174, -447.67335067, -487.52941563,
       -482.06760622, -473.99400643, -465.03943942, -440.63952457,
       -476.52740911, -481.80687152])