AAR（氨基酸风险评分）  
与RMSD等有所不同，AAR从序列的角度来评判抗体，主要就是对比得到的人工抗体序列和天然抗体序列的相似性。可以通过多种评分方式来进行计算

所需要提供的原始数据即  
人工抗体序列
天然抗体序列

In [None]:
from Bio import SeqIO
from Bio.Align import MultipleSeqAlignment

def calculate_aar(natural_seq_file, generated_seq_file):
    # 读取天然抗体序列
    natural_seq = SeqIO.read(natural_seq_file, "fasta")
    natural_seq_str = str(natural_seq.seq)

    # 读取生成抗体序列
    generated_seq = SeqIO.read(generated_seq_file, "fasta")
    generated_seq_str = str(generated_seq.seq)

    # 序列比对（这里使用简单的全局比对，实际应用中可根据需求选择更复杂的比对方法）
    from Bio import pairwise2
    from Bio.pairwise2 import format_alignment
    alignments = pairwise2.align.globalxx(natural_seq_str, generated_seq_str)
    aligned_natural = alignments[0][0]
    aligned_generated = alignments[0][1]

    # 计算相同氨基酸的数量
    same_amino_acids = sum(1 for a, b in zip(aligned_natural, aligned_generated) if a == b)

    # 计算AAR
    aar = (same_amino_acids / len(natural_seq_str)) * 100
    return aar

# 示例用法
natural_seq_file = "natural_sequence.fasta"
generated_seq_file = "generated_sequence.fasta"
aar = calculate_aar(natural_seq_file, generated_seq_file)
print(f"Amino Acid Recovery (AAR): {aar}%")

直接给定的一些示例方案

In [None]:
# 计算聚集倾向和稳定性风险
import numpy as np

AAR_SCORES = {
    'C': 0.8, 'W': 0.7, 'I': 0.6, 'V': 0.5, 
    'F': 0.5, 'Y': 0.4, 'L': 0.3, 'M': 0.2,
    'H': 0.1, 'T': 0.0, 'G': -0.1, 'A': -0.2,
    'P': -0.3, 'S': -0.4, 'R': -0.5, 'N': -0.6,
    'D': -0.7, 'Q': -0.7, 'E': -0.8, 'K': -0.9
}

def calculate_aar(sequence):
    scores = [AAR_SCORES.get(aa, 0) for aa in sequence]
    return np.mean(scores)

aar_score = calculate_aar(antibody_seq)
print(f"AAR Score: {aar_score:.4f}")