# Test

In [None]:
%%bash
# Download AntiFold code and model
mkdir -p antifold/models && cd antifold
wget https://opig.stats.ox.ac.uk/data/downloads/AntiFold/antifold.zip
wget -P models/ https://opig.stats.ox.ac.uk/data/downloads/AntiFold/models/model.pt
unzip antifold.zip

# Setup environment and install AntiFold (GPU)
# Nb: For CPU use: conda install -c pytorch pytorch
conda create --name antifold python=3.9 -y
conda activate antifold
conda install -c conda-forge pytorch-gpu # cudatoolkit=11.3 recommended
conda install -c pyg pyg -y
conda install -c conda-forge pip -y

# Install AntiFold
pip install .

In [1]:
import os
import sys
import numpy as np
import pandas as pd

import antifold.main as antifold

%load_ext autoreload
%autoreload 2

#### Sample antibody in Notebook

In [2]:
# Define the PDB and chains in DataFrame
pdb_dir = "data/pdbs"
df_pdbs = pd.read_csv("data/example_pdbs.csv")

# Regions to mutate (IMGT)
regions_to_mutate = ["CDR1", "CDR2", "CDRH3"]

In [3]:
# Load model
model = antifold.load_IF1_model("models/model.pt")

# Sample from PDBs, 10 sequences each at temperature 0.50 in regions CDR1, CDR2, CDR3H
pdb_output_dict = antifold.sample_pdbs(
    model,
    pdbs_csv_or_dataframe=df_pdbs,  # Path to CSV file, or a DataFrame
    regions_to_mutate=regions_to_mutate,
    pdb_dir="data/pdbs",
    sample_n=10,
    sampling_temp=0.50,
    limit_expected_variation=False,
)

# Output dictionary with sequences, and residue probabilities or log-odds
pdb_output_dict.keys()

dict_keys(['6y1l_imgt', '8ee8_imgt', 'C143_immunebuilder'])

In [4]:
pdb_output_dict["8ee8_imgt"]["logprobs"]

Unnamed: 0,pdb_pos,pdb_chain,aa_orig,aa_pred,pdb_posins,perplexity,A,C,D,E,...,M,N,P,Q,R,S,T,V,W,Y
0,2,D,V,M,2,1.504494,-5.359256,-6.954609,-6.033103,-6.327341,...,-0.066798,-6.198246,-5.034755,-6.525597,-6.282945,-5.196742,-5.277635,-4.280661,-7.696222,-6.909234
1,3,D,Q,Q,3,2.443738,-6.619194,-9.316168,-5.736094,-3.318742,...,-5.861398,-4.845271,-11.090965,-0.216009,-4.501232,-5.972548,-5.359848,-2.898728,-9.435407,-7.472999
2,4,D,L,L,4,1.004351,-15.568546,-13.212317,-18.266783,-12.142051,...,-7.916049,-16.199093,-16.454500,-12.849660,-14.686855,-17.012991,-18.211679,-10.292188,-14.058020,-14.857738
3,5,D,V,V,5,1.277447,-8.567152,-13.570121,-11.442293,-8.442339,...,-7.800765,-11.191738,-18.462742,-6.881032,-10.799744,-11.644833,-9.491144,-0.062822,-13.515275,-12.762409
4,6,D,E,E,6,1.019323,-9.548150,-13.043267,-7.964682,-0.002447,...,-14.235148,-13.143275,-11.724870,-6.269294,-13.082457,-11.761532,-11.894757,-11.256067,-16.772890,-18.300236
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,124,C,L,L,124,1.001991,-16.287935,-17.827927,-21.667639,-18.481306,...,-9.704562,-20.380480,-21.102755,-16.785530,-17.695158,-20.945368,-18.186380,-9.102672,-15.058927,-19.093246
231,125,C,T,T,125,1.068092,-7.367596,-9.631317,-12.221505,-6.968607,...,-9.737492,-9.251575,-14.554691,-8.010697,-10.509960,-6.852391,-0.008766,-6.075428,-13.354302,-13.254747
232,126,C,V,V,126,1.012233,-13.233449,-14.736781,-20.900080,-15.990385,...,-12.795333,-19.367769,-17.468605,-16.396191,-16.428848,-17.138369,-11.635246,-0.001612,-16.766176,-18.483015
233,127,C,L,L,127,1.012512,-12.184276,-12.987074,-13.784216,-12.762620,...,-8.146120,-13.486700,-14.590498,-11.978979,-11.125016,-11.075565,-13.466260,-7.238191,-13.253984,-13.871912


In [5]:
pdb_output_dict["8ee8_imgt"]["sequences"]

OrderedDict([('8ee8_imgt',
              SeqRecord(seq=Seq('VQLVESGGGLVQPGGSLRLSCAASGFTFSSDGMSWVRQAPGKGLEWVSYISSGG...VLG'), id='8ee8_imgt', name='', description=", score=0.8316, global_score=0.8316, regions=['CDR1', 'CDR2', 'CDRH3'], model_name=AntiFold, seed=42", dbxrefs=[])),
             ('8ee8_imgt__1',
              SeqRecord(seq=Seq('VQLVESGGGLVQPGGSLRLSCAASGFTFSSYGMSWVRQAPGKGLEWVSYISSGG...VLG'), id='', name='', description='T=0.50, sample=1, score=0.8864, global_score=0.6893, seq_recovery=0.9083, mutations=11', dbxrefs=[])),
             ('8ee8_imgt__2',
              SeqRecord(seq=Seq('VQLVESGGGLVQPGGSLRLSCAASGFTFSSYGMSWVRQAPGKGLEWVSYISSGG...VLG'), id='', name='', description='T=0.50, sample=2, score=0.8476, global_score=0.6802, seq_recovery=0.9000, mutations=12', dbxrefs=[])),
             ('8ee8_imgt__3',
              SeqRecord(seq=Seq('VQLVESGGGLVQPGGSLRLSCAASGFTFSSYGMSWVRQAPGKGLEWVSYISSGG...VLG'), id='', name='', description='T=0.50, sample=3, score=0.8555, global_score=0.

In [8]:
H_orig, L_orig = pdb_output_dict["8ee8_imgt"]["sequences"]["8ee8_imgt"].seq.split("/")
H_mut, L_mut = pdb_output_dict["8ee8_imgt"]["sequences"]["8ee8_imgt__1"].seq.split("/")
antifold.visualize_mutations(H_orig, H_mut, chain="H")
antifold.visualize_mutations(L_orig, L_mut, chain="L")

Mutations (11):	______________________________X_______________________X_________________________________________X_XX_XXXX_X_X___________
Original H:		VQLVESGGGLVQPGGSLRLSCAASGFTFSSDGMSWVRQAPGKGLEWVSYISSGGATTYYADSVKGRFTISRDNSKNTLSLQMNSLRGEDTAVYYCAKDITAPGRNGLDSWGQGVVVTVSS
Mutated H:		VQLVESGGGLVQPGGSLRLSCAASGFTFSSYGMSWVRQAPGKGLEWVSYISSGGSTTYYADSVKGRFTISRDNSKNTLSLQMNSLRGEDTAVYYCARDYVAARSYGFDYWGQGVVVTVSS

Mutations (9):	_________________________XXX__XXX__________________X_X_X___________________________________________________________
Original L:		SVLTQPPSLSASPGASARLPCTLSSDLNVGTKNMYWYQQKPGSAPRLFLYYYSDSDKQLGPGVPNRVSGSKETSSNTAFLLISGLQPEDEADYYCQVYDNSARVFGGGTRLTVLG
Mutated L:		SVLTQPPSLSASPGASARLPCTLSSSIIVGSYSMYWYQQKPGSAPRLFLYYKSSSEKQLGPGVPNRVSGSKETSSNTAFLLISGLQPEDEADYYCQVYDNSARVFGGGTRLTVLG

