# AntiFold tutorial

In [12]:
import pandas as pd
import antifold
import antifold.main
import pathlib as pl


%load_ext autoreload
%autoreload 2

# Load model
model = antifold.main.load_model()



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


#### Sample antibody in Notebook

In [None]:
# PDB directory
pdb_dir = pl.Path('../data/pdbs')

# Assumes first chain heavy, second chain light
pdbs_csv = antifold.main.generate_pdbs_csv(pdb_dir.as_posix(), max_chains=2)

# Alternatively load a CSV file with information
# pdbs_csv = pd.read_csv("../data/example_pdbs.csv")
pdbs_csv.head()


Unnamed: 0,pdb,Hchain,Lchain,chain3,chain4,chain5,chain6,chain7,chain8,chain9,chain10
0,6y1l_imgt,H,L,,,,,,,,
1,8ee8_imgt,D,C,,,,,,,,
2,C143_immunebuilder,H,L,,,,,,,,


In [15]:
# Sample from PDBs
df_logits_list = antifold.main.get_pdbs_logits(
    model=model,
    pdbs_csv_or_dataframe=pdbs_csv,
    pdb_dir=pdb_dir.as_posix(),
)

# Output log probabilites
df_logits_list[0]


Unnamed: 0,pdb_posins,pdb_chain,pdb_res,top_res,pdb_pos,perplexity,A,C,D,E,...,M,N,P,Q,R,S,T,V,W,Y
0,2,H,V,M,2,1.648812,0.541832,-1.073596,-0.780022,-0.786247,...,5.447723,-1.011164,0.674215,-1.093470,-0.770307,0.348106,0.439277,1.808568,-2.509914,-1.785515
1,3,H,Q,Q,3,1.388862,-2.229606,-4.550146,-0.183882,3.533127,...,-1.142007,2.071587,-5.069835,8.226052,3.300510,-1.733891,1.434406,0.715166,-5.375881,-3.119571
2,4,H,L,L,4,1.002084,-0.605992,0.131497,-4.796322,0.271998,...,3.479359,-3.901156,-1.295543,0.157823,-2.703829,-4.158237,-4.328786,2.185106,-0.781290,-0.680355
3,5,H,Q,Q,5,1.706991,0.134567,-5.041111,-1.744275,2.337098,...,-0.144230,0.861425,-12.202669,7.712160,3.337913,-0.089202,0.326601,2.749550,-3.815945,-4.896249
4,6,H,E,E,6,1.021615,0.921255,-1.131706,2.164598,11.621070,...,-1.469669,-0.282292,-0.285999,5.730549,-1.880150,-1.698004,-0.907386,0.893771,-3.873763,-6.184394
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
221,124,L,L,L,124,1.049188,-2.189133,-3.297068,-6.364221,-2.941747,...,2.541323,-4.563538,-5.961650,-1.397636,-1.396417,-6.891769,-1.858494,9.758929,-2.608857,-3.574952
222,125,L,E,T,125,1.067561,2.679987,-0.795610,-1.628026,1.571844,...,-1.481476,-0.015347,-5.204903,-0.777373,-3.066902,2.035285,8.529205,2.338246,-3.594448,-3.232327
223,126,L,I,I,126,1.647004,-2.867957,-1.441947,-6.823601,-4.076225,...,3.039231,-4.225101,-5.988674,-2.969731,-1.911045,-3.116693,0.749065,9.396074,-1.522411,-2.840287
224,127,L,K,K,127,1.351071,-2.054129,-2.763258,-2.569815,0.996182,...,-0.774707,0.891431,-4.741078,0.643836,3.170096,-0.245859,0.169133,1.192368,-2.539795,-4.610861


In [20]:
# Sample from PDBs, 10 sequences each at temperature 0.50 in regions CDR1, CDR2, CDR3H
pdb_output_dict = antifold.main.sample_pdbs(
    model,
    pdbs_csv_or_dataframe=pdbs_csv,  # Path to CSV file, or a DataFrame
    regions_to_mutate=["CDR1", "CDR2", "CDRH3"],
    pdb_dir=pdb_dir.as_posix(),
    sample_n=10,
    sampling_temp=0.50,
    limit_expected_variation=False,
    save_flag=True,
    out_dir="../data/outputs",
)

# Output dictionary with sequences, and residue probabilities or log-odds
pdb_output_dict.keys()


dict_keys(['6y1l_imgt_HL', '8ee8_imgt_DC', 'C143_immunebuilder_HL'])

In [None]:
pdb_output_dict["C143_immunebuilder_HL"]["sequences"]


OrderedDict([('C143_immunebuilder_HL',
              SeqRecord(seq=Seq('EVQLVESGGGLVQPGGSLRLSCAASGFSVSTKYMTWVRQAPGKGLEWVSVLYSG...TVL'), id='C143_immunebuilder_HL', name='', description=", score=0.6854, global_score=0.6854, regions=['CDR1', 'CDR2', 'CDRH3'], model_name=AntiFold, seed=42", dbxrefs=[])),
             ('C143_immunebuilder_HL__1',
              SeqRecord(seq=Seq('EVQLVESGGGLVQPGGSLRLSCAASGFTVSSNYMTWVRQAPGKGLEWVSVLYSG...TVL'), id='', name='', description='T=0.50, sample=1, score=0.5560, global_score=0.3766, seq_recovery=0.9038, mutations=23', dbxrefs=[])),
             ('C143_immunebuilder_HL__2',
              SeqRecord(seq=Seq('EVQLVESGGGLVQPGGSLRLSCAASGFTVSSNYMTWVRQAPGKGLEWVSVIYSG...TVL'), id='', name='', description='T=0.50, sample=2, score=0.6441, global_score=0.3987, seq_recovery=0.8954, mutations=25', dbxrefs=[])),
             ('C143_immunebuilder_HL__3',
              SeqRecord(seq=Seq('EVQLVESGGGLVQPGGSLRLSCAASGFTVSSYYMTWVRQAPGKGLEWVSVLYSG...TVL'), id='', name='', 

In [None]:
H_orig, L_orig = pdb_output_dict["C143_immunebuilder_HL"]["sequences"]["C143_immunebuilder_HL"].seq.split("/")
H_mut, L_mut = pdb_output_dict["C143_immunebuilder_HL"]["sequences"]["C143_immunebuilder_HL__1"].seq.split("/")
antifold.main.visualize_mutations(H_orig, H_mut, chain="H")
antifold.main.visualize_mutations(L_orig, L_mut, chain="L")


Mutations (18):	___________________________X__XX________________________X_________________________________________XXXXXXXX_XX_XXXX_______________
Original H:		EVQLVESGGGLVQPGGSLRLSCAASGFSVSTKYMTWVRQAPGKGLEWVSVLYSGGSDYYADSVKGRFTISRDNSKNALYLQMNSLRVEDTGVYYCARDSSEVRDHPGHPGRSVGAFDIWGQGTMVTVSS
Mutated H:		EVQLVESGGGLVQPGGSLRLSCAASGFTVSSNYMTWVRQAPGKGLEWVSVLYSGGSTYYADSVKGRFTISRDNSKNALYLQMNSLRVEDTGVYYCARDADDDGGYWGYYGDYGDAFDIWGQGTMVTVSS

Mutations (5):	__________________________X___X_XX___________________X________________________________________________________
Original L:		QSALTQPASVSGSPGQSITISCTGTSNDVGSYTLVSWYQQYPGKAPKLLIFEGTKRSSGISNRFSGSKSGNTASLTISGLQGEDEADYYCCSYAGASTFVFGGGTKLTVL
Mutated L:		QSALTQPASVSGSPGQSITISCTGTSSDVGGYNYVSWYQQYPGKAPKLLIFEGSKRSSGISNRFSGSKSGNTASLTISGLQGEDEADYYCCSYAGASTFVFGGGTKLTVL

