# Install

In [45]:
!pip install antifold



In [53]:
import pandas as pd
import antifold
import antifold.main

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


#### Sample antibody in Notebook

In [47]:
# Load model
model = antifold.main.load_model()

In [48]:
# PDB directory
pdb_dir = "data/pdbs"

# Assumes first chain heavy, second chain light
pdbs_csv = antifold.main.generate_pdbs_csv(pdb_dir, max_chains=2)

# Alternatively load a CSV file with information
# pdbs_csv = pd.read_csv("data/example_pdbs.csv")

In [49]:
# Sample from PDBs
df_logits_list = antifold.main.get_pdbs_logits(
    model=model,
    pdbs_csv_or_dataframe=pdbs_csv,
    pdb_dir=pdb_dir,
)

# Output log probabilites
df_logits_list[0]

Unnamed: 0,pdb_posins,pdb_chain,pdb_res,top_res,pdb_pos,perplexity,A,C,D,E,...,M,N,P,Q,R,S,T,V,W,Y
0,1,H,E,M,1,1.515394,0.468467,-1.827444,-0.017995,-0.006945,...,5.570857,-0.646468,0.719059,-0.382303,-0.165978,0.623796,0.014428,0.345046,-2.113127,-1.961271
1,2,H,V,V,2,1.036309,-0.428492,-3.039920,-1.863718,2.120904,...,1.196741,-2.200263,-4.007235,-0.687650,-1.978745,-4.295565,-0.395808,9.416345,-1.936036,-2.760887
2,3,H,Q,Q,3,1.166019,0.393386,-3.410288,-0.468408,3.121669,...,0.026447,0.192073,-6.474277,7.829305,1.579831,-1.519337,0.332361,1.489925,-4.093868,-2.614197
3,4,H,L,L,4,1.000889,-1.336375,-0.245010,-4.954091,-1.263302,...,3.066235,-4.327230,-3.728791,-1.022451,-2.571467,-4.271738,-4.935078,2.214575,-0.306664,-0.509913
4,5,H,V,V,5,1.659372,0.443280,-5.154366,-2.866150,1.121221,...,1.322401,-2.251758,-10.268043,1.366281,-1.269063,-1.524681,-0.165025,8.492573,-3.818366,-3.695840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
234,123,L,K,K,123,1.504862,-1.121593,-5.285645,-2.153915,3.024746,...,3.588242,1.008648,-3.039781,4.076246,3.468050,-1.966319,2.643071,-1.253351,-3.273266,-2.996099
235,124,L,L,L,124,1.091023,-1.259295,-3.552466,-6.687107,-2.646233,...,4.760981,-4.145008,-5.269346,-0.840992,-1.614605,-6.316737,-1.320422,9.069544,-2.428891,-3.306520
236,125,L,T,T,125,1.021823,2.414375,-0.983888,-2.639080,0.460178,...,-0.931746,-0.210748,-5.244661,-0.659353,-2.469649,3.138202,9.948484,1.941530,-4.536055,-3.705208
237,126,L,V,V,126,1.002100,1.146163,-1.616468,-6.910981,-2.778662,...,0.553159,-4.781265,-2.222537,-3.996854,-1.809276,-2.888187,2.177866,14.307243,-3.020518,-2.699842


In [50]:
# Sample from PDBs, 10 sequences each at temperature 0.50 in regions CDR1, CDR2, CDR3H
pdb_output_dict = antifold.main.sample_pdbs(
    model,
    pdbs_csv_or_dataframe=pdbs_csv,  # Path to CSV file, or a DataFrame
    regions_to_mutate=["CDR1", "CDR2", "CDRH3"],
    pdb_dir="data/pdbs",
    sample_n=10,
    sampling_temp=0.50,
    limit_expected_variation=False,
)

# Output dictionary with sequences, and residue probabilities or log-odds
pdb_output_dict.keys()

dict_keys(['C143_immunebuilder_HL', '6y1l_imgt_HL', '8ee8_imgt_DC'])

In [51]:
pdb_output_dict["C143_immunebuilder_HL"]["sequences"]

OrderedDict([('C143_immunebuilder_HL',
              SeqRecord(seq=Seq('EVQLVESGGGLVQPGGSLRLSCAASGFSVSTKYMTWVRQAPGKGLEWVSVLYSG...TVL'), id='C143_immunebuilder_HL', name='', description=", score=0.6854, global_score=0.6854, regions=['CDR1', 'CDR2', 'CDRH3'], model_name=AntiFold, seed=42", dbxrefs=[])),
             ('C143_immunebuilder_HL__1',
              SeqRecord(seq=Seq('EVQLVESGGGLVQPGGSLRLSCAASGFTVSSNYMTWVRQAPGKGLEWVSVLYSG...TVL'), id='', name='', description='T=0.50, sample=1, score=0.6097, global_score=0.3901, seq_recovery=0.9205, mutations=19', dbxrefs=[])),
             ('C143_immunebuilder_HL__2',
              SeqRecord(seq=Seq('EVQLVESGGGLVQPGGSLRLSCAASGFTVSSNYMTWVRQAPGKGLEWVSVIYSG...TVL'), id='', name='', description='T=0.50, sample=2, score=0.6614, global_score=0.4030, seq_recovery=0.9038, mutations=23', dbxrefs=[])),
             ('C143_immunebuilder_HL__3',
              SeqRecord(seq=Seq('EVQLVESGGGLVQPGGSLRLSCAASGFTVSSNYMTWVRQAPGKGLEWVSVLYSG...TVL'), id='', name='', 

In [52]:
H_orig, L_orig = pdb_output_dict["C143_immunebuilder_HL"]["sequences"]["C143_immunebuilder_HL"].seq.split("/")
H_mut, L_mut = pdb_output_dict["C143_immunebuilder_HL"]["sequences"]["C143_immunebuilder_HL__1"].seq.split("/")
antifold.main.visualize_mutations(H_orig, H_mut, chain="H")
antifold.main.visualize_mutations(L_orig, L_mut, chain="L")

Mutations (15):	___________________________X__XX________________________X_________________________________________X__XXXXX_X__XXXX_______________
Original H:		EVQLVESGGGLVQPGGSLRLSCAASGFSVSTKYMTWVRQAPGKGLEWVSVLYSGGSDYYADSVKGRFTISRDNSKNALYLQMNSLRVEDTGVYYCARDSSEVRDHPGHPGRSVGAFDIWGQGTMVTVSS
Mutated H:		EVQLVESGGGLVQPGGSLRLSCAASGFTVSSNYMTWVRQAPGKGLEWVSVLYSGGSTYYADSVKGRFTISRDNSKNALYLQMNSLRVEDTGVYYCARDASEDGGYWGYPGYYGNAFDIWGQGTMVTVSS

Mutations (4):	__________________________X_____XX___________________X________________________________________________________
Original L:		QSALTQPASVSGSPGQSITISCTGTSNDVGSYTLVSWYQQYPGKAPKLLIFEGTKRSSGISNRFSGSKSGNTASLTISGLQGEDEADYYCCSYAGASTFVFGGGTKLTVL
Mutated L:		QSALTQPASVSGSPGQSITISCTGTSSDVGSYNYVSWYQQYPGKAPKLLIFEGSKRSSGISNRFSGSKSGNTASLTISGLQGEDEADYYCCSYAGASTFVFGGGTKLTVL

