# Setup
- Install alphafold3, colabfold
- Copy model weights from Google Drive (adjust path & give permission when asked)

In [None]:
%%shell
pip install alphafold3-polymer-bonds
ALPHAFOLD3_VERSION="2e2ffc1⁠"
source <(curl -fsSL https://raw.githubusercontent.com/jurgjn/alphafold3-polymer-bonds/refs/heads/main/colab/install)

In [None]:
# Copy weights from Google Drive to the default location at /root/models/af3.bin.zst
from google.colab import drive
drive.mount('/mnt/gdrive')
!mkdir -p /root/models/
!cp /mnt/gdrive/MyDrive/alphafold3/models/af3.bin.zst /root/models/af3.bin.zst
!md5sum /root/models/af3.bin.zst
drive.flush_and_unmount()

In [None]:
# Standard imports
import argparse, collections, copy, gzip, json, os, os.path, re, string, sys
import numpy as np
# Helper functions for manipulating AlphaFold 3 input jsons: https://github.com/google-deepmind/alphafold3/blob/main/docs/input.md
from alphafold3_polymer_bonds.alphafold3_io import colab_data_pipeline, init_input_json, read_input_json, print_input_json, write_input_json

# Running inference on a single monomer

In [None]:
# Download sequences for a selection of species
%%shell
mkdir -p fasta
curl -fsSL https://rest.uniprot.org/uniprotkb/Q8U2C1.fasta | sed -e "1s/.*/>pyrfu/" > fasta/pyrfu.fasta
curl -fsSL https://rest.uniprot.org/uniprotkb/P65845.fasta | sed -e "1s/.*/>ecoli/" > fasta/ecoli.fasta
curl -fsSL https://rest.uniprot.org/uniprotkb/P31115.fasta | sed -e "1s/.*/>yeast/" > fasta/yeast.fasta
curl -fsSL https://rest.uniprot.org/uniprotkb/Q09524.fasta | sed -e "1s/.*/>caeel/" > fasta/caeel.fasta
curl -fsSL https://rest.uniprot.org/uniprotkb/Q9W282.fasta | sed -e "1s/.*/>drome/" > fasta/drome.fasta
curl -fsSL https://rest.uniprot.org/uniprotkb/Q6P815.fasta | sed -e "1s/.*/>xentr/" > fasta/xentr.fasta
curl -fsSL https://rest.uniprot.org/uniprotkb/Q7ZW97.fasta | sed -e "1s/.*/>danre/" > fasta/danre.fasta
curl -fsSL https://rest.uniprot.org/uniprotkb/Q9JI38.fasta | sed -e "1s/.*/>mouse/" > fasta/mouse.fasta
curl -fsSL https://rest.uniprot.org/uniprotkb/Q9BZE2.fasta | sed -e "1s/.*/>human/" > fasta/human.fasta
ls -l fasta

In [None]:
# Show sequence for one, e.g. drosophila
!cat fasta/drome.fasta

In [None]:
# Use init_input_json() to create an AlphaFold 3 input for a single peptide sequence
monomer_example = init_input_json("""
MSATNDKKVVINKRLKGLSREALEKLTQTELIDKVIQLEAYNFQLRNLLQKKLSEKDKHD
KEYSGLIGNEAEGKVSQVAKTSSKVQKIRKFDWSSAHKRHVLLKITYFGWDYQGFACQED
SNDTIESNLFRALARTCLIESRATSNYHRCGRTDKEVSAFCQVISIDLRSKHPPESQLDP
TALSSEIDYCGLLNRVLPKNIQCVAWMPLRSPVYSARFDCVSRTYRYYFPKGDLDIAAMR
KACDLLVRHADFRNFCKMDVHNGVTNYMRNLQSARVEACDQTNHTNSGYDMYYLEIQANA
FLWHQIRCIMAVLLLVGQKKENPGVISDLLDVESNPCKPQYTPAIGLPLNLFRCDFRDHT
TRSVNHPSSGDADEEAMDTAADESNDLNAPEHLERDLTAWIYNEENLQKLIENTQCEWTQ
FSVKSTMIRNVLQQLENLFEENFKPKEKVLAQVILLQDSVNPRQYQPLLERKRCESLENR
IEHFVKKQRLIVKNETETE""".replace('\n', ''))
monomer_example

In [None]:
# Google Colab does not have enough disk space to run the default AlphaFold 3 data pipeline (MSA + template search)
# We work around this by using the ColabFold MSA server
monomer_example_data = colab_data_pipeline(monomer_example)

In [None]:
# AlphaFold 3 data pipeline encodes MSAs in a single json line, the output is unreadable
# Try using print_input_json() on the data pipeline output instead
monomer_example_data

In [None]:
# AlphaFold 3 uses the name field in the input .json to generate the output file name
monomer_example_data['name'] = 'drome'
write_input_json(monomer_example_data, 'monomer_input/{}.json')
!ls -l monomer_input/

In [None]:
# run_alphafold3_autogpu.sh runs the structure prediction step with settings adjusted for the available GPU
# --norun_data_pipeline turns off the default MSA/template search (as we're using the ColabFold MSA server)
%%shell
source /alphafold3_venv/bin/activate
/app/alphafold/run_alphafold3_autogpu.sh \
  --input_dir=/content/monomer_input \
  --output_dir=/content/monomer_output \
  --norun_data_pipeline

In [None]:
# Results from the prediction step should now appear under:
!ls -l /content/monomer_output/monomer_example

# Mouse-human dimer

In [None]:
# As an example pair, let's look up the sequences for mouse and human
!cat fasta/mouse.fasta
!cat fasta/human.fasta

In [None]:
# init_input_json() can handle multiple sequences as input
mouse_human = init_input_json(
    ❓
)
mouse_human

In [None]:
# Use colab_data_pipeline() as previously to perform the MSA search
mouse_human_data = ❓

In [None]:
# Set the name attribute & write the mouse-human pair to pairs_input/:
mouse_human_data['name'] = ❓
write_input_json(❓)
!ls -l pairs_input/

In [None]:
# Run AlphaFold 3 on pairs_input/ as above
%%shell
source /alphafold3_venv/bin/activate
/app/alphafold/run_alphafold3_autogpu.sh \
  --input_dir=❓ \
  --output_dir=❓ \
  --norun_data_pipeline

In [None]:
# Show output with the confidence metrics
!cat pairs_output/mouse_human/mouse_human_summary_confidences.json

In [None]:
# Show matrix of interaction-level ipTM confidence metrics:
with open('pairs_output/mouse_human/mouse_human_summary_confidences.json') as fh:
  summary_confidences = json.load(fh)
print(np.matrix(summary_confidences['chain_pair_iptm']))

# 1x3 pool

In [None]:
# Due to time constraints, we'll run one "1x3" pool, e.g. pyrfu-pyrfu-ecoli-yeast:
!cat fasta/pyrfu.fasta
!cat fasta/pyrfu.fasta
!cat fasta/ecoli.fasta
!cat fasta/yeast.fasta

In [None]:
# init_input_json() can handle multiple sequences as input
pyrfu_pyrfu_ecoli_yeast = init_input_json(
    ❓
)
pyrfu_pyrfu_ecoli_yeast

In [None]:
# Use colab_data_pipeline() as previously to attach the MSAs
pyrfu_pyrfu_ecoli_yeast_data = ❓

In [None]:
# Set the name attribute & write the pool to pools_input/:
❓

In [None]:
# Run AlphaFold 3 on pools_input/
❓

In [None]:
# Show matrix of interaction-level ipTM confidence metrics
❓