# Setup
- Install alphafold3, colabfold
- Copy model weights from Google Drive (adjust path & give permission when asked)

In [1]:
%pip install --quiet alphafold3-polymer-bonds
!ALPHAFOLD3_VERSION="a8ecdb2"; source <(curl -fsSL https://raw.githubusercontent.com/jurgjn/alphafold3-polymer-bonds/refs/heads/main/colab/install)

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m116.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/86.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling alphafold3 a8ecdb2
Installing colabfold
Downloading run script to /app/alphafold


In [2]:
# Copy weights from Google Drive to the default location at /root/models/af3.bin.zst
from google.colab import drive
drive.mount('/mnt/gdrive')
!mkdir -p /root/models/
!cp /mnt/gdrive/MyDrive/alphafold3/models/af3.bin.zst /root/models/af3.bin.zst
!ls -l /root/models/af3.bin.zst
drive.flush_and_unmount()

Mounted at /mnt/gdrive
-rw------- 1 root root 1020558622 Oct  3 12:14 /root/models/af3.bin.zst


In [3]:
import argparse, collections, copy, gzip, json, os, os.path, re, string, sys
from pprint import pprint
import numpy as np
import Bio.PDB, Bio.PDB.mmcifio, Bio.PDB.Polypeptide
# Helper functions for manipulating AlphaFold 3 input jsons: https://github.com/google-deepmind/alphafold3/blob/main/docs/input.md
from alphafold3_polymer_bonds.alphafold3_io import colab_data_pipeline, init_input_json, read_input_json, print_input_json, write_input_json

# Set up baseline

In [4]:
!wget --content-disposition https://files.rcsb.org/download/1DF6-assembly1.cif.gz
!wget --content-disposition https://files.rcsb.org/download/6OQ1-assembly1.cif.gz
!gunzip *.gz

--2025-10-03 12:14:07--  https://files.rcsb.org/download/1DF6-assembly1.cif.gz
Resolving files.rcsb.org (files.rcsb.org)... 13.33.45.113, 13.33.45.71, 13.33.45.56, ...
Connecting to files.rcsb.org (files.rcsb.org)|13.33.45.113|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/gzip]
Saving to: ‘1DF6-assembly1.cif.gz’

1DF6-assembly1.cif.     [<=>                 ]       0  --.-KB/s               1DF6-assembly1.cif.     [ <=>                ]  11.05K  --.-KB/s    in 0s      

2025-10-03 12:14:07 (389 MB/s) - ‘1DF6-assembly1.cif.gz’ saved [11317]

--2025-10-03 12:14:07--  https://files.rcsb.org/download/6OQ1-assembly1.cif.gz
Resolving files.rcsb.org (files.rcsb.org)... 13.33.45.113, 13.33.45.71, 13.33.45.56, ...
Connecting to files.rcsb.org (files.rcsb.org)|13.33.45.113|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/gzip]
Saving to: ‘6OQ1-assembly1.cif.gz’

6OQ1-assembly1.cif.     [ <=> 

In [5]:
def input_json_from_rcsb(pdb_id='1DF6'):
  parser = Bio.PDB.MMCIFParser()
  struct = parser.get_structure(pdb_id, f'{pdb_id}-assembly1.cif')

  def get_(chain):
    return ''.join(Bio.PDB.Polypeptide.protein_letters_3to1.get(residue.resname, '') for residue in chain.get_residues())

  js = init_input_json()
  js['name'] = pdb_id
  for chain in Bio.PDB.Selection.unfold_entities(entity_list=struct[0], target_level='C'):
    js['sequences'].append(collections.OrderedDict([('protein', collections.OrderedDict([('id', chain.id),('sequence', get_(chain))]))]))

  return js

baseline_1DF6 = input_json_from_rcsb('1DF6')
baseline_6OQ1 = input_json_from_rcsb('6OQ1')
baseline_1DF6['name'] = '1DF6_baseline'
baseline_6OQ1['name'] = '6OQ1_baseline'

write_input_json(baseline_1DF6, 'data_pipeline_input/{}.json')
write_input_json(baseline_6OQ1, 'data_pipeline_input/{}.json')



# Set up bondedAtomPairs

In [6]:
polybonds_1DF6 = copy.deepcopy(baseline_1DF6)
polybonds_6OQ1 = copy.deepcopy(baseline_6OQ1)
polybonds_1DF6['name'] = '1DF6_polybonds'
polybonds_6OQ1['name'] = '6OQ1_polybonds'

polybonds_1DF6['bondedAtomPairs'] = [
  [['A', 1, 'N'], ['A', 30, 'C']],
]
polybonds_6OQ1['bondedAtomPairs'] = [
  [['F', 11, 'NZ'], ['A', 76, 'C']],
  [['F', 48, 'NZ'], ['C', 76, 'C']],
]
write_input_json(polybonds_1DF6, 'bonds_input/{}.json')
write_input_json(polybonds_6OQ1, 'bonds_input/{}.json')

In [7]:
!alphafold3_polymer_bonds --source_path bonds_input/1DF6_polybonds.json --output_path data_pipeline_input/1DF6_polybonds.json
!alphafold3_polymer_bonds --source_path bonds_input/6OQ1_polybonds.json --output_path data_pipeline_input/6OQ1_polybonds.json

alphafold3-polymer-bonds v0.3
Source file: bonds_input/1DF6_polybonds.json
Output file: data_pipeline_input/1DF6_polybonds.json
Loaded: bonds_input/1DF6_polybonds.json
A 1 A 30 protein protein
Saved modified file: data_pipeline_input/1DF6_polybonds.json
alphafold3-polymer-bonds v0.3
Source file: bonds_input/6OQ1_polybonds.json
Output file: data_pipeline_input/6OQ1_polybonds.json
Loaded: bonds_input/6OQ1_polybonds.json
F 11 A 76 protein protein
F 48 C 76 protein protein
Saved modified file: data_pipeline_input/6OQ1_polybonds.json


# Run colabfold data pipeline

In [8]:
for json_file in ['1DF6_baseline.json', '1DF6_polybonds.json', '6OQ1_baseline.json', '6OQ1_polybonds.json']:
  print(json_file)
  js = read_input_json(f'data_pipeline_input/{json_file}')
  js = colab_data_pipeline(js)
  write_input_json(js, 'predictions_input/{}.json')

1DF6_baseline.json
MPLBACKEND=AGG; source /colabfold_venv/bin/activate; colabfold_batch --msa-only --af3-json /tmp/_get_colabfold_msa/input/a850230a28468ac35a56354f44852f1cccf8b939.fasta /tmp/_get_colabfold_msa/output
1DF6_polybonds.json
MPLBACKEND=AGG; source /colabfold_venv/bin/activate; colabfold_batch --msa-only --af3-json /tmp/_get_colabfold_msa/input/3ed3f18e812cf5aaed3c2bdbb8099f7edf4ce0dc.fasta /tmp/_get_colabfold_msa/output
6OQ1_baseline.json
MPLBACKEND=AGG; source /colabfold_venv/bin/activate; colabfold_batch --msa-only --af3-json /tmp/_get_colabfold_msa/input/5e79e6c523d2a0d00e78c26fc593166b041dd4dc.fasta /tmp/_get_colabfold_msa/output
MPLBACKEND=AGG; source /colabfold_venv/bin/activate; colabfold_batch --msa-only --af3-json /tmp/_get_colabfold_msa/input/59a77f5ea85130fdff1efe9577a0c28b2f3d245d.fasta /tmp/_get_colabfold_msa/output
MPLBACKEND=AGG; source /colabfold_venv/bin/activate; colabfold_batch --msa-only --af3-json /tmp/_get_colabfold_msa/input/24cb1901bdfd1064e9d28d238

# Run structure prediction

In [9]:
%%shell
source /alphafold3_venv/bin/activate
/app/alphafold/run_alphafold3_autogpu.sh \
  --input_dir=/content/predictions_input/ \
  --output_dir=/content/predictions_output/ \
  --norun_data_pipeline

Updated 0 paths from the index
Using compilation time workaround with XLA flags
Using low GPU memory setup (40 GB)
Adjusting pair_transition_shard_spec in model_config.py
Enabling unified memory
Executing run_alphafold.py
I1003 12:14:39.739111 140054985740928 xla_bridge.py:895] Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
I1003 12:14:39.740203 140054985740928 xla_bridge.py:895] Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory

Running AlphaFold 3. Please note that standard AlphaFold 3 model parameters are
only available under terms of use provided at
https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md.
If you do not agree to these terms and are using AlphaFold 3 derived model
parameters, cancel execution of AlphaFold 3 inference with CTRL-C, and do not
use the model parameters.

Found local devices: [CudaDevi

