In [1]:
# Adapted OpenFold installation instructions from: https://openfold.readthedocs.io/en/latest/Installation.html
# Commands should run <30min on an interactive node with a GPU, e.g.: srun -J pty-$(hostname) --ntasks=10 --mem-per-cpu=10G --time=1-0 --gpus=rtx_4090:1 --gres=gpumem:16g --tmp=16384 --pty bash -l
module load eth_proxy
module load stack/2024-05
module load cuda/12.2.1

Many modules are hidden in this stack. Use "module --show_hidden spider SOFTWARE" if you are not able to find the required software


In [2]:
# 1. Clone & checkout pl_upgrades branch for CUDA12 support
git clone https://github.com/aqlaboratory/openfold.git
cd openfold
git checkout pl_upgrades

Cloning into 'openfold'...
remote: Enumerating objects: 7810, done.[K
remote: Counting objects: 100% (3541/3541), done.[K
remote: Compressing objects: 100% (925/925), done.[K
remote: Total 7810 (delta 2834), reused 3059 (delta 2613), pack-reused 4269[K
Receiving objects: 100% (7810/7810), 19.46 MiB | 27.64 MiB/s, done.
Resolving deltas: 100% (5508/5508), done.
Updating files: 100% (232/232), done.
Branch 'pl_upgrades' set up to track remote branch 'pl_upgrades' from 'origin'.
Switched to a new branch 'pl_upgrades'


In [3]:
# Check out CUTLASS code from the third-party dependancies script (scripts/install_third_party_dependencies.sh); needed for setting openfold_env (step 2)
echo "Download CUTLASS, required for Deepspeed Evoformer attention kernel"
git clone https://github.com/NVIDIA/cutlass --depth 1

Download CUTLASS, required for Deepspeed Evoformer attention kernel
Cloning into 'cutlass'...
remote: Enumerating objects: 5992, done.[K
remote: Counting objects: 100% (5992/5992), done.[K
remote: Compressing objects: 100% (1639/1639), done.[K
remote: Total 5992 (delta 3485), reused 4940 (delta 3065), pack-reused 0[K
Receiving objects: 100% (5992/5992), 27.23 MiB | 26.01 MiB/s, done.
Resolving deltas: 100% (3485/3485), done.
Updating files: 100% (5774/5774), done.


In [4]:
# 2. Create openfold_env environment, adapted from: https://github.com/aqlaboratory/openfold/blob/pl_upgrades/environment.yml
echo 'Changes from baseline:'
diff ../../workflow/envs/openfold-eu.yaml environment.yml
echo 'Attempting to install:'
mamba env create -n openfold_env -y -q -f ../../workflow/envs/openfold-eu.yaml

Changes from baseline:
9,11d8
<   - gcc_linux-64<13
<   - gxx_linux-64<13
<   - cudatoolkit-dev
19c16
<   - numpy<2 # Fix: A module that was compiled using NumPy 1.x cannot be run in NumPy 2.0.0 as it may crash. /../ If you are a user of the module, the easiest solution will be to downgrade to 'numpy<2' or try to upgrade the affected module.
---
>   - numpy
Attempting to install:
Channels:
 - conda-forge
 - bioconda
 - pytorch
 - nvidia
 - defaults
Platform: linux-64
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done
Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... By downloading and using the CUDA Toolkit conda packages, you accept the terms and conditions of the CUDA End User License Agreement (EULA): https://docs.nvidia.com/cuda/eula/index.html

done
Installing pip dependencies: ...working... done


In [5]:
# 2b. Activate the environment & set up variables: https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#macos-and-linux
conda activate openfold_env
cd $CONDA_PREFIX
mkdir -p ./etc/conda/activate.d
mkdir -p ./etc/conda/deactivate.d

cat << EOF > ./etc/conda/activate.d/env_vars.sh
#!/bin/sh
export LIBRARY_PATH=$CONDA_PREFIX/lib:$LIBRARY_PATH
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
export MAX_JOBS=$SLURM_NTASKS
EOF

cat << EOF > ./etc/conda/deactivate.d/env_vars.sh
#!/bin/sh
unset LIBRARY_PATH
unset LD_LIBRARY_PATH
unset MAX_JOBS
EOF

cd -
conda deactivate

(openfold_env)
(openfold_env)
(openfold_env)
(openfold_env)
(openfold_env)
(openfold_env)
(openfold_env)
(openfold_env)
(openfold_env)
/cluster/work/beltrao/jjaenes/24.06.10_af2genomics/software/openfold
(openfold_env)
(miniconda3)


In [6]:
# 3. Run (the remainder of) the third-party dependancies script: https://github.com/aqlaboratory/openfold/blob/pl_upgrades/scripts/install_third_party_dependencies.sh
conda activate openfold_env
# Download folding resources
wget -N --no-check-certificate -P openfold/resources \
    https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt

# Certain tests need access to this file
mkdir -p tests/test_data/alphafold/common
ln -rs openfold/resources/stereo_chemical_props.txt tests/test_data/alphafold/common

# Decompress test data
gunzip -c tests/test_data/sample_feats.pickle.gz > tests/test_data/sample_feats.pickle

python setup.py install

# (Already run before..)
#echo "Download CUTLASS, required for Deepspeed Evoformer attention kernel"
#git clone https://github.com/NVIDIA/cutlass --depth 1
conda env config vars set CUTLASS_PATH=$PWD/cutlass

# This setting is used to fix a worker assignment issue during data loading
conda env config vars set KMP_AFFINITY=none
conda deactivate

(miniconda3)
(openfold_env)
(openfold_env)
--2024-06-21 15:27:42--  https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
Resolving proxy.ethz.ch (proxy.ethz.ch)... 129.132.202.155
Connecting to proxy.ethz.ch (proxy.ethz.ch)|129.132.202.155|:3128... connected.
Proxy request sent, awaiting response... 200 OK
Length: 9119 (8.9K) [text/plain]
Saving to: ‘openfold/resources/stereo_chemical_props.txt’


Last-modified header missing -- time-stamps turned off.
2024-06-21 15:27:42 (869 KB/s) - ‘openfold/resources/stereo_chemical_props.txt’ saved [9119/9119]

(openfold_env)
(openfold_env)
(openfold_env)
(openfold_env)
(openfold_env)
(openfold_env)
(openfold_env)
(openfold_env)
(openfold_env)
running install
running bdist_egg
running egg_info
creating openfold.egg-info
writing openfold.egg-info/PKG-INFO
writing dependency_links to openfold.egg-info/dependency_links.txt
writing top-level names to openfold.e

In [7]:
# 5. Download parameters
conda activate openfold_env
scripts/download_alphafold_params.sh openfold/resources
conda deactivate

(miniconda3)
overwriting variable ['CUTLASS_PATH']
(openfold_env)

06/21 15:28:45 [[1;32mNOTICE[0m] Downloading 1 item(s)
 *** Download Progress Summary as of Fri Jun 21 15:29:46 2024 ***              m48s[0m[35m][0m[0m
[#63ac61 1.8GiB/5.2GiB(35%) CN:1 DL:28MiB ETA:2m1s]
FILE: openfold/resources/params/alphafold_params_2022-12-06.tar
-------------------------------------------------------------------------------

 *** Download Progress Summary as of Fri Jun 21 15:30:47 2024 ***              3s[0m[35m][0m[0mmm
[#63ac61 3.6GiB/5.2GiB(69%) CN:1 DL:38MiB ETA:42s]
FILE: openfold/resources/params/alphafold_params_2022-12-06.tar
-------------------------------------------------------------------------------

 *** Download Progress Summary as of Fri Jun 21 15:31:48 2024 ***              s[0m[35m][0m[0mm
[#63ac61 5.0GiB/5.2GiB(97%) CN:1 DL:23MiB ETA:6s]
FILE: openfold/resources/params/alphafold_params_2022-12-06.tar
-----------------------------------------------------------------

In [11]:
echo 'Model params as downloaded by https://github.com/aqlaboratory/openfold/blob/main/scripts/download_alphafold_params.sh'
md5sum openfold/resources/params/*
echo 'Local params:'
md5sum /cluster/project/alphafold/params/*

Model params as downloaded by https://github.com/aqlaboratory/openfold/blob/main/scripts/download_alphafold_params.sh
(miniconda3)
2ab724713fdaf49e4523c4503bfd068d  openfold/resources/params/LICENSE
fddffd89cff499afcd171f31df5b48ca  openfold/resources/params/params_model_1_multimer_v3.npz
e2c73bf20ad82630d1cc4589b9e537ea  openfold/resources/params/params_model_1.npz
dfca7cd972028fc6adcc0c8995857a2a  openfold/resources/params/params_model_1_ptm.npz
2b364df609bac5a9461e44d118426b73  openfold/resources/params/params_model_2_multimer_v3.npz
ec0ff5b4f6942c42fc35f50342043565  openfold/resources/params/params_model_2.npz
edb0729d11e83734b820c348c5ffe42a  openfold/resources/params/params_model_2_ptm.npz
4f9bc7fd9b66f0835c99ae1295f7100b  openfold/resources/params/params_model_3_multimer_v3.npz
78ba15ee79b038aefe37d9570c277dd7  openfold/resources/params/params_model_3.npz
d5f286c0349708c045a0ee8090ad5a8d  openfold/resources/params/params_model_3_ptm.npz
f7d33665caf72d3fd3a94baae3cb16e0  openfold

In [8]:
# 6. Run OpenFold unit tests
conda run -n openfold_env scripts/run_unit_tests.sh

(miniconda3)


[2024-06-21 15:32:08,495] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[1/4] /cluster/project/beltrao/jjaenes/software/miniconda3/envs/openfold_env/bin/x86_64-conda-linux-gnu-c++ -MMD -MF attention.o.d -DTORCH_EXTENSION_NAME=evoformer_attn -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/cluster/work/beltrao/jjaenes/24.06.10_af2genomics/software/openfold/cutlass/include -I/cluster/work/beltrao/jjaenes/24.06.10_af2genomics/software/openfold/cutlass/tools/util/include -isystem /cluster/project/beltrao/jjaenes/software/miniconda3/envs/openfold_env/lib/python3.10/site-packages/torch/include -isystem /cluster/project/beltrao/jjaenes/software/miniconda3/envs/openfold_env/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /cluster/project/beltrao/jjaenes/software/miniconda3/envs/openfold_env/lib/python3.10/site-packages/torch/includ

In [12]:
# Uninstall:
#cd ..
#pwd
#rm -rf openfold
#mamba deactivate
#mamba env remove -n openfold_env -y

(miniconda3)
(miniconda3)
(miniconda3)
(miniconda3)
(miniconda3)
(miniconda3)
