# Covalent bond small inhibitor

## Reference

github AF3 issues: https://github.com/google-deepmind/alphafold3/issues/159


## Setup

In [None]:
from af_kit.core import *
from af_kit.covalent import *
import pandas as pd

## Prepare json file

In [None]:
seq='HHHHHHAPNQALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDNPHVCRLLGICLTSTVQLIMQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAARNVLVKTPQHVKITDFGRAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLVIQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQG'

In [None]:
protein_json = get_protein_json('proteinA',seq,'data/proteinA.json',seeds=[1])

In [None]:
protein_json

{'name': 'proteinA',
 'modelSeeds': [1],
 'sequences': [{'protein': {'id': 'A',
    'sequence': 'HHHHHHAPNQALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDNPHVCRLLGICLTSTVQLIMQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAARNVLVKTPQHVKITDFGRAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLVIQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQG'}}],
 'bondedAtomPairs': [],
 'dialect': 'alphafold3',
 'version': 2}

## Prepare ligand

Load pdb in maestro, split complex, save ligand (without covalent bond) into pdb

Convert the pdb to ccd

In [None]:
ccd_text = sdf2ccd('lig-HKI.sdf')

In [None]:
ccd_text

"data_lig-any\n#\n_chem_comp.id lig-any\n_chem_comp.name 'lig-any'\n_chem_comp.type non-polymer\n_chem_comp.formula '?'\n_chem_comp.mon_nstd_parent_comp_id ?\n_chem_comp.pdbx_synonyms ?\n_chem_comp.formula_weight '?'\n#\nloop_\n_chem_comp_atom.comp_id\n_chem_comp_atom.atom_id\n_chem_comp_atom.type_symbol\n_chem_comp_atom.charge\n_chem_comp_atom.pdbx_leaving_atom_flag\n_chem_comp_atom.pdbx_model_Cartn_x_ideal\n_chem_comp_atom.pdbx_model_Cartn_y_ideal\n_chem_comp_atom.pdbx_model_Cartn_z_ideal\nlig-any C1 C 0 N 1.654 24.013 52.956\nlig-any C2 C 0 N 1.438 32.804 50.984\nlig-any C3 C 0 N 0.712 33.151 49.868\nlig-any C4 C 0 N -0.692 25.377 50.586\nlig-any C5 C 0 N -0.322 26.674 50.279\nlig-any C6 C 0 N 1.518 31.470 51.317\nlig-any C7 C 0 N -0.243 20.239 50.608\nlig-any C8 C 0 N 1.486 18.601 51.967\nlig-any C9 C 0 N 1.586 24.734 50.460\nlig-any C10 C 0 N 0.101 32.158 49.137\nlig-any C11 C 0 N 2.414 21.749 53.291\nlig-any C12 C 0 N 1.591 22.631 52.634\nlig-any C13 C 0 N 0.661 20.790 51.497\nli

In [None]:
data = get_protein_ccd_json(protein_json,
                            ccd_text,
                            ['A',101,'SG'],
                            ['L',1,'C28'],
                            '3W2Q_test',
                            '3Q2Q.json')

In [None]:
data

{'name': '3W2Q_test',
 'modelSeeds': [1],
 'sequences': [{'ligand': {'id': 'L', 'ccdCodes': ['lig-any']}},
  {'protein': {'id': 'A',
    'sequence': 'HHHHHHAPNQALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDNPHVCRLLGICLTSTVQLIMQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAARNVLVKTPQHVKITDFGRAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLVIQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQG'}}],
 'bondedAtomPairs': [[['A', 101, 'SG'], ['L', 1, 'C28']]],
 'userCCD': "data_lig-any\n#\n_chem_comp.id lig-any\n_chem_comp.name 'lig-any'\n_chem_comp.type non-polymer\n_chem_comp.formula '?'\n_chem_comp.mon_nstd_parent_comp_id ?\n_chem_comp.pdbx_synonyms ?\n_chem_comp.formula_weight '?'\n#\nloop_\n_chem_comp_atom.comp_id\n_chem_comp_atom.atom_id\n_chem_comp_atom.type_symbol\n_chem_comp_atom.charge\n_chem_comp_atom.pdbx_leaving_atom_flag\n_chem_comp_atom.pdbx_model_Cartn_x_ideal\n_chem_comp_atom.pdbx_model_Ca

### Docker command

Move the generated `proteinA.json` to the `af_input/project_name` folder

In [None]:
project_name='sdf'

In [None]:
docker_single_protein_default(json_path=f"af_input/{project_name}/proteinA.json",
                               output_dir=f"af_output/{project_name}")

docker run --rm \
    --volume "$HOME/af_input:/root/af_input" \
    --volume "$HOME/af_output/sdf:/root/af_output" \
    --volume "$HOME/af_model:/root/models" \
    --volume "$HOME/af_db:/root/public_databases" \
    --gpus "device=0" \
    sky1ove/alphafold3 \
    python run_alphafold.py \
    --json_path=/root/af_input/sdf/proteinA.json \
    --output_dir=/root/af_output \
    --model_dir=/root/models


## Prepare protein-smiles files

### Read output json

In [None]:
protein_json = read_json('data/seq_only_data.json')

In [None]:
str(protein_json)[:1000]

'{\'dialect\': \'alphafold3\', \'version\': 2, \'name\': \'PDCD1_seq_only\', \'sequences\': [{\'protein\': {\'id\': \'A\', \'sequence\': \'LDSPDRPWNPPTFSPALLVVTEGDNATFTCSFSNTSESFVLNWYRMSPSNQTDKLAAFPEDRSQPGQDCRFRVTQLPNGRDFHMSVVRARRNDSGTYLCGAISLAPKAQIKESLRAELRVTERRAEVPTAHPSPSPRPAGQFQTLV\', \'modifications\': [], \'unpairedMsa\': ">query\\nLDSPDRPWNPPTFSPALLVVTEGDNATFTCSFSNTSESFVLNWYRMSPSNQTDKLAAFPEDRSQPGQDCRFRVTQLPNGRDFHMSVVRARRNDSGTYLCGAISLAPKAQIKESLRAELRVTERRAEVPTAHPSPSPRPAGQFQTLV\\n>UniRef90_UPI0009801507/25-167 [subseq from] Programmed cell death protein 1 n=10 Tax=Homo sapiens TaxID=9606 RepID=UPI0009801507\\nLDSPDRPWNPPTFSPALLVVTEGDNATFTCSFSNTSESFVLNWYRMSPSNQTDKLAAFPEDRSQPGQDCRFRVTQLPNGRDFHMSVVRARRNDSGTYLCGAISLAPKAQIKESLRAELRVTERRAEVPTAHPSPSPRPAGQFQ---\\n>UniRef90_A0A5F7ZCX7/24-168 [subseq from] Programmed cell death 1 n=1 Tax=Macaca mulatta TaxID=9544 RepID=A0A5F7ZCX7_MACMU\\n-ESPDRPWNPPTFSPALLLVTEGDNATFTCSFSNASESFVLNWYRMSPSNQTDKLAAFPEDRSQPGRDCRFRVTQLPNGRDFHMSVVRARRNDSGTYLCGAISLAP

### Single protein-smile pair

In [None]:
get_protein_smiles_json??

[0;31mSignature:[0m
[0mget_protein_smiles_json[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0msmi_id[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mSMILES[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprotein_json[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msave_path[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mseeds[0m[0;34m=[0m[0;34m[[0m[0;36m1[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
[0;32mdef[0m [0mget_protein_smiles_json[0m[0;34m([0m[0msmi_id[0m[0;34m:[0m[0mstr[0m[0;34m,[0m [0;34m[0m
[0;34m[0m                            [0mSMILES[0m[0;34m:[0m[0mstr[0m[0;34m,[0m [0;34m[0m
[0;34m[0m                            [0mprotein_json[0m[0;34m,[0m [0;31m# json type[0m[0;34m[0m
[0;34m[0m                            [0msave_path[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0;31m# .json[0m[0;34m[0m
[

In [None]:
out = get_protein_smiles_json('smi_name','CCC',protein_json,'data/protein_smi.json')

### Multiple protein-smile pairs in a df

In [None]:
df = pd.DataFrame({'idx':['a','b'],'smi':['CCC','OCO']})
df

Unnamed: 0,idx,smi
0,a,CCC
1,b,OCO


In [None]:
for idx, smi in df.values:
    _ = get_protein_smiles_json(idx,smi,protein_json,f'af_input/{project_name}/{idx}.json',seeds=[1,2,3])

This will generate many json files in the directory

## Split file into multiple subfolder for multi-GPUs

In [None]:
split_nfolder(f'af_input/{project_name}')

Distributed 2 files into 4 folders.


## Docker

docker pull sky1ove/alphafold3

In [None]:
for i in range(4):
    docker_virtual_screening(input_dir=f"af_input/{project_name}/folder_{i}",
                               output_dir=f"af_output/{project_name}",
                               gpus=i)
# norun_data_pipeline means skip template search as we already did in the first step

docker run --rm \
    --volume "$HOME/af_input:/root/af_input" \
    --volume "$HOME/af_output/sdf:/root/af_output" \
    --volume "$HOME/af_model:/root/models" \
    --volume "$HOME/af_db:/root/public_databases" \
    --volume "$HOME/af_cache:/root/cache" \
    --gpus "device=0" \
    sky1ove/alphafold3 \
    python run_alphafold.py \
    --input_dir=/root/af_input/sdf/folder_0 \
    --output_dir=/root/af_output \
    --model_dir=/root/models \
    --jax_compilation_cache_dir=/root/cache \
    --norun_data_pipeline
docker run --rm \
    --volume "$HOME/af_input:/root/af_input" \
    --volume "$HOME/af_output/sdf:/root/af_output" \
    --volume "$HOME/af_model:/root/models" \
    --volume "$HOME/af_db:/root/public_databases" \
    --volume "$HOME/af_cache:/root/cache" \
    --gpus "device=1" \
    sky1ove/alphafold3 \
    python run_alphafold.py \
    --input_dir=/root/af_input/sdf/folder_1 \
    --output_dir=/root/af_output \
    --model_dir=/root/models \
    --jax_compilation_cache_