# Lysostaphin vs Staphylococcus aureus

AF3 modelling of lysostaphin (catalytic and cell wall binding domain) in complex with peptidoglycan fragments from S. aureus.

PG library generated with [PGN_MS2](https://github.com/jerickwan/PGN_MS2).

In [14]:
import json
import os
from pathlib import Path
import random
import re
import subprocess
import tempfile

from Bio import SeqIO
import pandas as pd
from IPython.display import display

cwd = os.getcwd()
if cwd.endswith('notebook'):
    os.chdir('..')
    cwd = os.getcwd()

from src.pg_modelling.ligand_utils import generate_ccd_from_smiles, gen_model_seeds

In [2]:
data_folder = Path(os.path.expanduser('~')) / 'Documents' / 'PG_modelling'
assert data_folder.is_dir()

## Load Lysostaphin

In [7]:
lysostaphin_path = data_folder / 'S_aureus' / 'S_aureus_lysostaphin.fasta'
lysostaphin_records = SeqIO.to_dict(SeqIO.parse(lysostaphin_path, 'fasta'))

In [8]:
lysostaphin_peptidase_m23 = {
    'protein': {
        'id': 'A',
        'sequence': str(lysostaphin_records['Lysostaphin_P10547_Peptiase_M23_279_364'].seq).upper()
    },
}
lysostaphin_sh3b = {
    'protein': {
        'id': 'A',
        'sequence': str(lysostaphin_records['Lysostaphin_P10547_SH3b_413_481'].seq).upper()
    },
}

## Load PG fragments

In [13]:
bridge_df = pd.read_csv(data_folder / 'S_aureus' / 'S_aureus_peptide_bridges.csv')
print(f'Number of bridges: {len(bridge_df):,}')
bridge_df.head()

Number of bridges: 11


Unnamed: 0,Name,Synonym,Formula,Monoisotopic Mass,Modifications,Degree Amidation,Degree Acetylation,Ontology,PGN Units,Glycan Units,...,Peptide,SMILES,INCHIKEY,clogP,RT,[M+H]+,[M+Na]+,[M+K]+,[M+2H]2+,[M+3H]3+
0,None-AqKAA[3-NH2-GGGGA],None--Ala.γ-isoGln.Lys.Ala.Ala[3--NH2--Gly.Gly...,C31H54N12O12,786.39842,Amidase,1,0,P5--S5,1,0,...,AqKAA[3-NH2-GGGGA],CC(N)C(=O)NCC(=O)NCC(=O)NCC(=O)NCC(=O)NCCCCC(N...,ZYKKRJKQWIIIKZ-UHFFFAOYSA-N,-7.2388,0,787.4057,809.38764,825.36158,394.20649,263.14009
1,None-AqKAA[3-NH2-AGGGG],None--Ala.γ-isoGln.Lys.Ala.Ala[3--NH2--Ala.Gly...,C31H54N12O12,786.39842,Amidase,1,0,P5--S5,1,0,...,AqKAA[3-NH2-AGGGG],CC(N)C(=O)NC(CCC(=O)NC(CCCCNC(=O)C(C)NC(=O)CNC...,JASCPJLSBGTOJZ-UHFFFAOYSA-N,-7.2388,0,787.4057,809.38764,825.36158,394.20649,263.14009
2,None-AqKAA[3-NH2-GAGGG],None--Ala.γ-isoGln.Lys.Ala.Ala[3--NH2--Gly.Ala...,C31H54N12O12,786.39842,Amidase,1,0,P5--S5,1,0,...,AqKAA[3-NH2-GAGGG],CC(N)C(=O)NC(CCC(=O)NC(CCCCNC(=O)CNC(=O)C(C)NC...,ZSEQKNJCTUIKKV-UHFFFAOYSA-N,-7.2388,0,787.4057,809.38764,825.36158,394.20649,263.14009
3,None-AqKAA[3-NH2-GSGGG],None--Ala.γ-isoGln.Lys.Ala.Ala[3--NH2--Gly.Ser...,C31H54N12O13,802.39333,Amidase,1,0,P5--S5,1,0,...,AqKAA[3-NH2-GSGGG],CC(N)C(=O)NC(CCC(=O)NC(CCCCNC(=O)CNC(=O)C(CO)N...,DITSXPZGDXFJBL-UHFFFAOYSA-N,-8.2664,0,803.40061,825.38255,841.35649,402.20395,268.47172
4,None-AqKAA[3-NH2-GGAGG],None--Ala.γ-isoGln.Lys.Ala.Ala[3--NH2--Gly.Gly...,C31H54N12O12,786.39842,Amidase,1,0,P5--S5,1,0,...,AqKAA[3-NH2-GGAGG],CC(N)C(=O)NC(CCC(=O)NC(CCCCNC(=O)CNC(=O)CNC(=O...,ORULLQTUYKBHKH-UHFFFAOYSA-N,-7.2388,0,787.4057,809.38764,825.36158,394.20649,263.14009


## Prepare AF3 input JSON files

In [16]:
zn_ion_seq = {
    'ligand': {
        'id': 'C',
        'smiles': '[Zn+2]',
    }
}

In [18]:
lysostaphin_domains = [
    ('M23', lysostaphin_peptidase_m23, [zn_ion_seq]),
    ('SH3b', lysostaphin_sh3b, []),
]
for _, row in bridge_df.iterrows():
    ligand_name = re.match(r'^.+-AqKAA\[3-NH2-([A-Z]+)\]$', row['Name'])[1]
    smiles = row['SMILES']

    ccd_code = f'LIG-PG-{ligand_name}'
    ligand_seq = {
        'ligand': {
            'id': 'B',
            'ccdCodes': [ccd_code],
        }
    }
    ccd_data = generate_ccd_from_smiles(smiles, ccd_code)

    for domain_name, domain_seq, ions_seq in lysostaphin_domains:
        name = f'Lysostaphin_{domain_name}_vs_{ligand_name}'

        sequences = [
            domain_seq,
            ligand_seq,
        ]
        sequences += ions_seq

        data = {
            'name': name,
            'sequences': sequences,
            'userCCD': ccd_data,
            'modelSeeds': gen_model_seeds(3),
            'dialect': 'alphafold3',
            'version': 1,
        }
        with (data_folder / 'S_aureus' / 'AF3_inputs' / f'{name}.json').open('w') as f_out:
            json.dump(
                data, 
                f_out,
                indent=True,
            )

[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling AddHs()
[12:59:01] Molecule does not have explicit Hs. Consider calling 