# Setup A3FE Runs from FEGrow Output

In [1]:
import os
from rdkit import Chem
from rdkit.Chem import rdMolAlign

RMSD_FILTER_THRESHOLD = 0.5

In [2]:
# Get all the pdb files
sdfs = [f for f in os.listdir('input/structures-SARS-comp48') if f.endswith('.sdf')]
# Sort by the number in the filename
sdfs.sort(key=lambda x: int(x.split('.')[0].split('_')[-1]))

# Read in all conformers with RDKit
conformers = {}
for sdf in sdfs:
    mol_num = sdf.split('.')[0].split('_')[-1]
    print(f'Processing {sdf}')

    # Get from sdf, keeping hydrogens
    mols = Chem.SDMolSupplier(f'input/structures-SARS-comp48/{sdf}', removeHs=False)

    # Filter out molecules with low RMSD difference to any other conformer
    filtered_mols = {}
    for i, conf in enumerate(mols):
        rmsds = [rdMolAlign.CalcRMS(conf, filtered_conf) for filtered_conf in filtered_mols.values()]
        if all(rmsd > RMSD_FILTER_THRESHOLD for rmsd in rmsds):
            filtered_mols[i] = conf
        else:
            # Get the conformer with the minimum RMSD
            min_rmsd_idx = rmsds.index(min(rmsds))
            min_rmsd_conf_name = list(filtered_mols.keys())[min_rmsd_idx]
            print(f'{sdf} conformer {i} has a minimum RMSD to conformer {min_rmsd_conf_name} of {min(rmsds)}')

    # If there's only 1 conformer, skip it
    if len(filtered_mols) == 1:
        print(f'Skipping {sdf} because it only has 1 conformer')
        continue

    for i, conf in filtered_mols.items():
        conf_name = f'lig_{mol_num}_{i}'
        conformers[conf_name] = conf

Processing best_conformers_12.sdf
Skipping best_conformers_12.sdf because it only has 1 conformer
Processing best_conformers_57.sdf
best_conformers_57.sdf conformer 1 has a minimum RMSD to conformer 0 of 0.014365684158992496
best_conformers_57.sdf conformer 3 has a minimum RMSD to conformer 2 of 0.006638357975446073
best_conformers_57.sdf conformer 6 has a minimum RMSD to conformer 5 of 0.015104158143383561
best_conformers_57.sdf conformer 8 has a minimum RMSD to conformer 7 of 0.011696732848194584
Processing best_conformers_58.sdf


In [3]:
len(conformers)

7

In [4]:
# Get all of the +1 ligands
positive_1_ligs = [mol for mol in conformers if Chem.GetFormalCharge(conformers[mol]) == 1]
positive_1_ligs

[]

In [5]:
# Get all of the charged ligands
charged_ligands = [mol for mol in conformers if Chem.GetFormalCharge(conformers[mol]) != 0]

In [6]:
charged_ligands

[]

In [12]:
# Create directories for each ligand

def create_dir(ligand_name, ligand_rdmol):
    
    # Make the directory
    input_dir = f'{ligand_name}/input'
    os.makedirs(input_dir, exist_ok=False)

    # Create soft links to the run_somd.sh and template_config.cfg files
    os.symlink('../../input/run_somd.sh', f'{input_dir}/run_somd.sh')

    # If the ligand is charged, use the charged template
    if ligand_name in charged_ligands:
        os.symlink('../../input/template_config_charged.cfg', f'{input_dir}/template_config.cfg')
    else:
        os.symlink('../../input/template_config.cfg', f'{input_dir}/template_config.cfg')

    # Copy in the SARS structure
    os.symlink(f'../../input/structures-SARS-comp48/protein_sanitised.pdb', f'{input_dir}/protein.pdb')

    # Write out the ligand as ligand.sdf, with hydrogens
    with Chem.SDWriter(f'{input_dir}/ligand.sdf') as writer:
        writer.write(ligand_rdmol)

for lig_name, lig_rdmol in conformers.items():
    create_dir(lig_name, lig_rdmol)


In [10]:
# Sort first by the ligand number, then by the conformer number
lig_dirs = [d for d in os.listdir() if "lig" in d]
lig_dirs.sort(key=lambda x: (int(x.split('_')[1]), int(x.split('_')[2])))

In [11]:
for i, lig_dir in enumerate(lig_dirs):
    print(f'{i}: {lig_dir}')

0: lig_57_0
1: lig_57_2
2: lig_57_4
3: lig_57_5
4: lig_57_7
5: lig_58_0
6: lig_58_1
