# Setup A3FE Runs from FEGrow Output

In [1]:
import os
from rdkit import Chem
from rdkit.Chem import rdMolAlign

RMSD_FILTER_THRESHOLD = 0.5

In [2]:
# Get all the pdb files
sdfs = [f for f in os.listdir('input/full_run-MERS/structures') if f.endswith('.sdf')]
# Sort by the number in the filename
sdfs.sort(key=lambda x: int(x.split('.')[0].split('_')[-1]))

# Read in all conformers with RDKit
conformers = {}
for sdf in sdfs:
    mol_num = sdf.split('.')[0].split('_')[-1]
    print(f'Processing {sdf}')

    # Get from sdf, keeping hydrogens
    mols = Chem.SDMolSupplier(f'input/full_run-MERS/structures/{sdf}', removeHs=False)

    # Filter out molecules with low RMSD difference to any other conformer
    filtered_mols = {}
    for i, conf in enumerate(mols):
        rmsds = [rdMolAlign.CalcRMS(conf, filtered_conf) for filtered_conf in filtered_mols.values()]
        if all(rmsd > RMSD_FILTER_THRESHOLD for rmsd in rmsds):
            filtered_mols[i] = conf
        else:
            # Get the conformer with the minimum RMSD
            min_rmsd_idx = rmsds.index(min(rmsds))
            min_rmsd_conf_name = list(filtered_mols.keys())[min_rmsd_idx]
            print(f'{sdf} conformer {i} has a minimum RMSD to conformer {min_rmsd_conf_name} of {min(rmsds)}')

    # If there's only 1 conformer, skip it
    if len(filtered_mols) == 1:
        print(f'Skipping {sdf} because it only has 1 conformer')
        continue

    for i, conf in filtered_mols.items():
        conf_name = f'lig_{mol_num}_{i}'
        conformers[conf_name] = conf

Processing best_conformers_0.sdf
best_conformers_0.sdf conformer 1 has a minimum RMSD to conformer 0 of 0.02741914520566837
best_conformers_0.sdf conformer 3 has a minimum RMSD to conformer 2 of 0.0357790840735598
best_conformers_0.sdf conformer 5 has a minimum RMSD to conformer 4 of 0.017446062001640315
Processing best_conformers_1.sdf
best_conformers_1.sdf conformer 8 has a minimum RMSD to conformer 7 of 0.1376199654917876
Processing best_conformers_2.sdf
Processing best_conformers_3.sdf
Processing best_conformers_4.sdf
Processing best_conformers_5.sdf
best_conformers_5.sdf conformer 6 has a minimum RMSD to conformer 5 of 0.49179796664890757
Processing best_conformers_6.sdf
Processing best_conformers_7.sdf
best_conformers_7.sdf conformer 1 has a minimum RMSD to conformer 0 of 0.003571660224147461
best_conformers_7.sdf conformer 3 has a minimum RMSD to conformer 2 of 0.000434958836200937
best_conformers_7.sdf conformer 5 has a minimum RMSD to conformer 4 of 0.0003676073110465695
Proce

In [3]:
len(conformers)

412

In [4]:
# Get all of the charged ligands
charged_ligands = [mol for mol in conformers if Chem.GetFormalCharge(conformers[mol]) != 0]

In [9]:
len(charged_ligands)

97

In [8]:
charged_ligands

['lig_2_0',
 'lig_2_1',
 'lig_6_0',
 'lig_6_1',
 'lig_6_2',
 'lig_6_3',
 'lig_6_4',
 'lig_6_5',
 'lig_10_0',
 'lig_10_1',
 'lig_10_2',
 'lig_10_3',
 'lig_10_5',
 'lig_10_6',
 'lig_10_7',
 'lig_10_9',
 'lig_10_10',
 'lig_10_11',
 'lig_10_12',
 'lig_10_13',
 'lig_14_0',
 'lig_14_1',
 'lig_14_2',
 'lig_14_3',
 'lig_14_4',
 'lig_14_5',
 'lig_14_6',
 'lig_14_7',
 'lig_14_8',
 'lig_19_0',
 'lig_19_1',
 'lig_19_2',
 'lig_23_0',
 'lig_23_1',
 'lig_23_2',
 'lig_23_3',
 'lig_24_0',
 'lig_24_1',
 'lig_24_2',
 'lig_24_3',
 'lig_24_4',
 'lig_24_5',
 'lig_29_0',
 'lig_29_1',
 'lig_32_0',
 'lig_32_1',
 'lig_32_2',
 'lig_32_3',
 'lig_32_4',
 'lig_32_5',
 'lig_32_7',
 'lig_32_9',
 'lig_38_0',
 'lig_38_1',
 'lig_38_2',
 'lig_41_0',
 'lig_41_1',
 'lig_53_0',
 'lig_53_1',
 'lig_62_0',
 'lig_62_1',
 'lig_62_4',
 'lig_62_5',
 'lig_62_6',
 'lig_62_7',
 'lig_62_8',
 'lig_64_0',
 'lig_64_1',
 'lig_64_2',
 'lig_64_3',
 'lig_64_4',
 'lig_64_5',
 'lig_64_6',
 'lig_64_7',
 'lig_73_0',
 'lig_73_2',
 'lig_73_4',
 'l

In [10]:
# Create directories for each ligand

def create_dir(ligand_name, ligand_rdmol):
    
    # Make the directory
    input_dir = f'{ligand_name}/input'
    os.makedirs(input_dir, exist_ok=False)

    # Create soft links to the run_somd.sh and template_config.cfg files
    os.symlink('../../input/run_somd.sh', f'{input_dir}/run_somd.sh')
    
    # If the ligand is charged, use the charged template
    if ligand_name in charged_ligands:
        os.symlink('../../input/template_config_charged.cfg', f'{input_dir}/template_config.cfg')
    else:
        os.symlink('../../input/template_config.cfg', f'{input_dir}/template_config.cfg')

    # Copy in the SARS structure
    os.symlink(f'../../input/full_run-MERS/protein.pdb', f'{input_dir}/protein.pdb')

    # Write out the ligand as ligand.sdf, with hydrogens
    with Chem.SDWriter(f'{input_dir}/ligand.sdf') as writer:
        writer.write(ligand_rdmol)

for lig_name, lig_rdmol in conformers.items():
    create_dir(lig_name, lig_rdmol)


In [None]:
# Sort first by the ligand number, then by the conformer number
lig_dirs = [d for d in os.listdir() if "lig" in d]
lig_dirs.sort(key=lambda x: (int(x.split('_')[1]), int(x.split('_')[2])))

In [40]:
lig_dirs

['lig_0_0',
 'lig_0_1',
 'lig_1_0',
 'lig_1_1',
 'lig_2_0',
 'lig_2_1',
 'lig_3_0',
 'lig_3_1',
 'lig_4_0',
 'lig_4_1',
 'lig_4_2',
 'lig_6_0',
 'lig_6_2',
 'lig_6_4',
 'lig_6_5',
 'lig_6_6',
 'lig_6_8',
 'lig_6_10',
 'lig_6_11',
 'lig_7_0',
 'lig_7_2',
 'lig_7_4',
 'lig_7_5',
 'lig_7_7',
 'lig_7_9',
 'lig_7_10',
 'lig_7_14',
 'lig_7_17',
 'lig_8_0',
 'lig_8_1',
 'lig_8_2',
 'lig_8_3',
 'lig_9_0',
 'lig_9_1',
 'lig_9_2',
 'lig_9_3',
 'lig_9_4',
 'lig_9_5',
 'lig_9_6',
 'lig_9_7',
 'lig_10_0',
 'lig_10_1',
 'lig_10_2',
 'lig_10_3',
 'lig_11_0',
 'lig_11_4',
 'lig_11_5',
 'lig_14_0',
 'lig_14_1',
 'lig_15_0',
 'lig_15_1',
 'lig_15_2',
 'lig_15_3',
 'lig_15_4',
 'lig_16_0',
 'lig_16_1',
 'lig_17_0',
 'lig_17_1',
 'lig_17_2',
 'lig_17_3',
 'lig_17_4',
 'lig_17_5',
 'lig_18_0',
 'lig_18_1',
 'lig_18_2',
 'lig_18_3',
 'lig_25_0',
 'lig_25_1',
 'lig_25_2',
 'lig_25_3',
 'lig_25_4',
 'lig_26_0',
 'lig_26_1',
 'lig_26_2',
 'lig_26_3',
 'lig_27_0',
 'lig_27_1',
 'lig_29_0',
 'lig_29_1',
 'lig_29