# Setup A3FE Runs from FEGrow Output

In [1]:
import os
from rdkit import Chem
from rdkit.Chem import rdMolAlign

RMSD_FILTER_THRESHOLD = 0.5

In [2]:
# Get all the pdb files
sdfs = [f for f in os.listdir('input/full_run-SARS/structures') if f.endswith('.sdf')]
# Sort by the number in the filename
sdfs.sort(key=lambda x: int(x.split('.')[0].split('_')[-1]))

# Read in all conformers with RDKit
conformers = {}
for sdf in sdfs:
    mol_num = sdf.split('.')[0].split('_')[-1]
    print(f'Processing {sdf}')

    # Get from sdf, keeping hydrogens
    mols = Chem.SDMolSupplier(f'input/full_run-SARS/structures/{sdf}', removeHs=False)

    # Filter out molecules with low RMSD difference to any other conformer
    filtered_mols = {}
    for i, conf in enumerate(mols):
        rmsds = [rdMolAlign.CalcRMS(conf, filtered_conf) for filtered_conf in filtered_mols.values()]
        if all(rmsd > RMSD_FILTER_THRESHOLD for rmsd in rmsds):
            filtered_mols[i] = conf
        else:
            # Get the conformer with the minimum RMSD
            min_rmsd_idx = rmsds.index(min(rmsds))
            min_rmsd_conf_name = list(filtered_mols.keys())[min_rmsd_idx]
            print(f'{sdf} conformer {i} has a minimum RMSD to conformer {min_rmsd_conf_name} of {min(rmsds)}')

    # If there's only 1 conformer, skip it
    if len(filtered_mols) == 1:
        print(f'Skipping {sdf} because it only has 1 conformer')
        continue

    for i, conf in filtered_mols.items():
        conf_name = f'lig_{mol_num}_{i}'
        conformers[conf_name] = conf

Processing best_conformers_0.sdf
Processing best_conformers_1.sdf
Processing best_conformers_2.sdf
Processing best_conformers_3.sdf
Processing best_conformers_4.sdf
Processing best_conformers_5.sdf
Skipping best_conformers_5.sdf because it only has 1 conformer
Processing best_conformers_6.sdf
best_conformers_6.sdf conformer 1 has a minimum RMSD to conformer 0 of 0.0003409971697354112
best_conformers_6.sdf conformer 3 has a minimum RMSD to conformer 2 of 0.0203401310279102
Processing best_conformers_7.sdf
best_conformers_7.sdf conformer 1 has a minimum RMSD to conformer 0 of 0.016987294717469945
best_conformers_7.sdf conformer 3 has a minimum RMSD to conformer 2 of 0.01015672638742873
best_conformers_7.sdf conformer 6 has a minimum RMSD to conformer 0 of 0.2922543646147368
best_conformers_7.sdf conformer 8 has a minimum RMSD to conformer 4 of 0.0339133253510127
best_conformers_7.sdf conformer 13 has a minimum RMSD to conformer 9 of 0.23723109178413113
best_conformers_7.sdf conformer 16 

In [5]:
len(conformers)

286

In [5]:
# Get all of the +1 ligands
positive_1_ligs = [mol for mol in conformers if Chem.GetFormalCharge(conformers[mol]) == 1]
positive_1_ligs

['lig_10_0',
 'lig_10_1',
 'lig_10_2',
 'lig_10_3',
 'lig_14_0',
 'lig_14_1',
 'lig_25_0',
 'lig_25_1',
 'lig_25_2',
 'lig_25_3',
 'lig_25_4',
 'lig_27_0',
 'lig_27_1',
 'lig_27_2',
 'lig_47_0',
 'lig_47_1',
 'lig_47_2',
 'lig_47_3',
 'lig_47_4',
 'lig_55_0',
 'lig_55_1',
 'lig_55_2',
 'lig_76_0',
 'lig_76_1']

In [6]:
# Get all of the charged ligands
charged_ligands = [mol for mol in conformers if Chem.GetFormalCharge(conformers[mol]) != 0]

In [7]:
charged_ligands

['lig_10_0',
 'lig_10_1',
 'lig_10_2',
 'lig_10_3',
 'lig_14_0',
 'lig_14_1',
 'lig_17_0',
 'lig_17_1',
 'lig_17_2',
 'lig_17_3',
 'lig_17_4',
 'lig_17_5',
 'lig_25_0',
 'lig_25_1',
 'lig_25_2',
 'lig_25_3',
 'lig_25_4',
 'lig_27_0',
 'lig_27_1',
 'lig_27_2',
 'lig_39_0',
 'lig_39_1',
 'lig_39_2',
 'lig_39_3',
 'lig_39_4',
 'lig_47_0',
 'lig_47_1',
 'lig_47_2',
 'lig_47_3',
 'lig_47_4',
 'lig_55_0',
 'lig_55_1',
 'lig_55_2',
 'lig_59_0',
 'lig_59_2',
 'lig_59_3',
 'lig_76_0',
 'lig_76_1']

In [None]:
# Create directories for each ligand

def create_dir(ligand_name, ligand_rdmol):
    
    # Make the directory
    input_dir = f'{ligand_name}/input'
    os.makedirs(input_dir, exist_ok=False)

    # Create soft links to the run_somd.sh and template_config.cfg files
    os.symlink('../../input/run_somd.sh', f'{input_dir}/run_somd.sh')

    # If the ligand is charged, use the charged template
    if ligand_name in charged_ligands:
        os.symlink('../../input/template_config_charged.cfg', f'{input_dir}/template_config.cfg')
    else:
        os.symlink('../../input/template_config.cfg', f'{input_dir}/template_config.cfg')

    # Copy in the SARS structure
    os.symlink(f'../../input/full_run-SARS/sars_sanitised_for_amber.pdb', f'{input_dir}/protein.pdb')

    # Write out the ligand as ligand.sdf, with hydrogens
    with Chem.SDWriter(f'{input_dir}/ligand.sdf') as writer:
        writer.write(ligand_rdmol)

for i, lig_name, lig_rdmol in conformers.items():
    create_dir(lig_name, lig_rdmol)


In [2]:
# Sort first by the ligand number, then by the conformer number
lig_dirs = [d for d in os.listdir() if "lig" in d]
lig_dirs.sort(key=lambda x: (int(x.split('_')[1]), int(x.split('_')[2])))

In [3]:
for i, lig_dir in enumerate(lig_dirs):
    print(f'{i}: {lig_dir}')

0: lig_0_0
1: lig_0_1
2: lig_1_0
3: lig_1_1
4: lig_2_0
5: lig_2_1
6: lig_3_0
7: lig_3_1
8: lig_3_2
9: lig_4_0
10: lig_4_1
11: lig_4_2
12: lig_6_0
13: lig_6_2
14: lig_6_4
15: lig_6_5
16: lig_6_6
17: lig_6_7
18: lig_6_8
19: lig_6_9
20: lig_6_10
21: lig_6_11
22: lig_7_0
23: lig_7_2
24: lig_7_4
25: lig_7_5
26: lig_7_7
27: lig_7_9
28: lig_7_10
29: lig_7_11
30: lig_7_12
31: lig_7_14
32: lig_7_15
33: lig_8_0
34: lig_8_1
35: lig_8_2
36: lig_8_3
37: lig_9_0
38: lig_9_1
39: lig_9_2
40: lig_9_3
41: lig_9_4
42: lig_9_5
43: lig_9_6
44: lig_9_7
45: lig_10_0
46: lig_10_1
47: lig_10_2
48: lig_10_3
49: lig_11_0
50: lig_11_4
51: lig_11_5
52: lig_14_0
53: lig_14_1
54: lig_15_0
55: lig_15_1
56: lig_15_2
57: lig_15_3
58: lig_15_4
59: lig_16_0
60: lig_16_1
61: lig_17_0
62: lig_17_1
63: lig_17_2
64: lig_17_3
65: lig_17_4
66: lig_17_5
67: lig_18_0
68: lig_18_1
69: lig_18_2
70: lig_18_3
71: lig_25_0
72: lig_25_1
73: lig_25_2
74: lig_25_3
75: lig_25_4
76: lig_26_0
77: lig_26_1
78: lig_26_2
79: lig_26_3
80: lig_