# Dipeptide 2-D TorsionDrive

Prepare 2-D TorsionDrives on phi (C-N-CA-C) and psi (N-CA-C-N) for alanine dipeptide (Ace-Ala-Nme) and two rotamers of proline dipeptide (Ace-Pro-Nme) and tryptophan dipeptide (Ace-Trp-Nme).

Starting conformations come from the OpenFF port of Amber ff14SB at https://github.com/openforcefield/amber-ff-porting/blob/master/AllDipeptides.tar.gz

In [1]:
import json
import numpy as np
from openeye import oechem
from openff.qcsubmit.datasets import TorsiondriveDataset
from openff.qcsubmit.factories import TorsiondriveDatasetFactory
from openff.qcsubmit.workflow_components import TorsionIndexer
from openff.toolkit.topology import Molecule
from openff.toolkit.typing.engines.smirnoff import ForceField
from openff.toolkit.utils import GLOBAL_TOOLKIT_REGISTRY
import os
import qcportal as ptl
from simtk.openmm import openmm
from simtk import unit

rad_to_deg = 180 / np.pi

# The fourth atom in chi2 is not hydrogen and not bonded to 3 ring atoms to select CA-CB-CG-CD1 for TRP
dihedral_smirks = {
    'phi': '[#6X4]-[#6X3:1](=O)-[#7X3:2]-[#6X4:3]-[#6X3:4](=O)-[#7X3]-[#6X4]',
    'psi': '[#6X4]-[#6X3](=O)-[#7X3:1]-[#6X4:2]-[#6X3:3](=O)-[#7X3:4]-[#6X4]',
    'chi1': '[#6X4]-[#6X3](=O)-[#7X3:1]-[#6X4:2](-[#6X3](=O)-[#7X3]-[#6X4])-[#6X4:3]-[!#1:4]',
    'chi2': '[#6X4]-[#6X3](=O)-[#7X3]-[#6X4:1](-[#6X3](=O)-[#7X3]-[#6X4])-[#6X4:2]-[#6:3]~[!#1!R3:4]'
}

# Initial dataset is only Ala, Pro, and Trp
#residue_names = [
#    'ALA', 'ARG', 'ASH', 'ASN', 'ASP', 'CYS', 'CYX', 'GLH', 'GLN', 'GLU', 'GLY', 'HID', 'HIE',
#    'HIP', 'ILE', 'LEU', 'LYN', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'
#]
residue_names = [
    'ALA', 'PRO', 'TRP'
]

max_rotamers = {'ALA': 1, 'PRO': 2, 'TRP': 2}

# Restrict proline phi to [-180, 60] to avoid ring strain
dihedral_range = {}
dihedral_spacing = {}
for res_name in residue_names:

    dihedral_spacing[res_name] = {'phi': 15, 'psi': 15}

    if res_name == 'PRO':

        dihedral_range[res_name] = {
            'phi': np.arange(-180, 61, dihedral_spacing[res_name]['phi']) / rad_to_deg,
            'psi': np.arange(-180, 180, dihedral_spacing[res_name]['psi']) / rad_to_deg
        }

    else:

        dihedral_range[res_name] = {
            'phi': np.arange(-180, 180, dihedral_spacing[res_name]['phi']) / rad_to_deg,
            'psi': np.arange(-180, 180, dihedral_spacing[res_name]['psi']) / rad_to_deg
        }


# Generate dipeptide rotamers

Use OpenEye Toolkit to generate all sidechain rotamers and scan backbone dihedrals for each dipeptide. Write an SDF file for each rotamer with backbone dihedral scans as conformers.

In [2]:
# Get an OEAtom from a residue match predicate and an atom name
def get_residue_atom(molecule, residue_predicate, atom_name):

    return molecule.GetAtom(
        oechem.OEAndAtom(oechem.OEAtomMatchResidue(residue_predicate), oechem.OEHasAtomName(atom_name)))

# Get array of rotamers sorted by probability
# res_name is a str corresponding to the 3-letter code in all caps
def get_sorted_rotamers(res_name):

    rotamers = []
    probs = []

    for rot in oechem.OEGetRotamers(oechem.OEGetResidueIndex(res_name)):
        rotamers.append(rot)
        probs.append(rot.GetProbability())

    # Get indices that would sort probabilities high-to-low
    prob_sort = np.argsort(probs)[::-1]

    # Sort array of rotamers by those indices
    sorted_rotamers = np.array(rotamers)[prob_sort]

    # Print rotamer probabilities and sidechain dihedrals
    print('#   Prob    Chi 1   Chi 2   Chi 3   Chi 4')
    for i in range(sorted_rotamers.size):

        rot = sorted_rotamers[i]
        print('{:2d}  {:6.2f}  {:6.1f}  {:6.1f}  {:6.1f}  {:6.1f}'.format(
            i, rot.GetProbability(), rot.GetChi1(), rot.GetChi2(), rot.GetChi3(), rot.GetChi4()))

    return sorted_rotamers

# Get list of residues
# polymer is an OEHierView
def get_residues(polymer):

    residues = []

    for chain in polymer.GetChains():
        for fragment in chain.GetFragments():
            for res in fragment.GetResidues():
                residues.append(res)
#                print('{:2s} {:2d} {:3s}'.format(chain.GetChainID(), res.GetResidueNumber(), res.GetResidueName()))
#                for atom in res.GetAtoms():
#                    print('    {:3d} {:4s}'.format(atom.GetIdx(), atom.GetName()))

    return residues

# Get dict of backbone and sidechain dihedrals in deg
# residue is an OEHierResidue or an OEAtom
def get_dihedrals(residue):

    dihedrals = {}
    dihedrals['phi'] = oechem.OEGetPhi(residue) * rad_to_deg
    dihedrals['psi'] = oechem.OEGetPsi(residue) * rad_to_deg
    omega = oechem.OEGetTorsion(residue, 3)
    if omega != -100.0:
        dihedrals['omega'] = omega * rad_to_deg
    for chi_idx in oechem.OEGetChis(residue):
        dihedrals['chi{:d}'.format(chi_idx - 3)] = oechem.OEGetTorsion(residue, chi_idx) * rad_to_deg
    return dihedrals

# Print backbone and sidechain dihedrals in deg
# residue is an OEHierResidue or an OEAtom
def print_dihedrals(residue):

    dihedrals = get_dihedrals(residue)
    out = '{:8.3f} {:8.3f}'.format(dihedrals['phi'], dihedrals['psi'])
    if 'omega' in dihedrals:
        out += ' {:8.3f}'.format(dihedrals['omega'])
    for chi_idx in oechem.OEGetChis(residue):
        out += ' {:8.3f}'.format(dihedrals['chi{:d}'.format(chi_idx - 3)])
    print(out)

# In reference molecule, get internal coords of atom rd with respect to atoms ra, rb, and rc. In target molecule,
# set Cartesian coords of atom td using the same internal coords with respect to atoms ta, tb, and tc.
# Reference and target are OE molecules. ra, rb, rc, rd, ta, tb, tc, and td are OE atoms.
# overwrite_angle overwrites the angle between rb, rc, and rd with an angle given in rad.
# overwrite_dihedral overwrites the dihedral between ra, rb, rc, and rd with an angle given in rad.
def copy_internal_coords(
    reference, target, ra, rb, rc, rd, ta, tb, tc, td, overwrite_angle = None, overwrite_dihedral = None):

    distance = oechem.OEGetDistance(reference, rd, rc)

    if overwrite_angle is None:
        angle = oechem.OEGetAngle(reference, rd, rc, rb)
    else:
        angle = overwrite_angle

    if overwrite_dihedral is None:
        dihedral = oechem.OEGetTorsion(reference, rd, rc, rb, ra)
    else:
        dihedral = overwrite_dihedral

    # Vector from c to d in local coordinate system
    dx = distance * np.sin(angle) * np.sin(dihedral)
    dy = distance * np.sin(angle) * np.cos(dihedral)
    dz = distance * np.cos(angle)

    # Get local coordinate system
    target_coords = target.GetCoords()
    r_cb = np.array(target_coords[tb.GetIdx()]) - np.array(target_coords[tc.GetIdx()])
    r_ba = np.array(target_coords[ta.GetIdx()]) - np.array(target_coords[tb.GetIdx()])
    r_cb /= np.linalg.norm(r_cb)
    r_ba /= np.linalg.norm(r_ba)

    # Get normal vector to plane containing atoms a, b, and c
    ba_cross_cb = np.cross(r_ba, r_cb)
    ba_cross_cb /= np.linalg.norm(ba_cross_cb)

    # z axis is oriented from c to b
    z = r_cb

    # y axis is orthogonal to z and in plane of atoms a, b, and c
    y = np.cross(z, ba_cross_cb)
    y /= np.linalg.norm(y)

    # x axis is orthogonal to y and z
    x = np.cross(y, z)
    x /= np.linalg.norm(x)

    # Update coordinates of atom td
    td_coords = target_coords[tc.GetIdx()] + np.dot(np.array([x, y, z]).T, np.array([dx, dy, dz]))
    target.SetCoords(td, td_coords)

In [3]:
ace_pred = oechem.OEAtomMatchResidueID()
ace_pred.SetName('ACE')
nme_pred = oechem.OEAtomMatchResidueID()
nme_pred.SetName('NME')
res_name_pred = oechem.OEAtomMatchResidueID()

# Dictionary of dihedrals in deg indexed by residue name, then rotamer index, then conformer index,
# then dihedral name
dihedrals_by_conf_idx = {}

# Create residues for Ace and Nme used to build Ace-Gly-Nme and Ace-Pro-Nme manually
if 'GLY' in residue_names or 'PRO' in residue_names:

    ace = oechem.OEResidue()
    ace.SetChainID('A')
    ace.SetFragmentNumber(0)
    ace.SetName('ACE')
    ace.SetResidueNumber(0)
    nme = oechem.OEResidue()
    nme.SetChainID('A')
    nme.SetFragmentNumber(0)
    nme.SetName('NME')
    nme.SetResidueNumber(2)

with oechem.oemolistream() as ifs, oechem.oemolostream() as ofs:

    for res_name in residue_names:

        print('{:3s}'.format(res_name))
        os.makedirs(os.path.join('dipeptide_rotamers', res_name), exist_ok = True)

        if ifs.open(os.path.join('AllDipeptides', 'MainChain', res_name, '{}.mol2'.format(res_name))):

            for mol in ifs.GetOEGraphMols():

                dihedrals_by_conf_idx[res_name] = {}

                # Move center of mass to origin and perceive stereochemistry
                oechem.OECenter(mol)
                oechem.OE3DToInternalStereo(mol)

                # Perceive residues, preserving atom order
                atoms = [atom for atom in mol.GetAtoms()]
                oechem.OEPerceiveResidues(mol)
                mol.OrderAtoms(atoms)

                # OEPerceiveResidues() fails for Ace-capped glycine and proline
                if res_name == 'GLY':

                    # Create residue for Gly
                    gly = oechem.OEResidue()
                    gly.SetChainID('A')
                    gly.SetFragmentNumber(0)
                    gly.SetName('GLY')
                    gly.SetResidueNumber(1)

                    # Add atoms to residues manually
                    for atom in mol.GetAtoms():

                        atom_name = atom.GetName().strip()
                        if atom_name in ['H1', 'C2', 'H3', 'H4', 'C5', 'O6']:
                            oechem.OEAtomSetResidue(atom, ace)
                        elif atom_name in ['N7', 'H8', 'C9', 'H10', 'H11', 'C12', 'O13']:
                            oechem.OEAtomSetResidue(atom, gly)
                        elif atom_name in ['N14', 'H15', 'C16', 'H17', 'H18', 'H19']:
                            oechem.OEAtomSetResidue(atom, nme)

                elif res_name == 'PRO':

                    # Create residue for Pro
                    pro = oechem.OEResidue()
                    pro.SetChainID('A')
                    pro.SetFragmentNumber(0)
                    pro.SetName('PRO')
                    pro.SetResidueNumber(1)

                    # Add atoms to residues manually
                    for atom in mol.GetAtoms():

                        atom_name = atom.GetName().strip()
                        if atom_name in ['H1', 'C2', 'H3', 'H4', 'C5', 'O6']:
                            oechem.OEAtomSetResidue(atom, ace)

                        elif atom_name in ['N7', 'C8', 'H9', 'H10', 'C11', 'H12', 'H13', 'C14', 'H15', 'H16',
                            'C17', 'H18', 'C19', 'O20']:

                            oechem.OEAtomSetResidue(atom, pro)

                        elif atom_name in ['N21', 'H22', 'C23', 'H24', 'H25', 'H26']:
                            oechem.OEAtomSetResidue(atom, nme)

                # Get hierarchy view of molecule and list of sidechain rotamers sorted by probability
                hv_mol = oechem.OEHierView(mol)
                mol_residues = get_residues(hv_mol)
                mol_rotamers = get_sorted_rotamers(res_name)

                # Get atoms that make up phi and psi
                # For proline, get side chain atoms and ACE cap atoms as well
                res_name_pred.SetName(res_name)
                if res_name == 'GLY':

                    c_ace = get_residue_atom(mol, ace_pred, ' C5 ')
                    n = get_residue_atom(mol, res_name_pred, ' N7 ')
                    ca = get_residue_atom(mol, res_name_pred, ' C9 ')
                    c = get_residue_atom(mol, res_name_pred, ' C12')
                    n_nme = get_residue_atom(mol, nme_pred, ' N14')

                elif res_name == 'PRO':

                    c_ace = get_residue_atom(mol, ace_pred, ' C5 ')
                    n = get_residue_atom(mol, res_name_pred, ' N7 ')
                    ca = get_residue_atom(mol, res_name_pred, ' C17')
                    ha = get_residue_atom(mol, res_name_pred, ' H18')
                    cb = get_residue_atom(mol, res_name_pred, ' C14')
                    hb1 = get_residue_atom(mol, res_name_pred, ' H15')
                    hb2 = get_residue_atom(mol, res_name_pred, ' H16')
                    cg = get_residue_atom(mol, res_name_pred, ' C11')
                    hg1 = get_residue_atom(mol, res_name_pred, ' H12')
                    hg2 = get_residue_atom(mol, res_name_pred, ' H13')
                    cd = get_residue_atom(mol, res_name_pred, ' C8 ')
                    hd1 = get_residue_atom(mol, res_name_pred, ' H9 ')
                    hd2 = get_residue_atom(mol, res_name_pred, ' H10')
                    c = get_residue_atom(mol, res_name_pred, ' C19')
                    o = get_residue_atom(mol, res_name_pred, ' O20')
                    n_nme = get_residue_atom(mol, nme_pred, ' N21')
                    h_nme = get_residue_atom(mol, nme_pred, ' H22')
                    ch3_nme = get_residue_atom(mol, nme_pred, ' C23')
                    h1_nme = get_residue_atom(mol, nme_pred, ' H24')
                    h2_nme = get_residue_atom(mol, nme_pred, ' H25')
                    h3_nme = get_residue_atom(mol, nme_pred, ' H26')

                    # Reference molecule for reference internal coordinates
                    ref_mol = oechem.OEGraphMol(mol)

                    ref_c_ace = get_residue_atom(ref_mol, ace_pred, ' C5 ')
                    ref_n = get_residue_atom(ref_mol, res_name_pred, ' N7 ')
                    ref_ca = get_residue_atom(ref_mol, res_name_pred, ' C17')
                    ref_ha = get_residue_atom(ref_mol, res_name_pred, ' H18')
                    ref_cb = get_residue_atom(ref_mol, res_name_pred, ' C14')
                    ref_hb1 = get_residue_atom(ref_mol, res_name_pred, ' H15')
                    ref_hb2 = get_residue_atom(ref_mol, res_name_pred, ' H16')
                    ref_cg = get_residue_atom(ref_mol, res_name_pred, ' C11')
                    ref_hg1 = get_residue_atom(ref_mol, res_name_pred, ' H12')
                    ref_hg2 = get_residue_atom(ref_mol, res_name_pred, ' H13')
                    ref_cd = get_residue_atom(ref_mol, res_name_pred, ' C8 ')
                    ref_hd1 = get_residue_atom(ref_mol, res_name_pred, ' H9 ')
                    ref_hd2 = get_residue_atom(ref_mol, res_name_pred, ' H10')
                    ref_c = get_residue_atom(ref_mol, res_name_pred, ' C19')
                    ref_o = get_residue_atom(ref_mol, res_name_pred, ' O20')
                    ref_n_nme = get_residue_atom(ref_mol, nme_pred, ' N21')
                    ref_h_nme = get_residue_atom(ref_mol, nme_pred, ' H22')
                    ref_ch3_nme = get_residue_atom(ref_mol, nme_pred, ' C23')
                    ref_h1_nme = get_residue_atom(ref_mol, nme_pred, ' H24')
                    ref_h2_nme = get_residue_atom(ref_mol, nme_pred, ' H25')
                    ref_h3_nme = get_residue_atom(ref_mol, nme_pred, ' H26')

                    ref_cb_cg_distance = oechem.OEGetDistance(ref_mol, ref_cb, ref_cg)
                    ref_cg_cd_distance = oechem.OEGetDistance(ref_mol, ref_cg, ref_cd)
                    ref_cb_cg_dist_sq = ref_cb_cg_distance * ref_cb_cg_distance
                    ref_cg_cd_dist_sq = ref_cg_cd_distance * ref_cg_cd_distance

                else:

                    c_ace = get_residue_atom(mol, ace_pred, ' C  ')
                    n = get_residue_atom(mol, res_name_pred, ' N  ')
                    ca = get_residue_atom(mol, res_name_pred, ' CA ')
                    c = get_residue_atom(mol, res_name_pred, ' C  ')
                    n_nme = get_residue_atom(mol, nme_pred, ' N  ')

                # Write conformers for each rotamer to SDF. OESetRotamer fails for Ace-Pro-Nme, and OESetTorsion
                # fails for rings, so set side chain torsions and phi for proline manually
                if res_name == 'PRO':

                    for i in range(mol_rotamers.size):

                        dihedrals_by_conf_idx[res_name][i] = {}
                        conf_idx = 0

                        # Create copy of OE molecule with no conformers
                        new_mol = oechem.OEMol(mol)
                        new_mol.DeleteConfs()

                        # Get side chain dihedrals for proline rotamers
                        rot_chi1 = mol_rotamers[i].GetChi1() / rad_to_deg
                        rot_chi2 = mol_rotamers[i].GetChi2() / rad_to_deg
                        print('%3s rotamer %d' % (res_name, i))
                        print('{:8.3f} {:8.3f} {:8.3f} {:8.3f}'.format(
                            oechem.OEGetTorsion(mol, c_ace, n, ca, c) * rad_to_deg,
                            oechem.OEGetTorsion(mol, n, ca, c, n_nme) * rad_to_deg,
                            rot_chi1 * rad_to_deg, rot_chi2 * rad_to_deg
                        ))

                        # Scan backbone dihedrals
                        for phi in dihedral_range[res_name]['phi']:

                            # Try to find good ring atom positions for given value of phi
                            # Otherwise increment phi towards -60 deg
                            sqrt_arg = -1
                            tmp_phi = phi
                            while sqrt_arg < 0:

                                # Given coordinates of acetyl cap and proline N and CA, CD is also fixed by its
                                # internal coordinates with respect to N, CA, and the acetyl C.
                                # Set coordinates of proline C using phi and internal coordinates of CA and N.
                                copy_internal_coords(
                                    ref_mol, mol, ref_c_ace, ref_n, ref_ca, ref_c, c_ace, n, ca, c,
                                    overwrite_dihedral = tmp_phi
                                )

                                # Set coordinates of CB using internal coordinates of CA, N, and C
                                copy_internal_coords(
                                    ref_mol, mol, ref_c, ref_n, ref_ca, ref_cb, c, n, ca, cb)

                                # Set coordinates of CG using bond distances to CB and CD and the rotamer chi1. To
                                # do this, use the internal coordinates of CD with respect to CB, CA, and N to find
                                # the angle CA-CB-CG that satisfies the CG-CD distance in spherical coordinates
                                # D^2 = RG^2 + RD^2 - 2 RG RD (sin AG sin AD cos(PhiG - PhiD) + cos AG cos AD)
                                # A sin AG + B cos AG + C = 0
                                # A = 2 RG RD sin AD cos(PhiG - PhiD)
                                # B = 2 RG RD cos AD
                                # C = D^2 - RG^2 - RD^2
                                # AG = 2 arctan((A +/- sqrt(A^2 + B^2 - C^2)) / (B - C))
                                cb_cd_distance = oechem.OEGetDistance(mol, cb, cd)
                                ca_cb_cd_angle = oechem.OEGetAngle(mol, ca, cb, cd)
                                n_ca_cb_cd_dihedral = oechem.OEGetTorsion(mol, n, ca, cb, cd)
                                A = 2 * ref_cb_cg_distance * cb_cd_distance * np.sin(ca_cb_cd_angle) * np.cos(
                                    rot_chi1 - n_ca_cb_cd_dihedral)
                                B = 2 * ref_cb_cg_distance * cb_cd_distance * np.cos(ca_cb_cd_angle)
                                C = ref_cg_cd_dist_sq - ref_cb_cg_dist_sq - cb_cd_distance * cb_cd_distance
                                sqrt_arg = A * A + B * B - C * C

                                if tmp_phi > -60 / rad_to_deg:
                                    tmp_phi -= dihedral_spacing[res_name]['phi'] / rad_to_deg
                                else:
                                    tmp_phi += dihedral_spacing[res_name]['phi'] / rad_to_deg

                            ca_cb_cg_angle = 2 * np.arctan((A + np.sqrt(sqrt_arg)) / (B - C))

                            copy_internal_coords(
                                ref_mol, mol, ref_n, ref_ca, ref_cb, ref_cg, n, ca, cb, cg,
                                overwrite_angle = ca_cb_cg_angle, overwrite_dihedral = rot_chi1
                            )

                            # Set coordinates of proline ring hydrogens using internal coordinates
                            copy_internal_coords(
                                ref_mol, mol, ref_c, ref_n, ref_ca, ref_ha, c, n, ca, ha)
                            copy_internal_coords(
                                ref_mol, mol, ref_cg, ref_ca, ref_cb, ref_hb1, cg, ca, cb, hb1)
                            copy_internal_coords(
                                ref_mol, mol, ref_cg, ref_ca, ref_cb, ref_hb2, cg, ca, cb, hb2)
                            copy_internal_coords(
                                ref_mol, mol, ref_cd, ref_cb, ref_cg, ref_hg1, cd, cb, cg, hg1)
                            copy_internal_coords(
                                ref_mol, mol, ref_cd, ref_cb, ref_cg, ref_hg2, cd, cb, cg, hg2)
                            copy_internal_coords(
                                ref_mol, mol, ref_n, ref_cg, ref_cd, ref_hd1, n, cg, cd, hd1)
                            copy_internal_coords(
                                ref_mol, mol, ref_n, ref_cg, ref_cd, ref_hd2, n, cg, cd, hd2)

                            for psi in dihedral_range[res_name]['psi']:

                                # Set coordinates of N-methyl nitrogen using psi
                                copy_internal_coords(
                                    ref_mol, mol, ref_n, ref_ca, ref_c, ref_n_nme, n, ca, c, n_nme,
                                    overwrite_dihedral = psi
                                )

                                # Set coordinates of proline carbonyl O and N-methyl cap from internal coordinates
                                copy_internal_coords(
                                    ref_mol, mol, ref_n_nme, ref_ca, ref_c, ref_o, n_nme, ca, c, o)
                                copy_internal_coords(
                                    ref_mol, mol, ref_ca, ref_c, ref_n_nme, ref_ch3_nme, ca, c, n_nme, ch3_nme)
                                copy_internal_coords(
                                    ref_mol, mol, ref_ch3_nme, ref_c, ref_n_nme, ref_h_nme, ch3_nme, c, n_nme,
                                    h_nme
                                )
                                copy_internal_coords(
                                    ref_mol, mol, ref_c, ref_n_nme, ref_ch3_nme, ref_h1_nme, c, n_nme, ch3_nme,
                                    h1_nme
                                )
                                copy_internal_coords(
                                    ref_mol, mol, ref_h1_nme, ref_n_nme, ref_ch3_nme, ref_h2_nme, h1_nme, n_nme,
                                    ch3_nme, h2_nme
                                )
                                copy_internal_coords(
                                    ref_mol, mol, ref_h1_nme, ref_n_nme, ref_ch3_nme, ref_h3_nme, h1_nme, n_nme,
                                    ch3_nme, h3_nme
                                )

                                new_mol.NewConf(mol)
                                dihedrals_by_conf_idx[res_name][i][conf_idx] = {
                                    'phi': phi * rad_to_deg,
                                    'psi': oechem.OEGetTorsion(mol, n, ca, c, n_nme) * rad_to_deg,
                                    'chi1': oechem.OEGetTorsion(mol, n, ca, cb, cg) * rad_to_deg,
                                    'chi2': oechem.OEGetTorsion(mol, ca, cb, cg, cd) * rad_to_deg,
                                    'chi3': oechem.OEGetTorsion(mol, cb, cg, cd, n) * rad_to_deg
                                }
                                conf_idx += 1

                        # Write molecule to SDF
                        if ofs.open(os.path.join('dipeptide_rotamers', res_name, '{}_rotamer_{:d}.sdf'.format(
                            res_name, i + 1))):

                            oechem.OEWriteMolecule(ofs, new_mol)

                        else:
                            print('Error writing {:3s} rotamer {:2d}'.format(res_name, i + 1))

                elif mol_rotamers.size > 0:

                    for i in range(mol_rotamers.size):

                        dihedrals_by_conf_idx[res_name][i] = {}
                        conf_idx = 0

                        # Create copy of OE molecule with no conformers
                        new_mol = oechem.OEMol(mol)
                        new_mol.DeleteConfs()

                        # Set sidechain dihedrals to rotamer values
                        oechem.OESetRotamer(mol_residues[1], mol_rotamers[i])
                        print('{:3s} rotamer {:2d}'.format(res_name, i))
                        print_dihedrals(mol_residues[1])

                        # Scan backbone dihedrals
                        for phi in dihedral_range[res_name]['phi']:

                            oechem.OESetTorsion(mol, c_ace, n, ca, c, phi)

                            for psi in dihedral_range[res_name]['psi']:

                                oechem.OESetTorsion(mol, n, ca, c, n_nme, psi)
                                new_mol.NewConf(mol)
                                dihedrals_by_conf_idx[res_name][i][conf_idx] = get_dihedrals(mol_residues[1])
                                conf_idx += 1

                        # Write molecule to SDF
                        if ofs.open(os.path.join('dipeptide_rotamers', res_name, '{}_rotamer_{:d}.sdf'.format(
                            res_name, i + 1))):

                            oechem.OEWriteMolecule(ofs, new_mol)

                        else:
                            print('Error writing {:3s} rotamer {:2d}'.format(res_name, i + 1))

                else:

                    dihedrals_by_conf_idx[res_name][0] = {}
                    conf_idx = 0

                    # Create copy of OE molecule with no conformers
                    new_mol = oechem.OEMol(mol)
                    new_mol.DeleteConfs()

                    # Scan backbone dihedrals
                    for phi in dihedral_range[res_name]['phi']:

                        oechem.OESetTorsion(mol, c_ace, n, ca, c, phi)

                        for psi in dihedral_range[res_name]['psi']:

                            oechem.OESetTorsion(mol, n, ca, c, n_nme, psi)
                            new_mol.NewConf(mol)
                            dihedrals_by_conf_idx[res_name][0][conf_idx] = get_dihedrals(mol_residues[1])
                            conf_idx += 1

                    # Write molecule to SDF
                    if ofs.open(os.path.join('dipeptide_rotamers', res_name, '{}_rotamer_1.sdf'.format(res_name))):
                        oechem.OEWriteMolecule(ofs, new_mol)

                    else:
                        print('Error writing {:3s} rotamer {:2d}'.format(res_name, i + 1))

        else:
            print('Error reading {:3s}'.format(res_name))

with open(os.path.join('dipeptide_rotamers', 'dihedrals_by_conf_idx'), 'w') as out_file:
    json.dump(dihedrals_by_conf_idx, out_file)

ALA
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
PRO
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   50.62   -25.0    36.0     0.0     0.0
 1   49.05    27.0   -34.0     0.0     0.0
PRO rotamer 0
 -63.481  123.684  -25.000   36.000
PRO rotamer 1
  15.000  165.000   27.000  -34.000
TRP
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   33.76   -67.0    97.0     0.0     0.0
 1   18.09  -179.0    65.0     0.0     0.0
 2   15.46  -177.0  -103.0     0.0     0.0
 3   11.73   -68.0    -7.0     0.0     0.0
 4   10.35    62.0   -89.0     0.0     0.0
 5    5.19    60.0    88.0     0.0     0.0
 6    5.13   -68.0   -89.0     0.0     0.0
TRP rotamer  0
-151.126  154.984  -67.000   97.000
TRP rotamer  1
 165.000  165.000 -179.000   65.000




TRP rotamer  2
 165.000  165.000 -177.000 -103.000
TRP rotamer  3
 165.000  165.000  -68.000   -7.000




TRP rotamer  4
 165.000  165.000   62.000  -89.000
TRP rotamer  5
 165.000  165.000   60.000   88.000




TRP rotamer  6
 165.000  165.000  -68.000  -89.000




# Minimize with OpenFF 2.0.0

To clean up valence terms, minimize dipeptide conformations with harmonic restraints on backbone and sidechain dihedrals

In [5]:
force_field = ForceField('openff_unconstrained-2.0.0.offxml')

with open(os.path.join('dipeptide_rotamers', 'dihedrals_by_conf_idx'), 'r') as in_file:
    dihedrals_by_conf_idx = json.load(in_file)

openmm_platform = openmm.Platform.getPlatformByName('Reference')

for res_name in residue_names:

    if res_name == 'PRO':
        pro_rotamers = get_sorted_rotamers('PRO')

    for i in range(max_rotamers[res_name]):

        rot_idx = i + 1

        # Read OFF molecule
        offmol = Molecule.from_file(
            os.path.join('dipeptide_rotamers', res_name, '{}_rotamer_{:d}.sdf'.format(res_name, rot_idx)))
        for j in range(1, len(offmol)):
            offmol[0].add_conformer(offmol[j].conformers[0])
        offmol = offmol[0]

        # Set up OpenMM system
        mapped_smiles = offmol.to_smiles(isomeric = True, mapped = True)
        new_mol = Molecule.from_mapped_smiles(mapped_smiles)
        openmm_system = force_field.create_openmm_system(new_mol.to_topology())

        # Harmonic restraint for periodic torsion
        # Energy constant is 4184 kJ mol^-1 nm^-2, equivalent to 10 kcal mol^-1 angstrom^-2
        harmonic_restraint = openmm.CustomTorsionForce(
            'k_over_2 * diff * diff; diff = min(dphi, two_pi - dphi); dphi = abs(theta - phi_min)')
        harmonic_restraint.addGlobalParameter('two_pi', 2 * np.pi)
        harmonic_restraint.addGlobalParameter('k_over_2', 4184)
        harmonic_restraint.addPerTorsionParameter('phi_min')

        # Set up harmonic restraints for backbone and sidechain torsions
        dihedral_indices = {}
        for dihedral in dihedral_smirks:

            dihedral_indices[dihedral] = []

            for j, atoms in enumerate(offmol.chemical_environment_matches(dihedral_smirks[dihedral])):

                print('{:3s} {:2d} {:5s} {:2d} {:2d} {:2d} {:2d} {:2d}'.format(
                    res_name, rot_idx, dihedral, j, atoms[0], atoms[1], atoms[2], atoms[3]))

                torsion_idx = harmonic_restraint.addTorsion(atoms[0], atoms[1], atoms[2], atoms[3], [0.0])
                dihedral_indices[dihedral].append((torsion_idx, atoms))

        openmm_system.addForce(harmonic_restraint)
        openmm_integrator = openmm.VerletIntegrator(0.001 * unit.femtoseconds)
        openmm_context = openmm.Context(openmm_system, openmm_integrator, openmm_platform)

        # Minimize conformers
        for conf_idx in range(offmol.n_conformers):

            # Set minimum of harmonic restraint for backbone torsions
            for dihedral in dihedral_indices:
                for torsion_idx, atoms in dihedral_indices[dihedral]:

                    if res_name == 'PRO' and dihedral == 'chi2':

                        harmonic_restraint.setTorsionParameters(
                            torsion_idx, atoms[0], atoms[1], atoms[2], atoms[3],
                            [pro_rotamers[i].GetChi2() / rad_to_deg]
                        )

                    else:

                        harmonic_restraint.setTorsionParameters(
                            torsion_idx, atoms[0], atoms[1], atoms[2], atoms[3],
                            [dihedrals_by_conf_idx[res_name][str(i)][str(conf_idx)][dihedral] / rad_to_deg]
                        )

            harmonic_restraint.updateParametersInContext(openmm_context)
            openmm_context.setPositions(offmol.conformers[conf_idx].value_in_unit(unit.nanometers))

            openmm.LocalEnergyMinimizer.minimize(openmm_context)
            minimized_state = openmm_context.getState(getPositions = True)
            minimized_coordinates = unit.quantity.Quantity(
                np.array(minimized_state.getPositions().value_in_unit(unit.nanometers)), unit.nanometers)
            new_mol.add_conformer(minimized_coordinates)

        # Align conformers to first conformer using heavy atoms of acetyl group and N and CA of amino acid
        oemol = new_mol.to_openeye()
        ref_oemol = oechem.OEGraphMol(oemol.GetConf(oechem.OEHasConfIdx(0)))

        # Center reference molecule
        oechem.OECenter(ref_oemol)

        # Substructure search for methyl carbon and carbonyl atoms of acetyl group and N and CA of amino acid
        ace_n_ca_subsearch = oechem.OESubSearch()
        ace_n_ca_subsearch.Init('[#6X4H3]-[#6X3](=O)-[#7X3]-[#6X4H1]')

        # Create atom match pairs between target and reference molecules
        alignment_match = oechem.OEMatch()
        oesubsearch_match_unique = True
        for ref_match in ace_n_ca_subsearch.Match(ref_oemol, oesubsearch_match_unique):
            for target_match in ace_n_ca_subsearch.Match(oemol, oesubsearch_match_unique):
                for ref_atoms, target_atoms in zip(ref_match.GetAtoms(), target_match.GetAtoms()):
                    alignment_match.AddPair(ref_atoms.target, target_atoms.target)

        # Set up OpenEye RMSD alignment
        oermsd_overlay = True
        N_confs = oemol.GetMaxConfIdx()
        rmsd_vector = oechem.OEDoubleArray(N_confs)
        rotation_matrix = oechem.OEDoubleArray(9 * N_confs)
        translation_vector = oechem.OEDoubleArray(3 * N_confs)

        # Compute rotation matrix and translation vector of alignment
        oechem.OERMSD(
            ref_oemol, oemol, rmsd_vector, alignment_match, oermsd_overlay, rotation_matrix, translation_vector)

        # Apply rotation then translation to align target molecule to reference molecule
        oechem.OERotate(oemol, rotation_matrix)
        oechem.OETranslate(oemol, translation_vector)

        # Write molecule with minimized and aligned conformers to SDF
        with oechem.oemolostream() as ofs:

            if ofs.open(os.path.join(
                'dipeptide_rotamers', res_name, '{}_min_rotamer_{:d}.sdf'.format(res_name, rot_idx))):

                oechem.OEWriteMolecule(ofs, oemol)

            else:
                print('Error writing {:3s} rotamer {:2d}'.format(res_name, rot_idx))


ALA  1 phi    0  4  6  8 14
ALA  1 psi    0  6  8 14 16
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   50.62   -25.0    36.0     0.0     0.0
 1   49.05    27.0   -34.0     0.0     0.0
PRO  1 phi    0  4  6 16 18
PRO  1 psi    0  6 16 18 20
PRO  1 chi1   0  6 16 13 10
PRO  1 chi2   0 16 13 10  7
PRO  2 phi    0  4  6 16 18
PRO  2 psi    0  6 16 18 20
PRO  2 chi1   0  6 16 13 10
PRO  2 chi2   0 16 13 10  7
TRP  1 phi    0  4  6  8 28
TRP  1 psi    0  6  8 28 30
TRP  1 chi1   0  6  8 10 13
TRP  1 chi2   0  8 10 13 14
TRP  2 phi    0  4  6  8 28
TRP  2 psi    0  6  8 28 30
TRP  2 chi1   0  6  8 10 13
TRP  2 chi2   0  8 10 13 14


Visualize conformers for proline before and after minimization

In [6]:
offmol = Molecule.from_file('dipeptide_rotamers/PRO/PRO_rotamer_1.sdf')
for i in range(1, len(offmol)):
    offmol[0].add_conformer(offmol[i].conformers[0])
offmol[0].visualize(backend = 'nglview')



NGLWidget(max_frame=407)

In [7]:
offmol = Molecule.from_file('dipeptide_rotamers/PRO/PRO_min_rotamer_1.sdf', allow_undefined_stereo = True)
for i in range(1, len(offmol)):
    offmol[0].add_conformer(offmol[i].conformers[0])
offmol[0].visualize(backend = 'nglview')

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: Fal

NGLWidget(max_frame=407)

In [8]:
offmol = Molecule.from_file('dipeptide_rotamers/PRO/PRO_rotamer_2.sdf')
for i in range(1, len(offmol)):
    offmol[0].add_conformer(offmol[i].conformers[0])
offmol[0].visualize(backend = 'nglview')

NGLWidget(max_frame=407)

In [9]:
offmol = Molecule.from_file('dipeptide_rotamers/PRO/PRO_min_rotamer_2.sdf', allow_undefined_stereo = True)
for i in range(1, len(offmol)):
    offmol[0].add_conformer(offmol[i].conformers[0])
offmol[0].visualize(backend = 'nglview')

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: Fal

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: Fal

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: Fal

NGLWidget(max_frame=407)

Calculate dihedrals for minimized proline conformers

In [10]:
ace_pred = oechem.OEAtomMatchResidueID()
ace_pred.SetName('ACE')
nme_pred = oechem.OEAtomMatchResidueID()
nme_pred.SetName('NME')
res_name_pred = oechem.OEAtomMatchResidueID()
res_name_pred.SetName('PRO')

ace = oechem.OEResidue()
ace.SetChainID('A')
ace.SetFragmentNumber(0)
ace.SetName('ACE')
ace.SetResidueNumber(0)
nme = oechem.OEResidue()
nme.SetChainID('A')
nme.SetFragmentNumber(0)
nme.SetName('NME')
nme.SetResidueNumber(2)

# Create residue for Pro
pro = oechem.OEResidue()
pro.SetChainID('A')
pro.SetFragmentNumber(0)
pro.SetName('PRO')
pro.SetResidueNumber(1)

for i in range(max_rotamers['PRO']):

    rot_idx = i + 1

    # Read OFF molecule
    offmol = Molecule.from_file(
        os.path.join('dipeptide_rotamers', 'PRO', 'PRO_min_rotamer_{:d}.sdf'.format(rot_idx)),
        allow_undefined_stereo = True
    )

    for j in range(1, len(offmol)):
        offmol[0].add_conformer(offmol[j].conformers[0])
    oemol = offmol[0].to_openeye()

    # Move center of mass to origin and perceive stereochemistry
    oechem.OECenter(oemol)
    oechem.OE3DToInternalStereo(oemol)

    # Perceive residues, preserving atom order
    atoms = [atom for atom in oemol.GetAtoms()]
    oechem.OEPerceiveResidues(oemol)
    oemol.OrderAtoms(atoms)

    # Add atoms to residues manually
    for atom in oemol.GetAtoms():

        atom_name = atom.GetName().strip()
        if atom_name in ['H1', 'C2', 'H3', 'H4', 'C5', 'O6']:
            oechem.OEAtomSetResidue(atom, ace)

        elif atom_name in ['N7', 'C8', 'H9', 'H10', 'C11', 'H12', 'H13', 'C14', 'H15', 'H16',
            'C17', 'H18', 'C19', 'O20']:

            oechem.OEAtomSetResidue(atom, pro)

        elif atom_name in ['N21', 'H22', 'C23', 'H24', 'H25', 'H26']:
            oechem.OEAtomSetResidue(atom, nme)

    # Get hierarchy view of molecule and list of sidechain rotamers sorted by probability
    hv_mol = oechem.OEHierView(oemol)
    mol_residues = get_residues(hv_mol)
    mol_rotamers = get_sorted_rotamers('PRO')

    # Get atoms that make up phi and psi
    # For proline, get side chain atoms and ACE cap atoms as well
    c_ace = get_residue_atom(oemol, ace_pred, ' C5 ')
    n = get_residue_atom(oemol, res_name_pred, ' N7 ')
    ca = get_residue_atom(oemol, res_name_pred, ' C17')
    cb = get_residue_atom(oemol, res_name_pred, ' C14')
    cg = get_residue_atom(oemol, res_name_pred, ' C11')
    cd = get_residue_atom(oemol, res_name_pred, ' C8 ')
    c = get_residue_atom(oemol, res_name_pred, ' C19')
    n_nme = get_residue_atom(oemol, nme_pred, ' N21')

    print('# Rot Conf Phi      Psi      Chi 1    Chi 2    Chi 3')
    for conf in oemol.GetConfs():

        print('  {:2d}  {:2d}   {:8.3f} {:8.3f} {:8.3f} {:8.3f} {:8.3f}'.format(
            rot_idx, conf.GetIdx(),
            oechem.OEGetTorsion(conf, c_ace, n, ca, c) * rad_to_deg,
            oechem.OEGetTorsion(conf, n, ca, c, n_nme) * rad_to_deg,
            oechem.OEGetTorsion(conf, n, ca, cb, cg) * rad_to_deg,
            oechem.OEGetTorsion(conf, ca, cb, cg, cd) * rad_to_deg,
            oechem.OEGetTorsion(conf, cb, cg, cd, n) * rad_to_deg
        ))


Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: Fal

#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   50.62   -25.0    36.0     0.0     0.0
 1   49.05    27.0   -34.0     0.0     0.0
# Rot Conf Phi      Psi      Chi 1    Chi 2    Chi 3
   1   0   -177.338  179.811  -21.775   37.958  -45.665
   1   1   -177.275 -165.402  -21.712   38.063  -45.173
   1   2   -177.130 -150.573  -21.673   38.152  -44.785
   1   3   -176.945 -135.589  -21.618   38.214  -45.218
   1   4   -176.779 -120.330  -21.483   38.175  -47.736
   1   5   -176.862 -104.866  -21.442   37.916  -49.248
   1   6   -176.929  -89.664  -21.535   37.720  -48.891
   1   7   -176.945  -74.598  -21.654   37.619  -48.076
   1   8   -176.965  -59.566  -21.794   37.587  -47.117
   1   9   -177.009  -44.575  -21.918   37.601  -46.302
   1  10   -177.114  -29.676  -21.982   37.664  -45.731
   1  11   -177.215  -14.840  -21.972   37.749  -45.594
   1  12   -177.279    0.010  -21.906   37.839  -45.752
   1  13   -177.294   14.861  -21.829   37.918  -45.940
   1  14   -177.254   29.730  -21.7

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: Fal

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: Fal

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: Fal

#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   50.62   -25.0    36.0     0.0     0.0
 1   49.05    27.0   -34.0     0.0     0.0
# Rot Conf Phi      Psi      Chi 1    Chi 2    Chi 3
   2   0   -177.957  179.875   30.300  -32.042   18.560
   2   1   -177.925 -165.324   30.476  -32.047   17.993
   2   2   -177.814 -150.352   30.655  -31.988   17.703
   2   3   -177.705 -135.251   30.766  -31.906   17.690
   2   4   -177.620 -120.129   30.749  -31.833   17.814
   2   5   -177.563 -105.070   30.649  -31.762   17.744
   2   6   -177.543  -90.024   30.504  -31.716   17.629
   2   7   -177.553  -74.982   30.343  -31.677   17.528
   2   8   -177.506  -59.729   30.249  -31.530   17.734
   2   9   -177.558  -44.624   30.197  -31.572   18.143
   2  10   -177.673  -29.619   30.091  -31.940   19.020
   2  11   -177.754  -14.721   30.123  -31.897   19.061
   2  12   -177.859    0.083   30.148  -32.057   19.148
   2  13   -177.905   14.910   30.236  -32.094   18.940
   2  14   -177.875   29.789   30.3

# Setup 2-D TorsionDrive

Setup 2-D TorsionDrives on phi and psi.

In [18]:
# Get software provenance
factory = TorsiondriveDatasetFactory()
provenance = factory.provenance(GLOBAL_TOOLKIT_REGISTRY)

# Initialize TorsionDrive dataset
dataset = TorsiondriveDataset(
    dataset_name = 'OpenFF Protein Dipeptide 2-D TorsionDrive v1.0',
    dataset_tagline = 'Dipeptide 2-D TorsionDrives on phi and psi',
    description = 'Two-dimensional TorsionDrives on phi and psi for dipeptides of alanine and two rotamers of '
        'proline and tryptophan',
    provenance = provenance
)
dataset.metadata.submitter = 'chapincavender'
dataset.metadata.long_description_url = (
    'https://github.com/openforcefield/qca-dataset-submission/tree/master/submissions/'
    '2021-11-18-OpenFF-Protein-Dipeptide-2D-TorsionDrive'
)

# Add molecules with constraints on non-driven sidechain torsions to dataset
for res_name in residue_names:

    res_rotamers = get_sorted_rotamers(res_name)

    for i in range(max_rotamers[res_name]):

        rot_idx = i + 1

        # Read molecule with minimized conformers
        offmol = Molecule.from_file(
            os.path.join('dipeptide_rotamers', res_name, '{}_min_rotamer_{:d}.sdf'.format(res_name, rot_idx)),
            allow_undefined_stereo = True
        )
        for j in range(1, len(offmol)):
            offmol[0].add_conformer(offmol[j].conformers[0])
        offmol = offmol[0]

        print('{:3s} rotamer {:d} of {:d} ({:d} conformers)'.format(
            res_name, rot_idx, max_rotamers[res_name], offmol.n_conformers))

        # Molecule metadata
        mol_index = '{:3s}-rotamer-{:d}'.format(res_name, rot_idx)
        mol_attributes = factory.create_cmiles_metadata(offmol)

        # Indices and scan range for driven torsions (phi and psi)
        phi_indices = offmol.chemical_environment_matches(dihedral_smirks['phi'])[0]
        psi_indices = offmol.chemical_environment_matches(dihedral_smirks['psi'])[0]
        phi_range = (dihedral_range[res_name]['phi'][0] * rad_to_deg,
            dihedral_range[res_name]['phi'][-1] * rad_to_deg)
        psi_range = (dihedral_range[res_name]['psi'][0] * rad_to_deg,
            dihedral_range[res_name]['psi'][-1] * rad_to_deg)
        phi_spacing = dihedral_spacing[res_name]['phi']
        psi_spacing = dihedral_spacing[res_name]['psi']

        # Add molecule to dataset
        dataset.add_molecule(
            index = mol_index, molecule = offmol, attributes = mol_attributes,
            dihedrals = [phi_indices, psi_indices],
            keywords = {'dihedral_ranges': [phi_range, psi_range], 'grid_spacing': [phi_spacing, psi_spacing]}
        )

        # Add constraints for non-driven torsions (sidechain dihedrals)
        for dihedral in dihedral_smirks:

            for j, atoms in enumerate(offmol.chemical_environment_matches(dihedral_smirks[dihedral])):

                if dihedral == 'chi1':
                    value = res_rotamers[i].GetChi1()
                elif dihedral == 'chi2':
                    value = res_rotamers[i].GetChi2()
                else:
                    continue

                print('{:3s} {:2d} {:5s} {:2d} {:2d} {:2d} {:2d} {:2d} {:8.3f}'.format(
                    res_name, rot_idx, dihedral, j, atoms[0], atoms[1], atoms[2], atoms[3], value))

                dataset.dataset[mol_index].add_constraint(
                    constraint = 'set', constraint_type = 'dihedral', indices = atoms, value = value)


#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
ALA rotamer 1 of 1 (576 conformers)
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   50.62   -25.0    36.0     0.0     0.0
 1   49.05    27.0   -34.0     0.0     0.0


Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: Fal

PRO rotamer 1 of 2 (408 conformers)
PRO  1 chi1   0  6 16 13 10  -25.000
PRO  1 chi2   0 16 13 10  7   36.000


Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: Fal

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: Fal

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 17, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 18, aromatic: False, chiral: Fal

PRO rotamer 2 of 2 (408 conformers)
PRO  2 chi1   0  6 16 13 10   27.000
PRO  2 chi2   0 16 13 10  7  -34.000
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   33.76   -67.0    97.0     0.0     0.0
 1   18.09  -179.0    65.0     0.0     0.0
 2   15.46  -177.0  -103.0     0.0     0.0
 3   11.73   -68.0    -7.0     0.0     0.0
 4   10.35    62.0   -89.0     0.0     0.0
 5    5.19    60.0    88.0     0.0     0.0
 6    5.13   -68.0   -89.0     0.0     0.0
TRP rotamer 1 of 2 (576 conformers)
TRP  1 chi1   0  6  8 10 13  -67.000
TRP  1 chi2   0  8 10 13 14   97.000
TRP rotamer 2 of 2 (576 conformers)
TRP  2 chi1   0  6  8 10 13 -179.000
TRP  2 chi2   0  8 10 13 14   65.000


Describe and export the dataset

In [19]:
confs = np.array([mol.n_conformers for mol in dataset.molecules])
molecular_weights = np.array([oechem.OECalculateMolecularWeight(mol.to_openeye()) for mol in dataset.molecules])
unique_formal_charges = np.unique([mol.total_charge / mol.total_charge.unit for mol in dataset.molecules])

print('Number of unique molecules        {:d}'.format(dataset.n_molecules))
print('Number of filtered molecules      {:d}'.format(dataset.n_filtered))
print('Number of torsion drives          {:d}'.format(dataset.n_records))
print('Number of conformers min mean max {:3d} {:6.2f} {:3d}'.format(confs.min(), confs.mean(), confs.max()))
print('Molecular weight min mean max     {:6.2f} {:6.2f} {:6.2f}'.format(
    molecular_weights.min(), molecular_weights.mean(), molecular_weights.max()))
print('Charges                          ', sorted(unique_formal_charges))

print(dataset.metadata.dict())

for spec, obj in dataset.qc_specifications.items():
    print("Spec:", spec)
    print(obj.dict())


Number of unique molecules        3
Number of filtered molecules      0
Number of torsion drives          5
Number of conformers min mean max 408 508.80 576
Molecular weight min mean max     144.17 200.64 259.30
Charges                           [0.0]
{'submitter': 'chapincavender', 'creation_date': datetime.date(2021, 11, 18), 'collection_type': 'TorsionDriveDataset', 'dataset_name': 'OpenFF Protein Dipeptide 2-D TorsionDrive v1.0', 'short_description': 'Dipeptide 2-D TorsionDrives on phi and psi', 'long_description_url': HttpUrl('https://github.com/openforcefield/qca-dataset-submission/tree/master/submissions/2021-11-18-OpenFF-Protein-Dipeptide-2D-TorsionDrive', scheme='https', host='github.com', tld='com', host_type='domain', path='/openforcefield/qca-dataset-submission/tree/master/submissions/2021-11-18-OpenFF-Protein-Dipeptide-2D-TorsionDrive'), 'long_description': 'Two-dimensional TorsionDrives on phi and psi for dipeptides of alanine and two rotamers of proline and tryptophan', 

In [20]:
dataset.export_dataset('dataset.json.bz2')
dataset.molecules_to_file('dataset.smi', 'smi')
dataset.visualize('dataset.pdf')