# Capped 1-mer sidechain 2-D TorsionDrives

Prepare 2-D TorsionDrives on up to the first two sidechain dihedrals chi1 (N-CA-CB-XG) and chi2 (CA-CB-XG-XD) for capped 1-mers (Ace-X-Nme) of amino acids with a rotatable bond in the sidechain (not Ala, Gly, or Pro). Each molecule will be prepared with two backbone conformations in which the backbone dihedrals are set to values corresponding to an alpha helix (-60, -45) and a beta strand (-135, 135).

Starting conformations come from the OpenFF port of Amber ff14SB at https://github.com/openforcefield/amber-ff-porting/blob/master/AllDipeptides.tar.gz

In [1]:
import json
import logging
import numpy as np
from openeye import oechem
from openff.qcsubmit.datasets import TorsiondriveDataset
from openff.qcsubmit.factories import TorsiondriveDatasetFactory
from openff.qcsubmit.workflow_components import TorsionIndexer
from openff.toolkit.topology import Molecule
from openff.toolkit.typing.engines.smirnoff import ForceField
from openff.toolkit.utils import GLOBAL_TOOLKIT_REGISTRY
import os
import qcportal as ptl
from simtk.openmm import openmm
from simtk import unit

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


In [2]:
# Run this block to suppress warnings from OpenFF toolkit for undefined stereochemistry
logging.getLogger("openforcefield").setLevel(logging.ERROR)

In [12]:
rad_to_deg = 180 / np.pi

residue_names = [
    'ARG', 'ASH', 'ASN', 'ASP', 'CYS', 'CYX', 'GLH', 'GLN', 'GLU', 'HID', 'HIE', 'HIP',
    'ILE', 'LEU', 'LYN', 'LYS', 'MET', 'PHE', 'SER', 'THR', 'TRP', 'TYR', 'VAL'
]

dihedral_smirks = {
    res_name: {
        'phi': '[#6X4]-[#6X3:1](=O)-[#7X3:2]-[#6X4:3]-[#6X3:4](=O)-[#7X3]-[#6X4]',
        'psi': '[#6X4]-[#6X3](=O)-[#7X3:1]-[#6X4:2]-[#6X3:3](=O)-[#7X3:4]-[#6X4]',
        'chi1': '[#6X4]-[#6X3](=O)-[#7X3:1]-[#6X4:2](-[#6X3](=O)-[#7X3]-[#6X4])-[#6X4:3]-[!#1:4]',
        'chi2': '[#6X4]-[#6X3](=O)-[#7X3]-[#6X4:1](-[#6X3](=O)-[#7X3]-[#6X4])-[#6X4:2]-[#6:3]~[!#1:4]',
    } for res_name in residue_names
}
dihedral_smirks['ASH']['chi2'] = (
    '[#6X4]-[#6X3](=O)-[#7X3]-[#6X4:1](-[#6X3](=O)-[#7X3]-[#6X4])-[#6X4:2]-[#6:3]=[#8:4]')
dihedral_smirks['ASN']['chi2'] = (
    '[#6X4]-[#6X3](=O)-[#7X3]-[#6X4:1](-[#6X3](=O)-[#7X3]-[#6X4])-[#6X4:2]-[#6:3]=[#8:4]')
dihedral_smirks['HID']['chi2'] = (
    '[#6X4]-[#6X3](=O)-[#7X3]-[#6X4:1](-[#6X3](=O)-[#7X3]-[#6X4])-[#6X4:2]-[#6:3]~[#7:4]')
dihedral_smirks['HIE']['chi2'] = (
    '[#6X4]-[#6X3](=O)-[#7X3]-[#6X4:1](-[#6X3](=O)-[#7X3]-[#6X4])-[#6X4:2]-[#6:3]~[#7:4]')
dihedral_smirks['HIP']['chi2'] = (
    '[#6X4]-[#6X3](=O)-[#7X3]-[#6X4:1](-[#6X3](=O)-[#7X3]-[#6X4])-[#6X4:2]-[#6:3]~[#7:4]')
dihedral_smirks['ILE']['chi1'] = (
    '[#6X4]-[#6X3](=O)-[#7X3:1]-[#6X4:2](-[#6X3](=O)-[#7X3]-[#6X4])-[#6X4:3]-[#6H2:4]')
dihedral_smirks['THR']['chi1'] = (
    '[#6X4]-[#6X3](=O)-[#7X3:1]-[#6X4:2](-[#6X3](=O)-[#7X3]-[#6X4])-[#6X4:3]-[#8:4]')
dihedral_smirks['TRP']['chi2'] = (
    '[#6X4]-[#6X3](=O)-[#7X3]-[#6X4:1](-[#6X3](=O)-[#7X3]-[#6X4])-[#6X4:2]-[#6:3]~[#6R2:4]')

# Map residue name to a valid name for OpenEye's rotamer library
oe_res_name_rotamer_map = {res_name: res_name for res_name in residue_names}
oe_res_name_rotamer_map['ASH'] = 'ASP'
oe_res_name_rotamer_map['CYX'] = 'CYS'
oe_res_name_rotamer_map['GLH'] = 'GLU'
oe_res_name_rotamer_map['HID'] = 'HIS'
oe_res_name_rotamer_map['HIE'] = 'HIS'
oe_res_name_rotamer_map['HIP'] = 'HIS'
oe_res_name_rotamer_map['LYN'] = 'LYS'

# Setup backbone constraint values
backbone_constraints = {
    'alpha': {'phi': -60, 'psi': -45},
    'beta': {'phi': -135, 'psi': 135},
}

# Setup sidechain scans for chi1 and chi2
dihedral_range = {}
dihedral_spacing = {}
for res_name in residue_names:

    dihedral_spacing[res_name] = {'chi1': 15, 'chi2': 15}

    dihedral_range[res_name] = {
        'chi1': np.arange(-180, 180, dihedral_spacing[res_name]['chi1']),
        'chi2': np.arange(-180, 180, dihedral_spacing[res_name]['chi2']),
    }


# Generate backbone and sidechain conformations

Use OpenEye Toolkit to generate backbone conformations and scan sidechain dihedrals for each amino acid. Write an SDF file for each backbone conformation with sidechain dihedral scans as conformers.

In [5]:
# Get an OEAtom from a residue match predicate and an atom name
def get_residue_atom(molecule, residue_predicate, atom_name, residue_id = None):

    if residue_id is not None:
        return molecule.GetAtom(oechem.OEAndAtom(
            oechem.OEAndAtom(oechem.OEAtomMatchResidue(residue_predicate), oechem.OEHasResidueNumber(residue_id)),
            oechem.OEHasAtomName(atom_name)
        ))
    else:
        return molecule.GetAtom(
            oechem.OEAndAtom(oechem.OEAtomMatchResidue(residue_predicate), oechem.OEHasAtomName(atom_name)))

# Get array of rotamers sorted by probability
# res_name is a str corresponding to the 3-letter code in all caps
def get_sorted_rotamers(res_name, rotamer_map = oe_res_name_rotamer_map):

    rotamers = []
    probs = []

    for rot in oechem.OEGetRotamers(oechem.OEGetResidueIndex(rotamer_map[res_name])):
        rotamers.append(rot)
        probs.append(rot.GetProbability())

    # Get indices that would sort probabilities high-to-low
    prob_sort = np.argsort(probs)[::-1]

    # Sort array of rotamers by those indices
    sorted_rotamers = np.array(rotamers)[prob_sort]

    # Print rotamer probabilities and sidechain dihedrals
    print('#   Prob    Chi 1   Chi 2   Chi 3   Chi 4')
    for i in range(sorted_rotamers.size):

        rot = sorted_rotamers[i]
        print('{:2d}  {:6.2f}  {:6.1f}  {:6.1f}  {:6.1f}  {:6.1f}'.format(
            i, rot.GetProbability(), rot.GetChi1(), rot.GetChi2(), rot.GetChi3(), rot.GetChi4()))

    return sorted_rotamers

# Get list of residues
# polymer is an OEHierView
def get_residues(polymer):

    residues = []

    for chain in polymer.GetChains():
        for fragment in chain.GetFragments():
            for res in fragment.GetResidues():
                residues.append(res)

    return residues

# Get dict of backbone and sidechain dihedrals in deg
# residue is an OEHierResidue or an OEAtom
def get_dihedrals(residue):

    dihedrals = {}
    dihedrals['phi'] = oechem.OEGetPhi(residue) * rad_to_deg
    dihedrals['psi'] = oechem.OEGetPsi(residue) * rad_to_deg
    omega = oechem.OEGetTorsion(residue, 3)
    if omega != -100.0:
        dihedrals['omega'] = omega * rad_to_deg
    for chi_idx in oechem.OEGetChis(residue):
        dihedrals['chi{:d}'.format(chi_idx - 3)] = oechem.OEGetTorsion(residue, chi_idx) * rad_to_deg
    return dihedrals

# Print backbone and sidechain dihedrals in deg
# residue is an OEHierResidue or an OEAtom
def print_dihedrals(residue):

    dihedrals = get_dihedrals(residue)
    out = '{:8.3f} {:8.3f}'.format(dihedrals['phi'], dihedrals['psi'])
    if 'omega' in dihedrals:
        out += ' {:8.3f}'.format(dihedrals['omega'])
    for chi_idx in oechem.OEGetChis(residue):
        out += ' {:8.3f}'.format(dihedrals['chi{:d}'.format(chi_idx - 3)])
    print(out)


In [6]:
ace_pred = oechem.OEAtomMatchResidueID()
ace_pred.SetName('ACE')
nme_pred = oechem.OEAtomMatchResidueID()
nme_pred.SetName('NME')
res_name_pred = oechem.OEAtomMatchResidueID()

# Dictionary of dihedrals in deg indexed by residue name, then backbone conformation index,
# then sidechain conformer index, then dihedral name
dihedrals_by_conf_idx = {}

# Create residues for Ace and Nme used to build Ace-Cyx-Nme manually
if 'CYX' in residue_names:

    ace = oechem.OEResidue()
    ace.SetChainID('A')
    ace.SetFragmentNumber(0)
    ace.SetName('ACE')
    ace.SetResidueNumber(0)
    nme = oechem.OEResidue()
    nme.SetChainID('A')
    nme.SetFragmentNumber(0)
    nme.SetName('NME')
    nme.SetResidueNumber(2)

with oechem.oemolistream() as ifs, oechem.oemolostream() as ofs:

    for res_name in residue_names:

        print('{:3s}'.format(res_name))
        os.makedirs(os.path.join('backbone_conformations', res_name), exist_ok = True)

        if ifs.open(os.path.join('AllDipeptides', 'MainChain', res_name, '{}.mol2'.format(res_name))):

            for mol in ifs.GetOEGraphMols():

                dihedrals_by_conf_idx[res_name] = {}

                # Fix incorrect bond orders and formal charges in carboxylates from mol2 produced by tleap
                # Adapted from https://github.com/openforcefield/amber-ff-porting/blob/master/utils.py
                if res_name == 'ASP' or res_name == 'GLU':

                    for atom in mol.GetAtoms():

                        # Find carbon with negative formal charge
                        if atom.GetAtomicNum() == 6 and atom.GetFormalCharge() == -1:

                            # Loop over bonded atoms and check for oxygens with negative formal charge
                            oxyanion_count = 0
                            for neighbor in atom.GetAtoms():

                                if neighbor.GetAtomicNum() == 8 and atom.GetFormalCharge() == -1:
                                    oxyanion_count += 1

                                    # Set correct formal charges and bond order for the second negative oxygen
                                    if oxyanion_count == 2:

                                        atom.SetFormalCharge(0)
                                        neighbor.SetFormalCharge(0)
                                        atom.GetBond(neighbor).SetOrder(2)

                # Move center of mass to origin and perceive stereochemistry
                oechem.OECenter(mol)
                oechem.OE3DToInternalStereo(mol)

                # Perceive residues, preserving atom order
                atoms = [atom for atom in mol.GetAtoms()]
                oechem.OEPerceiveResidues(mol)
                mol.OrderAtoms(atoms)

                # Build Ace-Cyx-Nme and bridged Ace-Cyx-Nme residues manually
                if res_name == 'CYX':

                    # Create residues for Cyx and bridged Ace-Cyx-Nme
                    cyx = oechem.OEResidue()
                    cyx.SetChainID('A')
                    cyx.SetFragmentNumber(0)
                    cyx.SetName(oe_res_name_rotamer_map[res_name])
                    cyx.SetResidueNumber(1)

                    bridged_ace = oechem.OEResidue()
                    bridged_ace.SetChainID('A')
                    bridged_ace.SetFragmentNumber(0)
                    bridged_ace.SetName('ACE')
                    bridged_ace.SetResidueNumber(3)

                    bridged_cyx = oechem.OEResidue()
                    bridged_cyx.SetChainID('A')
                    bridged_cyx.SetFragmentNumber(0)
                    bridged_cyx.SetName('CYX')
                    bridged_cyx.SetResidueNumber(4)

                    bridged_nme = oechem.OEResidue()
                    bridged_nme.SetChainID('A')
                    bridged_nme.SetFragmentNumber(0)
                    bridged_nme.SetName('NME')
                    bridged_nme.SetResidueNumber(5)

                    # Add atoms to residues manually
                    for atom in mol.GetAtoms():

                        idx = atom.GetIdx()
                        if idx <= 5:
                            oechem.OEAtomSetResidue(atom, ace)
                        elif idx >= 6 and idx <= 15:
                            oechem.OEAtomSetResidue(atom, cyx)
                        elif idx >= 16 and idx <= 21:
                            oechem.OEAtomSetResidue(atom, nme)
                        elif idx >= 22 and idx <= 27:
                            oechem.OEAtomSetResidue(atom, bridged_ace)
                        elif idx >= 28 and idx <= 37:
                            oechem.OEAtomSetResidue(atom, bridged_cyx)
                        elif idx >= 38:
                            oechem.OEAtomSetResidue(atom, bridged_nme)

                # Get hierarchy view of molecule and list of sidechain rotamers sorted by probability
                hv_mol = oechem.OEHierView(mol)
                mol_residues = get_residues(hv_mol)
                mol_rotamers = get_sorted_rotamers(res_name)

                # Get atoms that make up phi and psi
                res_name_pred.SetName(oe_res_name_rotamer_map[res_name])
                if res_name == 'CYX':

                    c_ace = get_residue_atom(mol, ace_pred, ' C  ', residue_id = 0)
                    n = get_residue_atom(mol, res_name_pred, ' N  ', residue_id = 1)
                    ca = get_residue_atom(mol, res_name_pred, ' CA ', residue_id = 1)
                    c = get_residue_atom(mol, res_name_pred, ' C  ', residue_id = 1)
                    n_nme = get_residue_atom(mol, nme_pred, ' N  ', residue_id = 2)

                else:

                    c_ace = get_residue_atom(mol, ace_pred, ' C  ')
                    n = get_residue_atom(mol, res_name_pred, ' N  ')
                    ca = get_residue_atom(mol, res_name_pred, ' CA ')
                    c = get_residue_atom(mol, res_name_pred, ' C  ')
                    n_nme = get_residue_atom(mol, nme_pred, ' N  ')

                # Write conformers for each rotamer to SDF
                for backbone_conf in backbone_constraints:

                    dihedrals_by_conf_idx[res_name][backbone_conf] = {}
                    conf_idx = 0

                    # Create copy of OE molecule with no conformers
                    new_mol = oechem.OEMol(mol)
                    new_mol.DeleteConfs()

                    # Set backbone dihedrals to backbone conformation
                    phi = backbone_constraints[backbone_conf]['phi']
                    psi = backbone_constraints[backbone_conf]['psi']
                    oechem.OESetTorsion(mol, c_ace, n, ca, c, phi / rad_to_deg)
                    oechem.OESetTorsion(mol, n, ca, c, n_nme, psi / rad_to_deg)
                    
                    # Set sidechain dihedrals to most populated rotamer
                    oechem.OESetRotamer(mol_residues[1], mol_rotamers[0])
                    print('{:3s} {:5s}'.format(res_name, backbone_conf))
                    print_dihedrals(mol_residues[1])

                    # Scan up to first two sidechain dihedrals
                    for chi1 in dihedral_range[res_name]['chi1']:

                        oechem.OESetTorsion(mol_residues[1], 4, chi1 / rad_to_deg)

                        # Sidechains with only one rotatable bond
                        if res_name in ['CYS', 'CYX', 'SER', 'THR', 'VAL']:

                            new_mol.NewConf(mol)
                            dihedrals_by_conf_idx[res_name][backbone_conf][conf_idx] = get_dihedrals(mol_residues[1])
                            conf_idx += 1
                            continue

                        for chi2 in dihedral_range[res_name]['chi2']:

                            oechem.OESetTorsion(mol_residues[1], 5, chi2 / rad_to_deg)
                            new_mol.NewConf(mol)
                            dihedrals_by_conf_idx[res_name][backbone_conf][conf_idx] = get_dihedrals(mol_residues[1])
                            conf_idx += 1

                    # Write molecule to SDF
                    if ofs.open(os.path.join(
                        'backbone_conformations', res_name, '{}_{}.sdf'.format(res_name, backbone_conf)
                    )):

                        oechem.OEWriteMolecule(ofs, new_mol)

                    else:
                        print('Error writing {:3s} {:5s}}'.format(res_name, backbone_conf))

        else:
            print('Error reading {:3s}'.format(res_name))

with open(os.path.join('backbone_conformations', 'dihedrals_by_conf_idx'), 'w') as out_file:
    json.dump(dihedrals_by_conf_idx, out_file)

ARG
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0    9.90   -67.0   180.0  -179.0   177.0
 1    6.14   -68.0  -172.0   -64.0   -88.0
 2    6.13   -67.0  -179.0  -176.0   -89.0
 3    5.40   -66.0   179.0    66.0  -172.0
 4    5.30   -68.0   180.0   179.0    91.0
 5    5.19   -66.0   179.0   -67.0   173.0
 6    5.04  -176.0   176.0   179.0   179.0
 7    4.09  -177.0   180.0    63.0    83.0
 8    4.00   -66.0   178.0    65.0    88.0
 9    3.31  -175.0   179.0    67.0  -171.0
10    3.24  -174.0   179.0   -65.0   -85.0
11    3.08   -64.0   -69.0  -175.0   -91.0
12    2.98  -177.0   178.0  -178.0   -90.0
13    2.84  -177.0   177.0   -68.0   171.0
14    2.59   -62.0   -68.0  -177.0  -176.0
15    2.28  -176.0   176.0   176.0    87.0
16    2.20   -63.0   -68.0   -61.0   -86.0
17    2.05   -62.0   -66.0   -64.0   163.0
18    1.78  -179.0    66.0   178.0   171.0
19    1.77    65.0  -177.0  -180.0   179.0
20    1.76    65.0   179.0   177.0    88.0
21    1.68   -68.0  -177.0   -69.0   113.0
22    1.



ASN
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   49.01   -70.0   -42.0     0.0     0.0
 1   29.10  -171.0    -1.0     0.0     0.0
 2   14.00    64.0     7.0     0.0     0.0
 3    7.46   -64.0   115.0     0.0     0.0
 4    0.11  -161.0   164.0     0.0     0.0
ASN alpha
 -60.000  -45.000  -70.000  -42.000
ASN beta 
-135.000  135.000  -70.000  -42.000




ASP
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   51.48   -69.0   -29.0     0.0     0.0
 1   23.65  -172.0    -2.0     0.0     0.0
 2   16.24    63.0    -2.0     0.0     0.0
 3    8.33  -174.0    74.0     0.0     0.0
ASP alpha
 -60.000  -45.000  -69.000  -29.000
ASP beta 
-135.000  135.000  -69.000  -29.000




CYS
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   55.67   -65.0     0.0     0.0     0.0
 1   26.33  -178.0     0.0     0.0     0.0
 2   17.73    65.0     0.0     0.0     0.0
CYS alpha
 -60.000  -45.000  -65.000
CYS beta 
-135.000  135.000  -65.000
CYX
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   55.67   -65.0     0.0     0.0     0.0
 1   26.33  -178.0     0.0     0.0     0.0
 2   17.73    65.0     0.0     0.0     0.0
CYX alpha
 -60.000  -45.000  -65.000
CYX beta 
-135.000  135.000  -65.000
GLH
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   36.58   -67.0   179.0    -7.0     0.0
 1   23.69  -177.0   178.0     1.0     0.0
 2   15.80   -66.0   -67.0   -32.0     0.0
 3    8.03  -178.0    65.0    26.0     0.0
 4    6.39   -67.0    83.0     3.0     0.0
 5    4.87    65.0  -177.0     1.0     0.0
 6    2.58    69.0   -85.0    16.0     0.0
 7    1.50  -170.0   -83.0   -29.0     0.0
 8    0.28    61.0    86.0    20.0     0.0
GLH alpha
 -60.000  -45.000  -67.000  179.000   -7.000
GLH beta 
-135



GLN
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   38.71   -67.0   179.0    -4.0     0.0
 1   18.69  -176.0   178.0     2.0     0.0
 2   16.05   -64.0   -66.0   -39.0     0.0
 3    9.75  -177.0    66.0    41.0     0.0
 4    5.08    65.0  -177.0    -3.0     0.0
 5    3.25   -67.0    82.0    17.0     0.0
 6    3.09   -65.0   -63.0   109.0     0.0
 7    1.47  -172.0   -86.0   -29.0     0.0
 8    1.44  -177.0    62.0  -105.0     0.0
 9    1.31    70.0   -84.0    17.0     0.0
10    0.48    63.0    84.0    30.0     0.0
11    0.23   -73.0    69.0  -120.0     0.0
12    0.15  -172.0   -76.0   127.0     0.0
GLN alpha
 -60.000  -45.000  -67.000  179.000   -4.000
GLN beta 
-135.000  135.000  -67.000  179.000   -4.000




GLU
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   36.58   -67.0   179.0    -7.0     0.0
 1   23.69  -177.0   178.0     1.0     0.0
 2   15.80   -66.0   -67.0   -32.0     0.0
 3    8.03  -178.0    65.0    26.0     0.0
 4    6.39   -67.0    83.0     3.0     0.0
 5    4.87    65.0  -177.0     1.0     0.0
 6    2.58    69.0   -85.0    16.0     0.0
 7    1.50  -170.0   -83.0   -29.0     0.0
 8    0.28    61.0    86.0    20.0     0.0
GLU alpha
 -60.000  -45.000  -67.000  179.000   -7.000
GLU beta 
-135.000  135.000  -67.000  179.000   -7.000
HID




#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   31.73   -64.0   -75.0     0.0     0.0
 1   17.01  -178.0    74.0     0.0     0.0
 2   13.14   -66.0    88.0     0.0     0.0
 3   11.93  -173.0   -87.0     0.0     0.0
 4    9.05   -68.0   171.0     0.0     0.0
 5    7.39    65.0   -81.0     0.0     0.0
 6    5.01    62.0    87.0     0.0     0.0
 7    4.47  -173.0  -167.0     0.0     0.0
HID alpha
 -60.000  -45.000  -64.000  -75.000
HID beta 
-135.000  135.000  -64.000  -75.000
HIE




#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   31.73   -64.0   -75.0     0.0     0.0
 1   17.01  -178.0    74.0     0.0     0.0
 2   13.14   -66.0    88.0     0.0     0.0
 3   11.93  -173.0   -87.0     0.0     0.0
 4    9.05   -68.0   171.0     0.0     0.0
 5    7.39    65.0   -81.0     0.0     0.0
 6    5.01    62.0    87.0     0.0     0.0
 7    4.47  -173.0  -167.0     0.0     0.0
HIE alpha
 -60.000  -45.000  -64.000  -75.000
HIE beta 
-135.000  135.000  -64.000  -75.000
HIP




#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   31.73   -64.0   -75.0     0.0     0.0
 1   17.01  -178.0    74.0     0.0     0.0
 2   13.14   -66.0    88.0     0.0     0.0
 3   11.93  -173.0   -87.0     0.0     0.0
 4    9.05   -68.0   171.0     0.0     0.0
 5    7.39    65.0   -81.0     0.0     0.0
 6    5.01    62.0    87.0     0.0     0.0
 7    4.47  -173.0  -167.0     0.0     0.0
HIP alpha
 -60.000  -45.000  -64.000  -75.000
HIP beta 
-135.000  135.000  -64.000  -75.000




ILE
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   62.03   -63.0   169.0     0.0     0.0
 1   15.70   -59.0   -61.0     0.0     0.0
 2   12.33    62.0   170.0     0.0     0.0
 3    5.81  -170.0   167.0     0.0     0.0
 4    2.61  -167.0    66.0     0.0     0.0
 5    0.87   -63.0    89.0     0.0     0.0
 6    0.35    58.0    84.0     0.0     0.0
ILE alpha
 -60.000  -45.000  -63.000  169.000
ILE beta 
-135.000  135.000  -63.000  169.000




LEU
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   64.54   -66.0   174.0     0.0     0.0
 1   30.12  -177.0    63.0     0.0     0.0
 2    2.36   -77.0    72.0     0.0     0.0
 3    1.37  -172.0   153.0     0.0     0.0
 4    0.45    61.0    83.0     0.0     0.0
 5    0.42   -83.0   -64.0     0.0     0.0
 6    0.33    73.0   165.0     0.0     0.0
 7    0.12  -172.0   -75.0     0.0     0.0
LEU alpha
 -60.000  -45.000  -66.000  174.000
LEU beta 
-135.000  135.000  -66.000  174.000




LYN
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   24.68   -68.0  -179.0  -179.0   179.0
 1   14.48  -175.0   177.0   180.0  -180.0
 2    9.01   -62.0   -67.0  -177.0  -178.0
 3    5.25   -67.0  -178.0  -177.0   -67.0
 4    4.06   -66.0  -179.0   176.0    67.0
 5    3.98    65.0  -178.0  -179.0  -180.0
 6    3.90   -69.0   174.0    71.0   175.0
 7    3.77   -67.0  -173.0   -74.0  -175.0
 8    3.54  -178.0   175.0   174.0    66.0
 9    3.53  -179.0    69.0   176.0   177.0
10    3.38  -176.0   178.0  -176.0   -67.0
11    2.54  -178.0   175.0    73.0   175.0
12    2.09   -60.0   -66.0  -173.0   -69.0
13    1.94  -175.0  -177.0   -73.0  -175.0
14    1.56   -63.0   -64.0   -71.0  -177.0
15    1.33   -61.0   -69.0   180.0    68.0
16    1.22   -64.0  -177.0   -70.0   -66.0
17    1.17  -180.0    67.0   174.0    66.0
18    1.13   -69.0   177.0    70.0    68.0
19    0.78  -178.0    63.0    70.0   177.0
20    0.77    64.0  -178.0  -179.0   -67.0
21    0.69    68.0  -179.0   178.0    67.0
22    0.



PHE
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   47.38   -67.0   -81.0     0.0     0.0
 1   34.27  -178.0    76.0     0.0     0.0
 2   11.17    64.0   -90.0     0.0     0.0
 3    6.89   -68.0   -15.0     0.0     0.0
PHE alpha
 -60.000  -45.000  -67.000  -81.000
PHE beta 
-135.000  135.000  -67.000  -81.000




SER
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   48.44    66.0     0.0     0.0     0.0
 1   28.30   -64.0     0.0     0.0     0.0
 2   22.97   179.0     0.0     0.0     0.0
SER alpha
 -60.000  -45.000   66.000
SER beta 
-135.000  135.000   66.000
THR
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   48.14    61.0     0.0     0.0     0.0
 1   44.64   -60.0     0.0     0.0     0.0
 2    6.91  -173.0     0.0     0.0     0.0
THR alpha
 -60.000  -45.000   61.000
THR beta 
-135.000  135.000   61.000
TRP
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   33.76   -67.0    97.0     0.0     0.0
 1   18.09  -179.0    65.0     0.0     0.0
 2   15.46  -177.0  -103.0     0.0     0.0
 3   11.73   -68.0    -7.0     0.0     0.0
 4   10.35    62.0   -89.0     0.0     0.0
 5    5.19    60.0    88.0     0.0     0.0
 6    5.13   -68.0   -89.0     0.0     0.0
TRP alpha
 -60.000  -45.000  -67.000   97.000
TRP beta 
-135.000  135.000  -67.000   97.000




TYR
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   48.01   -67.0   -81.0     0.0     0.0
 1   34.53  -178.0    76.0     0.0     0.0
 2   11.57    64.0   -90.0     0.0     0.0
 3    5.55   -68.0   -15.0     0.0     0.0
TYR alpha
 -60.000  -45.000  -67.000  -81.000
TYR beta 
-135.000  135.000  -67.000  -81.000




VAL
#   Prob    Chi 1   Chi 2   Chi 3   Chi 4
 0   75.56   176.0     0.0     0.0     0.0
 1   17.94   -62.0     0.0     0.0     0.0
 2    6.20    65.0     0.0     0.0     0.0
VAL alpha
 -60.000  -45.000  176.000
VAL beta 
-135.000  135.000  176.000




# Minimize with OpenFF 2.0.0

To clean up valence terms, minimize dipeptide conformations with harmonic restraints on backbone and sidechain dihedrals

In [13]:
force_field = ForceField('openff_unconstrained-2.0.0.offxml')

with open(os.path.join('backbone_conformations', 'dihedrals_by_conf_idx'), 'r') as in_file:
    dihedrals_by_conf_idx = json.load(in_file)

openmm_platform = openmm.Platform.getPlatformByName('Reference')

for res_name in residue_names:

    for backbone_conf in backbone_constraints:

        # Read OFF molecule
        offmol = Molecule.from_file(
            os.path.join('backbone_conformations', res_name, '{}_{}.sdf'.format(res_name, backbone_conf)))
        for j in range(1, len(offmol)):
            offmol[0].add_conformer(offmol[j].conformers[0])
        offmol = offmol[0]

        # Set up OpenMM system
        mapped_smiles = offmol.to_smiles(isomeric = True, mapped = True)
        new_mol = Molecule.from_mapped_smiles(mapped_smiles)
        openmm_system = force_field.create_openmm_system(new_mol.to_topology())

        # Harmonic restraint for periodic torsion
        # Energy constant is 4184 kJ mol^-1 nm^-2, equivalent to 10 kcal mol^-1 angstrom^-2
        harmonic_restraint = openmm.CustomTorsionForce(
            'k_over_2 * diff * diff; diff = min(dphi, two_pi - dphi); dphi = abs(theta - phi_min)')
        harmonic_restraint.addGlobalParameter('two_pi', 2 * np.pi)
        harmonic_restraint.addGlobalParameter('k_over_2', 4184)
        harmonic_restraint.addPerTorsionParameter('phi_min')

        # Set up harmonic restraints for backbone and up to first two sidechain torsions
        dihedral_indices = {}
        for dihedral in dihedral_smirks[res_name]:

            dihedral_indices[dihedral] = []

            for j, atoms in enumerate(offmol.chemical_environment_matches(dihedral_smirks[res_name][dihedral])):

                # For symmetric chemistries, take the lowest index
                if j > 0:
                    continue

                oemol = offmol.to_openeye()
                oe_torsion = oechem.OEGetTorsion(
                    oemol,
                    oemol.GetAtom(oechem.OEHasAtomIdx(atoms[0])),
                    oemol.GetAtom(oechem.OEHasAtomIdx(atoms[1])),
                    oemol.GetAtom(oechem.OEHasAtomIdx(atoms[2])),
                    oemol.GetAtom(oechem.OEHasAtomIdx(atoms[3])),
                ) * rad_to_deg

                print('{:3s} {:5s} {:5s} {:2d} {:2d} {:2d} {:2d} {:2d} {:8.3f} {:8.3f}'.format(
                    res_name, backbone_conf, dihedral, j, atoms[0], atoms[1], atoms[2], atoms[3],
                    dihedrals_by_conf_idx[res_name][backbone_conf]['0'][dihedral], oe_torsion,
                ))

                torsion_idx = harmonic_restraint.addTorsion(atoms[0], atoms[1], atoms[2], atoms[3], [0.0])
                dihedral_indices[dihedral].append((torsion_idx, atoms))

        openmm_system.addForce(harmonic_restraint)
        openmm_integrator = openmm.VerletIntegrator(0.001 * unit.femtoseconds)
        openmm_context = openmm.Context(openmm_system, openmm_integrator, openmm_platform)

        # Minimize conformers
        for conf_idx in range(offmol.n_conformers):

            # Set minimum of harmonic restraint for backbone torsions
            for dihedral in dihedral_indices:
                for torsion_idx, atoms in dihedral_indices[dihedral]:

                    harmonic_restraint.setTorsionParameters(
                        torsion_idx, atoms[0], atoms[1], atoms[2], atoms[3],
                        [dihedrals_by_conf_idx[res_name][backbone_conf][str(conf_idx)][dihedral] / rad_to_deg]
                    )

            harmonic_restraint.updateParametersInContext(openmm_context)
            openmm_context.setPositions(offmol.conformers[conf_idx].value_in_unit(unit.nanometers))

            openmm.LocalEnergyMinimizer.minimize(openmm_context)
            minimized_state = openmm_context.getState(getPositions = True)
            minimized_coordinates = unit.quantity.Quantity(
                np.array(minimized_state.getPositions().value_in_unit(unit.nanometers)), unit.nanometers)
            new_mol.add_conformer(minimized_coordinates)

        # Align conformers to first conformer using heavy atoms of acetyl group and N and CA of amino acid
        oemol = new_mol.to_openeye()
        ref_oemol = oechem.OEGraphMol(oemol.GetConf(oechem.OEHasConfIdx(0)))

        # Center reference molecule
        oechem.OECenter(ref_oemol)

        # Substructure search for methyl carbon and carbonyl atoms of acetyl group and N and CA of amino acid
        ace_n_ca_subsearch = oechem.OESubSearch()
        ace_n_ca_subsearch.Init('[#6X4H3]-[#6X3](=O)-[#7X3]-[#6X4]')

        # Create atom match pairs between target and reference molecules
        alignment_match = oechem.OEMatch()
        oesubsearch_match_unique = True
        for ref_match in ace_n_ca_subsearch.Match(ref_oemol, oesubsearch_match_unique):
            for target_match in ace_n_ca_subsearch.Match(oemol, oesubsearch_match_unique):
                for ref_atoms, target_atoms in zip(ref_match.GetAtoms(), target_match.GetAtoms()):
                    alignment_match.AddPair(ref_atoms.target, target_atoms.target)

        # Set up OpenEye RMSD alignment
        oermsd_overlay = True
        N_confs = oemol.GetMaxConfIdx()
        rmsd_vector = oechem.OEDoubleArray(N_confs)
        rotation_matrix = oechem.OEDoubleArray(9 * N_confs)
        translation_vector = oechem.OEDoubleArray(3 * N_confs)

        # Compute rotation matrix and translation vector of alignment
        oechem.OERMSD(
            ref_oemol, oemol, rmsd_vector, alignment_match, oermsd_overlay, rotation_matrix, translation_vector)

        # Apply rotation then translation to align target molecule to reference molecule
        oechem.OERotate(oemol, rotation_matrix)
        oechem.OETranslate(oemol, translation_vector)

        # Write molecule with minimized and aligned conformers to SDF
        with oechem.oemolostream() as ofs:

            if ofs.open(os.path.join(
                'backbone_conformations', res_name, '{}_min_{}.sdf'.format(res_name, backbone_conf)
            )):

                oechem.OEWriteMolecule(ofs, oemol)

            else:
                print('Error writing {:3s} {:5s}'.format(res_name, backbone_conf))


ARG alpha phi    0  4  6  8 28  -60.000  -59.997
ARG alpha psi    0  6  8 28 30  -45.000  -45.000
ARG alpha chi1   0  6  8 10 13  180.000 -179.998
ARG alpha chi2   0  8 10 13 16 -180.000 -179.996
ARG beta  phi    0  4  6  8 28 -135.000 -134.998
ARG beta  psi    0  6  8 28 30  135.000  134.998
ARG beta  chi1   0  6  8 10 13  180.000  179.996
ARG beta  chi2   0  8 10 13 16  180.000 -179.997
ASH alpha phi    0  4  6  8 17  -60.000  -60.001
ASH alpha psi    0  6  8 17 19  -45.000  -44.998
ASH alpha chi1   0  6  8 10 13  180.000 -179.997
ASH alpha chi2   0  8 10 13 14 -180.000 -179.998
ASH beta  phi    0  4  6  8 17 -135.000 -135.000
ASH beta  psi    0  6  8 17 19  135.000  135.001
ASH beta  chi1   0  6  8 10 13  180.000  179.999
ASH beta  chi2   0  8 10 13 14 -180.000  179.996
ASN alpha phi    0  4  6  8 18  -60.000  -60.001
ASN alpha psi    0  6  8 18 20  -45.000  -44.997
ASN alpha chi1   0  6  8 10 13  180.000  179.999
ASN alpha chi2   0  8 10 13 14 -180.000 -179.997
ASN beta  phi    0  

VAL alpha phi    0  4  6  8 20  -60.000  -60.003
VAL alpha psi    0  6  8 20 22  -45.000  -44.996
VAL alpha chi1   0  6  8 10 12  180.000 -179.999
VAL beta  phi    0  4  6  8 20 -135.000 -135.003
VAL beta  psi    0  6  8 20 22  135.000  134.997
VAL beta  chi1   0  6  8 10 12  180.000 -180.000


# Setup 2-D TorsionDrive

Setup 2-D TorsionDrives on chi1 and chi2.

In [20]:
# Get software provenance
factory = TorsiondriveDatasetFactory()
provenance = factory.provenance(GLOBAL_TOOLKIT_REGISTRY)

# Initialize TorsionDrive dataset
dataset = TorsiondriveDataset(
    dataset_name = 'OpenFF Protein Capped 1-mer Sidechains v1.0',
    dataset_tagline = 'Capped 1-mer TorsionDrives on up to two sidechain dihedrals',
    description = 'TorsionDrives on chi1 and chi2 for capped 1-mers of amino acids with a rotatable bond in the '
        'sidechain (not ALA, GLY, or PRO).',
    provenance = provenance
)
dataset.metadata.submitter = 'chapincavender'
dataset.metadata.long_description_url = (
    'https://github.com/openforcefield/qca-dataset-submission/tree/master/submissions/'
    '2022-02-10-OpenFF-Protein-Capped-1-mer-Sidechains'
)

# Add molecules with constraints on non-driven backbone torsions to dataset
for res_name in residue_names:

    for backbone_conf in backbone_constraints:

        # Read molecule with minimized conformers
        offmol = Molecule.from_file(
            os.path.join('backbone_conformations', res_name, '{}_min_{}.sdf'.format(res_name, backbone_conf)),
            allow_undefined_stereo=True,
        )
        for j in range(1, len(offmol)):
            offmol[0].add_conformer(offmol[j].conformers[0])
        offmol = offmol[0]

        print('{:3s} {:5s} ({:d} conformers)'.format(res_name, backbone_conf, offmol.n_conformers))

        # Molecule metadata
        mol_index = '{}-{}'.format(res_name, backbone_conf)

        # Indices and scan range for driven torsions (chi1 and chi2)
        chi1_indices = offmol.chemical_environment_matches(dihedral_smirks[res_name]['chi1'])[0]
        chi1_range = (dihedral_range[res_name]['chi1'][0], dihedral_range[res_name]['chi1'][-1])    
        chi1_spacing = dihedral_spacing[res_name]['chi1']

        if res_name in ['CYS', 'CYX', 'SER', 'THR', 'VAL']:

            dihedral_indices = [chi1_indices]
            keyword_ranges = [chi1_range]
            keyword_spacing = [chi1_spacing]

        else:

            chi2_indices = offmol.chemical_environment_matches(dihedral_smirks[res_name]['chi2'])[0]
            chi2_range = (dihedral_range[res_name]['chi2'][0], dihedral_range[res_name]['chi2'][-1])
            chi2_spacing = dihedral_spacing[res_name]['chi2']

            dihedral_indices = [chi1_indices, chi2_indices]
            keyword_ranges = [chi1_range, chi2_range]
            keyword_spacing = [chi1_spacing, chi2_spacing]

        # Add molecule to dataset
        dataset.add_molecule(
            index = mol_index,
            molecule = offmol, 
            dihedrals = dihedral_indices,
            keywords = {'dihedral_ranges': keyword_ranges, 'grid_spacing': keyword_spacing}
        )

        # Add constraints for non-driven backbone torsions (phi and psi)
        for dihedral in dihedral_smirks[res_name]:

            for j, atoms in enumerate(offmol.chemical_environment_matches(dihedral_smirks[res_name][dihedral])):

                # For symmetric chemistries, take the lowest index
                if j > 0:
                    continue

                if dihedral in backbone_constraints[backbone_conf]:
                    value = backbone_constraints[backbone_conf][dihedral]
                else:
                    continue

                print('{:3s} {:5s} {:5s} {:2d} {:2d} {:2d} {:2d} {:2d} {:8.3f}'.format(
                    res_name, backbone_conf, dihedral, j, atoms[0], atoms[1], atoms[2], atoms[3], value))

                dataset.dataset[mol_index].add_constraint(
                    constraint = 'set', constraint_type = 'dihedral', indices = atoms, value = value)


ARG alpha (576 conformers)
ARG alpha phi    0  4  6  8 28  -60.000
ARG alpha psi    0  6  8 28 30  -45.000
ARG beta  (576 conformers)
ARG beta  phi    0  4  6  8 28 -135.000
ARG beta  psi    0  6  8 28 30  135.000
ASH alpha (576 conformers)
ASH alpha phi    0  4  6  8 17  -60.000
ASH alpha psi    0  6  8 17 19  -45.000
ASH beta  (576 conformers)
ASH beta  phi    0  4  6  8 17 -135.000
ASH beta  psi    0  6  8 17 19  135.000
ASN alpha (576 conformers)
ASN alpha phi    0  4  6  8 18  -60.000
ASN alpha psi    0  6  8 18 20  -45.000
ASN beta  (576 conformers)
ASN beta  phi    0  4  6  8 18 -135.000
ASN beta  psi    0  6  8 18 20  135.000
ASP alpha (576 conformers)
ASP alpha phi    0  4  6  8 16  -60.000
ASP alpha psi    0  6  8 16 18  -45.000
ASP beta  (576 conformers)
ASP beta  phi    0  4  6  8 16 -135.000
ASP beta  psi    0  6  8 16 18  135.000
CYS alpha (24 conformers)
CYS alpha phi    0  4  6  8 15  -60.000
CYS alpha psi    0  6  8 15 17  -45.000
CYS beta  (24 conformers)
CYS beta  ph

Problematic atoms are:
Atom atomic num: 6, name: , idx: 8, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 9, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 10, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 19, aromatic: False, chiral: False



GLU alpha (576 conformers)
GLU alpha phi    0  4  6  8 19  -60.000
GLU alpha psi    0  6  8 19 21  -45.000
GLU beta  (576 conformers)
GLU beta  phi    0  4  6  8 19 -135.000
GLU beta  psi    0  6  8 19 21  135.000
HID alpha (576 conformers)
HID alpha phi    0  4  6  8 21  -60.000
HID alpha psi    0  6  8 21 23  -45.000
HID beta  (576 conformers)
HID beta  phi    0  4  6  8 21 -135.000
HID beta  psi    0  6  8 21 23  135.000
HIE alpha (576 conformers)
HIE alpha phi    0  4  6  8 21  -60.000
HIE alpha psi    0  6  8 21 23  -45.000
HIE beta  (576 conformers)
HIE beta  phi    0  4  6  8 21 -135.000
HIE beta  psi    0  6  8 21 23  135.000
HIP alpha (576 conformers)
HIP alpha phi    0  4  6  8 22  -60.000
HIP alpha psi    0  6  8 22 24  -45.000
HIP beta  (576 conformers)
HIP beta  phi    0  4  6  8 22 -135.000
HIP beta  psi    0  6  8 22 24  135.000
ILE alpha (576 conformers)
ILE alpha phi    0  4  6  8 23  -60.000
ILE alpha psi    0  6  8 23 25  -45.000
ILE beta  (576 conformers)
ILE beta  

Problematic atoms are:
Atom atomic num: 6, name: , idx: 8, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 9, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 10, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 23, aromatic: False, chiral: False



LEU alpha (576 conformers)
LEU alpha phi    0  4  6  8 23  -60.000
LEU alpha psi    0  6  8 23 25  -45.000
LEU beta  (576 conformers)
LEU beta  phi    0  4  6  8 23 -135.000
LEU beta  psi    0  6  8 23 25  135.000
LYN alpha (576 conformers)
LYN alpha phi    0  4  6  8 25  -60.000
LYN alpha psi    0  6  8 25 27  -45.000
LYN beta  (576 conformers)
LYN beta  phi    0  4  6  8 25 -135.000
LYN beta  psi    0  6  8 25 27  135.000
LYS alpha (576 conformers)
LYS alpha phi    0  4  6  8 26  -60.000
LYS alpha psi    0  6  8 26 28  -45.000
LYS beta  (576 conformers)
LYS beta  phi    0  4  6  8 26 -135.000
LYS beta  psi    0  6  8 26 28  135.000
MET alpha (576 conformers)
MET alpha phi    0  4  6  8 21  -60.000
MET alpha psi    0  6  8 21 23  -45.000


Problematic atoms are:
Atom atomic num: 6, name: , idx: 8, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 9, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 10, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 21, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 6, name: , idx: 8, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 7, name: , idx: 6, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx: 9, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 10, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 21, aromatic: False, chiral: False



MET beta  (576 conformers)
MET beta  phi    0  4  6  8 21 -135.000
MET beta  psi    0  6  8 21 23  135.000
PHE alpha (576 conformers)
PHE alpha phi    0  4  6  8 24  -60.000
PHE alpha psi    0  6  8 24 26  -45.000
PHE beta  (576 conformers)
PHE beta  phi    0  4  6  8 24 -135.000
PHE beta  psi    0  6  8 24 26  135.000
SER alpha (24 conformers)
SER alpha phi    0  4  6  8 15  -60.000
SER alpha psi    0  6  8 15 17  -45.000
SER beta  (24 conformers)
SER beta  phi    0  4  6  8 15 -135.000
SER beta  psi    0  6  8 15 17  135.000
THR alpha (24 conformers)
THR alpha phi    0  4  6  8 18  -60.000
THR alpha psi    0  6  8 18 20  -45.000
THR beta  (24 conformers)
THR beta  phi    0  4  6  8 18 -135.000
THR beta  psi    0  6  8 18 20  135.000
TRP alpha (576 conformers)
TRP alpha phi    0  4  6  8 28  -60.000
TRP alpha psi    0  6  8 28 30  -45.000
TRP beta  (576 conformers)
TRP beta  phi    0  4  6  8 28 -135.000
TRP beta  psi    0  6  8 28 30  135.000
TYR alpha (576 conformers)
TYR alpha phi 

Describe and export the dataset

In [21]:
confs = np.array([mol.n_conformers for mol in dataset.molecules])
molecular_weights = np.array([oechem.OECalculateMolecularWeight(mol.to_openeye()) for mol in dataset.molecules])
unique_formal_charges = np.unique([mol.total_charge / mol.total_charge.unit for mol in dataset.molecules])

print('Number of unique molecules        {:d}'.format(dataset.n_molecules))
print('Number of filtered molecules      {:d}'.format(dataset.n_filtered))
print('Number of torsion drives          {:d}'.format(dataset.n_records))
print('Number of conformers min mean max {:3d} {:6.2f} {:3d}'.format(confs.min(), confs.mean(), confs.max()))
print('Molecular weight min mean max     {:6.2f} {:6.2f} {:6.2f}'.format(
    molecular_weights.min(), molecular_weights.mean(), molecular_weights.max()))
print('Charges                          ', sorted(unique_formal_charges))

print(dataset.metadata.dict())

for spec, obj in dataset.qc_specifications.items():
    print("Spec:", spec)
    print(obj.dict())


Number of unique molecules        23
Number of filtered molecules      0
Number of torsion drives          46
Number of conformers min mean max  24 456.00 576
Molecular weight min mean max     160.17 206.90 350.46
Charges                           [-1.0, 0.0, 1.0]
{'submitter': 'chapincavender', 'creation_date': datetime.date(2022, 2, 10), 'collection_type': 'TorsionDriveDataset', 'dataset_name': 'OpenFF Protein Capped 1-mer Sidechains v1.0', 'short_description': 'Capped 1-mer TorsionDrives on up to two sidechain dihedrals', 'long_description_url': HttpUrl('https://github.com/openforcefield/qca-dataset-submission/tree/master/submissions/2022-02-10-OpenFF-Protein-Capped-1-mer-Sidechains', scheme='https', host='github.com', tld='com', host_type='domain', path='/openforcefield/qca-dataset-submission/tree/master/submissions/2022-02-10-OpenFF-Protein-Capped-1-mer-Sidechains'), 'long_description': 'TorsionDrives on chi1 and chi2 for capped 1-mers of amino acids with a rotatable bond in the s

In [22]:
dataset.export_dataset('dataset.json.bz2')

In [23]:
dataset.molecules_to_file('dataset.smi', 'smi')
dataset.visualize('dataset.pdf')