Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 18 additions & 14 deletions Code/GraphMol/FileParsers/test1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <clocale>
#include <cstdlib>

#include <regex>
#include <string>
#include <fstream>
#include <boost/lexical_cast.hpp>
Expand Down Expand Up @@ -4663,7 +4664,7 @@ void testParseCHG() {
22, -1, 26, -1, 27, -1, 28, 4, 29, -1, 31, 1, 32,
-1, 34, -1, 35, 1, 36, -1, 38, -1, 0, 0};
// Shouldn't seg fault, throw exception or have a null mol
RWMol *m = MolBlockToMol(molblock_chg);
std::unique_ptr<RWMol> m(MolBlockToMol(molblock_chg));
size_t i = 0;
while (1) {
if (charges[i] == 0) {
Expand All @@ -4678,20 +4679,23 @@ void testParseCHG() {

TEST_ASSERT(m);
std::string out = MolToMolBlock(*m);
const std::string sub = "M CHG";
std::vector<size_t> positions;
// There are now dative bonds in the molecule, so the mol block will
// be forced to V3000.
std::regex chg_all("CHG="), chg_m1("CHG=-1"), chg_p1("CHG=1"),
chg_p4("CHG=4");
TEST_ASSERT(
std::distance(std::sregex_iterator(out.begin(), out.end(), chg_all),
std::sregex_iterator()) == 24);
TEST_ASSERT(
std::distance(std::sregex_iterator(out.begin(), out.end(), chg_m1),
std::sregex_iterator()) == 18);
TEST_ASSERT(
std::distance(std::sregex_iterator(out.begin(), out.end(), chg_p1),
std::sregex_iterator()) == 4);
TEST_ASSERT(
std::distance(std::sregex_iterator(out.begin(), out.end(), chg_p4),
std::sregex_iterator()) == 2);

size_t pos = out.find(sub, 0);
while (pos != std::string::npos) {
positions.push_back(pos);
size_t num_entries =
strtol(out.substr(pos + sub.size(), 3).c_str(), nullptr, 10);
TEST_ASSERT(num_entries == 8);
pos = out.find(sub, pos + 1);
}
TEST_ASSERT(positions.size() == 3); // 24/3 == 8

delete m;
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
}

Expand Down
4 changes: 3 additions & 1 deletion Code/GraphMol/MolInterchange/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,9 @@ std::vector<boost::shared_ptr<ROMol>> DocToMols(
throw FileParseException("Bad Format: missing version in JSON");
}
// FIX: we want to be backwards compatible
if (doc["rdkitjson"]["version"].GetInt() != currentRDKitJSONVersion) {
// Version 10 files can be read by 11, but not vice versa.
if (int jsonVersion = doc["rdkitjson"]["version"].GetInt();
jsonVersion > currentRDKitJSONVersion || jsonVersion < 10) {
throw FileParseException("Bad Format: bad version in JSON");
}
} else {
Expand Down
25 changes: 16 additions & 9 deletions Code/GraphMol/MolInterchange/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,12 +219,19 @@ void addQuery(const Q &query, rj::Value &rjQuery, rj::Document &doc,

void addBond(const Bond &bond, rj::Value &rjBond, rj::Document &doc,
const rj::Value &rjDefaults, const JSONWriteParameters &params) {
int bo = 0;
if (inv_bolookup.find(bond.getBondType()) != inv_bolookup.end()) {
bo = inv_bolookup.find(bond.getBondType())->second;
} else {
BOOST_LOG(rdWarningLog)
<< " unrecognized bond type set to zero while writing" << std::endl;
int bo = -1;
if (auto boIter = inv_bolookup.find(bond.getBondType());
boIter != inv_bolookup.end()) {
bo = boIter->second;
}
// commonchem only supports a few bond orders:
if (!params.useRDKitExtensions && bo > 3) {
bo = -1;
}
if (bo < 0) {
bo = 0;
BOOST_LOG(rdWarningLog) << " unrecognized bond type " << bond.getBondType()
<< " set to zero while writing" << std::endl;
}
addIntVal(rjBond, rjDefaults, "bo", bo, doc);
rj::Value rjAtoms(rj::kArrayType);
Expand All @@ -235,9 +242,9 @@ void addBond(const Bond &bond, rj::Value &rjBond, rj::Document &doc,
rjBond.AddMember("atoms", rjAtoms, doc.GetAllocator());

std::string chi = "";
if (inv_stereoBondlookup.find(bond.getStereo()) !=
inv_stereoBondlookup.end()) {
chi = inv_stereoBondlookup.find(bond.getStereo())->second;
if (auto sbIter = inv_stereoBondlookup.find(bond.getStereo());
sbIter != inv_stereoBondlookup.end()) {
chi = sbIter->second;
} else {
BOOST_LOG(rdWarningLog) << " unrecognized bond stereo " << bond.getStereo()
<< " set to default while writing" << std::endl;
Expand Down
8 changes: 5 additions & 3 deletions Code/GraphMol/MolInterchange/details.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
namespace RDKit {
namespace MolInterchange {
constexpr int currentMolJSONVersion = 10;
constexpr int currentRDKitJSONVersion = 10;
constexpr int currentRDKitJSONVersion = 11;
constexpr int currentRDKitRepresentationVersion = 2;
constexpr int currentChargeRepresentationVersion = 10;
constexpr int currentQueryRepresentationVersion = 10;
Expand All @@ -33,9 +33,11 @@ static const std::map<Atom::ChiralType, std::string> inv_chilookup = {
{Atom::CHI_OTHER, "other"}};

static const std::map<unsigned int, Bond::BondType> bolookup = {
{0, Bond::ZERO}, {1, Bond::SINGLE}, {2, Bond::DOUBLE}, {3, Bond::TRIPLE}};
{0, Bond::ZERO}, {1, Bond::SINGLE}, {2, Bond::DOUBLE},
{3, Bond::TRIPLE}, {4, Bond::QUADRUPLE}, {17, Bond::DATIVE}};
static const std::map<Bond::BondType, unsigned int> inv_bolookup = {
{Bond::ZERO, 0}, {Bond::SINGLE, 1}, {Bond::DOUBLE, 2}, {Bond::TRIPLE, 3}};
{Bond::ZERO, 0}, {Bond::SINGLE, 1}, {Bond::DOUBLE, 2},
{Bond::TRIPLE, 3}, {Bond::QUADRUPLE, 4}, {Bond::DATIVE, 17}};

static const std::map<std::string, Bond::BondStereo> stereoBondlookup = {
{"unspecified", Bond::STEREONONE},
Expand Down
68 changes: 68 additions & 0 deletions Code/GraphMol/MolOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,62 @@ void halogenCleanup(RWMol &mol, Atom *atom) {
}
}
}

void metalBondCleanup(RWMol &mol, Atom *atom) {
PRECONDITION(atom, "bad atom in metalBondCleanup");
// The IUPAC recommendation for ligand->metal coordination bonds is that they
// be single. This upsets the RDKit valence model, as seen in CHEBI:26355,
// heme b. If the valence of a non-metal atom is above the maximum in the
// RDKit model, and there are single bonds from it to metal
// change those bonds to atom->metal dative.

auto isMetal = [](const Atom *a) -> bool {
// This is the list of not metal atoms from QueryOps.cpp
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a request for a change, but I suspect that we're going to want to adjust this list to make it less permissive.

static const std::set<int> notMetals{1, 2, 5, 6, 7, 8, 9, 10,
14, 15, 16, 17, 18, 33, 34, 35,
36, 52, 53, 54, 85, 86};
return (notMetals.find(a->getAtomicNum()) == notMetals.end());
};
auto noDative = [](const Atom *a) -> bool {
static const std::set<int> noD{1, 2, 9, 10};
return (noD.find(a->getAtomicNum()) != noD.end());
};
if (!isMetal(atom)) {
return;
}
const auto &valens =
PeriodicTable::getTable()->getValenceList(atom->getAtomicNum());
if (valens.back() != -1) {
// the atom can only have specific valences, so leave it.
return;
}
for (auto bond : mol.atomBonds(atom)) {
auto otherAtom = bond->getOtherAtom(atom);
if (!isMetal(otherAtom) && !noDative(otherAtom)) {
auto ev = otherAtom->calcExplicitValence(false);
// Check the explicit valence of the non-metal against the allowed
// valences of the atom, adjusted by its formal charge. This means that
// N+ is treated the same as C, O+ the same as N. This allows for,
// for example, c1cccc[n+]1-[Fe] to be acceptable and not turned into
// c1cccc[n+]1->[Fe]. After all, c1cccc[n+]1-C is ok. Although this is
// a poor example because c1ccccn1->[Fe] appears to be the normal
// way that pyridine complexes with transition metals. Heme b in
// CHEBI:26355 is an example of when this is required.
int effAtomicNum =
otherAtom->getAtomicNum() - otherAtom->getFormalCharge();
if (effAtomicNum <= 0) {
continue;
}
const auto &otherValens =
Comment thread
greglandrum marked this conversation as resolved.
PeriodicTable::getTable()->getValenceList(effAtomicNum);
if (otherValens.back() > 0 && ev > otherValens.back()) {
bond->setBondType(RDKit::Bond::BondType::DATIVE);
bond->setBeginAtom(otherAtom);
bond->setEndAtom(atom);
}
}
}
}
} // namespace

void cleanUp(RWMol &mol) {
Expand All @@ -198,6 +254,12 @@ void cleanUp(RWMol &mol) {
}
}

void cleanUpOrganometallics(RWMol &mol) {
for (auto atom : mol.atoms()) {
metalBondCleanup(mol, atom);
}
}

void adjustHs(RWMol &mol) {
//
// Go through and adjust the number of implicit and explicit Hs
Expand Down Expand Up @@ -320,6 +382,12 @@ void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed,
cleanUp(mol);
}

operationThatFailed = SANITIZE_CLEANUP_ORGANOMETALLICS;
if (sanitizeOps & operationThatFailed) {
// clean up things like nitro groups
cleanUpOrganometallics(mol);
}

// update computed properties on atoms and bonds:
operationThatFailed = SANITIZE_PROPERTIES;
if (sanitizeOps & operationThatFailed) {
Expand Down
45 changes: 31 additions & 14 deletions Code/GraphMol/MolOps.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,7 @@ typedef enum {
SANITIZE_SETHYBRIDIZATION = 0x80,
SANITIZE_CLEANUPCHIRALITY = 0x100,
SANITIZE_ADJUSTHS = 0x200,
SANITIZE_CLEANUP_ORGANOMETALLICS = 0x400,
SANITIZE_ALL = 0xFFFFFFF
} SanitizeFlags;

Expand All @@ -449,6 +450,7 @@ typedef enum {
/*!
This functions calls the following in sequence
-# MolOps::cleanUp()
-# MolOps::cleanUpOrganometallics()
-# mol.updatePropertyCache()
-# MolOps::symmetrizeSSSR()
-# MolOps::Kekulize()
Expand Down Expand Up @@ -583,6 +585,20 @@ RDKIT_GRAPHMOL_EXPORT int setAromaticity(
*/
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol);

//! Designed to be called by the sanitizer to handle special cases for
//! organometallic species before valence is perceived
/*!

Currently this:
- replaces single bonds between "hypervalent" organic atoms and metals with
dative bonds (this is following an IUPAC recommendation:
https://iupac.qmul.ac.uk/tetrapyrrole/TP8.html)

\param mol the molecule of interest

*/
RDKIT_GRAPHMOL_EXPORT void cleanUpOrganometallics(RWMol &mol);

//! Called by the sanitizer to assign radical counts to atoms
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol);

Expand All @@ -608,17 +624,17 @@ RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol);

\param mol the molecule of interest

\param markAtomsBonds if this is set to true, \c isAromatic boolean settings
on both the Bonds and Atoms are turned to false following the Kekulization,
otherwise they are left alone in their original state.
\param markAtomsBonds if this is set to true, \c isAromatic boolean
settings on both the Bonds and Atoms are turned to false following the
Kekulization, otherwise they are left alone in their original state.

\param maxBackTracks the maximum number of attempts at back-tracking. The
algorithm uses a back-tracking procedure to revisit a previous setting of
double bond if we hit a wall in the kekulization process

<b>Notes:</b>
- this does not modify query bonds which have bond type queries (like those
which come from SMARTS) or rings containing them.
- this does not modify query bonds which have bond type queries (like
those which come from SMARTS) or rings containing them.
- even if \c markAtomsBonds is \c false the \c BondType for all modified
aromatic bonds will be changed from \c RDKit::Bond::AROMATIC to \c
RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
Expand All @@ -632,9 +648,9 @@ RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds = true,

\param mol the molecule of interest

\param markAtomsBonds if this is set to true, \c isAromatic boolean settings
on both the Bonds and Atoms are turned to false following the Kekulization,
otherwise they are left alone in their original state.
\param markAtomsBonds if this is set to true, \c isAromatic boolean
settings on both the Bonds and Atoms are turned to false following the
Kekulization, otherwise they are left alone in their original state.

\param maxBackTracks the maximum number of attempts at back-tracking. The
algorithm uses a back-tracking procedure to revisit a previous setting of
Expand Down Expand Up @@ -895,9 +911,9 @@ RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol);


NOTE that this does not check to see if atoms are chiral centers (i.e. all
substituents are different), it merely sets the chiral type flags based on the
coordinates and atom ordering. Use \c assignStereochemistryFrom3D() if you
want chiral flags only on actual stereocenters.
substituents are different), it merely sets the chiral type flags based on
the coordinates and atom ordering. Use \c assignStereochemistryFrom3D() if
you want chiral flags only on actual stereocenters.
*/
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(
ROMol &mol, int confId = -1, bool replaceExistingTags = true);
Expand Down Expand Up @@ -992,7 +1008,8 @@ RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol);

This function is useful in the following situations:
- when parsing a mol file; for the bonds marked here, coordinate
information on the neighbors can be used to indentify cis or trans states
information on the neighbors can be used to indentify cis or trans
states
- when writing a mol file; bonds that can be cis/trans but not marked as
either need to be specially marked in the mol file
- finding double bonds with unspecified stereochemistry so they
Expand All @@ -1003,8 +1020,8 @@ RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol);
*/
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol,
bool cleanIt = false);
//! \brief Uses the molParity atom property to assign ChiralType to a molecule's
//! atoms
//! \brief Uses the molParity atom property to assign ChiralType to a
//! molecule's atoms
/*!
\param mol the molecule of interest
\param replaceExistingTags if this flag is true, any existing atomic chiral
Expand Down
2 changes: 1 addition & 1 deletion Code/GraphMol/MolStandardize/test1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ void testCleanup() {
RWMOL_SPTR m = "C[Hg]C"_smiles;
RWMOL_SPTR res(MolStandardize::cleanup(*m, params));
TEST_ASSERT(MolToSmiles(*res) == "C[Hg]C")
BOOST_LOG(rdDebugLog) << "Finished" << std::endl;
}
BOOST_LOG(rdDebugLog) << "Finished" << std::endl;
}

void testStandardizeSm() {
Expand Down
27 changes: 25 additions & 2 deletions Code/GraphMol/Wrap/MolOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,11 @@ void cleanupMol(ROMol &mol) {
MolOps::cleanUp(rwmol);
}

void cleanUpOrganometallicsMol(ROMol &mol) {
auto &rwmol = static_cast<RWMol &>(mol);
MolOps::cleanUpOrganometallics(rwmol);
}

void setAromaticityMol(ROMol &mol, MolOps::AromaticityModel model) {
auto &wmol = static_cast<RWMol &>(mol);
MolOps::setAromaticity(wmol, model);
Expand Down Expand Up @@ -946,6 +951,8 @@ struct molops_wrapper {
python::enum_<MolOps::SanitizeFlags>("SanitizeFlags")
.value("SANITIZE_NONE", MolOps::SANITIZE_NONE)
.value("SANITIZE_CLEANUP", MolOps::SANITIZE_CLEANUP)
.value("SANITIZE_CLEANUP_ORGANOMETALLICS",
MolOps::SANITIZE_CLEANUP_ORGANOMETALLICS)
.value("SANITIZE_PROPERTIES", MolOps::SANITIZE_PROPERTIES)
.value("SANITIZE_SYMMRINGS", MolOps::SANITIZE_SYMMRINGS)
.value("SANITIZE_KEKULIZE", MolOps::SANITIZE_KEKULIZE)
Expand Down Expand Up @@ -1563,6 +1570,21 @@ to the terminal dummy atoms.\n\
\n";
python::def("Cleanup", cleanupMol, (python::arg("mol")), docString.c_str());

// ------------------------------------------------------------------------
docString =
"cleans up certain common bad functionalities in the organometallic molecule\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use\n\
\n\
NOTES:\n\
\n\
- The molecule is modified in place.\n\
\n";
python::def("CleanupOrganometallics", cleanUpOrganometallicsMol,
(python::arg("mol")), docString.c_str());

python::enum_<MolOps::AromaticityModel>("AromaticityModel")
.value("AROMATICITY_DEFAULT", MolOps::AROMATICITY_DEFAULT)
.value("AROMATICITY_RDKIT", MolOps::AROMATICITY_RDKIT)
Expand Down Expand Up @@ -2549,8 +2571,9 @@ EXAMPLES:\n\n\
&MolzipParams::enforceValenceRules,
"If true (default) enforce valences after zipping\n\
Setting this to false allows assembling chemically incorrect fragments.")
.def_readwrite("generateCoordinates", &MolzipParams::generateCoordinates,
"If true will add depiction coordinates to input molecules and\n\
.def_readwrite(
"generateCoordinates", &MolzipParams::generateCoordinates,
"If true will add depiction coordinates to input molecules and\n\
zipped molecule (for molzipFragments only)")
.def("setAtomSymbols", &RDKit::setAtomSymbols,
"Set the atom symbols used to zip mols together when using "
Expand Down
Loading