Skip to content

Commit

Permalink
Merge branch 'master' into fix/github7306
Browse files Browse the repository at this point in the history
  • Loading branch information
rvianello committed Jun 16, 2024
2 parents 7e1f881 + d6171aa commit 295eb87
Show file tree
Hide file tree
Showing 20 changed files with 735 additions and 229 deletions.
8 changes: 7 additions & 1 deletion Code/GraphMol/CIPLabeler/catch_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
#include <GraphMol/test_fixtures.h>
#include <GraphMol/Substruct/SubstructMatch.h>

#include <RDGeneral/BoostStartInclude.h>
#include <boost/algorithm/string.hpp>
#include <RDGeneral/BoostEndInclude.h>

#include "CIPLabeler.h"
#include "Digraph.h"
#include "rules/Pairlist.h"
Expand Down Expand Up @@ -805,6 +809,8 @@ TEST_CASE("AssignMandP", "[accurateCIP]") {
"0-31=P:");
}
SECTION("two atropisomers") {
UseLegacyStereoPerceptionFixture(false);

// auto m = MolFileToMol(
testOneAtropIomerMandP(
"C1(N2C(C)=CC=C2Br)=C(C)C(C)=C(N2C(C)=CC=C2Br)C(C)=C1C |(-0.0002,1.5403,;-0.0002,3.0805,;-1.334,3.8508,;-2.6678,3.0807,;-1.334,5.391,;1.3338,5.391,;1.3338,3.8508,;2.6676,3.0807,;-1.3338,0.7702,;-2.6678,1.5403,;-1.3338,-0.7702,;-2.6678,-1.5401,;-0.0002,-1.5403,;-0.0002,-3.0805,;1.3338,-3.8508,;2.6676,-3.0805,;1.3338,-5.391,;-1.334,-5.391,;-1.334,-3.8508,;-2.6678,-3.0805,;1.3338,-0.7702,;2.6678,-1.5403,;1.3338,0.7702,;2.6678,1.5404,),wU:1.6,13.14|",
Expand Down Expand Up @@ -936,4 +942,4 @@ TEST_CASE("atropisomers", "[basic]") {
}
}
}
}
}
13 changes: 13 additions & 0 deletions Code/GraphMol/Chirality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,8 +477,16 @@ std::optional<Atom::ChiralType> atomChiralTypeFromBondDirPseudo3D(
auto centerLoc = conf->getAtomPos(atom->getIdx());
centerLoc.z = 0.0;
auto refPt = conf->getAtomPos(bondAtom->getIdx());

// Github #7305: in some odd cases, we get conformers with
// weird scalings. In these, we need to scale the 3d offset
// or it might be irrelevant or dominate over the coordinates.
auto refLength = (centerLoc - refPt).length();
refPt.z =
bondDir == Bond::BondDir::BEGINWEDGE ? pseudo3DOffset : -pseudo3DOffset;
if (refLength) {
refPt.z *= refLength;
}

//----------------------------------------------------------
//
Expand Down Expand Up @@ -512,9 +520,14 @@ std::optional<Atom::ChiralType> atomChiralTypeFromBondDirPseudo3D(
if (nbrBond->getBeginAtomIdx() == atom->getIdx() &&
(nbrBond->getBondDir() == Bond::BondDir::BEGINWEDGE ||
nbrBond->getBondDir() == Bond::BondDir::BEGINDASH)) {
// scale the 3d offset based on the reference bond here too
tmpPt.z = nbrBond->getBondDir() == Bond::BondDir::BEGINWEDGE
? pseudo3DOffset
: -pseudo3DOffset;
if (refLength) {
tmpPt.z *= refLength;
}

} else {
tmpPt.z = 0;
}
Expand Down
37 changes: 23 additions & 14 deletions Code/GraphMol/FileParsers/FileWriters.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,8 @@ struct RDKIT_FILEPARSERS_EXPORT MolWriterParams {
* \param confId - selects the conformer to be used
* (default=-1 - find first in mol)
*/
RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol,
const MolWriterParams &params,
int confId = -1);
RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(
const ROMol &mol, const MolWriterParams &params, int confId = -1);

// \brief generates an MDL mol block for a molecule
/*!
Expand All @@ -54,10 +53,8 @@ RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol,
* automatically with more than 999 atoms or
* bonds)(default=false)
*/
inline std::string MolToMolBlock(const ROMol &mol,
bool includeStereo = true,
int confId = -1,
bool kekulize = true,
inline std::string MolToMolBlock(const ROMol &mol, bool includeStereo = true,
int confId = -1, bool kekulize = true,
bool forceV3000 = false) {
MolWriterParams params{includeStereo, kekulize, forceV3000};
return MolToMolBlock(mol, params, confId);
Expand Down Expand Up @@ -94,6 +91,20 @@ inline std::string MolToV3KMolBlock(const ROMol &mol, bool includeStereo = true,
return MolToMolBlock(mol, params, confId);
}

// \brief generates an MDL v2000 mol block for a molecule
/*!
* \param mol - the molecule in question
* \param MolWriterParams - parameter struct with write options
* \param confId - selects the conformer to be used
* (default=-1 - find first in mol)
*
* \note This function will throw a ValueError exception if the molecule has
* more than 999 atoms, bonds, or SGroups.
*/
RDKIT_FILEPARSERS_EXPORT std::string MolToV2KMolBlock(
const ROMol &mol, const MolWriterParams &params = MolWriterParams(),
int confId = -1);

// \brief Writes a molecule to an MDL mol file
/*!
* \param mol - the molecule in question
Expand All @@ -118,9 +129,9 @@ RDKIT_FILEPARSERS_EXPORT void MolToMolFile(const ROMol &mol,
* automatically with
* more than 999 atoms or bonds)
*/
inline void MolToMolFile(
const ROMol &mol, const std::string &fName, bool includeStereo = true,
int confId = -1, bool kekulize = true, bool forceV3000 = false) {
inline void MolToMolFile(const ROMol &mol, const std::string &fName,
bool includeStereo = true, int confId = -1,
bool kekulize = true, bool forceV3000 = false) {
MolWriterParams params{includeStereo, kekulize, forceV3000};
MolToMolFile(mol, fName, params, confId);
}
Expand All @@ -132,10 +143,8 @@ inline void MolToMolFile(
* \param MolWriterParams - parameter struct with write options
* \param confId - selects the conformer to be used
*/
inline void MolToV3KMolFile(const ROMol &mol,
const std::string &fName,
const MolWriterParams &params,
int confId = -1) {
inline void MolToV3KMolFile(const ROMol &mol, const std::string &fName,
const MolWriterParams &params, int confId = -1) {
// have to set forceV300, prefer copy over mutable params argument
MolWriterParams v3KParams{params};
v3KParams.forceV3000 = true;
Expand Down
118 changes: 62 additions & 56 deletions Code/GraphMol/FileParsers/MolFileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ int getQueryBondSymbol(const Bond *bond) {
}
return res;
}

bool isAtomRGroup(const Atom &atom) {
return atom.getAtomicNum() == 0 &&
atom.hasProp(common_properties::_MolFileRLabel);
}
} // namespace

const std::string GetMolFileChargeInfo(const RWMol &mol) {
Expand Down Expand Up @@ -177,7 +182,7 @@ const std::string GetMolFileChargeInfo(const RWMol &mol) {
nRads = 0;
}
}
if (!atom->hasQuery()) {
if (!isAtomRGroup(*atom)) {
int isotope = atom->getIsotope();
if (isotope != 0) {
++nMassDiffs;
Expand Down Expand Up @@ -652,28 +657,6 @@ const std::string GetMolFileAtomLine(const Atom *atom, const Conformer *conf,
return res;
};

namespace {
/*
If a molecule contains dative bonds the V2000 format should not
be used as it doesn't support dative bonds. If a dative bond is
detected while writing a V2000 molfile the RequiresV3000Exception
is thrown and the V2000 writer will redo the export in V3000 format.
This is arguably a rather brute-force way of detecting the proper output
format, but the only alternatives I (Jan Holst Jensen) had in mind were:
1) Check all bond types before output. Slow and would affect all
V2000 exports.
2) Maintain a reference count of dative bonds in molecule. Complex
and error-prone.
*/
class RequiresV3000Exception : public std::runtime_error {
public:
explicit RequiresV3000Exception()
: std::runtime_error("RequiresV3000Exception"){};
};
} // namespace

int BondGetMolFileSymbol(const Bond *bond) {
PRECONDITION(bond, "");
// FIX: should eventually recognize queries
Expand Down Expand Up @@ -707,9 +690,9 @@ int BondGetMolFileSymbol(const Bond *bond) {
res = 1;
break;
case Bond::DATIVE:
// Dative bonds requires V3000 format. Throw special exception to
// force output to be re-done in V3000.
throw RequiresV3000Exception();
// extension
res = 9;
break;
default:
break;
}
Expand Down Expand Up @@ -809,7 +792,7 @@ const std::string GetV3000MolFileAtomLine(
if (chg != 0) {
ss << " CHG=" << chg;
}
if (isotope != 0) {
if (isotope != 0 && !isAtomRGroup(*atom)) {
// the documentation for V3000 CTABs says that this should contain the
// "absolute atomic weight" (whatever that means).
// Online examples seem to have integer (isotope) values and Marvin won't
Expand Down Expand Up @@ -1243,11 +1226,14 @@ std::string getV3000CTAB(const ROMol &tmol,
// gets a mol block as a string
//
//------------------------------------------------
std::string outputMolToMolBlock(const RWMol &tmol, int confId, bool forceV3000,

enum class MolFileFormat { V2000, V3000, unspecified };

std::string outputMolToMolBlock(const RWMol &tmol, int confId,
MolFileFormat whichFormat,
unsigned int precision,
const boost::dynamic_bitset<> &aromaticBonds) {
std::string res;
bool isV3000;
unsigned int nAtoms, nBonds, nLists, chiralFlag, nsText, nRxnComponents;
unsigned int nReactants, nProducts, nIntermediates;
nAtoms = tmol.getNumAtoms();
Expand All @@ -1257,6 +1243,12 @@ std::string outputMolToMolBlock(const RWMol &tmol, int confId, bool forceV3000,
const auto &sgroups = getSubstanceGroups(tmol);
unsigned int nSGroups = sgroups.size();

if (whichFormat == MolFileFormat::V2000 &&
(nAtoms > 999 || nBonds > 999 || nSGroups > 999)) {
throw ValueErrorException(
"V2000 format does not support more than 999 atoms, bonds or SGroups.");
}

chiralFlag = 0;
nsText = 0;
nRxnComponents = 0;
Expand Down Expand Up @@ -1302,8 +1294,22 @@ std::string outputMolToMolBlock(const RWMol &tmol, int confId, bool forceV3000,
}
res += "\n";

isV3000 = forceV3000 || nAtoms > 999 || nBonds > 999 || nSGroups > 999 ||
!tmol.getStereoGroups().empty();
bool hasDative = false;
for (const auto bond : tmol.bonds()) {
if (bond->getBondType() == Bond::DATIVE) {
hasDative = true;
break;
}
}

bool isV3000 = false;
if (whichFormat == MolFileFormat::V3000) {
isV3000 = true;
} else if (whichFormat == MolFileFormat::unspecified &&
(hasDative || nAtoms > 999 || nBonds > 999 || nSGroups > 999 ||
!tmol.getStereoGroups().empty())) {
isV3000 = true;
}

// the counts line:
std::stringstream ss;
Expand Down Expand Up @@ -1371,18 +1377,15 @@ std::string outputMolToMolBlock(const RWMol &tmol, int confId, bool forceV3000,
return res;
}

std::string MolToMolBlock(const ROMol &mol, const MolWriterParams &params,
int confId) {
RDKit::Utils::LocaleSwitcher switcher;
RWMol trwmol(mol);
void prepareMol(RWMol &trwmol, const MolWriterParams &params,
boost::dynamic_bitset<> &aromaticBonds) {
// NOTE: kekulize the molecule before writing it out
// because of the way mol files handle aromaticity
if (trwmol.needsUpdatePropertyCache()) {
trwmol.updatePropertyCache(false);
}
boost::dynamic_bitset<> aromaticBonds(mol.getNumBonds());
if (params.kekulize && mol.getNumBonds()) {
for (const auto bond : mol.bonds()) {
if (params.kekulize && trwmol.getNumBonds()) {
for (const auto bond : trwmol.bonds()) {
if (bond->getIsAromatic()) {
aromaticBonds.set(bond->getIdx());
}
Expand All @@ -1394,26 +1397,29 @@ std::string MolToMolBlock(const ROMol &mol, const MolWriterParams &params,
// generate coordinates so that the stereo we generate makes sense
RDDepict::compute2DCoords(trwmol);
}
#if 0
if(includeStereo){
// assign "any" status to any stereo bonds that are not
// marked with "E" or "Z" code - these bonds need to be explicitly written
// out to the mol file
MolOps::findPotentialStereoBonds(trwmol);
// now assign stereo code if any have been specified by the directions on
// single bonds
MolOps::assignStereochemistry(trwmol);
}
#endif
FileParserUtils::moveAdditionalPropertiesToSGroups(trwmol);
}

try {
return outputMolToMolBlock(trwmol, confId, params.forceV3000,
params.precision, aromaticBonds);
} catch (RequiresV3000Exception &) {
return outputMolToMolBlock(trwmol, confId, true, params.precision,
aromaticBonds);
}
std::string MolToMolBlock(const ROMol &mol, const MolWriterParams &params,
int confId) {
RDKit::Utils::LocaleSwitcher switcher;
RWMol trwmol(mol);
boost::dynamic_bitset<> aromaticBonds(trwmol.getNumBonds());
prepareMol(trwmol, params, aromaticBonds);
MolFileFormat whichFormat =
params.forceV3000 ? MolFileFormat::V3000 : MolFileFormat::unspecified;
return outputMolToMolBlock(trwmol, confId, whichFormat, params.precision,
aromaticBonds);
}

std::string MolToV2KMolBlock(const ROMol &mol, const MolWriterParams &params,
int confId) {
RDKit::Utils::LocaleSwitcher switcher;
RWMol trwmol(mol);
boost::dynamic_bitset<> aromaticBonds(trwmol.getNumBonds());
prepareMol(trwmol, params, aromaticBonds);
return outputMolToMolBlock(trwmol, confId, MolFileFormat::V2000,
params.precision, aromaticBonds);
}

//------------------------------------------------
Expand Down
28 changes: 28 additions & 0 deletions Code/GraphMol/FileParsers/file_parsers_catch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7278,6 +7278,34 @@ M END
}
}

TEST_CASE("MolToV2KMolBlock") {
SECTION("basics") {
auto m = "[NH3]->[Pt]"_smiles;
REQUIRE(m);
// by default we get a V3K block since there is a dative bond present
auto mb = MolToMolBlock(*m);
CHECK(mb.find("V3000") != std::string::npos);
CHECK(mb.find("V30 1 9 1 2") != std::string::npos);
// but we can ask for a V2K block
mb = MolToV2KMolBlock(*m);
CHECK(mb.find("V2000") != std::string::npos);
CHECK(mb.find(" 1 2 9 0") != std::string::npos);
}
SECTION("limits") {
// we won't test SGroups since creating 1000 of them is a bit much
std::vector<std::string> smileses = {
std::string(1000, 'C'),
"C1CC2" + std::string(996, 'C') + "12",
};
for (const auto &smi : smileses) {
INFO(smi);
auto m = SmilesToMol(smi);
REQUIRE(m);
CHECK_THROWS_AS(MolToV2KMolBlock(*m), ValueErrorException);
}
}
}

TEST_CASE("Github #7306: bad crossed bonds in large aromatic rings") {
std::string rdbase = getenv("RDBASE");
rdbase += "/Code/GraphMol/FileParsers/test_data/";
Expand Down
Loading

0 comments on commit 295eb87

Please sign in to comment.