## Workflow plan

 - Create a list of SMARTS strings for the bond types reported in reference 10.1021/ed085p532 (Tables 2).
 - Develop an RDKit-based query tool that checks whether the current SDF file contains the molecules with specified bond type.
 - If a given bond type is found in the molecule, the constitutive correction for that species will be included.
 - Constitutive corrections are added along atomic Pascal constants (procedure for unknown molecule).

### Materials:
 - [Substructure Filtering in RDKit ](https://www.youtube.com/watch?v=Z1PrErlmTGI)
 - [SMARTS notation](https://en.wikipedia.org/wiki/SMILES_arbitrary_target_specification)

## Compounds loader

In [26]:
from pathlib import Path
from typing import Any

import pytest
from rdkit import Chem
from rdkit.Chem import Mol, MolToSmarts, RemoveHs

from src import DIAMAG_COMPOUND_CONSTITUTIVE_CORR_SUBDIR
from src.constants.common_molecules import COMMON_MOLECULES
from src.core.compound import MBCompound
from src.core.molecule import MBMolecule
from src.loader import SDFLoader


In [27]:
compound: MBCompound = SDFLoader.Load(
    "COOR.sdf", subdir=DIAMAG_COMPOUND_CONSTITUTIVE_CORR_SUBDIR
)
mols_list = []
# Creates SMARTS notation for molecules via RDKit function
for mol in compound.GetMols(to_rdkit=True):
    mol = mol = RemoveHs(mol)
    mols_list.append(mol)
    print(MolToSmarts(mol, isomericSmiles=True))

[#6](=[#8])-[#8]-[#6]


### Constitutive corrections for relevent bond types

In [None]:
RELEVENT_BOND_TYPES = {
    "C=C": {
        "SMARTS": "[C]=[C]",
        "constitutive_corr": 5.5,
        "sdf_file": "C2H4.sdf",
        "Conflicting bonds": {"C=C-C=C"},
    },
    "C#C": {  # triple bond is ascribed as "#"
        "SMARTS": "[C;!$([C]-c)]#[C;!$([C]-c);!$([C]-[C](=O)-[C])]",  # C#C but without aromatic [c] neighbours or without C(=O)R neighbour for one C atom.
        "constitutive_corr": 0.8,
        "sdf_file": "C2H2.sdf",
        "Conflicting bonds": {"Ar-C#C-Ar", "RC#Câ€“C(=O)R", "Ar-C#C"},
    },
    "C=C-C=C": {
        "SMARTS": "[C]=[C]-[C]=[C]",
        "constitutive_corr": 10.6,
        "sdf_file": "C=C-C=C.sdf",
        "Conflicting bonds": {
            "conjugated double bond systems with 3 or more C=C bonds"
        },
    },
    "Ar-C#C-Ar": {
        "SMARTS": "[c]-[C]#[C]-[c]",
        "constitutive_corr": 3.85,
        "sdf_file": "C2H4.sdf",
        "Conflicting bonds": {None},
    },
    "CH2=CH-CH2-": {
        "SMARTS": "[CH2]=[CH]-[CH2]-*",
        "constitutive_corr": 4.5,
        "sdf_file": "allyl chloride.sdf",
        "Conflicting bonds": {None},
    },
    "C=O": {
        "SMARTS": "[C;X3;!$([C]-c);!$([C]-[N,O])]=O",  # TODO: Finish
        "constitutive_corr": 6.3,
        "sdf_file": "allyl chloride.sdf",
        "Conflicting bonds": {None},
    },
    "COOH": {
        "SMARTS": "[C;X3;!$(C-c)](=O)[O;H]",
        "constitutive_corr": -5.0,
        "sdf_file": "COOH.sdf",
        "Conflicting bonds": {None},
    },
    "COOR": {
        "SMARTS": "[C;X3;!$(C-c)](=O)O[C]",
        "constitutive_corr": -5.0,
        "sdf_file": "COOR.sdf",
        "Conflicting bonds": {None},
    },
    "C(=O)NH2": {
        "SMARTS": "[C;X3;!$(C-c)](=O)[N;H2]",
        "constitutive_corr": -3.5,
        "sdf_file": "COONH2.sdf",
        "Conflicting bonds": {None},
    },
    "N=N": {
        "SMARTS": "N=N",
        "constitutive_corr": 1.85,
        "sdf_file": "N=N.sdf",
        "Conflicting bonds": {None},
    },
}

In [29]:
# Need to be enhanced by loop that checks all bond types from dictionary
pattern = Chem.MolFromSmarts("[C;X3;!$(C-c)](=O)O[C]")

# Note that C=C also will be matched! Not unique matching must be resolved in the future.
for mol in mols_list:
    if mol.HasSubstructMatch(pattern):
        print("True")
    else:
        print("False")

True
