## Workflow plan

 - Create a list of SMARTS strings for the bond types reported in reference 10.1021/ed085p532 (Tables 2).
 - Develop an RDKit-based query tool that checks whether the current SDF file contains the molecules with specified bond type.
 - If a given bond type is found in the molecule, the constitutive correction for that species will be included.
 - Constitutive corrections are added along atomic Pascal constants (procedure for unknown molecule).

### Materials:
 - [Substructure Filtering in RDKit ](https://www.youtube.com/watch?v=Z1PrErlmTGI)
 - [SMARTS notation](https://en.wikipedia.org/wiki/SMILES_arbitrary_target_specification)

## Compounds loader

In [1]:
from pathlib import Path
from typing import Any

import pytest
from rdkit import Chem
from rdkit.Chem import Mol, MolToSmarts, RemoveHs

from src import DIAMAG_COMPOUND_CONSTITUTIVE_CORR_SUBDIR
from src.constants.common_molecules import COMMON_MOLECULES
from src.core.compound import MBCompound
from src.core.molecule import MBMolecule
from src.constants.bonds import DIAMAG_RELEVANT_BONDS
from src.loader import SDFLoader, SDFFileNotFoundError


### Constitutive corrections for relevent bond types

In [2]:
compound: MBCompound = SDFLoader.Load(
    "1,3,5-triazine+.sdf", subdir=DIAMAG_COMPOUND_CONSTITUTIVE_CORR_SUBDIR
)

mol: MBMolecule = compound.GetMols(to_rdkit=False)[0]
print(f'SDF loaded first molecule SMARTS: {mol.smarts}')

match = False
for idx, diamag_relevant_bond in enumerate(DIAMAG_RELEVANT_BONDS):
    if mol.HasSubstructMatch(smarts=diamag_relevant_bond.SMARTS):
        match = True
        print(f'{idx}: Match: "{diamag_relevant_bond.formula}": {diamag_relevant_bond}')
    
if not match:
    print("No match found.")

SDF loaded first molecule SMARTS: [#6]1:[#7]:[#6]:[#7]:[#6]:[#7H+]:1
61: Match: "triazine": DiamagRelevantBond(sdf_file='1,2,3-triazine.sdf', constitutive_corr=-1.4, formula='triazine', SMARTS='[$(n1nnccc1),$(n1nccnc1),$(n1cncnc1),$([nH+]1nnccc1),$(n1[nH+]nccc1),$(n1n[nH+]ccc1),$([nH+]1nccnc1),$(n1[nH+]ccnc1),$(n1ncc[nH+]c1),$([nH+]1cncnc1),$(n1c[nH+]cnc1),$(n1cnc[nH+]c1)]', description="Assumes the same constant for three triazine isomers and thier monoprotonated states. This must be noted in Software's MANUAL.")


In [3]:
from numpy import ma
from src import DIAMAG_COMPOUND_CONSTITUTIVE_CORR_SUBDIR
from src.constants.bonds import DIAMAG_RELEVANT_BONDS, DiamagRelevantBond
from src.core.compound import MBCompound
from src.loader import SDFLoader


def run_bond_match_tests_info_only() -> None:
    """
    Informational version:
    - No exceptions
    - Collects all failures
    - Prints a clean final summary
    """

    failures: list[dict] = []
    skipped: list[int] = []
    passed: list[int] = []

    for idx, drb in enumerate(DIAMAG_RELEVANT_BONDS):

        # --- Load compound ---------------------------------------------
        
        try:
            compound: MBCompound = SDFLoader.Load(
                drb.sdf_file, subdir=DIAMAG_COMPOUND_CONSTITUTIVE_CORR_SUBDIR
            )
        except SDFFileNotFoundError as e:
            skipped.append(idx)
            print(f"[SKIPPED {idx}] {drb.formula} -- SDF file not found: {e}")
            continue

        expected_smarts = drb.SMARTS
        test_failed = False

        # --- Check all molecules ---------------------------------------
        matched_bonds = []
        for mol in compound.GetMols(to_rdkit=False):
            if mol.HasSubstructMatch(smarts=expected_smarts):
                matched_bonds.append(mol)

        if len(matched_bonds) > 1:
            test_failed = True
            for mol in matched_bonds:
                failures.append(
                    {
                        "idx": idx,
                        "formula": drb.formula,
                        "sdf_file": drb.sdf_file,
                        "expected_smarts": expected_smarts,
                        "molecule_smarts": mol.smarts,
                        "mol": mol,
                    }
                )
        elif len(matched_bonds) == 0:
            test_failed = True
            failures.append(
                {
                    "idx": idx,
                    "formula": drb.formula,
                    "sdf_file": drb.sdf_file,
                    "expected_smarts": expected_smarts,
                    "molecule_smarts": "No Match Found",
                    "mol": "No Match Found",
                }
            )

        if test_failed:
            print(f"[ERROR {idx}] {drb.formula}")
        else:
            passed.append(idx)

    # --- Final summary --------------------------------------------------
    print("\n" + "=" * 80)
    print("SMARTS MATCH SUMMARY")
    print("=" * 80)

    print(f"✅ Passed : {len(passed)}")
    print(f"⚠️ Skipped: {len(skipped)}")
    print(f"❌ Failed : {len(failures)}")

    # --- Detailed failure report ---------------------------------------
    if failures:
        print("\n" + "-" * 80)
        print("NO SUBSTRUCTURE MATCH FAILURES DETAIL")
        print("-" * 80)

        for f in failures:
            print(
                f"\n[Test {f['idx']} FAILED]\n"
                f"Bond formula:   {f['formula']}\n"
                f"SDF file:       {f['sdf_file']}\n\n"
                f"Expected SMARTS:\n"
                f"  {f['expected_smarts']}\n\n"
                f"Molecule SMARTS:\n"
                f"  {f['molecule_smarts']}\n\n"
                f"Molecule object:\n"
                f"  {f['mol']}\n"
                + "-" * 80
            )

    print("\n✅ Informational run completed.")


run_bond_match_tests_info_only()

[ERROR 4] CH2=CH-CH2-
[ERROR 5] C=O
[SKIPPED 6] COOH -- SDF file not found: File not found: /Users/mir/PycharmProjects/mag-bridge/data/sdf/diamag_compound/constitutive_corr/COOH.sdf
[SKIPPED 7] COOR -- SDF file not found: File not found: /Users/mir/PycharmProjects/mag-bridge/data/sdf/diamag_compound/constitutive_corr/COOR.sdf
[SKIPPED 8] C(=O)NH2 -- SDF file not found: File not found: /Users/mir/PycharmProjects/mag-bridge/data/sdf/diamag_compound/constitutive_corr/COONH2.sdf
[SKIPPED 11] -N#C -- SDF file not found: File not found: /Users/mir/PycharmProjects/mag-bridge/data/sdf/diamag_compound/constitutive_corr/HCN-Any.sdf
[SKIPPED 12] -C#N -- SDF file not found: File not found: /Users/mir/PycharmProjects/mag-bridge/data/sdf/diamag_compound/constitutive_corr/HCN.sdf
[ERROR 15] C-Cl
[SKIPPED 16] Cl-CR2CR2-Cl -- SDF file not found: File not found: /Users/mir/PycharmProjects/mag-bridge/data/sdf/diamag_compound/constitutive_corr/R4C2Cl2.sdf
[ERROR 30] Ar-Ar
[ERROR 52] pyrazine

SMARTS MATCH