## Workflow plan

 - Create a list of SMILES strings for the molecules reported in reference 10.1021/ed085p532 (Tables 3–5).
 - Develop an RDKit-based query tool that checks whether the current SDF file contains the molecules from the SMILES list.
 - If a given molecule is found in the SDF file, the diamagnetic contribution for that species will be included.

### Materials:
 - [Crack the Code: Mastering SMILES Notation](https://www.youtube.com/watch?v=QRLaIARxP30)
 - [OpenSMILES community - Bible of the SMILES notation](http://opensmiles.org/)
 - [Structure-to-SMILES conventer](https://www.rcsb.org/chemical-sketch)
 - [PubChem - SMILES library](https://pubchem.ncbi.nlm.nih.gov/)

## Compounds loader

In [1]:
from pathlib import Path
from typing import Any

import pytest
from rdkit import Chem
from rdkit.Chem import MolKey

from src import MOLECULE_MATCH_SUBDIR, SDF_DIR
from src.constants.common_molecules import COMMON_MOLECULES
from src.core.compound import MBCompound
from src.loader import SDFLoader

In [2]:
def print_common_molecules_refactor_format() -> None:
    for group_name, group_dict in COMMON_MOLECULES.items():
        print(f"# --- {group_name} ---")
        for formula, mol_data in group_dict.items():
            name = mol_data["name"]
            smiles = mol_data["SMILES"]
            diamag = mol_data["diamag_sus"]
            sdf = mol_data["sdf_file"]

            # Format SMILES set into Python literal syntax
            smiles_literal = "{" + ", ".join(f'"{s}"' for s in smiles) + "}"

            print(
                "CommonMolecule(\n"
                f'    formula="{formula}",\n'
                f'    name="{name}",\n'
                f"    SMILES={smiles_literal},\n"
                f"    diamag_sus={diamag},\n"
                f'    sdf_file="{sdf}",\n'
                "),\n"
            )

print_common_molecules_refactor_format()


# --- anions ---
CommonMolecule(
    formula="AsO3(3-)",
    name="arsenate(III)",
    SMILES={"[O-][As]([O-])[O-]"},
    diamag_sus=-51,
    sdf_file="AsO33-.sdf",
),

CommonMolecule(
    formula="AsO4(3-)",
    name="arsenate(V)",
    SMILES={"O=[As]([O-])([O-])[O-]"},
    diamag_sus=-60,
    sdf_file="AsO43-.sdf",
),

CommonMolecule(
    formula="BF4(-)",
    name="tetrafluoroborate",
    SMILES={"F[B-](F)(F)F"},
    diamag_sus=-37,
    sdf_file="BF4-.sdf",
),

CommonMolecule(
    formula="BO3(3-)",
    name="borate",
    SMILES={"[O-]B([O-])[O-]"},
    diamag_sus=-35,
    sdf_file="BO33-.sdf",
),

CommonMolecule(
    formula="BrO3(-)",
    name="bromate(V)",
    SMILES={"[O-][Br+2]([O-])[O-]"},
    diamag_sus=-40,
    sdf_file="BrO3-.sdf",
),

CommonMolecule(
    formula="ClO3(-)",
    name="chlorate(V)",
    SMILES={"[O-][Cl+2]([O-])[O-]"},
    diamag_sus=-30.2,
    sdf_file="ClO3-.sdf",
),

CommonMolecule(
    formula="ClO4(-)",
    name="chlorate(VII)",
    SMILES={"[O-][Cl+3]([