## References

- [RDKit Documentation](https://www.rdkit.org/docs/GettingStartedInPython.html)
- [PubChem](https://pubchem.ncbi.nlm.nih.gov/) - find SDF files for molecules
- YouTube Tutorial
  - [Part1](https://www.youtube.com/watch?v=NozaWUkJ3YM)


## Init Parameters


In [25]:
from rdkit import Chem
from rdkit.Chem import Draw

from src import SDF_DIR

file = "CH3COONa.sdf"

## Sandbox


In [26]:
# Loads Molecule from file to python object
suppl: Chem.SDMolSupplier = Chem.SDMolSupplier(f"{SDF_DIR.joinpath(file)}")

# Test if each molecule was loadded correctly
for molecule in suppl:
    assert molecule is not None

mols = [x for x in suppl]  # one line for loop syntax, must know, very handy
print(f"The file '{file}' contains '{len(mols)}' molecule(s).")

assert len(mols) > 0  # make sure the file was loaded correctly
mol = mols[0]  # SDF files can contain multiple molecules - we work with just one now


The file 'CH3COONa.sdf' contains '1' molecule(s).


In [27]:
# Calling some basic functions on molecule object
print(f"Heavy Atom Number: {mol.GetNumAtoms()}")
print(f"All Atom Number: {mol.GetNumAtoms(onlyExplicit=False)}")
print(f"SMILES format: '{Chem.MolToSmiles(mol)}'")


Heavy Atom Number: 5
All Atom Number: 8
SMILES format: 'CC(=O)[O-].[Na+]'


In [28]:
# Calling some basic functions on atoms objects
atoms: list[Chem.Atom] = [a for a in mol.GetAtoms()]
for atom in atoms:
    if atom.GetHybridization() == Chem.HybridizationType.SP2:
        print(
            f"""Atom: {atom.GetSymbol()}, AtomicNumber: {atom.GetAtomicNum()}, Mass: {atom.GetMass()}, GetIsAromatic: {atom.GetIsAromatic()}, GetHybridization: {atom.GetHybridization()}"""
        )


Atom: C, AtomicNumber: 6, Mass: 12.011, GetIsAromatic: False, GetHybridization: SP2
Atom: O, AtomicNumber: 8, Mass: 15.999, GetIsAromatic: False, GetHybridization: SP2
Atom: O, AtomicNumber: 8, Mass: 15.999, GetIsAromatic: False, GetHybridization: SP2


In [29]:
# # Simple Drawing
# img = Draw.MolToImage(mol, legend=file)
# img

#### 1. Compare RDkit with Element class. Preliminary coding.


In [30]:
# Calling some basic functions on atoms objects
from rdkit.Chem import rdMolDescriptors
from rdkit import Chem

# Chem.SanitizeMol(mol)
oxidation_states = rdMolDescriptors.CalcOxidationNumbers(mol)
print(oxidation_states)

mol = Chem.AddHs(mol)
atoms: list[Chem.Atom] = [a for a in mol.GetAtoms()]

for atom in atoms:
    if atom.HasProp("OxidationNumber"):
        ox_state = atom.GetProp("OxidationNumber")
    else:
        ox_state = 1

    neighbors = [nbr.GetSymbol() for nbr in atom.GetNeighbors()]
    neighbor_symbols = ", ".join(neighbors) if neighbors else "none"

    print(
        f"""Atom: {atom.GetSymbol()}, GetIsAromatic: {atom.GetIsAromatic()},  IsInRing: {atom.IsInRing()}, ox_state: {ox_state}, id: {atom.GetIdx()}, neighbors: {neighbor_symbols}"""
    )

None
Atom: C, GetIsAromatic: False,  IsInRing: False, ox_state: -3, id: 0, neighbors: C, H, H, H
Atom: C, GetIsAromatic: False,  IsInRing: False, ox_state: 3, id: 1, neighbors: C, O, O
Atom: O, GetIsAromatic: False,  IsInRing: False, ox_state: -2, id: 2, neighbors: C
Atom: O, GetIsAromatic: False,  IsInRing: False, ox_state: -2, id: 3, neighbors: C
Atom: Na, GetIsAromatic: False,  IsInRing: False, ox_state: 1, id: 4, neighbors: none
Atom: H, GetIsAromatic: False,  IsInRing: False, ox_state: 1, id: 5, neighbors: C
Atom: H, GetIsAromatic: False,  IsInRing: False, ox_state: 1, id: 6, neighbors: C
Atom: H, GetIsAromatic: False,  IsInRing: False, ox_state: 1, id: 7, neighbors: C
