## References

- [RDKit Documentation](https://www.rdkit.org/docs/GettingStartedInPython.html)
- [PubChem](https://pubchem.ncbi.nlm.nih.gov/) - find SDF files for molecules
- YouTube Tutorial
  - [Part1](https://www.youtube.com/watch?v=NozaWUkJ3YM)


## Init Parameters


In [1]:
from rdkit import Chem
from rdkit.Chem import Draw

from src import SDF_DIR

file = "As-benzene-arsenic acid.sdf"

## Sandbox


In [2]:
# Loads Molecule from file to python object (Properties in the SDF file are used to set properties on each molecule)
suppl: Chem.SDMolSupplier = Chem.SDMolSupplier(f"{SDF_DIR.joinpath(file)}")

# Test if each molecule was loadded correctly
for molecule in suppl:
    assert molecule is not None

mols = [x for x in suppl]  # one line for loop syntax, must know, very handy
print(f"The file '{file}' contains '{len(mols)}' molecule(s).")

assert len(mols) > 0  # make sure the file was loaded correctly


The file 'As-benzene-arsenic acid.sdf' contains '1' molecule(s).


#### 1. Compare RDkit with Element class. Preliminary coding.


In [3]:
# Calling some basic functions on atoms objects
from rdkit.Chem import rdMolDescriptors
from rdkit import Chem

for i in range(len(mols)):
    mol = mols[i]
    print(f"I am molecule {i + 1}\n")

    rdMolDescriptors.CalcOxidationNumbers(mol)

    mol = Chem.AddHs(mol)
    atoms: list[Chem.Atom] = [a for a in mol.GetAtoms()]

    for atom in atoms:
        if atom.HasProp("OxidationNumber"):
            ox_state = atom.GetProp("OxidationNumber")
        else:
            ox_state = 1

        neighbors = [nbr.GetSymbol() for nbr in atom.GetNeighbors()]
        neighbor_symbols = ", ".join(neighbors) if neighbors else "none"

        print(
            f"""Atom: {atom.GetSymbol()}, GetIsAromatic: {atom.GetIsAromatic()},  IsInRing: {atom.IsInRing()}, ox_state: {ox_state}, id: {atom.GetIdx()}, neighbors: {neighbor_symbols}, FormalCharge: {atom.GetFormalCharge()}, CovalentNumber: {atom.GetDegree()}"""
        )

I am molecule 1

Atom: C, GetIsAromatic: False,  IsInRing: True, ox_state: -3, id: 0, neighbors: C, As, As, FormalCharge: 0, CovalentNumber: 3
Atom: C, GetIsAromatic: False,  IsInRing: True, ox_state: -1, id: 1, neighbors: C, C, H, FormalCharge: 0, CovalentNumber: 3
Atom: As, GetIsAromatic: False,  IsInRing: True, ox_state: 3, id: 2, neighbors: C, C, FormalCharge: 0, CovalentNumber: 2
Atom: C, GetIsAromatic: False,  IsInRing: True, ox_state: -1, id: 3, neighbors: C, C, H, FormalCharge: 0, CovalentNumber: 3
Atom: C, GetIsAromatic: False,  IsInRing: True, ox_state: -2, id: 4, neighbors: As, C, H, FormalCharge: 0, CovalentNumber: 3
Atom: C, GetIsAromatic: False,  IsInRing: True, ox_state: -1, id: 5, neighbors: C, C, H, FormalCharge: 0, CovalentNumber: 3
Atom: As, GetIsAromatic: False,  IsInRing: False, ox_state: 5, id: 6, neighbors: C, O, O, O, FormalCharge: 0, CovalentNumber: 4
Atom: O, GetIsAromatic: False,  IsInRing: False, ox_state: -2, id: 7, neighbors: As, H, FormalCharge: 0, Covale

In [4]:
from constants import PASCAL_CONST

sum_dia_contr = 0

for i in range(len(mols)):
    mol = mols[i]

    print(f"I am molecule {i + 1}")

    rdMolDescriptors.CalcOxidationNumbers(mol)

    mol = Chem.AddHs(mol)
    atoms: list[Chem.Atom] = [
        a for a in mol.GetAtoms()
    ]  # Creates a list of atoms for each molecule

    for atom in atoms:
        symbol = atom.GetSymbol()
        if symbol in PASCAL_CONST:
            # retrieve const data
            covalent_data = PASCAL_CONST[symbol]["covalent"]
            ionic_data = PASCAL_CONST[symbol]["ionic"]["charge"]
            ox_state_data = PASCAL_CONST[symbol]["covalent"]["ox_state"]

            if in_ring:
                sum_dia_contr += covalent_data["ring"]

            """TODO: Resolve the error"""
            if not in_ring:
                sum_dia_contr += covalent_data["open_chain"]

            """TODO: Fix incompatible format of oxidation state. Ox_state from RDKit: int, Ox_state in PASCAL_CONST: string."""
            # if ox_state is not None:
            # ....

            """TODO: Fix incompatible format of charge. Charge from RDKit: int; Charge in PASCAL_CONST: string."""
            # if atom_charge is not None:
            # ....
    print(sum_dia_contr)

I am molecule 1


NameError: name 'in_ring' is not defined