# Check Building Blocks
We go through the building blocks to identify features that a GNN should add to atoms and bonds

In [None]:
import pathlib
import sys

sys.path.append(str(pathlib.Path().resolve().parents[1]))

import pandas as pd
from rdkit import Chem

from src.util.db_utils import SynFermDatabaseConnection

In [None]:
con = SynFermDatabaseConnection()

In [None]:
# get all building blocks
df = pd.DataFrame(con.con.execute('SELECT long, category, SMILES FROM building_blocks').fetchall(), columns=['long', 'category', 'SMILES'])
df

In [None]:
# what atom types are there?
atom_types = set()
for smiles in df.SMILES:
    mol = Chem.MolFromSmiles(smiles)
    for atom in mol.GetAtoms():
        atom_types.add(atom.GetSymbol())
atom_types

In [None]:
# what atom degrees are there?
atom_degrees = set()
for smiles in df.SMILES:
    mol = Chem.MolFromSmiles(smiles)
    for atom in mol.GetAtoms():
        atom_degrees.add(atom.GetDegree())
atom_degrees

In [None]:
# what formal charges are there?
formal_charges = set()
for smiles in df.SMILES:
    mol = Chem.MolFromSmiles(smiles)
    for atom in mol.GetAtoms():
        formal_charges.add(atom.GetFormalCharge())
formal_charges

In [None]:
# what number of total Hs are there?
total_hs = set()
for smiles in df.SMILES:
    mol = Chem.MolFromSmiles(smiles)
    for atom in mol.GetAtoms():
        total_hs.add(atom.GetTotalNumHs())
total_hs

In [None]:
# what hybridizations are there?
hybridizations = set()
for smiles in df.SMILES:
    mol = Chem.MolFromSmiles(smiles)
    for atom in mol.GetAtoms():
        hybridizations.add(atom.GetHybridization())
hybridizations

In [None]:
# what bond types are there?
bond_types = set()
for smiles in df.SMILES:
    mol = Chem.MolFromSmiles(smiles)
    for bond in mol.GetBonds():
        bond_types.add(bond.GetBondType())
bond_types

In [None]:
# what bond stereochemistries are there?
bond_stereochemistries = set()
for smiles in df.SMILES:
    mol = Chem.MolFromSmiles(smiles)
    for bond in mol.GetBonds():
        bond_stereochemistries.add(bond.GetStereo())
bond_stereochemistries

In [None]:
# which buildingblocks have E stereochemistry?
mols_e = []
for smiles in df.SMILES:
    mol = Chem.MolFromSmiles(smiles)
    for bond in mol.GetBonds():
        if bond.GetStereo() == Chem.BondStereo.STEREOE:
            mols_e.append(mol)
Chem.Draw.MolsToGridImage(mols_e, molsPerRow=3)


## Conclusions

For most properties, the data covers the usual values.
For bond stereochemistry however, we seem to have only E or no stereochemistry, and we only have a single example of E.
This limitation is worth bearing in mind.

For the featurization, we should probably leave this out entirely.