# **Generate SDF files**

**Author:** Raissa Lohanna

**Date:** June 16th, 2023

**Objective:** Generate SFD files in order to use the `chemreps` library to generate BoB descriptors.

The files must be saved and read in the structure: `../data/sdf/`.

In [1]:
import numpy as np
import pandas as pd
from rdkit import Chem

In [17]:
df = pd.read_parquet("atom_coord_processed.parquet")

In [None]:
# Convert atomic numbers and positions to molecules
for _, row in df.iterrows():
    mol = Chem.RWMol()

    for i in range(row['atom count']):
        atomic_number = int(row['atomic numbers'][i])
        atom = Chem.Atom(atomic_number)
        atom.SetMonomerInfo(Chem.AtomPDBResidueInfo())
        mol.AddAtom(atom)

        conformer = Chem.Conformer(mol.GetNumAtoms())
        mol.AddConformer(conformer)

        atom_idx = mol.GetNumAtoms() - 1
        position = row['atomic positions'][i]
        mol.GetConformer().SetAtomPosition(atom_idx, Chem.rdGeometry.Point3D(*position))

    writer = Chem.SDWriter(f'../data/sdf/molecule_{row.cid}.sdf')
    writer.write(mol)
    writer.close()