In [1]:
import numpy as np
import pandas as pd
from rdkit import Chem

In [None]:
df = pd.read_parquet("atom_coord_processed.parquet")

In [None]:
# Create an empty list to store molecules
mols = []

# Convert atomic numbers and positions to molecules
for _, row in df.iterrows():
    mol = Chem.RWMol()

    for i in range(row['atom_count']):
        atomic_number = row['atomic_numbers'][i]
        atom = Chem.Atom(atomic_number)
        atom.SetMonomerInfo(Chem.AtomPDBResidueInfo())
        mol.AddAtom(atom)

        atom_idx = mol.GetNumAtoms() - 1
        position = row['atomic_positions'][i]
        mol.GetConformer().SetAtomPosition(atom_idx, Chem.rdGeometry.Point3D(*position))

    mols.append(mol)

# Write the molecules to separate SDF files
for i, mol in enumerate(mols):
    writer = Chem.SDWriter(f'molecule_{df.loc[i, "cid"]}.sdf')
    writer.write(mol)
    writer.close()
