In [1]:
from rdkit import Chem
from rdkit.Chem import PandasTools, SaltRemover
import pandas as pd

In [3]:
# SDFファイルを指定
sdf_file_path = 'PubChem_substance_phenol.sdf'

# SaltRemoverを初期化
remover = SaltRemover.SaltRemover()

# 塩の有無を判定する関数
def has_salt(mol):
    if mol is None:
        return None
    try:
        stripped_mol = remover.StripMol(mol)
        return mol.GetNumAtoms() != stripped_mol.GetNumAtoms()
    except Exception:
        return None  # エラーが発生した場合は None を返す

# カスタマイズしたロード処理
data = []
supplier = Chem.SDMolSupplier(sdf_file_path)

for mol in supplier:
    if mol:  # 無効な分子をスキップ
        try:
            smiles = Chem.MolToSmiles(mol)
            salt_flag = has_salt(mol)
            data.append({"SMILES": smiles, "Molecule": mol, "HasSalt": salt_flag})
        except Exception:
            # エラーが発生した場合の処理
            data.append({"SMILES": None, "Molecule": None, "HasSalt": None})

# DataFrameに変換
df = pd.DataFrame(data)

# PandasToolsでMolecule列を強化
PandasTools.AddMoleculeColumnToFrame(df, smilesCol='SMILES', molCol='Molecule')

# サマリー作成
summary = {
    "Total Molecules": len(df),
    "Valid Molecules": len(df[df["Molecule"].notnull()]),
    "Molecules with Salts": df["HasSalt"].sum(),
    "Without Salts": len(df[df["Molecule"].notnull()]) - df["HasSalt"].sum(),
    "Invalid Molecules": len(df[df["Molecule"].isnull()])
}


In [5]:
summary

{'Total Molecules': 133,
 'Valid Molecules': 133,
 'Molecules with Salts': 7,
 'Without Salts': 126,
 'Invalid Molecules': 0}