In [None]:
# Import necessary libraries
from rdkit import Chem
from rdkit.Chem import AllChem
import pandas as pd
import sqlite3

# Function to perform substructure search in the database
def substructure_search(db_name, substructure_smarts):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    
    substructure = Chem.MolFromSmarts(substructure_smarts)
    if not substructure:
        raise ValueError("Invalid substructure SMARTS.")

    cursor.execute("SELECT id, SMILES, MolBlock, Fingerprint FROM Chemicals")
    results = cursor.fetchall()

    matched_molecules = []
    for row in results:
        mol = Chem.MolFromMolBlock(row[2])
        if mol and mol.HasSubstructMatch(substructure):
            matched_molecules.append((row[0], row[1]))

    conn.close()
    return matched_molecules

# Example substructure search
db_name = 'chemical_substructure_database.db'  # Use the name of your existing database
substructure_smarts = 'c1ccccc1'  # Benzene ring
matched_molecules = substructure_search(db_name, substructure_smarts)
print("Matched Molecules:", matched_molecules)

# Function to save results to CSV
def save_results_to_csv(results, output_file):
    df = pd.DataFrame(results, columns=["ID", "SMILES"])
    df.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

# Save substructure search results to CSV
output_csv_file = 'substructure_search_results.csv'
save_results_to_csv(matched_molecules, output_csv_file)
