In [1]:
import pandas as pd
from rdkit import Chem
import sqlite3

def convert_smiles_to_smarts(smiles):
    molecule = Chem.MolFromSmiles(smiles)
    if molecule:
        return Chem.MolToSmarts(molecule)
    return None

def create_database(input_csv, db_path):
    # Read the input CSV file containing SMILES strings
    df = pd.read_csv(input_csv)

    # Convert SMILES to SMARTS
    df['SMARTS'] = df['SMILES'].apply(convert_smiles_to_smarts)

    # Connect to (or create) the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # Create a table for the SMARTS patterns
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS smarts_table (
            id INTEGER PRIMARY KEY,
            smarts TEXT
        )
    ''')

    # Insert SMARTS patterns into the table
    for i, row in df.iterrows():
        cursor.execute("INSERT INTO smarts_table (smarts) VALUES (?)", (row['SMARTS'],))

    # Commit the changes and close the connection
    conn.commit()
    conn.close()

# Example usage
input_csv = 'asinex.csv'  # Input CSV file path
db_path = 'asx1_smarts_database.db'  # Path to the SQLite database

create_database(input_csv, db_path)
print("Database creation complete.")


Database creation complete.


In [4]:
import pandas as pd
from rdkit import Chem
import sqlite3

def convert_smiles_to_smarts(smiles):
    molecule = Chem.MolFromSmiles(smiles)
    if molecule:
        return Chem.MolToSmarts(molecule)
    return None

def substructure_search(db_path, query_smiles, output_csv):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Convert the query SMILES to SMARTS and then to an RDKit molecule
    query_smarts = convert_smiles_to_smarts(query_smiles)
    query_mol = Chem.MolFromSmarts(query_smarts)

    # Perform the substructure search
    cursor.execute("SELECT smarts FROM smarts_table")
    results = []
    for row in cursor.fetchall():
        smarts = row[0]
        mol = Chem.MolFromSmarts(smarts)  # Convert SMARTS to molecule
        if mol and mol.HasSubstructMatch(query_mol):
            results.append(smarts)

    conn.close()

    # Write matching SMARTS patterns to CSV
    df = pd.DataFrame(results, columns=['SMARTS'])
    df.to_csv(output_csv, index=False)

    return results

# Example usage
db_path = 'asx1_smarts_database.db'  # Path to the SQLite database
output_csv = 'matching_smarts.csv'  # Output CSV file path

# Ask for input SMILES
input_smiles = input("Enter the SMILES string for the query: ")  # Interactive input
matching_smarts = substructure_search(db_path, input_smiles, output_csv)
print(f"Matching SMARTS patterns written to {output_csv}")


Enter the SMILES string for the query:  NC(=O)c1sc2nc(N)ccc2c1N


Matching SMARTS patterns written to matching_smarts.csv
