In [2]:
import pandas as pd
from rdkit import Chem

def convert_smiles_to_smarts(smiles):
    molecule = Chem.MolFromSmiles(smiles)
    if molecule:
        return Chem.MolToSmarts(molecule)
    return None

def create_smarts_database(input_csv, output_csv):
    # Read the input CSV file containing SMILES strings
    df = pd.read_csv(input_csv)
    
    # Add a new column for the SMARTS patterns
    df['SMARTS'] = df['SMILES'].apply(convert_smiles_to_smarts)

    # Save the updated DataFrame to a new CSV file
    df.to_csv(output_csv, index=False)

# Example usage
input_csv = 'asinex.csv'  # Input CSV file path
output_csv = 'output_smarts.csv'  # Output CSV file path

create_smarts_database(input_csv, output_csv)


In [9]:
import pandas as pd
from rdkit import Chem
import sqlite3

def convert_smiles_to_smarts(smiles):
    molecule = Chem.MolFromSmiles(smiles)
    if molecule:
        return Chem.MolToSmarts(molecule)
    return None

def create_database(input_csv, db_path):
    # Read the input CSV file containing SMILES strings
    df = pd.read_csv(input_csv)

    # Convert SMILES to SMARTS
    df['SMARTS'] = df['SMILES'].apply(convert_smiles_to_smarts)

    # Connect to (or create) the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # Create a table for the SMARTS patterns
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS smarts_table (
            id INTEGER PRIMARY KEY,
            smarts TEXT
        )
    ''')

    # Insert SMARTS patterns into the table
    for i, row in df.iterrows():
        cursor.execute("INSERT INTO smarts_table (smarts) VALUES (?)", (row['SMARTS'],))

    # Commit the changes and close the connection
    conn.commit()
    conn.close()

def substructure_search(db_path, query_smarts):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Convert the query SMARTS to an RDKit molecule
    query_mol = Chem.MolFromSmarts(query_smarts)
    AllChem.Compute2DCoords(query_mol)  # Ensure 2D coordinates are computed

    # Perform the substructure search
    cursor.execute("SELECT smarts FROM smarts_table")
    results = []
    for row in cursor.fetchall():
        smarts = row[0]
        mol = Chem.MolFromSmarts(smarts)  # Convert SMARTS to molecule
        if mol:
            AllChem.Compute2DCoords(mol)  # Ensure 2D coordinates are computed
            if mol.HasSubstructMatch(query_mol):
                results.append(smarts)

    conn.close()
    return results

# Example usage
input_csv = 'asinex.csv'  # Input CSV file path
db_path = 'smarts_database.db'  # Path to the SQLite database

create_database(input_csv, db_path)

query_smarts = "C[OH]"  # Example SMARTS pattern for substructure search
matching_smarts = substructure_search(db_path, query_smarts)
print(f"SMARTS patterns that match the query: {matching_smarts}")


SMARTS patterns that match the query: ['[#6]-[#6]1:[#6]:[#6]:[#6]2:[#6](:[#6]:1)-[#6](=[#6]-[#6]1:[#6](-[#8]-2):[#6]:[#6]:[#6]:[#6]:1)-[#6](-[#8])=[#8]', '[#6]-[#6](-[#6])-[#6]-[#6]-[#7]1:[#6](-[#7]-[#6]-[#6]-[#8]):[#7]:[#6]2:[#7](-[#6]):[#6](:[#7H]:[#6](:[#6]:1:2)=[#8])=[#8]', '[#6]-[#6]-[#7](-[#6]-[#6])-[#6]-[#6]#[#6]-[#6]-[#6](-[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1)(-[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1)-[#8]', '[#6]-[#8]-[#6]1:[#6]:[#6]:[#6](-[#16](-[#7]2-[#6]-[#6]-[#6]-[#6]-2)(=[#8])=[#8]):[#6]:[#6]:1-[#6](-[#8])=[#8]', '[#8]-[#6](-[#6]-[#6]-[#6]-[#7]-[#6](-[#6](-[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1)-[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1)=[#8])=[#8]', '[#6]-[#6]-[#6]1:[#6](-[#8]):[#6]:[#6](:[#6](:[#6]:1)-[#6](-[#6]-[#6]1:[#7]:[#6]2:[#6]:[#6]:[#6]:[#6]:[#6]:2:[#16]:1)=[#8])-[#8]', '[#6]-[#16](-[#7](-[#6]-[#6](-[#8])=[#8])-[#6]1:[#6]:[#6](-[#17]):[#6]:[#6](-[#17]):[#6]:1)(=[#8])=[#8]', '[#8]-[#6](-[#6](-[#6]-[#6](-[#7]-[#6]1:[#6]:[#6]:[#6](:[#6]:[#6]:1)-[#9])=[#8])-[#6]-[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]

In [10]:
import pandas as pd
from rdkit import Chem
import sqlite3

def convert_smiles_to_smarts(smiles):
    molecule = Chem.MolFromSmiles(smiles)
    if molecule:
        return Chem.MolToSmarts(molecule)
    return None

def create_database(input_csv, db_path):
    # Read the input CSV file containing SMILES strings
    df = pd.read_csv(input_csv)

    # Convert SMILES to SMARTS
    df['SMARTS'] = df['SMILES'].apply(convert_smiles_to_smarts)

    # Connect to (or create) the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # Create a table for the SMARTS patterns
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS smarts_table (
            id INTEGER PRIMARY KEY,
            smarts TEXT
        )
    ''')

    # Insert SMARTS patterns into the table
    for i, row in df.iterrows():
        cursor.execute("INSERT INTO smarts_table (smarts) VALUES (?)", (row['SMARTS'],))

    # Commit the changes and close the connection
    conn.commit()
    conn.close()

def substructure_search(db_path, query_smiles):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Convert the query SMILES to SMARTS and then to an RDKit molecule
    query_smarts = convert_smiles_to_smarts(query_smiles)
    query_mol = Chem.MolFromSmarts(query_smarts)

    # Perform the substructure search
    cursor.execute("SELECT smarts FROM smarts_table")
    results = []
    for row in cursor.fetchall():
        smarts = row[0]
        mol = Chem.MolFromSmarts(smarts)  # Convert SMARTS to molecule
        if mol and mol.HasSubstructMatch(query_mol):
            results.append(smarts)

    conn.close()
    return results

# Example usage
input_csv = 'asinex.csv'  # Input CSV file path
db_path = 'smarts_database.db'  # Path to the SQLite database

create_database(input_csv, db_path)

# Ask for input SMILES
input_smiles = input("Enter the SMILES string for the query: ")  # Interactive input
matching_smarts = substructure_search(db_path, input_smiles)
print(f"SMARTS patterns that match the query: {matching_smarts}")


Enter the SMILES string for the query:  c1cc2cccnc2s1


SMARTS patterns that match the query: ['[#6]-[#6]1:[#6]2:[#6](-[#7]):[#6](-[#6](-[#7]-[#6]3:[#6]:[#6]:[#6](-[#8]-[#6]):[#6]:[#6]:3-[#8]-[#6])=[#8]):[#16]:[#6]:2:[#7]:[#6](-[#6]):[#6]:1', '[#6]-[#6]1(-[#6])-[#6]-[#6]2:[#6](-[#6]-[#8]-1):[#6](-[#7]1-[#6]-[#6]-[#8]-[#6]-[#6]-1):[#7]:[#6]1:[#6]:2:[#6](:[#6](:[#16]:1)-[#6](-[#6]1:[#6]:[#6]:[#6](:[#6]:[#6]:1)-[#35])=[#8])-[#7]', '[#7]-[#6](-[#6]1:[#16]:[#6]2:[#7]:[#6]3-[#6]-[#6]-[#6]-[#6]:3:[#6](-[#6]3:[#6]:[#7]:[#6]:[#6]:[#6]:3):[#6]:2:[#6]:1-[#7])=[#8]', '[#6]-[#6]1:[#6]:[#6](-[#6]-[#8]-[#6]):[#6]2:[#6](-[#7]):[#6](-[#6](-[#7]-[#6]3:[#6]:[#6]:[#6](:[#6]:[#6]:3)-[#8]-[#6])=[#8]):[#16]:[#6]:2:[#7]:1', '[#6]-[#6]-[#6]1:[#6]:[#6]:[#6]:[#6](-[#6]-[#6]):[#6]:1-[#7]-[#6](-[#6]1:[#16]:[#6]2:[#7]:[#6](-[#6]):[#6]:[#6](-[#6]):[#6]:2:[#6]:1-[#7])=[#8]', '[#6]-[#6]1:[#6]:[#6](-[#6]):[#7]:[#6]2:[#6]:1:[#6](-[#7]):[#6](-[#6](-[#7]-[#6]1:[#6]:[#6](-[#17]):[#6](-[#6]):[#6]:[#6]:1)=[#8]):[#16]:2', '[#6]-[#6]1:[#6]2:[#6](-[#7]):[#6](-[#6](-[#6]3:[#6]:[#6]:[

In [11]:
import pandas as pd
from rdkit import Chem
import sqlite3

def convert_smiles_to_smarts(smiles):
    molecule = Chem.MolFromSmiles(smiles)
    if molecule:
        return Chem.MolToSmarts(molecule)
    return None

def create_database(input_csv, db_path):
    # Read the input CSV file containing SMILES strings
    df = pd.read_csv(input_csv)

    # Convert SMILES to SMARTS
    df['SMARTS'] = df['SMILES'].apply(convert_smiles_to_smarts)

    # Connect to (or create) the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # Create a table for the SMARTS patterns
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS smarts_table (
            id INTEGER PRIMARY KEY,
            smarts TEXT
        )
    ''')

    # Insert SMARTS patterns into the table
    for i, row in df.iterrows():
        cursor.execute("INSERT INTO smarts_table (smarts) VALUES (?)", (row['SMARTS'],))

    # Commit the changes and close the connection
    conn.commit()
    conn.close()

def substructure_search(db_path, query_smiles):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Convert the query SMILES to SMARTS and then to an RDKit molecule
    query_smarts = convert_smiles_to_smarts(query_smiles)
    query_mol = Chem.MolFromSmarts(query_smarts)

    # Perform the substructure search
    cursor.execute("SELECT smarts FROM smarts_table")
    results = []
    for row in cursor.fetchall():
        smarts = row[0]
        mol = Chem.MolFromSmarts(smarts)  # Convert SMARTS to molecule
        if mol and mol.HasSubstructMatch(query_mol):
            results.append(smarts)

    conn.close()
    return results

def write_results_to_csv(results, output_csv):
    df = pd.DataFrame(results, columns=['SMARTS'])
    df.to_csv(output_csv, index=False)

# Example usage
input_csv = 'asinex.csv'  # Input CSV file path
db_path = 'as_smarts_database.db'  # Path to the SQLite database
output_csv = 'matching_smarts.csv'  # Output CSV file path

create_database(input_csv, db_path)

# Ask for input SMILES
input_smiles = input("Enter the SMILES string for the query: ")  # Interactive input
matching_smarts = substructure_search(db_path, input_smiles)

# Write matching SMARTS patterns to CSV
write_results_to_csv(matching_smarts, output_csv)
print(f"Matching SMARTS patterns written to {output_csv}")


Enter the SMILES string for the query:  c1cc2cccnc2s1


Matching SMARTS patterns written to matching_smarts.csv
