In [2]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import sqlite3

# Function to convert SMILES to Daylight fingerprints
def smiles_to_fingerprint(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        fingerprint = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048)
        return fingerprint.ToBitString()
    else:
        return None

# Read SMILES from CSV file
df = pd.read_csv('asinex.csv')
smiles_list = df['SMILES'].tolist()

# Connect to SQLite database (create if not exists)
conn = sqlite3.connect('fingerprint_database_1.db')
cursor = conn.cursor()

# Create table for storing fingerprints
cursor.execute('''CREATE TABLE IF NOT EXISTS fingerprints (
                    id INTEGER PRIMARY KEY,
                    smiles TEXT,
                    fingerprint TEXT)''')

# Convert SMILES to fingerprints and store in database
for smiles in smiles_list:
    fingerprint = smiles_to_fingerprint(smiles)
    if fingerprint:
        cursor.execute('INSERT INTO fingerprints (smiles, fingerprint) VALUES (?, ?)', (smiles, fingerprint))

# Commit and close the database connection
conn.commit()
conn.close()


In [11]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem, DataStructs
import sqlite3

# Function to convert SMILES to Daylight fingerprints
def smiles_to_fingerprint(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        fingerprint = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048)
        return fingerprint.ToBitString()
    else:
        return None

# Function to calculate Tanimoto similarity
def calculate_similarity(fp1, fp2):
    fp1 = DataStructs.CreateFromBitString(fp1)
    fp2 = DataStructs.CreateFromBitString(fp2)
    return DataStructs.FingerprintSimilarity(fp1, fp2)

# Ask for user input to search for similar SMILES and similarity threshold
search_smiles = input("Enter SMILES to search for similar molecules: ")
similarity_threshold = float(input("Enter similarity threshold (0 to 1): "))
search_fp = smiles_to_fingerprint(search_smiles)

if search_fp:
    # Connect to the database to perform the similarity search
    conn = sqlite3.connect('fingerprint_database.db')
    cursor = conn.cursor()
    cursor.execute('SELECT smiles, fingerprint FROM fingerprints')
    results = cursor.fetchall()
    
    similar_molecules = []
    for row in results:
        smiles, fingerprint = row
        similarity = calculate_similarity(search_fp, fingerprint)
        if similarity >= similarity_threshold:
            similar_molecules.append((smiles, similarity))
    
    # Sort by similarity and print results
    similar_molecules.sort(key=lambda x: x[1], reverse=True)
    print(f"Similar molecules to {search_smiles} with a similarity threshold of {similarity_threshold}:")
    for smiles, similarity in similar_molecules:
        print(f"SMILES: {smiles}, Similarity: {similarity:.2f}")
    
    # Output the results to a CSV file
    df_results = pd.DataFrame(similar_molecules, columns=['SMILES', 'Similarity'])
    df_results.to_csv('similar_molecules.csv', index=False)
    print("Results saved to similar_molecules.csv")
    
    conn.close()
else:
    print("Invalid SMILES input.")


Enter SMILES to search for similar molecules:  Nc1c(sc2nc(N)c(cc12)C#N)C(=O)Nc1ccccc1Br
Enter similarity threshold (0 to 1):  0.1


Similar molecules to Nc1c(sc2nc(N)c(cc12)C#N)C(=O)Nc1ccccc1Br with a similarity threshold of 0.1:
SMILES: Nc1c(C(Nc(cccc2)c2Br)=O)sc(nc2N)c1cc2C#N, Similarity: 1.00
SMILES: Nc(c1c2)c(C(Nc(cccc3)c3Oc3ccccc3)=O)sc1nc(N)c2C#N, Similarity: 0.71
SMILES: Nc1c(C(NCc2cccc(Cl)c2)=O)sc(nc2N)c1cc2C#N, Similarity: 0.57
SMILES: Cc1c(c(N)c(C(Nc(cccc2)c2OC)=O)s2)c2nc(C)c1, Similarity: 0.40
SMILES: CCOc(cccc1)c1NC(c(sc1nc(C)cc(C)c11)c1N)=O, Similarity: 0.38
SMILES: Cc1cc(COC)c(c(N)c(C(Nc(cccc2)c2O)=O)s2)c2n1, Similarity: 0.38
SMILES: Cc1cc(COC)c(c(N)c(C(Nc(cccc2)c2C(OC)=O)=O)s2)c2n1, Similarity: 0.37
SMILES: COc(cc1)cc(NC(c(sc2nc(-c(cc3)ccc3F)ccc22)c2N)=O)c1OC, Similarity: 0.36
SMILES: COc(ccc(Cl)c1)c1NC(c(sc1c2ccc(-c(cc3)cc(OC)c3OC)n1)c2N)=O, Similarity: 0.36
SMILES: Nc1c(C(Nc(c(Cl)ccc2)c2Cl)=O)sc2c1ccc(-c1cnccc1)n2, Similarity: 0.35
SMILES: Nc1c(C(Nc2nccs2)=O)sc2nc(CCCCC3)c3cc12, Similarity: 0.35
SMILES: Cc1cc(COC)c(c(N)c(C(Nc(cc2)ccc2C#N)=O)s2)c2n1, Similarity: 0.35
SMILES: COc(ccc(-c(cc1)nc2c1c(N)