In [2]:
import warnings
import itertools
from tqdm import tqdm
from rdkit import Chem, rdBase

warnings.filterwarnings('ignore')
rdBase.DisableLog('rdApp.error')

SMILES = ['CCc1nc(CCCC(=O)Nc2ccc(F)cc2F)cs1', 'CN(C)c1ccc(C(=O)Nc2ccc(Cl)cc2Cl)cc1', 'CN(C)c1ccc(C(=O)Nc2ccc(Cl)cc2Cl)cc1']

SMARTS = Chem.MolFromSmarts('[*]!@-[*]')

class MoleculeMatcher:
    def __init__(self, smarts):
        self.smarts = smarts
    def get_matches_wrong(self, mol):
        """
        mol: a mol object
        return: a list of (atom1, atom2) pairs that can cut the mol between atom1 and atom2
        """
        matches = set()
        matches |= set(tuple(sorted(match)) for match in mol.GetSubstructMatches(self.smarts))

        return matches



    def get_matches_corrected(self, mol):
        """
        mol: a mol object
        return: a list of (atom1, atom2) pairs that can cut the mol between atom1 and atom2
        """
        matches = set()
        matches = set(tuple(sorted(match)) for match in mol.GetSubstructMatches(self.smarts))

        return matches




# Create an instance of the MoleculeMatcher with the specified SMARTS pattern
matcher = MoleculeMatcher(SMARTS)

# Process each SMILES string
for smiles in tqdm(SMILES, desc="Processing SMILES"):
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        wrong_matches = matcher.get_matches_wrong(mol)
        corrected_matches = matcher.get_matches_corrected(mol)
        if wrong_matches == corrected_matches:
            print(f"SMILES: {smiles}")
            print("Matches by wrong function: ", wrong_matches)
            print("Matches by corrected function: ", corrected_matches)
            print("Results are the same:", wrong_matches == corrected_matches)
    else:
        print(f"Could not parse SMILES: {smiles}")

Processing SMILES: 100%|██████████| 3/3 [00:00<00:00, 2978.20it/s]

SMILES: CCc1nc(CCCC(=O)Nc2ccc(F)cc2F)cs1
Matches by wrong function:  {(0, 1), (1, 2), (10, 11), (8, 10), (6, 7), (4, 5), (5, 6), (17, 18), (14, 15), (7, 8)}
Matches by corrected function:  {(0, 1), (1, 2), (10, 11), (8, 10), (6, 7), (4, 5), (5, 6), (17, 18), (14, 15), (7, 8)}
Results are the same: True
SMILES: CN(C)c1ccc(C(=O)Nc2ccc(Cl)cc2Cl)cc1
Matches by wrong function:  {(0, 1), (9, 10), (13, 14), (1, 2), (7, 9), (6, 7), (1, 3), (16, 17)}
Matches by corrected function:  {(0, 1), (9, 10), (13, 14), (1, 2), (7, 9), (6, 7), (1, 3), (16, 17)}
Results are the same: True
SMILES: CN(C)c1ccc(C(=O)Nc2ccc(Cl)cc2Cl)cc1
Matches by wrong function:  {(0, 1), (9, 10), (13, 14), (1, 2), (7, 9), (6, 7), (1, 3), (16, 17)}
Matches by corrected function:  {(0, 1), (9, 10), (13, 14), (1, 2), (7, 9), (6, 7), (1, 3), (16, 17)}
Results are the same: True



