## Anionic surfactants test

In [66]:
def anionic_test():
    sulfate = Chem.MolFromSmarts('COS(=O)(=O)[OH,O-]')
    sulfonate = Chem.MolFromSmarts('CS(=O)(=O)[OH,O-]')
    phosphate = Chem.MolFromSmarts('COP([OH1])([OH1])=O')
    carboxylic = Chem.MolFromSmarts('[CX3;!$(Cc)](=O)[OX2H1]')
    silicic = Chem.MolFromSmarts('[Si][OX2H]')
    
    if not all([sulfate, sulfonate, phosphate, carboxylic, silicic]):
        raise ValueError("One or more SMARTS patterns failed to initialize.")
    
    def test(x):
        mol = x['mol']
        smiles = x['smiles']
        
        # Character filtering
        if mol is None:
            return False
        
        # Exclude branching with carbon atoms
        
        if has_branching_with_carbon(mol):
            return False
        
        # Ensure a straight alkyl chain
        
        if not is_straight_alkyl_chain(mol):
            return False
        
        # Substructure matching
        return (
            mol.HasSubstructMatch(sulfate) or 
            mol.HasSubstructMatch(sulfonate) or 
            mol.HasSubstructMatch(phosphate) or 
            mol.HasSubstructMatch(carboxylic) or 
            mol.HasSubstructMatch(silicic)
        )
    
    return test


In [67]:
from rdkit.Chem import rdmolops

def is_straight_alkyl_chain(mol):
    carbons = [atom.GetIdx() for atom in mol.GetAtoms() if atom.GetSymbol() == "C"]
    for idx in carbons:
        atom = mol.GetAtomWithIdx(idx)
        # Ensure no carbon has more than 2 non-functional group bonds
        if atom.GetDegree() > 2:
            neighbors = [nbr.GetSymbol() for nbr in atom.GetNeighbors()]
            if not all(n in {"O", "S"} for n in neighbors if n != "C"):
                return False
    return True

In [68]:
def has_branching_with_carbon(mol):
    for atom in mol.GetAtoms():
        if atom.GetSymbol() == "C" and atom.GetDegree() > 2:
            neighbors = [nbr.GetSymbol() for nbr in atom.GetNeighbors()]
            print(f"Carbon index {atom.GetIdx()} degree: {atom.GetDegree()}, neighbors: {neighbors}")
            # Only count branching caused by carbon neighbors
            carbon_neighbors = [n for n in neighbors if n == "C"]
            if len(carbon_neighbors) > 2:
                print(f"Branching detected at carbon index {atom.GetIdx()} with neighbors {neighbors}")
                return True
    return False

In [69]:
mol2 = Chem.MolFromSmiles('[Na+].CCCCCCCCCCCCOS([O-])(=O)=O')
smiles = '[Na+].CCCCCCCCCCCCOS([O-])(=O)=O'


In [70]:
test_fn = anionic_test()
print("Test function callable:", callable(test_fn))
result = test_fn({'mol': mol2, 'smiles': smiles})
print("Final result:", result)

Test function callable: True
Final result: True
