# Example application of medicinal chemistry filters (MCFs)
Load a few molecules into RDKit

In [1]:
from rdkit import Chem

example_molecules = ['C1=CC=C(C=C1)CNC2=CC=CC3=C2N=CC=C3',
 'C1=CC=C(C=C1)C2=CC(=NC(=C2)C3=CC=CC=N3)C4=CC=CC=C4',
 'CCCCN(CCCC)C(=O)CO',
 'CCC(C1=CC(=C(C(=C1)OC)OC)OC)C(=O)O',
 'C#CC1=NC(=CN=C1N)Cl',
 'COC1=CC(=C(C=C1)Br)OC(F)F']

smiles_to_mol = {s: Chem.MolFromSmiles(s) for s in example_molecules}

Binary scores for each molecule based on pass/no pass

In [2]:
import numpy as np
import mcf

# Adapted from "Deep learning enables rapid identification of potent DDR1 kinase inhibitors"
all_filters = [
    mcf.LogPFilter(),
    mcf.MolecularWeightFilter(),
    mcf.HBABHBDFilter(),
    mcf.TPSAFilter(),
    mcf.NRBFilter(),
    mcf.ToxicityFilter(),
    mcf.TrivialRulesFilter(),
]

def score_molecule(mol):
    return np.array([f.apply(mol) for f in all_filters]).astype(int)

{s:score_molecule(mol) for s, mol in smiles_to_mol.items()}

{'C1=CC=C(C=C1)CNC2=CC=CC3=C2N=CC=C3': array([1, 0, 1, 1, 1, 1, 1]),
 'C1=CC=C(C=C1)C2=CC(=NC(=C2)C3=CC=CC=N3)C4=CC=CC=C4': array([1, 1, 1, 1, 1, 1, 1]),
 'CCCCN(CCCC)C(=O)CO': array([1, 0, 1, 1, 1, 1, 1]),
 'CCC(C1=CC(=C(C(=C1)OC)OC)OC)C(=O)O': array([1, 1, 1, 1, 1, 0, 0]),
 'C#CC1=NC(=CN=C1N)Cl': array([1, 0, 1, 1, 1, 1, 0]),
 'COC1=CC(=C(C=C1)Br)OC(F)F': array([1, 1, 1, 1, 1, 1, 0])}

You can combine these scores into a single score using:
* mean - reward intermediate molecules
* product - reward only valid molecules