In [6]:
from rdkit import Chem
from copy import deepcopy
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from IPython.display import SVG
import json
import ipywidgets as widgets
from ipywidgets import interact
from src.chem_draw import draw_molecule
from hydra import compose, initialize
import polars as pl
from functools import partial

with initialize(version_base=None, config_path="../conf/filepaths"):
    cfg = compose(config_name="filepaths")

In [5]:
kcs = pl.read_parquet(Path(cfg.known) / "known_compounds.parquet")
print(len(kcs))
kcs.head()

8603


id,smiles,names,n_atoms
i64,str,list[str],i32
0,"""*""","[""A""]",1
1,"""**""","[""RX""]",2
2,"""*C""","[""an alkane""]",2
3,"""*C#N""","[""a nitrile""]",3
4,"""*C(*)(N)C(=O)O""","[""2,2-dialkylglycine""]",7


In [14]:
def patt_filter(smiles: str, patt: Chem.Mol) -> bool:
    """
    Check if the SMILES string matches the given RDKit pattern.
    """
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return False
    return mol.HasSubstructMatch(patt)

qty_C = '[#6]-[#6](-[#6])(-[#6])(-[#6])'
qty_c_filter = partial(patt_filter, patt=Chem.MolFromSmarts(qty_C))


qty_cs = kcs.filter(
    pl.col("smiles").map_elements(qty_c_filter, return_dtype=pl.Boolean)
).sort("n_atoms", descending=False)
print(len(qty_cs))
qty_cs.head()



1828


[15:41:03] Unusual charge on atom 0 number of radical electrons set to zero
[15:41:03] Unusual charge on atom 0 number of radical electrons set to zero


id,smiles,names,n_atoms
i64,str,list[str],i32
2324,"""CC(C)(C)C=O""","[""2,2-dimethylpropanal""]",6
1975,"""CC(=O)C(C)(C)C""","[""3,3-dimethylbutan-2-one""]",7
2323,"""CC(C)(C)C(O)C#N""","[""(2S)-2-hydroxy-3,3-dimethylbutanenitrile""]",8
2322,"""CC(C)(C)C(C)(O)C#N""","[""(2S)-2-hydroxy-2-methyl-3,3-dimethylbutanenitrile""]",9
3360,"""CC1(C)COC(=O)C1=O""","[""2-dehydropantolactone""]",9


In [20]:
# Create a dropdown widget with the keys of qty_C_smiles
dropdown = widgets.Dropdown(
    options=[(row["names"][0], row) for row in qty_cs.iter_rows(named=True)],
    description='SMILES:',
    style={'description_width': 'initial'}
)

# Define a function to display the selected key
@interact
def display_molecule(row = dropdown):
    print(", ".join(row["names"]))
    display(SVG(draw_molecule(row["smiles"], size=(300, 300))))

interactive(children=(Dropdown(description='SMILES:', options=(('2,2-dimethylpropanal', {'id': 2324, 'smiles':…

In [21]:
qty_cs.write_parquet(
    "/home/stef/krxns/data/raw/qty_c_cpds.parquet",
)