In [1]:
# add path (for local)
import sys
repo_root = "../../"
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

In [None]:
from openai import OpenAI

api_key = ""

smiles = "c1cc(CC(C)C)ccc1C(C)C(=O)O"
prompt = "I will give you a molecule as SMILES. Please transfrom a given molecule to improve permeability, and return it as SMILES string. Don't include anything else other than SMILES in the output. Starting molecule: ###SMILES###"
prompt = prompt.replace("###SMILES###", smiles)

def ask(prompt: str, api_key: str, model: str = "gpt-4o-mini") -> str:
    client = OpenAI(api_key=api_key)
    resp = client.responses.create(model=model, input=prompt)
    return resp.output_text

ask(prompt=prompt, api_key=api_key)

'CC(C)C(C(=O)O)C1=CC=C(C(C)C)C=C1'

In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
from utils import draw_mol

mol = Chem.MolFromSmiles("NCCCC(=O)O")
draw_mol(mol, 100, 100)
# mol = Chem.MolFromSmiles("CCc1ccccc1")
# mol = Chem.MolFromSmiles("O[C@H](c1cncnc1C(CC(F)F)F)C(=O)[O-]")
# mol = Chem.AddHs(mol)

# smirks_list = [
#     "[cH:1]>>[c:1]C", "[cH:1]>>[c:1]CC", "[cH:1]>>[c:1]F", "[cH:1]>>[c:1]Cl", "[cH:1]>>[c:1]O", "[cH:1]>>[c:1][N+](=O)[O-]", # benzene-derivative
#     "[O:1][H]>>[O:1]C(C)=O", "[O:1][H]>>[O:1]C", "[O:1][H]>>[O:1]S(=O)(=O)c1ccc(C)cc1", # alcohol
#     "[N:1]([H])[H]>>[N:1]C(C)=O", "[N:1]([H])[H]>>[N:1]C(=O)OC(C)(C)C", "[N:1]([H])[H]>>[N:1]S(=O)(=O)c1ccccc1", # amine
#     "[C:1](=O)[O:2][H]>>[C:1](=O)[O:2]C", "[C:1](=O)[OH]>>[C:1](=O)[NH2]", "[C:1](=O)[OH]>>[C:1](=O)Cl", # carboxylic acid
#     "[c:1][Br]>>[c:1]c1ccccc1", "[c:1][Cl]>>[c:1]N", # cross-coupling # "[c:1][I]>>[c:1]C#CH"?
#     "[C:1]=[C:2]>>[C:1](O)[C:2](O)", "[C:1]=[C:2]>>[C:1](Br)[C:2](Br)", "[C:1](=O)[C:2]>>[C:1](=N[C:2])" # misc
# ]
smirks_list = ["[N:1]>>[N:1]C(=O)OC(C)(C)C"]

generated_mols = []
for smarts in smirks_list:
    try:
        rxn = AllChem.ReactionFromSmarts(smarts)

        products = rxn.RunReactants((mol,))
        for ps in products:
            for p in ps:
                generated_mols.append(p)
    except:
        continue
        
unique_smiles = set()
unique_mols = []

for mol in generated_mols:
    try:
        mol = Chem.RemoveHs(mol)
        smiles = Chem.MolToSmiles(mol, canonical=True)
        if smiles not in unique_smiles:
            unique_smiles.add(smiles)
            unique_mols.append(mol)
    except:
        continue
        
for mol in unique_mols:
    draw_mol(mol, 100, 100)

In [None]:
from rdkit import Chem
from ClickReaction import AlkalineEsterHydrolysis, AmideCoupling, AmideCouplingWithAnilines, BocRemoval, CuAAC, FmocRemoval, SulfonAmideFormation, SuzukiMiyaura

patterns = {
    "carboxylic_acid": Chem.MolFromSmarts("C(=O)[OH]"),
    "amine": Chem.MolFromSmarts("[NX3;H2,H1;!$(NC=O)]"),
    "aniline": Chem.MolFromSmarts("c[NX3;H2,H1]"),
    "alkyne": Chem.MolFromSmarts("C#C"),
    "azide": Chem.MolFromSmarts("N=[N+]=[N-]"),
    "sulfonyl_halide": Chem.MolFromSmarts("S(=O)(=O)[Cl,Br,F,I]"),
    "aryl_halide": Chem.MolFromSmarts("c[Cl,Br,I,F]"),
    "boronic_acid": Chem.MolFromSmarts("B(O)O"),
}

def detect_functional_groups(mol):
    results = {}
    for name, patt in patterns.items():
        results[name] = mol.HasSubstructMatch(patt)
    return results

initial_mol = Chem.MolFromSmiles("O=C(/[O-])c1cncnc1Cc1noc(-c2ccncc2)n1")
results = []

for reaction in [AlkalineEsterHydrolysis, BocRemoval, FmocRemoval]:
    try:
        result = reaction(initial_mol).get_product()
        results += result
    except:
        pass

mol_is = detect_functional_groups(initial_mol)

print(mol_is)

for r in results:
    print(Chem.MolToSmiles(r))

In [None]:
from transition import BioT5Transition
from node import MolStringNode
from language import SELFIES
from utils import draw_mol

transition = BioT5Transition(target_objective="improve LogP value")
root = MolStringNode.node_from_key("[C][=C][C][=C][C][=C][Ring1][=Branch1]", lang=SELFIES())
child = transition.sample_transition(root)
mol = child.mol()

draw_mol(mol)

In [None]:
import pandas as pd

df = pd.read_csv("generation_result/temp.csv")
df = df.iloc[-1000:, :] # for faster test
df

In [None]:
from rdkit import Chem
df["mol"] = df["key"].apply(Chem.MolFromSmiles)

In [None]:
df["mol"][199043]

In [None]:
from rdkit.Chem import rdMMPA

def get_fragments(mol):
    return rdMMPA.FragmentMol(
        mol,
        maxCuts=3,
        maxCutBonds=20,
        resultsAsMols=True
    )

fragment_map = {}
for idx, row in df.iterrows():
    # print(idx)
    frags = get_fragments(row["mol"])
    for core, sidechain in frags:
        if core is None or sidechain is None:
            continue
        key = (Chem.MolToSmiles(core),)
        val = (Chem.MolToSmiles(sidechain), row["d_score_reward"], row["key"])
        fragment_map.setdefault(key, []).append(val)
        # print(key, val)

results = []
for core, entries in fragment_map.items():
    if len(entries) < 2:
        continue
    for i in range(len(entries)):
        for j in range(i + 1, len(entries)):
            sc1, p1, s1 = entries[i]
            sc2, p2, s2 = entries[j]
            delta = p2 - p1
            results.append({
                "core": core,
                "sc1": sc1,
                "sc2": sc2,
                "delta_property": delta,
                "smiles1": s1,
                "smiles2": s2
            })

mmpa_df = pd.DataFrame(results)

In [None]:
mmpa_df

In [None]:
mmpa_df["smirks"] = mmpa_df.apply(lambda row: f"{row['sc1']}>>{row['sc2']}", axis=1)

smirks = (
    mmpa_df.groupby("smirks")["delta_property"]
    .agg(["mean", "std", "count"])
    .reset_index()
    # .sort_values("mean", ascending=False)
)

In [None]:
smirks

In [None]:
from rdkit.Chem import AllChem

def apply_smirks(mol: Chem.Mol, smirks: str):
    try:
        rxn = AllChem.ReactionFromSmarts(smirks, useSmiles=True)
        products = rxn.RunReactants((mol,))
        return products
    except Exception as e:
        return []

In [None]:
culled_smirks = smirks[smirks["mean"] > 0]
culled_smirks = culled_smirks[culled_smirks["count"] > 1]

In [None]:
mol = Chem.MolFromSmiles("O[C@H](c1cncnc1C(CC(F)F)F)C(=O)[O-]")

In [None]:
for s in culled_smirks["smirks"]:
    result = apply_smirks(mol, s)
    if result:
        print(result)

In [None]:
from utils import draw_mol
mol = Chem.MolFromSmiles("O[C@H](c1cncnc1C(CC(F)F)F)C(=O)[O-]")
draw_mol(mol)