In [9]:
from rdkit import Chem
from random import choice
from typing import List

from chemutator.evo_utils import (
    get_list_link_idxs,
    get_link_idx
)

from rdkit.Chem.Draw import IPythonConsole, DrawingOptions
IPythonConsole.drawOptions.addAtomIndices = True
IPythonConsole.drawOptions.annotationFontScale = 0.9

### utils

In [None]:
def link_fragments(
    smi_A: str,
    smi_B: str,
    link_idx_A: int,
    link_idx_B: int,
) -> str:
    mol_A = Chem.AddHs(Chem.MolFromSmiles(smi_A))
    mol_B = Chem.AddHs(Chem.MolFromSmiles(smi_B))
    num_atoms_A

In [None]:
def link_fragments(smi_A, smi_B, smi_L, link_idx_A, link_idx_B, link_idx_L1, link_idx_L2):
    mol_A = Chem.AddHs(Chem.MolFromSmiles(smi_A))
    mol_B = Chem.AddHs(Chem.MolFromSmiles(smi_B))
    num_atoms_A = mol_A.GetNumAtoms()
    num_atoms_B = mol_B.GetNumAtoms()
    mol_A_B = Chem.CombineMols(mol_A, mol_B)
    if smi_L != None:
        mol_L = Chem.AddHs(Chem.MolFromSmiles(smi_L))
        mol_A_B = Chem.CombineMols(mol_A_B, mol_L)
    mol_A_B = Chem.RWMol(mol_A_B)

    if smi_L == None:
        if (mol_A.GetAtomWithIdx(link_idx_A).GetSymbol() in ['N', 'O'] and 
           mol_B.GetAtomWithIdx(link_idx_B).GetSymbol() in ['N', 'O']):
           return None

    hyd_idx_A = -1
    for atom in mol_A_B.GetAtomWithIdx(link_idx_A).GetNeighbors():
        if atom.GetSymbol() == 'H':
            hyd_idx_A = atom.GetIdx()
    hyd_idx_B = -1
    for atom in mol_A_B.GetAtomWithIdx(link_idx_B+num_atoms_A).GetNeighbors():
        if atom.GetSymbol() == 'H':
            hyd_idx_B = atom.GetIdx()
    if smi_L != None:
        hyd_idx_L1 = -1
        for atom in mol_A_B.GetAtomWithIdx(link_idx_L1+num_atoms_A+num_atoms_B).GetNeighbors():
            if atom.GetSymbol() == 'H':
                hyd_idx_L1 = atom.GetIdx()
        hyd_idx_L2 = -1
        for atom in mol_A_B.GetAtomWithIdx(link_idx_L2+num_atoms_A+num_atoms_B).GetNeighbors():
            if atom.GetSymbol() == 'H' and hyd_idx_L1 != atom.GetIdx():
                hyd_idx_L2 = atom.GetIdx()
        if hyd_idx_L1 > hyd_idx_L2:
            temp = hyd_idx_L1
            hyd_idx_L1 = hyd_idx_L2
            hyd_idx_L2 = temp

    if smi_L == None:
        mol_A_B.AddBond(link_idx_A, link_idx_B+num_atoms_A, Chem.BondType.SINGLE)
    else:
        mol_A_B.AddBond(link_idx_A, link_idx_L1+num_atoms_A+num_atoms_B, Chem.BondType.SINGLE)
        mol_A_B.AddBond(link_idx_B+num_atoms_A, link_idx_L2+num_atoms_A+num_atoms_B, Chem.BondType.SINGLE)
        mol_A_B.RemoveAtom(hyd_idx_L2)
        mol_A_B.RemoveAtom(hyd_idx_L1)
        
    mol_A_B.RemoveAtom(hyd_idx_B)
    mol_A_B.RemoveAtom(hyd_idx_A)

    mol_A_B = mol_A_B.GetMol()
    mol_A_B = Chem.RemoveHs(mol_A_B)

    return Chem.MolToSmiles(mol_A_B)

### tests

In [5]:
def test_get_list_link_idxs():
    smi = 'CC(C(=O)O)c1ccc2oc(-c3ccc(Cl)cc3)nc2c1'
    assert get_list_link_idxs(smi) == [4, 6, 7, 12, 13, 16, 17, 20]

    smi = 'C=C(CC)C(=O)c1ccc(OCC(=O)O)c(Cl)c1Cl'
    assert get_list_link_idxs(smi, True) == [7, 8, 14]

    smi = 'CN1CC2CC3=CC(C(=O)CC3)C3C=C(CCC3=O)CC1CN2'
    assert get_list_link_idxs(smi) == [22]

test_get_list_link_idxs()

In [21]:
smi = 'C=C(CC)C(=O)c1ccc(OCC(=O)O)c(Cl)c1Cl'
print(get_list_link_idxs(smi, True))

[0, 2, 3, 7, 8, 11, 14]
