# Fragmentation

> Fragmentation module

In [None]:
#| default_exp fragmentation

In [None]:
#| hide
from nbdev.showdoc import *
from fastcore.test import test_eq

In [None]:
#| export
import sys
if '..' not in sys.path:
    sys.path.append('..')
import numpy as np
from rdkit import Chem
from copy import deepcopy
from rdkit.Chem import MolToSmiles, MolFromSmiles, BRICS
from breadth_first_fragmentation.utilities import mols_from_smiles

In [None]:
#| export
def count_dummies(mol:Chem.rdchem.Mol, # input molecule
                  )->int: # count of dummy atoms
    "Function to count dummy atoms."
    count = 0
    for atom in mol.GetAtoms():
        if atom.GetAtomicNum() == 0:
            count += 1
    return count

In [None]:
show_doc(count_dummies)

---

[source](https://github.com/panukorn17/breadth-first-fragmentation/blob/main/breadth_first_fragmentation/fragmentation.py#L22){target="_blank" style="float:right; font-size:smaller"}

### count_dummies

>      count_dummies (mol:rdkit.Chem.rdchem.Mol)

*Function to count dummy atoms.*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| mol | Mol | input molecule |
| **Returns** | **int** | **count of dummy atoms** |

In [None]:
#| export
def get_size(frag:Chem.rdchem.Mol, # input fragment
             )->int: # count of real atoms
    "Function to count real atoms."
    dummies = count_dummies(frag)
    total_atoms = frag.GetNumAtoms()
    real_atoms = total_atoms - dummies
    return real_atoms

In [None]:
show_doc(get_size)

---

[source](https://github.com/panukorn17/breadth-first-fragmentation/blob/main/breadth_first_fragmentation/fragmentation.py#L32){target="_blank" style="float:right; font-size:smaller"}

### get_size

>      get_size (frag:rdkit.Chem.rdchem.Mol)

*Function to count real atoms.*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| frag | Mol | input fragment sadfasdfasdf |
| **Returns** | **int** | **count of real atoms** |

In [None]:
#| export
def replace_last(s:str, # the string (fragment) to which the dummy label * is to be replaced with another fragment
                 old:str, # the string from the fragment s to be replaced
                 new:str, # the string to replace the "old" string in the fragment s
                 )->str: # the original string s with the replacement
    "Function to replace the last occuring dummy label with a fragment."
    s_reversed = s[::-1]
    old_reversed = old[::-1]
    new_reversed = new[::-1]

    # Replace the first occurrence in the reversed string
    s_reversed = s_reversed.replace(old_reversed, new_reversed, 1)

    # Reverse the string back to original order
    return s_reversed[::-1]

In [None]:
show_doc(replace_last)

---

[source](https://github.com/panukorn17/breadth-first-fragmentation/blob/main/breadth_first_fragmentation/fragmentation.py#L41){target="_blank" style="float:right; font-size:smaller"}

### replace_last

>      replace_last (s:str, old:str, new:str)

*Function to replace the last occuring dummy label with a fragment.*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| s | str | the string (fragment) to which the dummy label * is to be replaced with another fragment |
| old | str | the string from the fragment s to be replaced |
| new | str | the string to replace the "old" string in the fragment s |
| **Returns** | **str** | **the original string s with the replacement** |

In [None]:
#| export
def check_reconstruction(frags:list[str], # list of fragments in SMILES format
                         frag_1:str, # head/tail fragment in SMILES format
                         frag_2:str, # head/tail fragment in SMILES format
                         orig_smi, # original molecule in SMILES format
                         )->bool: # whether the original molecule was reconstructed
    "Function to test whether the original molecule has been reconstructed."
    try:
        frags_test = frags.copy()
        frags_test.append(frag_1)
        frags_test.append(frag_2)
        frag_2_re = frags_test[-1]
        for i in range(len(frags_test)-1):
            frag_1_re = frags_test[-1*i-2]
            recomb = replace_last(frag_2_re, "*", frag_1_re.replace("*", "",1))
            recomb_canon = MolToSmiles(MolFromSmiles(Chem.CanonSmiles(recomb)),rootedAtAtom = 1)
            frag_2_re = recomb_canon
        orig_smi_canon = MolToSmiles(MolFromSmiles(Chem.CanonSmiles(orig_smi)),rootedAtAtom = 1)
        if recomb_canon == orig_smi_canon:
            #print("Reconstruction successful")
            #print("Original Smiles:", orig_smi, "Fragment 1:" , frag_1, "Fragment 2: ", frag_2, "Reconstruction: ", recomb_canon)
            return True
        else:
            #print("Reconstruction failed")
            #print("True Smiles:", smi, "Fragment 1:" , frag_1, "Fragment 2: ", frag_2, "Reconstruction: ", recomb_canon)
            return False
    except:
        #print("Reconstruction failed")
        #print("True Smiles:", smi, "Fragment 1:" , frag_1, "Fragment 2: ", frag_2, "Reconstruction: ", recomb_canon)
        return False

In [None]:
show_doc(check_reconstruction)

---

[source](https://github.com/panukorn17/breadth-first-fragmentation/blob/main/breadth_first_fragmentation/fragmentation.py#L64){target="_blank" style="float:right; font-size:smaller"}

### check_reconstruction

>      check_reconstruction (frags:list[str], frag_1:str, frag_2:str, orig_smi)

*Function to test whether the original molecule has been reconstructed.*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| frags | list | list of fragments in SMILES format |
| frag_1 | str | head/tail fragment in SMILES format |
| frag_2 | str | head/tail fragment in SMILES format |
| orig_smi |  | original molecule in SMILES format |
| **Returns** | **bool** | **wheteher the original molecule was reconstructed** |

In [None]:
frags=[]
frag_1='*CCC'
frag_2='N(*)(CCc1cccc(-c2ccccc2)c1)C(=O)C1OC(C(=O)O)=CC(N)C1NC(C)=O'
orig_smi='CCCN(CCc1cccc(-c2ccccc2)c1)C(=O)C1OC(C(=O)O)=CC(N)C1NC(C)=O'
print(check_reconstruction(frags, frag_1, frag_2, orig_smi))

True


In [None]:
test_eq(check_reconstruction(frags, frag_1, frag_2, orig_smi), True)

In [None]:
#| export
def check_bond_no(bonds:list, # the list of BRIC bonds locations
                  frags:list, # the list of fragments
                  frag_list_len:int, # the length of the fragment list
                  smi:str # the smiles string of the molecule
                  )->tuple: # a tuple containing the fragment list and a boolean value to indicate whether fragmentation is complete
    "This function checks if the molecule has less bonds than the limit of BRIC bonds."
    if (len(bonds) <= frag_list_len):
        print("Final Fragment: ", smi)
        frags.append(MolToSmiles(MolFromSmiles(Chem.CanonSmiles(smi)), rootedAtAtom=1))
        fragComplete = True
        return frags, fragComplete
    else:
        fragComplete = False
        return frags, fragComplete

In [None]:
show_doc(check_bond_no)

---

[source](https://github.com/panukorn17/breadth-first-fragmentation/blob/main/breadth_first_fragmentation/fragmentation.py#L90){target="_blank" style="float:right; font-size:smaller"}

### check_bond_no

>      check_bond_no (bonds:list, frags:list, frag_list_len:int, smi:str)

*This function checks if the molecule has less bonds than the limit of BRIC bonds.

Parameters:
bonds (list): the list of BRIC bonds
smi (str): the smiles string of the molecule
frags (list): the list of fragments
frag_list_len (int): the length of the fragment list

Returns:
tuple: a tuple containing the fragment list and a boolean value to indicate whether fragmentation is complete
    - frags (list): the list of fragments
    - fragComplete (bool): a boolean value to indicate whether fragmentation is complete*

In [None]:
#| export
def fragment_recursive(mol_smi_orig:str, # the original smiles string of the molecule
                       mol_smi:str, # the smiles string of the molecule
                       frags:list, # the list of fragments
                       counter:int, # the counter for the recursion
                       frag_list_len:int, # the length of the fragment list
                       min_length:int=0, # the minimum number of atoms in a fragment
                       )->list: # the list of fragments
    "This recursive function fragments a molecule using the DEFRAGMO fragmentation method."
    fragComplete = False
    try:
        counter += 1
        mol = MolFromSmiles(mol_smi)
        bonds = list(BRICS.FindBRICSBonds(mol))

        # Check if the mol has less bonds than the limit of BRIC bonds
        frags, fragComplete = check_bond_no(bonds, frags, frag_list_len, mol_smi)
        if fragComplete:
            return frags

        idxs, labs = list(zip(*bonds))

        bond_idxs = []
        for a1, a2 in idxs:
            bond = mol.GetBondBetweenAtoms(a1, a2)
            bond_idxs.append(bond.GetIdx())

        order = np.argsort(bond_idxs).tolist()
        bond_idxs = [bond_idxs[i] for i in order]
        for bond in bond_idxs:
            broken = Chem.FragmentOnBonds(mol,
                                        bondIndices=[bond],
                                        dummyLabels=[(0, 0)])
            head, tail = Chem.GetMolFrags(broken, asMols=True)
            head_bric_bond_no = len(list(BRICS.FindBRICSBonds(head)))
            tail_bric_bond_no = len(list(BRICS.FindBRICSBonds(tail)))
            if head_bric_bond_no <= frag_list_len:
                head_smi = Chem.CanonSmiles(MolToSmiles(head))
                tail_smi = MolToSmiles(MolFromSmiles(Chem.CanonSmiles(MolToSmiles(tail))), rootedAtAtom=1)
                if check_reconstruction(frags, head_smi, tail_smi, mol_smi_orig) & (get_size(head) >= min_length):
                    print("Head fragment: ", head_smi)
                    print("Recurse tail: ", tail_smi)
                    frags.append(head_smi)
                    fragComplete = fragment_recursive(mol_smi_orig, tail_smi, frags, counter, frag_list_len = 0)  
                    if fragComplete:
                        return frags
                # if reconstruction fails, and there is only one bond, then add the fragment to the fragment list
                elif (len(bond_idxs) == 1) & (get_size(MolFromSmiles(mol_smi)) >= min_length):
                    print("Final Fragment: ", mol_smi)
                    frags.append(MolToSmiles(MolFromSmiles(Chem.CanonSmiles(mol_smi)), rootedAtAtom=1))
                    fragComplete = True
                    return frags
                elif bond == bond_idxs[-1]:
                    fragComplete = fragment_recursive(mol_smi_orig, MolToSmiles(MolFromSmiles(Chem.CanonSmiles(mol_smi)), rootedAtAtom=1), frags, counter, frag_list_len + 1)
                    if fragComplete:
                        return frags
            elif tail_bric_bond_no <= frag_list_len:
                tail_smi = Chem.CanonSmiles(MolToSmiles(tail))
                head_smi = MolToSmiles(MolFromSmiles(Chem.CanonSmiles(MolToSmiles(head))), rootedAtAtom=1)
                if check_reconstruction(frags, tail_smi, head_smi, mol_smi_orig) & (get_size(tail) >= min_length):
                    print("Tail: ", tail_smi)
                    print("Recurse Head: ", head_smi)
                    frags.append(tail_smi)
                    fragComplete = fragment_recursive(mol_smi_orig, head_smi, frags, counter, frag_list_len = 0)  
                    if fragComplete:
                        return frags
                elif (len(bond_idxs) == 1) & (get_size(MolFromSmiles(mol_smi)) >= min_length):
                    print("Final fragment: ", mol_smi)
                    frags.append(MolToSmiles(MolFromSmiles(Chem.CanonSmiles(mol_smi)), rootedAtAtom=1))
                    fragComplete = True
                    return frags
                elif bond == bond_idxs[-1]:
                    fragComplete = fragment_recursive(mol_smi_orig, MolToSmiles(MolFromSmiles(Chem.CanonSmiles(mol_smi)), rootedAtAtom=1), frags, counter, frag_list_len + 1)
                    if fragComplete:
                        return frags
    except Exception:
        pass

In [None]:
show_doc(fragment_recursive)

---

[source](https://github.com/panukorn17/breadth-first-fragmentation/blob/main/breadth_first_fragmentation/fragmentation.py#L115){target="_blank" style="float:right; font-size:smaller"}

### fragment_recursive

>      fragment_recursive (mol_smi_orig:str, mol_smi:str, frags:list,
>                          counter:int, frag_list_len:int, min_length:int=0)

*This recursive function fragments a molecule using the DEFRAGMO fragmentation method.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| mol_smi_orig | str |  | the original smiles string of the molecule |
| mol_smi | str |  | the smiles string of the molecule |
| frags | list |  | the list of fragments |
| counter | int |  | the counter for the recursion |
| frag_list_len | int |  | the length of the fragment list |
| min_length | int | 0 | the minimum number of atoms in a fragment |
| **Returns** | **list** |  | **the list of fragments** |

In [None]:
#| export
def break_into_fragments_defragmo(mol:Chem.rdchem.Mol, # the molecule object
                                  smi:str, # the smiles string of the molecule
                                  )->tuple: # a tuple containing the original smiles, the fragmented smiles, and the number of fragments
    "This function breaks a molecule into fragments using the DEFRAGMO fragmentation method."
    frags = []
    fragment_recursive(smi, smi, frags, 0, 0)

    # if no fragments are found
    if len(frags) == 0:
        return smi, np.nan, 0

    # if only one fragment is found
    if len(frags) == 1:
        return smi, smi, 1
    
    return smi, " ".join(frags), len(frags)

In [None]:
show_doc(break_into_fragments_defragmo)

---

[source](https://github.com/panukorn17/breadth-first-fragmentation/blob/main/breadth_first_fragmentation/fragmentation.py#L199){target="_blank" style="float:right; font-size:smaller"}

### break_into_fragments_defragmo

>      break_into_fragments_defragmo (mol:rdkit.Chem.rdchem.Mol, smi:str)

*This function breaks a molecule into fragments using the DEFRAGMO fragmentation method.*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| mol | Mol | the molecule object |
| smi | str | the smiles string of the molecule |
| **Returns** | **tuple** | **a tuple containing the original smiles, the fragmented smiles, and the number of fragments** |

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()