## Terpenoids

example sesquiterpenoids
https://github.com/ebi-chebi/ChEBI/issues/4651

https://www.ebi.ac.uk/chebi/searchId.do?chebiId=144139

![img](https://www.ebi.ac.uk/chebi/displayImage.do?defaultImage=true&imageIndex=0&chebiId=144139)



In [4]:
from rdkit import Chem
from rdkit.Chem.Scaffolds import MurckoScaffold
from rdkit.Chem import Draw
from rdkit.Chem.rdMolDescriptors import CalcNumRings

# Your molecule's SMILES
smiles = "O=C1C(C2C(C3[C@](C4(C(C5(C(C(=O)C(C)(C)CC5)CC4)C)CC3)C)(C)CC2)(C)CC1OC(=O)C)(C)C"

def get_core_scaffold(smiles: str, draw=False):
    mol = Chem.MolFromSmiles(smiles)

    if mol:
        
        # Generate the Murcko framework (core backbone)
        core = MurckoScaffold.GetScaffoldForMol(mol)

        # Convert core to SMILES
        core_smiles = Chem.MolToSmiles(core)
        print("Core Scaffold SMILES:", core_smiles)

        # Count Carbon Atoms in Core
        c_count = sum(1 for atom in core.GetAtoms() if atom.GetAtomicNum() == 6)
        print("Core Carbon Count:", c_count)

        # Count Number of Rings in Core
        ring_count = CalcNumRings(core)
        print("Number of Rings in Core:", ring_count)

        # Show Core Structure
        if draw:
            core_image = Draw.MolToImage(core)
            core_image.show()

    else:
        print("Invalid SMILES input.")

get_core_scaffold(smiles)



Core Scaffold SMILES: O=C1CCC2C(CCC3C2CCC2C4CCCC(=O)C4CCC23)C1
Core Carbon Count: 22
Number of Rings in Core: 5


In [3]:
talatrachyoxazine_B = 'C=C(C)[C@@H]1CC[C@@H](C)[C@@]2(O)[C@@H]1C=C(C)[C@@H](OC(C)=O)[C@@H]2OC(=O)[C@@H]1C[C@@]2(O)C3=C(C(Cl)=CC=C3)N(C)O[C@H]2N1/C=C/C(=O)OCCCCCCCCCCCCCCCCCCCCC'

In [11]:
def draw(smiles: str):
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        img = Draw.MolToImage(mol)
        img.show()
    else:
        print("Invalid SMILES input.")
        
draw(talatrachyoxazine_B)

In [5]:
get_core_scaffold(talatrachyoxazine_B)

Core Scaffold SMILES: O=C(O[C@H]1CC=C[C@@H]2CCCCC21)[C@@H]1CC2c3ccccc3NO[C@H]2N1
Core Carbon Count: 21
Number of Rings in Core: 5


In [6]:
from rdkit.Chem.rdmolops import GetSSSR

def extract_largest_ring(smiles: str, draw=False):
    mol = Chem.MolFromSmiles(smiles)

    if mol:
        rings = [ring for ring in Chem.GetSymmSSSR(mol)]
        largest_ring = max(rings, key=len) if rings else None

        if largest_ring:
            ring_atoms = set(largest_ring)
            core = Chem.PathToSubmol(mol, list(ring_atoms))
            core_smiles = Chem.MolToSmiles(core)
            print("Largest Ring System SMILES:", core_smiles)

            if draw:
                core_image = Draw.MolToImage(core)
                core_image.show()

            return core_smiles

    print("No rings found or invalid SMILES.")
    return None

extract_largest_ring(talatrachyoxazine_B)

Largest Ring System SMILES: CC.CCCCC.CO


'CC.CCCCC.CO'

In [7]:
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.rdmolops import GetSSSR

def extract_largest_fused_ring_system(smiles: str, draw=False):
    mol = Chem.MolFromSmiles(smiles)
    
    if not mol:
        print("Invalid SMILES input.")
        return None

    # Find all rings in the molecule
    sssr = Chem.GetSymmSSSR(mol)

    if not sssr:
        print("No rings found in the molecule.")
        return None

    # Identify the largest fused ring system
    ring_sets = [set(ring) for ring in sssr]
    largest_ring_system = max(ring_sets, key=len, default=None)

    if not largest_ring_system:
        print("No valid fused ring system found.")
        return None

    # Convert the ring atom indices into a new molecule
    core = Chem.PathToSubmol(mol, list(largest_ring_system))
    core_smiles = Chem.MolToSmiles(core)
    
    print("Largest Fused Ring System SMILES:", core_smiles)

    if draw:
        core_image = Draw.MolToImage(core)
        core_image.show()

    return core_smiles

extract_largest_fused_ring_system(talatrachyoxazine_B)


Largest Fused Ring System SMILES: CC.CCCCC.CO


'CC.CCCCC.CO'

In [9]:
from rdkit import Chem
from rdkit.Chem import Draw

def extract_fused_ring_system(smiles: str, draw=False):
    mol = Chem.MolFromSmiles(smiles)

    if not mol:
        print("Invalid SMILES input.")
        return None

    # Get all rings in the molecule
    sssr = Chem.GetSymmSSSR(mol)
    if not sssr:
        print("No rings detected in the molecule.")
        return None

    # Create a list of sets, each containing atom indices of a ring
    ring_sets = [set(ring) for ring in sssr]

    # Merge overlapping ring sets to find the largest fused system
    fused_rings = []
    while ring_sets:
        first = ring_sets.pop(0)
        merged = True
        while merged:
            merged = False
            for other in ring_sets:
                if first.intersection(other):  # If rings share atoms, merge them
                    first.update(other)
                    ring_sets.remove(other)
                    merged = True
        fused_rings.append(first)

    # Find the largest fused ring system
    largest_fused_system = max(fused_rings, key=len, default=set())

    if not largest_fused_system:
        print("No valid fused ring system found.")
        return None

    # Extract and create the substructure
    core = Chem.PathToSubmol(mol, list(largest_fused_system))
    core_smiles = Chem.MolToSmiles(core)

    print("Largest Fused Ring System SMILES:", core_smiles)

    # Optional: Draw the structure
    if draw:
        core_image = Draw.MolToImage(core)
        core_image.show()

    return core_smiles

extract_fused_ring_system(talatrachyoxazine_B)


Largest Fused Ring System SMILES: CCCO.CNCO.cc(cCl)NC.ccc


'CCCO.CNCO.cc(cCl)NC.ccc'

In [10]:
from rdkit import Chem
from rdkit.Chem import Draw

def extract_fused_ring_system(smiles: str, draw=False):
    mol = Chem.MolFromSmiles(smiles)

    if not mol:
        print("Invalid SMILES input.")
        return None

    # Get all rings in the molecule
    sssr = Chem.GetSymmSSSR(mol)
    if not sssr:
        print("No rings detected in the molecule.")
        return None

    # Create a list of sets, each containing atom indices of a ring
    ring_sets = [set(ring) for ring in sssr]

    # Merge overlapping ring sets to find the largest fused system
    fused_rings = []
    while ring_sets:
        first = ring_sets.pop(0)
        merged = True
        while merged:
            merged = False
            for other in ring_sets:
                if first.intersection(other):  # If rings share atoms, merge them
                    first.update(other)
                    ring_sets.remove(other)
                    merged = True
        fused_rings.append(first)

    # Find the largest fused ring system
    largest_fused_system = max(fused_rings, key=len, default=set())

    if not largest_fused_system:
        print("No valid fused ring system found.")
        return None

    # Extract and create the substructure
    core = Chem.PathToSubmol(mol, list(largest_fused_system))
    core_smiles = Chem.MolToSmiles(core)

    print("Largest Fused Ring System SMILES:", core_smiles)

    # Optional: Draw the structure
    if draw:
        core_image = Draw.MolToImage(core)
        core_image.show()

    return core_smiles

# Run with Talatrachyoxazine B
talatrachyoxazine_B = "C=C(C)[C@@H]1CC[C@@H](C)[C@@]2(O)[C@@H]1C=C(C)[C@@H](OC(C)=O)[C@@H]2OC(=O)[C@@H]1C[C@@]2(O)C3=C(C(Cl)=CC=C3)N(C)O[C@H]2N1/C=C/C(=O)OCCCCCCCCCCCCCCCCCCCCC"

extract_fused_ring_system(talatrachyoxazine_B, draw=True)


Largest Fused Ring System SMILES: CCCO.CNCO.cc(cCl)NC.ccc


'CCCO.CNCO.cc(cCl)NC.ccc'

In [13]:
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Scaffolds import MurckoScaffold

# Define SMARTS pattern for a sesquiterpene core (adjustable)
sesquiterpene_smarts = "[C;R]1[C;R][C;R][C;R][C;R][C;R]1"  # Generic six-membered ring
sesquiterpene_core = Chem.MolFromSmarts(sesquiterpene_smarts)

def extract_sesquiterpene_core(smiles: str, draw=False):
    mol = Chem.MolFromSmiles(smiles)

    if not mol:
        print("Invalid SMILES input.")
        return None

    # 1. Remove large non-core elements like esters, long chains
    #mol = Chem.DeleteSubstructs(mol, Chem.MolFromSmarts("C(=O)OCCCCCCCCCCCCCCCCCCCCC"))

    # 2. Extract the largest fused ring system
    murcko_core = MurckoScaffold.GetScaffoldForMol(mol)

    # 3. Check if the extracted core contains a sesquiterpene moiety
    if murcko_core.HasSubstructMatch(sesquiterpene_core):
        print("Sesquiterpene core detected in extracted structure.")
    else:
        print("No clear sesquiterpene detected, possibly a hybrid.")

    # Convert core to SMILES
    core_smiles = Chem.MolToSmiles(murcko_core)
    print("Extracted Core SMILES:", core_smiles)

    # 4. Draw the core
    if draw:
        core_image = Draw.MolToImage(murcko_core)
        core_image.show()

    return core_smiles

# Example: Process Talatrachyoxazine B
talatrachyoxazine_B = "C=C(C)[C@@H]1CC[C@@H](C)[C@@]2(O)[C@@H]1C=C(C)[C@@H](OC(C)=O)[C@@H]2OC(=O)[C@@H]1C[C@@]2(O)C3=C(C(Cl)=CC=C3)N(C)O[C@H]2N1/C=C/C(=O)OCCCCCCCCCCCCCCCCCCCCC"

extract_sesquiterpene_core(talatrachyoxazine_B, draw=True)


Sesquiterpene core detected in extracted structure.
Extracted Core SMILES: O=C(O[C@H]1CC=C[C@@H]2CCCCC21)[C@@H]1CC2c3ccccc3NO[C@H]2N1


'O=C(O[C@H]1CC=C[C@@H]2CCCCC21)[C@@H]1CC2c3ccccc3NO[C@H]2N1'