# Import Libs

In [1]:
# import libs
import re
from rich import print

# Combine Functional Gorups

In [2]:
Chain1 = ["C1=C2", "C2-{1}", "C3=C4", "C4-C5", "C5=C6", "C6-C1"]
Chain2 = ["XX1-C2", "{1}-C2", "C2=C3", "C3-XX4"]

# log
print(f"Chain1: {Chain1}")
print(f"Chain2: {Chain2}")

## Extract Highest Index

In [3]:
def extract_highest_index(chain):
    """
    Extracts the highest numerical index from atom labels in the given chain.

    Parameters
    ----------
    chain : list
        A list of strings representing bonds between atoms in a chain.

    Returns
    -------
    int
        The highest numerical index found in the chain.
    """
    indices = []
    for bond in chain:
        atoms = re.findall(r'[A-Za-z]+(\d+)', bond)  # Extract numbers from atoms
        indices.extend(map(int, atoms))
    return max(indices) if indices else 0

In [4]:
# check
print(f"Extracted highest index from Chain1: {extract_highest_index(Chain1)}")
print(f"Extracted highest index from Chain2: {extract_highest_index(Chain2)}")

## Combine Groups

In [26]:
# source
molecule_src = {
    'MainChain': ["C1-C2","C2-C3","C3*{Chain1}","C3-C4","C4*{Chain2}","C4-C5","C5-C6"],
    'Chain1': ["C1=C2","C2-C3","C3=*"],
    'Chain2' : ["*-C1","C1=C2","C2-XX3"]
}

### Search For Main Chain

In [27]:
def search_for_main_chain(molecule_src):
    """
    Searches for the main chain in the molecule source.

    Parameters
    ----------
    molecule_src : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.

    Returns
    -------
    str
        The name of the main chain found in the molecule source.
    """
    for key, chain in molecule_src.items():
        # create a pattern to match the main chain
        pattern1 = re.compile(r'\{.*?\}\*.*')
        pattern2 = re.compile(r'.*\*\{.*?\}')

        # check if the main chain is found
        if any(pattern1.match(bond) or pattern2.match(bond) for bond in chain):
            return key
    return None

In [28]:
# test
print(f"Main chain found: {search_for_main_chain(molecule_src)}")

In [None]:
def construct_molecule(molecule_src):
    """
    Constructs the molecule from the given molecule source.

    Parameters
    ----------
    molecule_src : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.

    Returns
    -------
    dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.
    """
    # create a copy of the molecule source
    molecule = {key: chain.copy() for key, chain in molecule_src.items()}
    # print(f'molecule: {molecule}')

    # search for the main chain
    main_chain = search_for_main_chain(molecule)
    # print(f'main_chain: {main_chain}')

    chain_info = {}

    # check if the main chain is found
    if main_chain:
        # get the highest index from the main chain
        highest_index = extract_highest_index(molecule[main_chain])
        # print(f'highest_index: {highest_index}')


        # update index of other chains
        for key, chain in molecule.items():
            if key != main_chain:
                # create chain info
                chain_info[key] = {
                    'bonds': [],
                    'gate': []
                }

                # update the index of the chain
                for i, bond in enumerate(chain):

                    # SECTION: define pattern to match bonds
                    pattern = r"([A-Za-z]+)(\d+)([-=#])([A-Za-z]+)(\d+)"
                    # match the pattern
                    match_bond = re.match(pattern, bond)
                    # augment the index
                    if match_bond:
                        # extract atoms and indices
                        atom1, index1, bond_order, atom2, index2 = match_bond.groups()
                        # update the index
                        index1 = str(int(index1) + highest_index)
                        index2 = str(int(index2) + highest_index)
                        # update the bond
                        molecule[key][i] = f"{atom1}{index1}{bond_order}{atom2}{index2}"
                        # update the chain info
                        chain_info[key]['bonds'].append(molecule[key][i])


                    # SECTION: gate pattern
                    pattern_gate = r"([A-Za-z]+)(\d+)([-=#])\*"
                    # match the pattern
                    match_gate = re.match(pattern_gate, bond)
                    # augment the index
                    if match_gate:
                        # extract atoms and indices
                        atom1, index1, bond_order = match_gate.groups()
                        # update the index
                        index1 = str(int(index1) + highest_index)
                        # update the bond
                        molecule[key][i] = f"{bond_order}{atom1}{index1}"
                        # update the chain info
                        chain_info[key]['gate'].append(molecule[key][i])

                    # SECTION: gate pattern
                    pattern_gate = r"\*([-=#])([A-Za-z]+)(\d+)"
                    # match the pattern
                    match_gate = re.match(pattern_gate, bond)
                    # augment the index
                    if match_gate:
                        # extract atoms and indices
                        bond_order, atom1, index1 = match_gate.groups()
                        # update the index
                        index1 = str(int(index1) + highest_index)
                        # update the bond
                        molecule[key][i] = f"{atom1}{index1}{bond_order}"
                        # update the chain info
                        chain_info[key]['gate'].append(molecule[key][i])

                # update highest index
                highest_index = int(index1)
                # print(f"highest_index: {highest_index}")

    # combine the main chain and other chains
    constructed_molecule = []
    # find the gate atoms
    for items in molecule[main_chain]:
        # SECTION: define pattern
        pattern_gate = r"([A-Za-z]+)(\d+)\*\{([A-Za-z0-9]+)\}"
        # match the pattern
        match_gate = re.match(pattern_gate, items)
        # extract the gate atoms
        if match_gate:
            # extract atoms and indices
            atom1, index1, gate = match_gate.groups()
            # find element index in molecule['main_chain']
            element_index = molecule[main_chain].index(items)
            # update
            # check start with letter or number
            if chain_info[gate]['gate'][0].startswith(('-','=','#')):
                molecule[main_chain][element_index] = f"{atom1}{index1}{chain_info[gate]['gate'][0]}"
            else:
                molecule[main_chain][element_index] = f"{chain_info[gate]['gate'][0]}{atom1}{index1}"

        pattern_gate = r"\{([A-Za-z0-9]+)\}\*([A-Za-z]+)(\d+)"
        # match the pattern
        match_gate = re.match(pattern_gate, items)
        # extract the gate atoms
        if match_gate:
            # extract atoms and indices
            gate, atom1, index1 = match_gate.groups()
            # find element index in molecule['main_chain']
            element_index = molecule[main_chain].index(items)
            # update
            # check start with letter or number
            if chain_info[gate]['gate'][0].startswith(('-','=','#')):
                molecule[main_chain][element_index] = f"{atom1}{index1}{chain_info[gate]['gate'][0]}"
            else:
                molecule[main_chain][element_index] = f"{chain_info[gate]['gate'][0]}{atom1}{index1}"

    # combine the main chain and other chains
    for key, chain in molecule.items():
        if key == main_chain:
            constructed_molecule.extend(chain)

    # chain info
    for key, chain in chain_info.items():
        constructed_molecule.extend(chain['bonds'])

    return chain_info, molecule, constructed_molecule


In [89]:
# source
print(molecule_src)
# test
print(construct_molecule(molecule_src))