# Import Libs

In [1]:
# import libs
import re
from rich import print

# Combine Functional Gorups

In [2]:
Chain1 = ["C1=C2", "C2-{1}", "C3=C4", "C4-C5", "C5=C6", "C6-C1"]
Chain2 = ["XX1-C2", "{1}-C2", "C2=C3", "C3-XX4"]
Chain3 = ['C1-C2', 'C2-C3', 'C3*{Chain11}', 'C3-C4', 'C4*{Chain21}', 'C4-C5', 'C5-C6']

# log res
print(f"Chain1: {Chain1}")
print(f"Chain2: {Chain2}")
print(f"Chain3: {Chain3}")

## Extract Highest Index

In [3]:
def extract_highest_index(chain):
    """
    Extracts the highest numerical index from atom labels in the given chain.

    Parameters
    ----------
    chain : list
        A list of strings representing bonds between atoms in a chain.

    Returns
    -------
    int
        The highest numerical index found in the chain.
    """
    indices = []
    for bond in chain:
        atoms = re.findall(r'[A-Za-z]+(\d+)', bond)  # Extract numbers from atoms
        if '{' in bond or '}' in bond:
            continue  # Ignore bonds containing {Chain..}
        indices.extend(map(int, atoms))
    return max(indices) if indices else 0

In [18]:
# check
print(f"Extracted highest index from Chain1: {extract_highest_index(Chain1)}, {type(extract_highest_index(Chain1))}")
print(f"Extracted highest index from Chain2: {extract_highest_index(Chain2)}")
print(f"Extracted highest index from Chain3: {extract_highest_index(Chain3)}")

## Combine Groups

In [25]:
# source
molecule_src = {
    'MainChain': ["C1-C2","C2-C3","C3*{Chain1}","C3-C4","C4*{Chain2}","C4-C5","C5-C6"],
    'Chain1': ["C1=C2","C2-C3","C3=*"],
    'Chain2' : ["*-C1","C1=C2","C2-XX3"]
}

In [34]:
molecule_src = {
    'MainChain': ["C1*{Chain1}","C1-C2","C2*{Chain2}"],
    'Chain1': ["*-C1","C1=C2","C2-C3","C3=C4","C4-C5","C5=*"],
    'Chain2': ["*-C1","C1=C2","C2-C3","C3=C4","C4-C5","C5=*"],
}

In [46]:
molecule_src = {
    'MainChain': ["C1*{Chain1}", "C1-C2", "C2*{Chain2}"],
    'Chain1': ["*-C1", "C1=C2", "C2-C3", "C3=XX4"],
    'Chain2': ["XX1-C2", "C2=C3", "C3-C4", "C4=*"],
}

In [None]:
molecule_src = {
    'MainChain': ["C1*{Chain1}", "C1-C2", "C2*{Chain2}", "C2-C3", "C3=*"],
    'Chain1': ["*-C1", "C1=C2", "C2-C3", "C3=*"],
    'Chain2': ["*-C1", "C1=C2", "C2-C3", "C3=*"],
    'Chain3': ["*-C1", "C1=C2", "C2-C3", "C3=XX4"],
}

In [6]:
# naphthalene
molecule_src = {
    'MainChain': ["C1-C2", "C2=C3", "C3-C4", "C4=C5", "C5-C6", "C6=C1", "C1*{Chain1}", "C6*{Chain1}"],
    'Chain1': ["*-C1", "C1=C2", "C2-C3", "C3=C4", "C4-**"],
}

### Search For Main Chain

In [5]:
def search_for_main_chain(molecule_src):
    """
    Searches for the main chain in the molecule source.

    Parameters
    ----------
    molecule_src : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.

    Returns
    -------
    str
        The name of the main chain found in the molecule source.
    """
    for key, chain in molecule_src.items():
        # create a pattern to match the main chain
        pattern1 = re.compile(r'\{.*?\}\*.*')
        pattern2 = re.compile(r'.*\*\{.*?\}')

        # check if the main chain is found
        if any(pattern1.match(bond) or pattern2.match(bond) for bond in chain):
            return key
    return None

In [8]:
# test
print(f"Main chain found: {search_for_main_chain(molecule_src)}")

### Check Molecule

In [6]:
def check_molecule(molecule_src):
    """
    Checks if the molecule source is valid.

    Parameters
    ----------
    molecule_src : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.

    Returns
    -------
    bool
        True if the molecule source is valid, False otherwise.
    """
    # create a copy of the molecule source
    molecule = {key: chain.copy() for key, chain in molecule_src.items()}

    # checked molecule source
    molecule_src_checked = {}

    # check if the main chain is found
    main_chain = search_for_main_chain(molecule)
    if not main_chain:
        raise ValueError("Main chain not found in the molecule source.")

    # add the main chain to the checked molecule source
    molecule_src_checked[main_chain] = molecule[main_chain]

    # main chain bonds
    main_chain_bonds = molecule[main_chain]

    # chain counter
    chain_counter = 0

    # chain type
    chain_types = {
        "1": 'branch',
        "2": 'ring',
        "3": 'bridge',
    }

    # chain analysis
    chain_analysis = {}

    # looping through the molecule source
    for key, chain in molecule.items():
        if key != main_chain:

            # TODO: check chain types
            # print(f"Chain: {chain}")
            chain_gate_num = sum(item.count('*') for item in chain)
            # print(f"Chain gate num: {chain_gate_num}")
            # print(f"Chain types: {chain_types[str(chain_gate_num)]}")

            # save
            chain_analysis[key] = chain_types[str(chain_gate_num)]

    # log
    # print(f"Chain analysis: {chain_analysis}")

    # looping through the molecule source
    for key, chain in molecule.items():
        if key != main_chain:
            # check if the chain is connected to the main chain
            # SECTION: create pattern to match the main chain
            pattern_gate = rf"([A-Za-z]+)(\d+)\*\{{({re.escape(key)})\}}"

            # looping through the main chain bonds
            for i, bond in enumerate(main_chain_bonds):
                # print(f"i: {i}")
                # print(f"Bond: {bond}")
                match = re.match(pattern_gate, bond)
                # check
                if match:
                    # print(f"chain_analysis: {chain_analysis[key]}")
                    # TODO: check chain types
                    if chain_analysis[key] == 'branch' or chain_analysis[key] == 'ring':
                        # update chain counter
                        chain_counter += 1
                    elif chain_analysis[key] == 'bridge':
                        # reset chain counter
                        chain_counter = 1

                    # extract the matched chain
                    atom, index, key_chain = match.groups()
                    # print(f"Atom: {atom}")
                    # print(f"Index: {index}")
                    # print(f"Key chain: {key_chain}")
                    # rename key
                    key_ = key_chain + str(chain_counter)
                    # print(f"Key: {key_}")

                    # add the chain to the checked molecule source
                    molecule_src_checked[key_] = chain
                    # print(f"Molecule source checked: {molecule_src_checked}")
                    # update the element in the main chain
                    main_chain_bonds[i] = atom+index+"*{"+key_+"}"
                    # print(f"Main chain bonds: {main_chain_bonds}")

            # reset chain counter
            chain_counter = 0

    # res
    return molecule_src_checked

In [10]:
# naphthalene
molecule_src = {
    'MainChain': ["C1-C2", "C2=C3", "C3-C4", "C4=C5", "C5-C6", "C6=C1", "C1*{Chain1}", "C6*{Chain1}"],
    'Chain1': ["*-C1", "C1=C2", "C2-C3", "C3=C4", "C4-**"],
}

# molecule_src = {
#     'MainChain': ["C1*{Chain1}", "C1-C2", "C2*{Chain2}", "C2-C3", "C3=*"],
#     'Chain1': ["*-C1", "C1=C2", "C2-C3", "C3=*"],
#     'Chain2': ["*-C1", "C1=C2", "C2-C3", "C3=*"],
#     'Chain3': ["*-C1", "C1=C2", "C2-C3", "C3=XX4"],
# }

# molecule_src = {
#     'MainChain': ["C1*{Chain1}", "C1-C2", "C2*{Chain2}","C3*{Chain2}"],
#     'Chain1': ["*-C1", "C1=C2", "C2-C3", "C3=XX4"],
#     'Chain2': ["*-C2", "C2=C3", "C3-C4", "C4=**"],
# }

# test
molecule_src_checked = check_molecule(molecule_src)
print(molecule_src_checked)

### Construct Molecule

#### `Code 1`

In [13]:
def construct_molecule(molecule_src):
    """
    Constructs the molecule from the given molecule source.

    Parameters
    ----------
    molecule_src : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.

    Returns
    -------
    dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.
    """
    # create a copy of the molecule source
    molecule = {key: chain.copy() for key, chain in molecule_src.items()}
    # print(f'molecule: {molecule}')

    # search for the main chain
    main_chain = search_for_main_chain(molecule)
    # print(f'main_chain: {main_chain}')

    chain_info = {}

    # reset
    highest_index = 0

    # check if the main chain is found
    if main_chain:
        # get the highest index from the main chain
        highest_index = extract_highest_index(molecule[main_chain])
        # print(f'highest_index: {highest_index}')


        # update index of other chains
        for key, chain in molecule.items():
            if key != main_chain:
                # create chain info
                chain_info[key] = {
                    'bonds': [],
                    'gate': []
                }

                # update the index of the chain
                for i, bond in enumerate(chain):

                    # SECTION: define pattern to match bonds
                    pattern = r"([A-Za-z]+)(\d+)([-=#])([A-Za-z]+)(\d+)"
                    # match the pattern
                    match_bond = re.match(pattern, bond)
                    # augment the index
                    if match_bond:
                        # extract atoms and indices
                        atom1, index1, bond_order, atom2, index2 = match_bond.groups()
                        # update the index
                        index1 = str(int(index1) + highest_index)
                        index2 = str(int(index2) + highest_index)
                        # update the bond
                        molecule[key][i] = f"{atom1}{index1}{bond_order}{atom2}{index2}"
                        # update the chain info
                        chain_info[key]['bonds'].append(molecule[key][i])


                    # SECTION: gate pattern
                    pattern_gate = r"([A-Za-z]+)(\d+)([-=#])\*"
                    # match the pattern
                    match_gate = re.match(pattern_gate, bond)
                    # augment the index
                    if match_gate:
                        # extract atoms and indices
                        atom1, index1, bond_order = match_gate.groups()
                        # update the index
                        index1 = str(int(index1) + highest_index)
                        # update the bond
                        molecule[key][i] = f"{bond_order}{atom1}{index1}"
                        # update the chain info
                        chain_info[key]['gate'].append(molecule[key][i])

                    # SECTION: gate pattern
                    pattern_gate = r"\*([-=#])([A-Za-z]+)(\d+)"
                    # match the pattern
                    match_gate = re.match(pattern_gate, bond)
                    # augment the index
                    if match_gate:
                        # extract atoms and indices
                        bond_order, atom1, index1 = match_gate.groups()
                        # update the index
                        index1 = str(int(index1) + highest_index)
                        # update the bond
                        molecule[key][i] = f"{atom1}{index1}{bond_order}"
                        # update the chain info
                        chain_info[key]['gate'].append(molecule[key][i])

                # update highest index
                highest_index = int(index1)
                # print(f"highest_index: {highest_index}")

    # log molecule
    # print(f"molecule: {molecule}")

    # gat index
    gate_index = 0

    # combine the main chain and other chains
    constructed_molecule = []
    # find the gate atoms
    for items in molecule[main_chain]:
        # SECTION: define pattern
        pattern_gate = r"([A-Za-z]+)(\d+)\*\{([A-Za-z0-9]+)\}"
        # match the pattern
        match_gate = re.match(pattern_gate, items)
        # extract the gate atoms
        if match_gate:
            # extract atoms and indices
            atom1, index1, gate = match_gate.groups()
            # find element index in molecule['main_chain']
            element_index = molecule[main_chain].index(items)
            # update
            # check start with letter or number
            if chain_info[gate]['gate'][gate_index].startswith(('-','=','#')):
                molecule[main_chain][element_index] = f"{atom1}{index1}{chain_info[gate]['gate'][gate_index]}"
            else:
                molecule[main_chain][element_index] = f"{chain_info[gate]['gate'][gate_index]}{atom1}{index1}"

            # update gate index
            # gate_index += 1

        pattern_gate = r"\{([A-Za-z0-9]+)\}\*([A-Za-z]+)(\d+)"
        # match the pattern
        match_gate = re.match(pattern_gate, items)
        # extract the gate atoms
        if match_gate:
            # extract atoms and indices
            gate, atom1, index1 = match_gate.groups()
            # find element index in molecule['main_chain']
            element_index = molecule[main_chain].index(items)
            # update
            # check start with letter or number
            if chain_info[gate]['gate'][gate_index].startswith(('-','=','#')):
                molecule[main_chain][element_index] = f"{atom1}{index1}{chain_info[gate]['gate'][gate_index]}"
            else:
                molecule[main_chain][element_index] = f"{chain_info[gate]['gate'][gate_index]}{atom1}{index1}"

            # update gate index
            # gate_index += 1

    # combine the main chain and other chains
    for key, chain in molecule.items():
        if key == main_chain:
            constructed_molecule.extend(chain)

    # chain info
    for key, chain in chain_info.items():
        constructed_molecule.extend(chain['bonds'])

    return chain_info, molecule, constructed_molecule


#### `Code 2`

In [None]:
def construct_molecule_v2(molecule_src):
    """
    Constructs the molecule from the given molecule source.

    Parameters
    ----------
    molecule_src : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.

    Returns
    -------
    dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.
    """
    # create a copy of the molecule source
    molecule = {key: chain.copy() for key, chain in molecule_src.items()}
    # print(f'molecule: {molecule}')

    # search for the main chain
    main_chain = search_for_main_chain(molecule)
    # print(f'main_chain: {main_chain}')

    chain_info = {}

    # reset
    highest_index = 0

    # check if the main chain is found
    if main_chain:
        # get the highest index from the main chain
        highest_index = extract_highest_index(molecule[main_chain])
        print(f'highest_index-0: {highest_index}')

        # update index of other chains
        for key, chain in molecule.items():
            if key != main_chain:
                # create chain info
                chain_info[key] = {
                    'bonds': [],
                    'gate': []
                }

                # update the index of the chain
                for i, bond in enumerate(chain):

                    # SECTION: define pattern to match bonds
                    pattern = r"([A-Za-z]+)(\d+)([-=#])([A-Za-z]+)(\d+)"
                    # match the pattern
                    match_bond = re.match(pattern, bond)
                    # augment the index
                    if match_bond:
                        # extract atoms and indices
                        atom1, index1, bond_order, atom2, index2 = match_bond.groups()
                        # update the index
                        index1 = str(int(index1) + highest_index)
                        index2 = str(int(index2) + highest_index)
                        # update the bond
                        molecule[key][i] = f"{atom1}{index1}{bond_order}{atom2}{index2}"
                        # update the chain info
                        chain_info[key]['bonds'].append(molecule[key][i])

                        # last index
                        # check the larger index
                        last_index = int(index1) if int(index1) > int(index2) else int(index2)


                    # SECTION: gate pattern
                    pattern_gate = r"([A-Za-z]+)(\d+)([-=#])\*"
                    # match the pattern
                    match_gate = re.match(pattern_gate, bond)
                    # augment the index
                    if match_gate:
                        # extract atoms and indices
                        atom1, index1, bond_order = match_gate.groups()
                        # update the index
                        index1 = str(int(index1) + highest_index)
                        # update the bond
                        molecule[key][i] = f"{bond_order}{atom1}{index1}"
                        # update the chain info
                        chain_info[key]['gate'].append(molecule[key][i])

                        # last index
                        last_index = int(index1)

                    # SECTION: gate pattern
                    pattern_gate = r"\*([-=#])([A-Za-z]+)(\d+)"
                    # match the pattern
                    match_gate = re.match(pattern_gate, bond)
                    # augment the index
                    if match_gate:
                        # extract atoms and indices
                        bond_order, atom1, index1 = match_gate.groups()
                        # update the index
                        index1 = str(int(index1) + highest_index)
                        # update the bond
                        molecule[key][i] = f"{atom1}{index1}{bond_order}"
                        # update the chain info
                        chain_info[key]['gate'].append(molecule[key][i])

                        # last index
                        last_index = int(index1)

                # update highest index
                highest_index = last_index
                print(f"highest_index: {highest_index}")

    # set
    gate_num = 0

    # combine the main chain and other chains
    constructed_molecule = []
    # find the gate atoms
    for items in molecule[main_chain]:
        # SECTION: define pattern
        pattern_gate = r"([A-Za-z]+)(\d+)\*\{([A-Za-z0-9]+)\}"
        # match the pattern
        match_gate = re.match(pattern_gate, items)
        # extract the gate atoms
        if match_gate:
            # extract atoms and indices
            atom1, index1, gate = match_gate.groups()
            # find element index in molecule['main_chain']
            element_index = molecule[main_chain].index(items)

            # gate index num
            gate_num = len(chain_info[gate]['gate'])

            # update
            # check start with letter or number
            # TODO: check gate-index
            if gate_num == 1:
                if chain_info[gate]['gate'][0].startswith(('-','=','#')):
                    molecule[main_chain][element_index] = f"{atom1}{index1}{chain_info[gate]['gate'][0]}"
                else:
                    molecule[main_chain][element_index] = f"{chain_info[gate]['gate'][0]}{atom1}{index1}"
            else:
                for m in range(gate_num):
                    if chain_info[gate]['gate'][m].startswith(('-','=','#')):
                        _connection = f"{atom1}{index1}{chain_info[gate]['gate'][m]}"
                    else:
                        _connection = f"{chain_info[gate]['gate'][m]}{atom1}{index1}"

                    # append
                    if m == 0:
                        molecule[main_chain][element_index] = _connection
                    else:
                        molecule[main_chain].append(_connection)

        pattern_gate = r"\{([A-Za-z0-9]+)\}\*([A-Za-z]+)(\d+)"
        # match the pattern
        match_gate = re.match(pattern_gate, items)
        # extract the gate atoms
        if match_gate:
            # extract atoms and indices
            gate, atom1, index1 = match_gate.groups()
            # find element index in molecule['main_chain']
            element_index = molecule[main_chain].index(items)

            # gate index num
            gate_num = len(chain_info[gate]['gate'])

            # update
            # check start with letter or number
            # TODO: check gate-index
            if gate_num == 1:
                if chain_info[gate]['gate'][0].startswith(('-','=','#')):
                    molecule[main_chain][element_index] = f"{atom1}{index1}{chain_info[gate]['gate'][0]}"
                else:
                    molecule[main_chain][element_index] = f"{chain_info[gate]['gate'][0]}{atom1}{index1}"
            else:
                for m in range(len(chain_info[gate]['gate'])):
                    if chain_info[gate]['gate'][m].startswith(('-','=','#')):
                        _connection = f"{atom1}{index1}{chain_info[gate]['gate'][m]}"
                    else:
                        _connection = f"{chain_info[gate]['gate'][m]}{atom1}{index1}"
                    # append
                    if m == 0:
                        molecule[main_chain][element_index] = _connection
                    else:
                        molecule[main_chain].append(_connection)

    # combine the main chain and other chains
    for key, chain in molecule.items():
        if key == main_chain:
            constructed_molecule.extend(chain)

    # chain info
    for key, chain in chain_info.items():
        constructed_molecule.extend(chain['bonds'])

    return chain_info, molecule, constructed_molecule


#### `Code 3`

In [14]:
def construct_molecule_v3(molecule_src):
    """
    Constructs the molecule from the given molecule source.

    Parameters
    ----------
    molecule_src : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.

    Returns
    -------
    dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.
    """
    # create a copy of the molecule source
    molecule = {key: chain.copy() for key, chain in molecule_src.items()}
    # print(f'molecule: {molecule}')

    # search for the main chain
    main_chain = search_for_main_chain(molecule)
    # print(f'main_chain: {main_chain}')

    chain_info = {}

    # reset
    highest_index = 0

    # check if the main chain is found
    if main_chain:
        # get the highest index from the main chain
        highest_index = extract_highest_index(molecule[main_chain])
        # print(f'highest_index-0: {highest_index}')

        # TODO: check chain types

        # chain type
        chain_types = {
            "1": 'branch',
            "2": 'ring',
            "3": 'bridge',
        }

        # chain analysis
        chain_analysis = {}

        # TODO: check chain types
        for key, chain in molecule.items():
            if key != main_chain:

                # count gate num
                chain_gate_num = sum(item.count('*') for item in chain)
                # print(f"Chain gate num: {chain_gate_num}")
                # print(f"Chain types: {chain_types[str(chain_gate_num)}")

                # save
                chain_analysis[key] = chain_types[str(chain_gate_num)]

                # create chain info
                chain_info[key] = {
                    'receiver': [],
                    'bonds': [],
                    'gate': [],
                    'type': chain_types[str(chain_gate_num)],
                    'connection-port': {}
                }

        # NOTE: check main chain
        for chain in molecule[main_chain]:
            # SECTION: define pattern
            pattern_gate = r"([A-Za-z]+)(\d+)\*\{([A-Za-z0-9]+)\}"
            # match the pattern
            match_gate = re.match(pattern_gate, chain)
            # extract the gate atoms
            if match_gate:
                # extract atoms and indices
                atom1, index1, gate = match_gate.groups()
                # find element index in molecule['main_chain']
                element_index = molecule[main_chain].index(chain)

                # update chain info
                chain_info[gate]['receiver'].append(f"{atom1}{index1}")

            # SECTION: define pattern
            pattern_gate = r"\{([A-Za-z0-9]+)\}\*([A-Za-z]+)(\d+)"
            # match the pattern
            match_gate = re.match(pattern_gate, chain)
            # extract the gate atoms
            if match_gate:
                # extract atoms and indices
                gate, atom1, index1 = match_gate.groups()
                # find element index in molecule['main_chain']
                element_index = molecule[main_chain].index(chain)

                # update chain info
                chain_info[gate]['receiver'].append(f"{atom1}{index1}")

        # log
        print(f"chain info: {chain_info}")

        # NOTE: update index of other chains
        for key, chain in molecule.items():
            if key != main_chain:

                # update the index of the chain
                for i, bond in enumerate(chain):

                    # SECTION: define pattern to match bonds
                    pattern = r"([A-Za-z]+)(\d+)([-=#])([A-Za-z]+)(\d+)"
                    # match the pattern
                    match_bond = re.match(pattern, bond)
                    # augment the index
                    if match_bond:
                        # extract atoms and indices
                        atom1, index1, bond_order, atom2, index2 = match_bond.groups()
                        # update the index
                        index1 = str(int(index1) + highest_index)
                        index2 = str(int(index2) + highest_index)
                        # update the bond
                        molecule[key][i] = f"{atom1}{index1}{bond_order}{atom2}{index2}"
                        # update the chain info
                        chain_info[key]['bonds'].append(molecule[key][i])

                        # last index
                        # check the larger index
                        last_index = int(index1) if int(
                            index1) > int(index2) else int(index2)

                    # SECTION: gate pattern
                    pattern_gate = r"([A-Za-z]+)(\d+)([-=#])(\*+)"
                    # match the pattern
                    match_gate = re.match(pattern_gate, bond)
                    # augment the index
                    if match_gate:
                        # extract atoms and indices
                        atom1, index1, bond_order, gate_port = match_gate.groups()
                        # update the index
                        index1 = str(int(index1) + highest_index)
                        # update the bond
                        molecule[key][i] = f"{atom1}{index1}{bond_order}"
                        # update the chain info
                        chain_info[key]['gate'].append(molecule[key][i])
                        # connection port
                        chain_info[key]['connection-port'][f"{atom1}{index1}{bond_order}"] = {
                            'port': gate_port,
                            'bond': f"{atom1}{index1}",
                            'bond-type': bond_order,
                            'bond-gate': f"{atom1}{index1}{bond_order}",
                            'port-status': 'open'
                        }

                        # last index
                        last_index = int(index1)

                    # SECTION: gate pattern
                    pattern_gate = r"(\*+)([-=#])([A-Za-z]+)(\d+)"
                    # match the pattern
                    match_gate = re.match(pattern_gate, bond)
                    # augment the index
                    if match_gate:
                        # extract atoms and indices
                        gate_port, bond_order, atom1, index1 = match_gate.groups()
                        # update the index
                        index1 = str(int(index1) + highest_index)
                        # update the bond
                        molecule[key][i] = f"{bond_order}{atom1}{index1}"
                        # update the chain info
                        chain_info[key]['gate'].append(molecule[key][i])
                        # connection port
                        chain_info[key]['connection-port'][f"{bond_order}{atom1}{index1}"] = {
                            'port': gate_port,
                            'bond': f"{atom1}{index1}",
                            'bond-type': bond_order,
                            'bond-gate': f"{bond_order}{atom1}{index1}",
                            'port-status': 'open'
                        }

                        # last index
                        last_index = int(index1)

                # update highest index
                highest_index = last_index

    # log
    print(f"chain info: {chain_info}")

    # set
    gate_num = 0

    # combine the main chain and other chains
    constructed_molecule = []
    # TODO: find the gate atoms
    for items in molecule[main_chain]:
        # SECTION: define pattern
        pattern_gate = r"([A-Za-z]+)(\d+)\*\{([A-Za-z0-9]+)\}"

        # match the pattern
        match_gate = re.match(pattern_gate, items)

        # extract the gate atoms
        if match_gate:
            # extract atoms and indices
            atom1, index1, gate = match_gate.groups()
            print(f"atom: {atom1}, index1: {index1}, gate: {gate}")

            # find element index in molecule['main_chain']
            element_index = molecule[main_chain].index(items)
            print(f"element_index: {element_index}")

            # gate index num
            gate_num = len(chain_info[gate]['gate'])
            print(f"Gate num: {gate_num}")

            # TODO: check chain types
            chain_type_ = chain_analysis[gate]
            print(f"Chain type: {chain_type_}")

            # update
            # check start with letter or number
            # TODO: check gate-index
            if gate_num == 1:
                if chain_info[gate]['gate'][0].startswith(('-', '=', '#')):
                    molecule[main_chain][element_index] = f"{atom1}{index1}{chain_info[gate]['gate'][0]}"
                else:
                    molecule[main_chain][element_index] = f"{chain_info[gate]['gate'][0]}{atom1}{index1}"
            else:
                # check chain type
                if chain_type_ == 'ring':
                    for m in range(gate_num):
                        if chain_info[gate]['gate'][m].startswith(('-', '=', '#')):
                            _connection = f"{atom1}{index1}{chain_info[gate]['gate'][m]}"
                        else:
                            _connection = f"{chain_info[gate]['gate'][m]}{atom1}{index1}"

                        # append
                        if m == 0:
                            molecule[main_chain][element_index] = _connection
                        else:
                            molecule[main_chain].append(_connection)
                elif chain_type_ == 'bridge':
                    # get the gate index receiver
                    receiver_index = chain_info[gate]['receiver'].index(f"{atom1}{index1}")

                    # chain info
                    _gate_in = chain_info[gate]['gate'][receiver_index]

                    # check
                    if chain_info[gate]['gate'][receiver_index].startswith(('-', '=', '#')):
                        # update the bond
                        molecule[main_chain][element_index] = f"{atom1}{index1}{_gate_in}"
                    else:
                        # update the bond
                        molecule[main_chain][element_index] = f"{_gate_in}{atom1}{index1}"

        # SECTION: define pattern
        pattern_gate = r"\{([A-Za-z0-9]+)\}\*([A-Za-z]+)(\d+)"
        # match the pattern
        match_gate = re.match(pattern_gate, items)
        # extract the gate atoms
        if match_gate:
            # extract atoms and indices
            gate, atom1, index1 = match_gate.groups()
            # find element index in molecule['main_chain']
            element_index = molecule[main_chain].index(items)

            # gate index num
            gate_num = len(chain_info[gate]['gate'])
            print(f"Gate num: {gate_num}")

            # update
            # check start with letter or number
            # TODO: check gate-index
            if gate_num == 1:
                if chain_info[gate]['gate'][0].startswith(('-', '=', '#')):
                    molecule[main_chain][element_index] = f"{atom1}{index1}{chain_info[gate]['gate'][0]}"
                else:
                    molecule[main_chain][element_index] = f"{chain_info[gate]['gate'][0]}{atom1}{index1}"
            else:
                # check chain type
                if chain_type_ == 'ring':
                    for m in range(len(chain_info[gate]['gate'])):
                        if chain_info[gate]['gate'][m].startswith(('-', '=', '#')):
                            _connection = f"{atom1}{index1}{chain_info[gate]['gate'][m]}"
                        else:
                            _connection = f"{chain_info[gate]['gate'][m]}{atom1}{index1}"
                        # append
                        if m == 0:
                            molecule[main_chain][element_index] = _connection
                        else:
                            molecule[main_chain].append(_connection)
                elif chain_type_ == 'bridge':
                    # get the gate index receiver
                    receiver_index = chain_info[gate]['receiver'].index(f"{atom1}{index1}")

                    # chain info
                    _gate_in = chain_info[gate]['gate'][receiver_index]

                    # check
                    if chain_info[gate]['gate'][receiver_index].startswith(('-', '=', '#')):
                        # update the bond
                        molecule[main_chain][element_index] = f"{atom1}{index1}{_gate_in}"
                    else:
                        # update the bond
                        molecule[main_chain][element_index] = f"{_gate_in}{atom1}{index1}"

    # combine the main chain and other chains
    for key, chain in molecule.items():
        if key == main_chain:
            constructed_molecule.extend(chain)

    # chain info
    for key, chain in chain_info.items():
        constructed_molecule.extend(chain['bonds'])

    return chain_info, molecule, constructed_molecule

#### `Code 4`

In [None]:
def analyze_chain_types(molecule, main_chain):
    """
    Analyze the types of chains in the molecule.

    Parameters
    ----------
    molecule : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.
    main_chain : str
        The name of the main chain in the molecule.

    Returns
    -------
    tuple
        A tuple containing chain_info and chain_analysis dictionaries.
    """
    chain_info = {}
    chain_analysis = {}

    chain_types = {
        "1": 'branch',
        "2": 'ring',
        "3": 'bridge',
    }

    for key, chain in molecule.items():
        if key != main_chain:
            # Count gate num
            chain_gate_num = sum(item.count('*') for item in chain)

            # Save chain type
            chain_analysis[key] = chain_types[str(chain_gate_num)]

            # Initialize chain info
            chain_info[key] = {
                'receiver': [],
                'bonds': [],
                'gate': [],
                'type': chain_types[str(chain_gate_num)],
                'connection-port': {}
            }

    return chain_info, chain_analysis

def process_main_chain_connections(molecule, main_chain, chain_info):
    """
    Process connections between the main chain and other chains.

    Parameters
    ----------
    molecule : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.
    main_chain : str
        The name of the main chain in the molecule.
    chain_info : dict
        Information about chains in the molecule.
    """
    for chain in molecule[main_chain]:
        # Pattern for atom*{chain}
        pattern1 = r"([A-Za-z]+)(\d+)\*\{([A-Za-z0-9]+)\}"
        match = re.match(pattern1, chain)
        if match:
            atom1, index1, gate = match.groups()
            chain_info[gate]['receiver'].append(f"{atom1}{index1}")
            continue

        # Pattern for {chain}*atom
        pattern2 = r"\{([A-Za-z0-9]+)\}\*([A-Za-z]+)(\d+)"
        match = re.match(pattern2, chain)
        if match:
            gate, atom1, index1 = match.groups()
            chain_info[gate]['receiver'].append(f"{atom1}{index1}")

def update_chain_indices(molecule, main_chain, chain_info, highest_index):
    """
    Update indices of atoms in chains and process bonds.

    Parameters
    ----------
    molecule : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.
    main_chain : str
        The name of the main chain in the molecule.
    chain_info : dict
        Information about chains in the molecule.
    highest_index : int
        The highest index in the main chain.

    Returns
    -------
    int
        The updated highest index.
    """
    for key, chain in molecule.items():
        if key != main_chain:
            last_index = highest_index

            for i, bond in enumerate(chain):
                # Process normal bonds
                pattern_bond = r"([A-Za-z]+)(\d+)([-=#])([A-Za-z]+)(\d+)"
                match = re.match(pattern_bond, bond)
                if match:
                    atom1, index1, bond_order, atom2, index2 = match.groups()
                    index1 = str(int(index1) + highest_index)
                    index2 = str(int(index2) + highest_index)
                    molecule[key][i] = f"{atom1}{index1}{bond_order}{atom2}{index2}"
                    chain_info[key]['bonds'].append(molecule[key][i])
                    last_index = max(int(index1), int(index2))
                    continue

                # Process atom-* gate
                pattern_gate1 = r"([A-Za-z]+)(\d+)([-=#])(\*+)"
                match = re.match(pattern_gate1, bond)
                if match:
                    atom1, index1, bond_order, gate_port = match.groups()
                    index1 = str(int(index1) + highest_index)
                    molecule[key][i] = f"{atom1}{index1}{bond_order}"
                    chain_info[key]['gate'].append(molecule[key][i])
                    chain_info[key]['connection-port'][f"{atom1}{index1}{bond_order}"] = {
                        'port': gate_port,
                        'bond': f"{atom1}{index1}",
                        'bond-type': bond_order,
                        'bond-gate': f"{atom1}{index1}{bond_order}",
                        'port-status': 'open'
                    }
                    last_index = int(index1)
                    continue

                # Process *-atom gate
                pattern_gate2 = r"(\*+)([-=#])([A-Za-z]+)(\d+)"
                match = re.match(pattern_gate2, bond)
                if match:
                    gate_port, bond_order, atom1, index1 = match.groups()
                    index1 = str(int(index1) + highest_index)
                    molecule[key][i] = f"{bond_order}{atom1}{index1}"
                    chain_info[key]['gate'].append(molecule[key][i])
                    chain_info[key]['connection-port'][f"{bond_order}{atom1}{index1}"] = {
                        'port': gate_port,
                        'bond': f"{atom1}{index1}",
                        'bond-type': bond_order,
                        'bond-gate': f"{bond_order}{atom1}{index1}",
                        'port-status': 'open'
                    }
                    last_index = int(index1)

            highest_index = last_index

    return highest_index

def process_gate_connections(molecule, main_chain, chain_info, chain_analysis):
    """
    Process gate connections between chains.

    Parameters
    ----------
    molecule : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.
    main_chain : str
        The name of the main chain in the molecule.
    chain_info : dict
        Information about chains in the molecule.
    chain_analysis : dict
        Analysis of chain types.
    """
    for i, items in enumerate(molecule[main_chain]):
        # Process atom*{chain} pattern
        pattern1 = r"([A-Za-z]+)(\d+)\*\{([A-Za-z0-9]+)\}"
        match = re.match(pattern1, items)
        if match:
            atom1, index1, gate = match.groups()
            gate_num = len(chain_info[gate]['gate'])
            chain_type_ = chain_analysis[gate]

            if gate_num == 1:
                gate_connection = chain_info[gate]['gate'][0]
                if gate_connection.startswith(('-', '=', '#')):
                    molecule[main_chain][i] = f"{atom1}{index1}{gate_connection}"
                else:
                    molecule[main_chain][i] = f"{gate_connection}{atom1}{index1}"
            else:
                process_multi_gate_connection(molecule, main_chain, i, atom1, index1, gate, chain_info, chain_type_)
            continue

        # Process {chain}*atom pattern
        pattern2 = r"\{([A-Za-z0-9]+)\}\*([A-Za-z]+)(\d+)"
        match = re.match(pattern2, items)
        if match:
            gate, atom1, index1 = match.groups()
            gate_num = len(chain_info[gate]['gate'])
            chain_type_ = chain_analysis[gate]

            if gate_num == 1:
                gate_connection = chain_info[gate]['gate'][0]
                if gate_connection.startswith(('-', '=', '#')):
                    molecule[main_chain][i] = f"{atom1}{index1}{gate_connection}"
                else:
                    molecule[main_chain][i] = f"{gate_connection}{atom1}{index1}"
            else:
                process_multi_gate_connection(molecule, main_chain, i, atom1, index1, gate, chain_info, chain_type_)

def process_multi_gate_connection(molecule, main_chain, element_index, atom1, index1, gate, chain_info, chain_type_):
    """
    Process connections with multiple gates.

    Parameters
    ----------
    molecule : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.
    main_chain : str
        The name of the main chain in the molecule.
    element_index : int
        Index of the element in the main chain.
    atom1 : str
        The atom symbol.
    index1 : str
        The atom index.
    gate : str
        The gate identifier.
    chain_info : dict
        Information about chains in the molecule.
    chain_type_ : str
        Type of the chain.
    """
    if chain_type_ == 'ring':
        for m, gate_connection in enumerate(chain_info[gate]['gate']):
            if gate_connection.startswith(('-', '=', '#')):
                _connection = f"{atom1}{index1}{gate_connection}"
            else:
                _connection = f"{gate_connection}{atom1}{index1}"

            if m == 0:
                molecule[main_chain][element_index] = _connection
            else:
                molecule[main_chain].append(_connection)
    elif chain_type_ == 'bridge':
        receiver_index = chain_info[gate]['receiver'].index(f"{atom1}{index1}")
        _gate_in = chain_info[gate]['gate'][receiver_index]

        if _gate_in.startswith(('-', '=', '#')):
            molecule[main_chain][element_index] = f"{atom1}{index1}{_gate_in}"
        else:
            molecule[main_chain][element_index] = f"{_gate_in}{atom1}{index1}"

def build_constructed_molecule(molecule, main_chain, chain_info):
    """
    Build the final constructed molecule.

    Parameters
    ----------
    molecule : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.
    main_chain : str
        The name of the main chain in the molecule.
    chain_info : dict
        Information about chains in the molecule.

    Returns
    -------
    list
        The constructed molecule as a list of bonds.
    """
    constructed_molecule = []

    # Add main chain bonds
    constructed_molecule.extend(molecule[main_chain])

    # Add bonds from other chains
    for key, chain in chain_info.items():
        constructed_molecule.extend(chain['bonds'])

    return constructed_molecule

def construct_molecule_v4(molecule_src):
    """
    Constructs the molecule from the given molecule source.

    Parameters
    ----------
    molecule_src : dict
        A dictionary containing lists of strings representing bonds between atoms in a molecule.

    Returns
    -------
    tuple
        A tuple containing chain_info, molecule, and constructed_molecule.
    """
    # Create a copy of the molecule source
    molecule = {key: chain.copy() for key, chain in molecule_src.items()}

    # Search for the main chain
    main_chain = search_for_main_chain(molecule)
    if not main_chain:
        raise ValueError("Main chain not found in the molecule source.")

    # Get the highest index from the main chain
    highest_index = extract_highest_index(molecule[main_chain])

    # Analyze chain types
    chain_info, chain_analysis = analyze_chain_types(molecule, main_chain)

    # Process main chain connections
    process_main_chain_connections(molecule, main_chain, chain_info)

    # Update chain indices
    highest_index = update_chain_indices(molecule, main_chain, chain_info, highest_index)

    # Process gate connections
    process_gate_connections(molecule, main_chain, chain_info, chain_analysis)

    # Build the constructed molecule
    constructed_molecule = build_constructed_molecule(molecule, main_chain, chain_info)

    return chain_info, molecule, constructed_molecule

#### `Test`

In [16]:
# naphthalene
molecule_src_1 = {
    'MainChain': ["C1-C2", "C2=C3", "C3-C4", "C4=C5", "C5-C6", "C6=C1", "C1*{Chain1}", "C6*{Chain1}"],
    'Chain1': ["*-C1", "C1=C2", "C2-C3", "C3=C4", "C4-**"],
}

molecule_src_2 = {
    'MainChain': ["C1-C2", "C2=C3", "C3-C4", "C4=C5", "C5-C6", "C6=C1", "C1*{Chain1}", "C6*{Chain1}"],
    'Chain1': ["*-C1", "C1=C2", "C2-C3", "C3=C4", "C4-*"],
}

molecule_src_3 = {
    'MainChain': ["C1*{Chain1}", "C1-C2", "C2*{Chain2}", "C2-C3", "C3=XX4"],
    'Chain1': ["*-C1", "C1=C2", "C2-C3", "C3=*"],
    'Chain2': ["*-C1", "C1=C2", "C2-C3", "C3=*"],
}

molecule_src_4 = {
    'MainChain': ["C1-C2", "C2*{Chain1}","C2-C3","C3*{Chain2}"],
    'Chain1': ["*-C1", "C1=C2", "C2-C3", "C3=XX4"],
    'Chain2': ["*-C1", "C1=C2", "C2-C3"],
}

molecule_src_5 = {
    'MainChain': ["C1-C2", "C2=C3", "C3=C4", "C1*{Chain1}", "C3*{Chain2}", "C4*{Chain2}"],
    'Chain1': ["*-C1", "C1=C2", "C2-XX3"],
    'Chain2': ["*-C1", "C1=C2", "C2-**"],
}

# molecules
molecules_src = [molecule_src_1, molecule_src_2, molecule_src_3, molecule_src_4, molecule_src_5]

In [None]:
# source
print(molecule_src)
print("-"*50)

# test
molecule_src_checked = check_molecule(molecule_src)
print(molecule_src_checked)
print("-"*50)

# build
chain_info, molecule, constructed_molecule = construct_molecule_v3(molecule_src_checked)
print(chain_info)
print("-"*50)
print(molecule)
print("-"*50)
print(len(constructed_molecule))
print(constructed_molecule)

In [None]:
# looping through molecules
for molecule_src in molecules_src:
    # test
    molecule_src_checked = check_molecule(molecule_src)
    print(molecule_src_checked)
    print("-"*40)

    # build
    chain_info, molecule, constructed_molecule = construct_molecule_v4(molecule_src_checked)
    print(chain_info)
    print("-"*40)
    print(molecule)
    print("-"*40)
    print(len(constructed_molecule))
    # source
    print(molecule_src)
    print("-"*40)
    print(constructed_molecule)
    print("*"*80)


#### `Code 5`

In [None]:
class MoleculeConstructor:
    """
    A class for constructing complete molecular structures from component chains.

    This class provides methods to analyze molecular chains, process connections
    between chains, and construct a complete molecule from its component parts.
    """

    def __init__(self, molecule_src):
        """
        Initialize with a molecule source dictionary.

        Parameters
        ----------
        molecule_src : dict
            A dictionary containing lists of strings representing bonds between atoms in a molecule.
        """
        self.molecule_src = molecule_src
        self.molecule = {key: chain.copy() for key, chain in molecule_src.items()}
        self.main_chain = self._search_for_main_chain()
        if not self.main_chain:
            raise ValueError("Main chain not found in the molecule source.")
        self.highest_index = self._extract_highest_index(self.molecule[self.main_chain])
        self.chain_info = {}
        self.chain_analysis = {}
        self.constructed_molecule = []

    def _search_for_main_chain(self):
        """
        Searches for the main chain in the molecule source.

        Returns
        -------
        str
            The name of the main chain found in the molecule source.
        """
        for key, chain in self.molecule.items():
            pattern1 = re.compile(r'\{.*?\}\*.*')
            pattern2 = re.compile(r'.*\*\{.*?\}')
            if any(pattern1.match(bond) or pattern2.match(bond) for bond in chain):
                return key
        return None

    def _extract_highest_index(self, chain):
        """
        Extracts the highest numerical index from atom labels in the given chain.

        Parameters
        ----------
        chain : list
            A list of strings representing bonds between atoms in a chain.

        Returns
        -------
        int
            The highest numerical index found in the chain.
        """
        indices = []
        for bond in chain:
            atoms = re.findall(r'[A-Za-z]+(\d+)', bond)  # Extract numbers from atoms
            if '{' in bond or '}' in bond:
                continue  # Ignore bonds containing {Chain..}
            indices.extend(map(int, atoms))
        return max(indices) if indices else 0

    def _analyze_chain_types(self):
        """
        Analyze the types of chains in the molecule.
        """
        chain_types = {
            "1": 'branch',
            "2": 'ring',
            "3": 'bridge',
        }

        for key, chain in self.molecule.items():
            if key != self.main_chain:
                # Count gate num
                chain_gate_num = sum(item.count('*') for item in chain)

                # Save chain type
                self.chain_analysis[key] = chain_types[str(chain_gate_num)]

                # Initialize chain info
                self.chain_info[key] = {
                    'receiver': [],
                    'bonds': [],
                    'gate': [],
                    'type': chain_types[str(chain_gate_num)],
                    'connection-port': {}
                }

    def _process_main_chain_connections(self):
        """
        Process connections between the main chain and other chains.
        """
        for chain in self.molecule[self.main_chain]:
            # Pattern for atom*{chain}
            pattern1 = r"([A-Za-z]+)(\d+)\*\{([A-Za-z0-9]+)\}"
            match = re.match(pattern1, chain)
            if match:
                atom1, index1, gate = match.groups()
                self.chain_info[gate]['receiver'].append(f"{atom1}{index1}")
                continue

            # Pattern for {chain}*atom
            pattern2 = r"\{([A-Za-z0-9]+)\}\*([A-Za-z]+)(\d+)"
            match = re.match(pattern2, chain)
            if match:
                gate, atom1, index1 = match.groups()
                self.chain_info[gate]['receiver'].append(f"{atom1}{index1}")

    def _update_chain_indices(self):
        """
        Update indices of atoms in chains and process bonds.
        """
        for key, chain in self.molecule.items():
            if key != self.main_chain:
                last_index = self.highest_index

                for i, bond in enumerate(chain):
                    # Process normal bonds
                    pattern_bond = r"([A-Za-z]+)(\d+)([-=#])([A-Za-z]+)(\d+)"
                    match = re.match(pattern_bond, bond)
                    if match:
                        atom1, index1, bond_order, atom2, index2 = match.groups()
                        index1 = str(int(index1) + self.highest_index)
                        index2 = str(int(index2) + self.highest_index)
                        self.molecule[key][i] = f"{atom1}{index1}{bond_order}{atom2}{index2}"
                        self.chain_info[key]['bonds'].append(self.molecule[key][i])
                        last_index = max(int(index1), int(index2))
                        continue

                    # Process atom-* gate
                    pattern_gate1 = r"([A-Za-z]+)(\d+)([-=#])(\*+)"
                    match = re.match(pattern_gate1, bond)
                    if match:
                        atom1, index1, bond_order, gate_port = match.groups()
                        index1 = str(int(index1) + self.highest_index)
                        self.molecule[key][i] = f"{atom1}{index1}{bond_order}"
                        self.chain_info[key]['gate'].append(self.molecule[key][i])
                        self.chain_info[key]['connection-port'][f"{atom1}{index1}{bond_order}"] = {
                            'port': gate_port,
                            'bond': f"{atom1}{index1}",
                            'bond-type': bond_order,
                            'bond-gate': f"{atom1}{index1}{bond_order}",
                            'port-status': 'open'
                        }
                        last_index = int(index1)
                        continue

                    # Process *-atom gate
                    pattern_gate2 = r"(\*+)([-=#])([A-Za-z]+)(\d+)"
                    match = re.match(pattern_gate2, bond)
                    if match:
                        gate_port, bond_order, atom1, index1 = match.groups()
                        index1 = str(int(index1) + self.highest_index)
                        self.molecule[key][i] = f"{bond_order}{atom1}{index1}"
                        self.chain_info[key]['gate'].append(self.molecule[key][i])
                        self.chain_info[key]['connection-port'][f"{bond_order}{atom1}{index1}"] = {
                            'port': gate_port,
                            'bond': f"{atom1}{index1}",
                            'bond-type': bond_order,
                            'bond-gate': f"{bond_order}{atom1}{index1}",
                            'port-status': 'open'
                        }
                        last_index = int(index1)

                self.highest_index = last_index

    def _process_multi_gate_connection(self, element_index, atom1, index1, gate, chain_type_):
        """
        Process connections with multiple gates.

        Parameters
        ----------
        element_index : int
            Index of the element in the main chain.
        atom1 : str
            The atom symbol.
        index1 : str
            The atom index.
        gate : str
            The gate identifier.
        chain_type_ : str
            Type of the chain.
        """
        if chain_type_ == 'ring':
            for m, gate_connection in enumerate(self.chain_info[gate]['gate']):
                if gate_connection.startswith(('-', '=', '#')):
                    _connection = f"{atom1}{index1}{gate_connection}"
                else:
                    _connection = f"{gate_connection}{atom1}{index1}"

                if m == 0:
                    self.molecule[self.main_chain][element_index] = _connection
                else:
                    self.molecule[self.main_chain].append(_connection)
        elif chain_type_ == 'bridge':
            receiver_index = self.chain_info[gate]['receiver'].index(f"{atom1}{index1}")
            _gate_in = self.chain_info[gate]['gate'][receiver_index]

            if _gate_in.startswith(('-', '=', '#')):
                self.molecule[self.main_chain][element_index] = f"{atom1}{index1}{_gate_in}"
            else:
                self.molecule[self.main_chain][element_index] = f"{_gate_in}{atom1}{index1}"

    def _process_gate_connections(self):
        """
        Process gate connections between chains.
        """
        for i, items in enumerate(self.molecule[self.main_chain]):
            # Process atom*{chain} pattern
            pattern1 = r"([A-Za-z]+)(\d+)\*\{([A-Za-z0-9]+)\}"
            match = re.match(pattern1, items)
            if match:
                atom1, index1, gate = match.groups()
                gate_num = len(self.chain_info[gate]['gate'])
                chain_type_ = self.chain_analysis[gate]

                if gate_num == 1:
                    gate_connection = self.chain_info[gate]['gate'][0]
                    if gate_connection.startswith(('-', '=', '#')):
                        self.molecule[self.main_chain][i] = f"{atom1}{index1}{gate_connection}"
                    else:
                        self.molecule[self.main_chain][i] = f"{gate_connection}{atom1}{index1}"
                else:
                    self._process_multi_gate_connection(i, atom1, index1, gate, chain_type_)
                continue

            # Process {chain}*atom pattern
            pattern2 = r"\{([A-Za-z0-9]+)\}\*([A-Za-z]+)(\d+)"
            match = re.match(pattern2, items)
            if match:
                gate, atom1, index1 = match.groups()
                gate_num = len(self.chain_info[gate]['gate'])
                chain_type_ = self.chain_analysis[gate]

                if gate_num == 1:
                    gate_connection = self.chain_info[gate]['gate'][0]
                    if gate_connection.startswith(('-', '=', '#')):
                        self.molecule[self.main_chain][i] = f"{atom1}{index1}{gate_connection}"
                    else:
                        self.molecule[self.main_chain][i] = f"{gate_connection}{atom1}{index1}"
                else:
                    self._process_multi_gate_connection(i, atom1, index1, gate, chain_type_)

    def _build_constructed_molecule(self):
        """
        Build the final constructed molecule.
        """
        self.constructed_molecule = []

        # Add main chain bonds
        self.constructed_molecule.extend(self.molecule[self.main_chain])

        # Add bonds from other chains
        for key, chain in self.chain_info.items():
            self.constructed_molecule.extend(chain['bonds'])

    def construct(self):
        """
        Construct the complete molecule from its components.

        Returns
        -------
        tuple
            A tuple containing chain_info, molecule, and constructed_molecule.
        """
        # Step 1: Analyze chain types
        self._analyze_chain_types()

        # Step 2: Process main chain connections
        self._process_main_chain_connections()

        # Step 3: Update chain indices
        self._update_chain_indices()

        # Step 4: Process gate connections
        self._process_gate_connections()

        # Step 5: Build the constructed molecule
        self._build_constructed_molecule()

        return self.chain_info, self.molecule, self.constructed_molecule

    @staticmethod
    def from_source(molecule_src):
        """
        Construct a molecule directly from a source dictionary.

        This is a convenience method that creates a MoleculeConstructor instance
        and immediately constructs the molecule.

        Parameters
        ----------
        molecule_src : dict
            A dictionary containing lists of strings representing bonds between atoms in a molecule.

        Returns
        -------
        tuple
            A tuple containing chain_info, molecule, and constructed_molecule.
        """
        constructor = MoleculeConstructor(molecule_src)
        return constructor.construct()

#### Test 2

In [19]:
molecule_src = {
    'MainChain': ["C1-C2", "C2=C3", "C3-C4", "C4=C5", "C5-C6", "C6=C1", "C1*{Chain1}", "C6*{Chain1}"],
    'Chain1': ["*-C1", "C1=C2", "C2-C3", "C3=C4", "C4-**"],
}

In [21]:
# checker
molecule_src_checked = check_molecule(molecule_src)
# molecule constructor
constructor = MoleculeConstructor(molecule_src_checked)
print(constructor.construct())
print(molecule_src)

In [17]:
# looping through molecules
for molecule_src in molecules_src:
    # test
    molecule_src_checked = check_molecule(molecule_src)
    # print(molecule_src_checked)

    # build
    _, _, res = construct_molecule_v3(molecule_src_checked)
    constructor = MoleculeConstructor(molecule_src_checked)
    _, _, res2 = constructor.construct()
    # source
    print(molecule_src)
    print(res)
    print(res2)
    print("*"*80)