In [1]:
from bs4 import BeautifulSoup, NavigableString

html = '''
<div>
    <div><br /></div>
    <table>
        <tbody>
            <tr>
                <td>
                    <div>Row 1: <span style="color: rgb(0, 166, 125);">You are a helpful assistant.</span> Remember it.</div>
                </td>
                <td>
                    <div>Row 1: <a href="https://platform.openai.com/docs/guides/chat/introduction">https://platform.openai.com/docs/guides/chat/introduction</a></div>
                </td>
                <td>
                    <div>Row 1: <b><u>Import Content</u></b> Read more.</div>
                </td>
            </tr>
            <tr>
                <td><div>Row 2: </div></td>
                <td><div>Row 2: </div></td>
                <td><div>Row 2: </div></td>
            </tr>
        </tbody>
    </table>
    <div><br /></div>
</div>
'''

def extract_text_and_parents(tag, parents=[]):
    results = []
    for child in tag.children:
        if isinstance(child, NavigableString):
            if child.strip():
                text = child.strip()
                parent_tags = [{"name": p.name, "attrs": p.attrs} for p in parents + [tag]]
                results.append({"text": text, "parent_tags": parent_tags})
        else:
            results.extend(extract_text_and_parents(child, parents + [tag]))
    return results

soup = BeautifulSoup(html, 'html.parser')
td_tags = soup.find_all('td')

for i, td in enumerate(td_tags, 1):
    text_with_parents = extract_text_and_parents(td)
    print(f"Text and parent tags in TD {i}:")
    for item in text_with_parents:
        print(f"Text: {item['text']}")
        print("Parent tags:")
        for parent in item["parent_tags"]:
            print(f"  Tag: {parent['name']}, Attributes: {parent['attrs']}")
    print()


Text and parent tags in TD 1:
Text: Row 1:
Parent tags:
  Tag: td, Attributes: {}
  Tag: div, Attributes: {}
Text: You are a helpful assistant.
Parent tags:
  Tag: td, Attributes: {}
  Tag: div, Attributes: {}
  Tag: span, Attributes: {'style': 'color: rgb(0, 166, 125);'}
Text: Remember it.
Parent tags:
  Tag: td, Attributes: {}
  Tag: div, Attributes: {}

Text and parent tags in TD 2:
Text: Row 1:
Parent tags:
  Tag: td, Attributes: {}
  Tag: div, Attributes: {}
Text: https://platform.openai.com/docs/guides/chat/introduction
Parent tags:
  Tag: td, Attributes: {}
  Tag: div, Attributes: {}
  Tag: a, Attributes: {'href': 'https://platform.openai.com/docs/guides/chat/introduction'}

Text and parent tags in TD 3:
Text: Row 1:
Parent tags:
  Tag: td, Attributes: {}
  Tag: div, Attributes: {}
Text: Import Content
Parent tags:
  Tag: td, Attributes: {}
  Tag: div, Attributes: {}
  Tag: b, Attributes: {}
  Tag: u, Attributes: {}
Text: Read more.
Parent tags:
  Tag: td, Attributes: {}
  Tag: 