In [1]:
from bs4 import BeautifulSoup

# Sample HTML content
html_content = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
</head>
<body>
    <h1>Selecting Elements</h1>

    <svg id="circles" width="300" height="200" x="400" y="400">
        <circle r="30" cx="50" cy="50"></circle>
        <circle r="30" cx="120" cy="120"></circle>
    </svg>
    <svg id="rects" width="300" height="200" x="800" y="800">
        <rect width="100" height="30" x="5" y="5"></rect>
        <rect width="70" height="30" x="50" y="50"></rect>
    </svg>
    <script src="https://cdn.jsdelivr.net/npm/d3@7"></script>
    <script src="select.js"></script>
</body>
</html>
'''

# Parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')

# Recursive function to generate DOM tree
def generate_dom_tree(node, level=0):
    indent = "  " * level
    print(f"{indent}{node.name if node.name else 'Text'}")

    # Recursively print children
    for child in node.children:
        if hasattr(child, 'children'):
            generate_dom_tree(child, level + 1)

# Generate the DOM tree
generate_dom_tree(soup)

[document]
  html
    head
      meta
      meta
      title
    body
      h1
      svg
        circle
        circle
      svg
        rect
        rect
      script
      script


In [2]:
from bs4 import BeautifulSoup

# Node class representing each element in the DOM tree
class Node:
    def __init__(self, tag_type, children=None):
        self.tag_type = tag_type  # Type of the HTML element (e.g., 'html', 'head', 'body', 'h1')
        self.children = children if children is not None else []  # Children nodes

    def add_child(self, child_node):
        self.children.append(child_node)

    def traverse(self, level=0):
        indent = "  " * level
        print(f"{indent}{self.tag_type}")
        for child in self.children:
            child.traverse(level + 1)

# Function to generate the DOM tree as a Node structure
def create_dom_tree(soup_element):
    if soup_element.name is not None:  # If it's a valid tag (ignoring text nodes for now)
        node = Node(tag_type=soup_element.name)
        # Recursively create children nodes
        for child in soup_element.children:
            if hasattr(child, 'name'):  # Only handle elements with a tag name
                child_node = create_dom_tree(child)
                if child_node is not None:
                    node.add_child(child_node)
        return node
    return None

# Sample HTML content
html_content = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
</head>
<body>
    <h1>Selecting Elements</h1>

    <svg id="circles" width="300" height="200" x="400" y="400">
        <circle r="30" cx="50" cy="50"></circle>
        <circle r="30" cx="120" cy="120"></circle>
    </svg>
    <svg id="rects" width="300" height="200" x="800" y="800">
        <rect width="100" height="30" x="5" y="5"></rect>
        <rect width="70" height="30" x="50" y="50"></rect>
    </svg>
    <script src="https://cdn.jsdelivr.net/npm/d3@7"></script>
    <script src="select.js"></script>
</body>
</html>
'''

# Parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')

# Create the DOM tree starting from the root node
dom_tree_root = create_dom_tree(soup)

# Traverse and print the DOM tree
if dom_tree_root:
    dom_tree_root.traverse()


[document]
  html
    head
      meta
      meta
      title
    body
      h1
      svg
        circle
        circle
      svg
        rect
        rect
      script
      script


In [4]:
class d3Clone:
    def __init__(self, dom_tree_root):
        self.dom_tree_root = dom_tree_root
    
    def select(self, selector):
        def traverse(level=0):
            for child in self.dom_tree_root.children:
                child.traverse(level + 1)
                if child.tag_type == selector:
                    return child
        child=traverse()
        return child
    
d3=d3Clone(dom_tree_root)
selected_element=d3.select('svg')
print(selected_element)

        
        

  html
    head
      meta
      meta
      title
    body
      h1
      svg
        circle
        circle
      svg
        rect
        rect
      script
      script
None


In [5]:
class d3Clone:
    def __init__(self, dom_tree_root):
        self.dom_tree_root = dom_tree_root

    def select(self, selector):
        # A recursive function to traverse the entire tree
        def traverse(node):
            if node.tag_type == selector:  # If we find the matching tag
                return node
            # Otherwise, keep traversing children
            for child in node.children:
                result = traverse(child)
                if result is not None:  # If we found the node in one of the children
                    return result
            return None

        # Start traversing from the root
        return traverse(self.dom_tree_root)

# Usage example:
d3 = d3Clone(dom_tree_root)
selected_element = d3.select('svg')  # Selects the first <svg> element
print(selected_element)


<__main__.Node object at 0x7427d89d9490>
