## Complete tree printing

In [None]:
import re

def parse_log_file(file_path):
    with open(file_path, 'r') as f:
        logs = []
        for line in f:
            # Match for function entry logs with '>' or '-->'
            match_push = re.match(r'^(--+>|\s*>)\s*call function (.+?) in (.+?):(\d+)', line)
            if match_push:
                function_name = match_push.group(2)
                file_path = match_push.group(3)
                line_number = int(match_push.group(4))
                logs.append((function_name, file_path, line_number, "push"))
                continue

            # Match for function exit logs with '<' or '<--'
            match_pop = re.match(r'^(<--+|\s*<)\s*exit function (.+?) in (.+?):(\d+)', line)
            if match_pop:
                function_name = match_pop.group(2)
                file_path = match_pop.group(3)
                line_number = int(match_pop.group(4))
                logs.append((function_name, file_path, line_number, "pop"))
                continue

    return logs

class CallStackNode:
    def __init__(self, function_name, file_path, line_number):
        self.function_name = function_name
        self.file_path = file_path
        self.line_number = line_number
        self.children = []
        self.parent = None

    def add_child(self, child_node):
        child_node.parent = self
        self.children.append(child_node)

    def __repr__(self, level=0, is_last_child=True, parent_last_childs=[]):
        indent = ''
        for parent_last in parent_last_childs:
            indent += '    ' if parent_last else '│   '

        branch = '└── ' if is_last_child else '├── '
        repr_str = f"{indent}{branch}{self.function_name}, {self.file_path}:{self.line_number}\n" # complete
        # repr_str = f"{indent}{branch}{self.function_name}\n" # simple without file path and line number

        for i, child in enumerate(self.children):
            repr_str += child.__repr__(level + 1, i == len(self.children) - 1, parent_last_childs + [is_last_child])
        return repr_str

class CallStackTree:
    def __init__(self):
        self.root = CallStackNode("root", "", 0)
        self.current_node = self.root

    def push(self, function_name, file_path, line_number):
        new_node = CallStackNode(function_name, file_path, line_number)
        self.current_node.add_child(new_node)
        self.current_node = new_node

    def pop(self):
        if self.current_node != self.root:
            self.current_node = self.current_node.parent

    def __repr__(self):
        return self.root.__repr__()

# Parse log file and construct call stack tree
log_file_path = "/root/vescale_prj/veScale/test/parallel/pipeline/backend/logs/2024_0914_205932/test_pipe-host_d3cb3edeb6a9-pid_1639519-py/tracing-test_pipe-2024_0914_205932.log"
logs = parse_log_file(log_file_path)

call_stack_tree = CallStackTree()

for log in logs:
    function_name, file_path, line_number, operation = log
    if operation == "push":
        call_stack_tree.push(function_name, file_path, line_number)
    elif operation == "pop":
        call_stack_tree.pop()

print(call_stack_tree)

### Study plan

In [None]:
import re
import os
from collections import deque

def parse_log_file(file_path):
    with open(file_path, 'r') as f:
        logs = []
        for line in f:
            # Match for function entry logs with '>' or '-->'
            match_push = re.match(r'^(--+>|\s*>)\s*call function (.+?) in (.+?):(\d+)', line)
            if match_push:
                function_name = match_push.group(2)
                file_path = match_push.group(3)
                line_number = int(match_push.group(4))
                logs.append((function_name, file_path, line_number, "push"))
                continue

            # Match for function exit logs with '<' or '<--'
            match_pop = re.match(r'^(<--+|\s*<)\s*exit function (.+?) in (.+?):(\d+)', line)
            if match_pop:
                function_name = match_pop.group(2)
                file_path = match_pop.group(3)
                line_number = int(match_pop.group(4))
                logs.append((function_name, file_path, line_number, "pop"))
                continue

    return logs

class CallStackNode:
    def __init__(self, function_name, file_path, line_number):
        self.function_name = function_name
        self.file_path = file_path
        self.line_number = line_number
        self.children = []
        self.parent = None

    def add_child(self, child_node):
        child_node.parent = self
        self.children.append(child_node)

    def __repr__(self, level=0, is_last_child=True, parent_last_childs=[]):
        indent = ''
        for parent_last in parent_last_childs:
            indent += '    ' if parent_last else '│   '

        branch = '└── ' if is_last_child else '├── '
        repr_str = f"{indent}{branch}{self.function_name}, {self.file_path}:{self.line_number}\n" # complete

        for i, child in enumerate(self.children):
            repr_str += child.__repr__(level + 1, i == len(self.children) - 1, parent_last_childs + [is_last_child])
        return repr_str

class CallStackTree:
    def __init__(self):
        self.root = CallStackNode("root", "", 0)
        self.current_node = self.root

    def push(self, function_name, file_path, line_number):
        new_node = CallStackNode(function_name, file_path, line_number)
        self.current_node.add_child(new_node)
        self.current_node = new_node

    def pop(self):
        if self.current_node != self.root:
            self.current_node = self.current_node.parent

    def breadth_first_traversal(self):
        queue = deque([self.root])
        visited = set()
        traversal_order = []
        while queue:
            current_node = queue.popleft()
            node_id = (current_node.function_name, current_node.file_path, current_node.line_number)
            if node_id not in visited:
                visited.add(node_id)
                traversal_order.append(current_node)
                queue.extend(current_node.children)
        return traversal_order

    def depth_first_traversal(self):
        stack = [self.root]
        visited = set()
        traversal_order = []

        while stack:
            current_node = stack.pop()
            node_id = (current_node.function_name, current_node.file_path, current_node.line_number)

            if node_id not in visited:
                visited.add(node_id)
                traversal_order.append(current_node)

                # Extend stack with children in reverse order to preserve depth-first traversal
                stack.extend(reversed(current_node.children))

        return traversal_order

    def build_bfs_tree(self):
        bfs_order = self.breadth_first_traversal()
        bfs_tree_root = CallStackNode("BFS Root", "", 0)
        node_mapping = {bfs_order[0]: bfs_tree_root}  # Map original root to new BFS tree root

        for node in bfs_order[1:]:
            parent = node.parent
            if parent in node_mapping:
                new_node = CallStackNode(node.function_name, node.file_path, node.line_number)
                node_mapping[parent].add_child(new_node)
                node_mapping[node] = new_node

        return bfs_tree_root

    def __repr__(self):
        return self.root.__repr__()

# Parse log file and construct call stack tree
log_file_path = "/root/vescale_prj/veScale/test/parallel/pipeline/backend/logs/2024_0914_205932/test_pipe-host_d3cb3edeb6a9-pid_1639519-py/tracing-test_pipe-2024_0914_205932.log"
logs = parse_log_file(log_file_path)

call_stack_tree = CallStackTree()

for log in logs:
    function_name, file_path, line_number, operation = log
    if operation == "push":
        call_stack_tree.push(function_name, file_path, line_number)
    elif operation == "pop":
        call_stack_tree.pop()

# Ensure the logs directory exists
logs_dir = "logs"
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)

# Save and print Breadth-First Traversal
bfs_log_file = os.path.join(logs_dir, "bfs_traversal.log")
traversal_order = call_stack_tree.breadth_first_traversal()

print("\nLearning Plan (Breadth-First, No Duplicates):")
with open(bfs_log_file, 'w') as f:
    f.write("Learning Plan (Breadth-First, No Duplicates):\n")
    for node in traversal_order:
        if node.function_name != "root":
            output = f"'{node.function_name}' in file '{node.file_path}', line {node.line_number}."
            print(output)
            f.write(output + "\n")

# Save and print Depth-First Traversal
dfs_log_file = os.path.join(logs_dir, "dfs_traversal.log")
traversal_order = call_stack_tree.depth_first_traversal()

print("\nLearning Plan (Depth-First, No Duplicates):")
with open(dfs_log_file, 'w') as f:
    f.write("Learning Plan (Depth-First, No Duplicates):\n")
    for node in traversal_order:
        if node.function_name != "root":
            output = f"'{node.function_name}' in file '{node.file_path}', line {node.line_number}."
            print(output)
            f.write(output + "\n")

################################################################################
# ### Save and print Tree Representation (all log lines, long time to process)
# tree_log_file = os.path.join(logs_dir, "call_stack_tree.log")
# with open(tree_log_file, 'w') as f:
#     tree_repr = repr(call_stack_tree)
#     f.write(tree_repr)
#     print(tree_repr)
################################################################################

print(f"Breadth-First traversal saved to {bfs_log_file}")
print(f"Depth-First traversal saved to {dfs_log_file}")
# print(f"Call stack tree representation saved to {tree_log_file}")

################################################################################

# BFS Summary
bfs_summary = f"\n{len(traversal_order)} function calls processed."
with open(bfs_log_file, 'a') as f:
    f.write(bfs_summary)
print(bfs_summary)

# Build BFS Tree from traversal
bfs_tree_root = call_stack_tree.build_bfs_tree()

# Save and print BFS Tree Representation
bfs_tree_log_file = os.path.join(logs_dir, "bfs_tree.log")
with open(bfs_tree_log_file, 'w') as f:
    bfs_tree_repr = bfs_tree_root.__repr__()
    f.write(bfs_tree_repr)
    print(bfs_tree_repr)

print(f"Breadth-First tree representation saved to {bfs_tree_log_file}")

To copy the code of each function from the log file into a new log file based on the function’s file path and line number, you can integrate the extract_function_from_file_with_line_numbers function into the main traversal process. The goal is to extract the code of the function and append it to the new log files.

Here’s how you can integrate the function extraction into the breadth_first_traversal and depth_first_traversal parts of your script:

Steps:

1.	For each node in the BFS/DFS traversal, use extract_function_from_file_with_line_numbers to extract the function’s code.
2.	Save the extracted code into the corresponding log file (bfs_log_file or dfs_log_file).
3.	Ensure the log format appends the file name, line number, and extracted function code.

Here’s an updated version of your script to include this functionality:

In [1]:
import re
import os
from collections import deque


def extract_function_from_file_with_line_numbers(file_path, line_number):
    try:
        with open(file_path, 'r') as file:
            source_lines = file.readlines()
    except FileNotFoundError:
        return f"# Error: File '{file_path}' not found.\n", line_number

    # Check if line_number is within the range of the file
    if line_number > len(source_lines):
        return f"# Error: Line number {line_number} exceeds the total lines in file '{file_path}'.\n", line_number

    func_start = None
    func_end = None
    indent_level = None
    decorator_lines = []
    signature_found = False

    # Start looking from the given line number and work backwards to find decorators and function
    for i in range(line_number - 1, -1, -1):
        line = source_lines[i].strip()
        if line.startswith('def '):  # Detect function definition
            func_start = i
            indent_level = len(source_lines[i]) - len(source_lines[i].lstrip())
            signature_found = True
            break
        elif line.startswith('@'):  # Detect decorators
            decorator_lines.insert(0, source_lines[i])

    if not signature_found:
        # If no function definition was found, return an error
        if decorator_lines:
            return ''.join(decorator_lines), line_number  # Return only decorators if no function
        return f"# Error: Function not found at line {line_number} in '{file_path}'.\n", line_number

    # Handle multi-line function signatures
    func_signature_lines = []
    for i in range(func_start, len(source_lines)):
        func_signature_lines.append(source_lines[i])
        if source_lines[i].strip().endswith(':'):
            func_end = i
            break

    if func_end is None:
        return f"# Error: Could not find complete function signature at line {line_number} in '{file_path}'.\n", line_number

    # Now search for where the function body ends based on indentation
    body_end = None
    for i, line in enumerate(source_lines[func_end + 1:], start=func_end + 1):
        current_indent = len(line) - len(line.lstrip())
        # Detect the end of the function based on indentation or empty lines
        if current_indent <= indent_level and line.strip() and not line.lstrip().startswith('#'):
            body_end = i
            break

    # If no function end is found, assume it goes till the end of the file
    if body_end is None:
        body_end = len(source_lines)

    # Include decorator lines, function lines, and any comments or empty lines
    function_code = ''.join(decorator_lines + func_signature_lines + source_lines[func_end + 1:body_end])

    return function_code, func_start + 1


def parse_log_file(file_path):
    with open(file_path, 'r') as f:
        logs = []
        for line in f:
            match_push = re.match(r'^(--+>|\s*>)\s*call function (.+?) in (.+?):(\d+)', line)
            if match_push:
                function_name = match_push.group(2)
                file_path = match_push.group(3)
                line_number = int(match_push.group(4))
                logs.append((function_name, file_path, line_number, "push"))
                continue

            match_pop = re.match(r'^(<--+|\s*<)\s*exit function (.+?) in (.+?):(\d+)', line)
            if match_pop:
                function_name = match_pop.group(2)
                file_path = match_pop.group(3)
                line_number = int(match_pop.group(4))
                logs.append((function_name, file_path, line_number, "pop"))
                continue

    return logs

class CallStackNode:
    def __init__(self, function_name, file_path, line_number):
        self.function_name = function_name
        self.file_path = file_path
        self.line_number = line_number
        self.children = []
        self.parent = None

    def add_child(self, child_node):
        child_node.parent = self
        self.children.append(child_node)

    def __repr__(self, level=0, is_last_child=True, parent_last_childs=[]):
        indent = ''
        for parent_last in parent_last_childs:
            indent += '    ' if parent_last else '│   '

        branch = '└── ' if is_last_child else '├── '
        repr_str = f"{indent}{branch}{self.function_name}, {self.file_path}:{self.line_number}\n"

        for i, child in enumerate(self.children):
            repr_str += child.__repr__(level + 1, i == len(self.children) - 1, parent_last_childs + [is_last_child])
        return repr_str

class CallStackTree:
    def __init__(self):
        self.root = CallStackNode("root", "", 0)
        self.current_node = self.root

    def push(self, function_name, file_path, line_number):
        new_node = CallStackNode(function_name, file_path, line_number)
        self.current_node.add_child(new_node)
        self.current_node = new_node

    def pop(self):
        if self.current_node != self.root:
            self.current_node = self.current_node.parent

    def breadth_first_traversal(self):
        queue = deque([self.root])
        visited = set()
        traversal_order = []
        while queue:
            current_node = queue.popleft()
            node_id = (current_node.function_name, current_node.file_path, current_node.line_number)
            if node_id not in visited:
                visited.add(node_id)
                traversal_order.append(current_node)
                queue.extend(current_node.children)
        return traversal_order

    def depth_first_traversal(self):
        stack = [self.root]
        visited = set()
        traversal_order = []

        while stack:
            current_node = stack.pop()
            node_id = (current_node.function_name, current_node.file_path, current_node.line_number)

            if node_id not in visited:
                visited.add(node_id)
                traversal_order.append(current_node)
                stack.extend(reversed(current_node.children))

        return traversal_order


# Extract and write function code to log with error handling
def write_extracted_functions(traversal_order, log_file):
    with open(log_file, 'w') as f:
        for node in traversal_order:
            if node.function_name != "root":
                f.write(f"Function {node.function_name} in file {node.file_path}:{node.line_number}:\n")
                try:
                    function_code, _ = extract_function_from_file_with_line_numbers(node.file_path, node.line_number)
                    f.write(function_code + "\n")
                except Exception as e:
                    f.write(f"# Error extracting function {node.function_name} in {node.file_path} at line {node.line_number}: {str(e)}\n")
                f.write("#" * 80 + "\n")  # Separator between functions

# Main script to parse log, traverse, and extract functions
# log_file_path = "/root/vescale_prj/veScale/examples/nanogpt_4D_finetune/logs/2024_0911_1938_40/logs-finetune_4D-host_d3cb3edeb6a9-pid_432289-py/tracing-finetune_4D-2024_0911_1938_40.log"
log_file_path = "/root/vescale_prj/veScale/test/parallel/pipeline/instruction/logs/2024_0914_202631/test_schedule-host_d3cb3edeb6a9-pid_1575073-py/tracing-test_schedule-2024_0914_202631.log"
logs = parse_log_file(log_file_path)
call_stack_tree = CallStackTree()

for log in logs:
    function_name, file_path, line_number, operation = log
    if operation == "push":
        call_stack_tree.push(function_name, file_path, line_number)
    elif operation == "pop":
        call_stack_tree.pop()

# # Breadth-first traversal and save extracted functions
# bfs_log_file = "logs/bfs_extracted_functions.log"
# bfs_traversal_order = call_stack_tree.breadth_first_traversal()
# write_extracted_functions(bfs_traversal_order, bfs_log_file)

# Depth-first traversal and save extracted functions
dfs_log_file = "logs/dfs_extracted_functions.log"
dfs_traversal_order = call_stack_tree.depth_first_traversal()
write_extracted_functions(dfs_traversal_order, dfs_log_file)

# print(f"Breadth-First function extraction saved to {bfs_log_file}")
print(f"Depth-First function extraction saved to {dfs_log_file}")

Depth-First function extraction saved to logs/dfs_extracted_functions.log
