In [10]:
import re

class Node:
    def __init__(self, name):
        self.name = name
        self.children = []

    def add_child(self, child):
        # 检查是否存在重复子节点
        for existing_child in self.children:
            if existing_child.name == child.name:
                return
        self.children.append(child)

def parse_log_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    root = Node("root")
    stack = [root]

    for line in lines:
        if not line.strip() or 'call' not in line:
            continue

        indent_level = (line.count('-') // 4)

        # 提取函数名和路径：行号
        match = re.search(r'function (\w+) in (.+:\d+)', line)
        if match:
            function_name = match.group(1)
            path_line = match.group(2)
            line_content = f"{function_name} in {path_line}"
        else:
            continue

        node = Node(line_content)

        while len(stack) > indent_level + 1:
            stack.pop()

        # 检查重复调用
        if not any(child.name == node.name for child in stack[-1].children):
            stack[-1].add_child(node)
            stack.append(node)

    return root

def print_tree(node, prefix=""):
    children = node.children
    for index, child in enumerate(children):
        connector = "└── " if index == len(children) - 1 else "├── "
        print(prefix + connector + child.name)
        if child.children:
            extension = "    " if index == len(children) - 1 else "│   "
            print_tree(child, prefix + extension)

# 读取日志文件并生成树结构
file_path = './logs-run_itpn_pretraining-host_Fairfax4way04RTX4090-pid_2352294-py/tracing-run_itpn_pretraining-20240722_221840.log'
# file_path = 'path_to_log_file.log'  # 替换为日志文件的实际路径
root = parse_log_file(file_path)

# 打印树状结构
print_tree(root)

├── get_args in /home/xiaofeng.wu/prjs/iTPN/itpn_clip/run_itpn_pretraining.py:109
└── main in /home/xiaofeng.wu/prjs/iTPN/itpn_clip/run_itpn_pretraining.py:268
    ├── init_distributed_mode in /home/xiaofeng.wu/prjs/iTPN/itpn_clip/utils/utils.py:293
    ├── setup_for_distributed in /home/xiaofeng.wu/prjs/iTPN/itpn_clip/utils/utils.py:239
    ├── print in /home/xiaofeng.wu/prjs/iTPN/itpn_clip/utils/utils.py:246
    ├── get_rank in /home/xiaofeng.wu/prjs/iTPN/itpn_clip/utils/utils.py:268
    ├── is_dist_avail_and_initialized in /home/xiaofeng.wu/prjs/iTPN/itpn_clip/utils/utils.py:254
    ├── get_model in /home/xiaofeng.wu/prjs/iTPN/itpn_clip/run_itpn_pretraining.py:242
    ├── clip_tpn_base_3324_patch16_224 in /home/xiaofeng.wu/prjs/iTPN/itpn_clip/modeling_pretrain.py:371
    │   ├── __init__ in /home/xiaofeng.wu/prjs/iTPN/itpn_clip/modeling_pretrain.py:56
    │   ├── __init__ in /home/xiaofeng.wu/prjs/iTPN/itpn_clip/modeling_finetune.py:123
    │   │   └── __init__ in /home/xiaofeng.wu/

### only call

In [2]:
import re

class Node:
    def __init__(self, name):
        self.name = name
        self.children = []
        self.visited = False

    def add_child(self, child):
        # 检查是否存在重复子节点
        for existing_child in self.children:
            if existing_child.name == child.name:
                return
        self.children.append(child)

def parse_log_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    root = Node("root")
    stack = [root]

    for line in lines:
        if not line.strip():
            continue

        indent_level = (line.count('-') // 4)

        # 提取函数名和路径：行号
        call_match = re.search(r'call function (\w+) in (.+:\d+)', line)
        # exit_match = re.search(r'exit function (\w+) in (.+:\d+)', line)

        if call_match:
            function_name = call_match.group(1)
            path_line = call_match.group(2)
            line_content = f"{function_name}, {path_line}"
        # elif exit_match:
        #     function_name = exit_match.group(1)
        #     path_line = exit_match.group(2)
        #     line_content = f"exit {function_name} in {path_line}"
        else:
            continue

        node = Node(line_content)

        while len(stack) > indent_level + 1:
            stack.pop()

        # 检查重复调用
        if not any(child.name == node.name for child in stack[-1].children):
            stack[-1].add_child(node)
            stack.append(node)

    return root

def print_tree(node, prefix=""):
    children = node.children
    for index, child in enumerate(children):
        connector = "└── " if index == len(children) - 1 else "├── "
        print(prefix + connector + child.name)
        if child.children:
            extension = "    " if index == len(children) - 1 else "│   "
            print_tree(child, prefix + extension)

# 读取日志文件并生成树结构
# file_path = './logs-run_itpn_pretraining-host_Fairfax4way04RTX4090-pid_2352294-py/tracing-run_itpn_pretraining-20240722_221840.log'
file_path = '/home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervision/logs-run_itpn_pretraining-host_Fairfax4way04RTX4090-pid_2774112-py/tracing-run_itpn_pretraining-20240724_064619.log'
root = parse_log_file(file_path)

# 打印树状结构
print_tree(root)

├── get_args, /home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervision/run_itpn_pretraining.py:131
└── main, /home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervision/run_itpn_pretraining.py:316
    ├── init_distributed_mode, /home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervision/utils/utils.py:291
    ├── setup_for_distributed, /home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervision/utils/utils.py:237
    ├── create_ds_config, /home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervision/utils/utils.py:627
    ├── get_world_size, /home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervision/utils/utils.py:260
    │   └── is_dist_avail_and_initialized, /home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervision/utils/utils.py:252
    ├── print, /home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervision/utils/utils.py:244
    ├── get_rank, /home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervision/utils/utils.py:266
    ├── is_dist_avail_and_initialized, /home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervision/utils/utils.py:252
    ├── get_clip, /home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervi

### Study

node, graph, tree.

todo ...

In [None]:
import re

class Node:
    def __init__(self, name):
        self.name = name
        self.children = []
        self.visited = False

    def add_child(self, child):
        # 检查是否存在重复子节点
        for existing_child in self.children:
            if existing_child.name == child.name:
                return
        self.children.append(child)

def parse_log_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    root = Node("root")
    stack = [root]

    for line in lines:
        if not line.strip():
            continue

        indent_level = (line.count('-') // 4)

        # 提取函数名和路径：行号
        call_match = re.search(r'call function (\w+) in (.+:\d+)', line)
        # exit_match = re.search(r'exit function (\w+) in (.+:\d+)', line)

        if call_match:
            function_name = call_match.group(1)
            path_line = call_match.group(2)
            line_content = f"{function_name}, {path_line}"
        # elif exit_match:
        #     function_name = exit_match.group(1)
        #     path_line = exit_match.group(2)
        #     line_content = f"exit {function_name} in {path_line}"
        else:
            continue

        node = Node(line_content)

        while len(stack) > indent_level + 1:
            stack.pop()

        # 检查重复调用
        if not any(child.name == node.name for child in stack[-1].children):
            stack[-1].add_child(node)
            stack.append(node)

    return root

def print_tree(node, prefix=""):
    children = node.children
    for index, child in enumerate(children):
        connector = "└── " if index == len(children) - 1 else "├── "
        print(prefix + connector + child.name)
        if child.children:
            extension = "    " if index == len(children) - 1 else "│   "
            print_tree(child, prefix + extension)

# 读取日志文件并生成树结构
# file_path = './logs-run_itpn_pretraining-host_Fairfax4way04RTX4090-pid_2352294-py/tracing-run_itpn_pretraining-20240722_221840.log'
file_path = '/home/xiaofeng.wu/prjs/iTPN/CLIP_as_supervision/logs-run_itpn_pretraining-host_Fairfax4way04RTX4090-pid_2774112-py/tracing-run_itpn_pretraining-20240724_064619.log'
root = parse_log_file(file_path)

# 打印树状结构
print_tree(root)