In [None]:
import clang.cindex
import os

def get_caller_function_name(call_expr_cursor):
    # Iterate through parent cursors until a function declaration is found
    parent = call_expr_cursor.semantic_parent
    while parent and parent.kind != clang.cindex.CursorKind.FUNCTION_DECL:
        parent = parent.semantic_parent

    if parent and parent.kind == clang.cindex.CursorKind.FUNCTION_DECL:
        return parent.spelling  # Return the function name
    else:
        return None  # No caller function found (e.g., top-level call)



def analyze_function_relationships(root_dir):
    index = clang.cindex.Index.create()
    translation_unit = index.parse(
        root_dir,
        args=['-std=c++11']  # Specify language and include path
    )

    function_calls = {}
    for cursor in translation_unit.cursor.walk_preorder():
        if cursor.kind == clang.cindex.CursorKind.FUNCTION_DECL:
            function_name = cursor.spelling
            function_calls[function_name] = []
            for child in cursor.get_children():
                if child.kind == clang.cindex.CursorKind.CALL_EXPR:
                    caller_name = get_caller_function_name(child)
                    function_calls[function_name].append(caller_name)

    # Further analysis and visualization using function_calls data


if __name__ == '__main__':
    analyze_function_relationships("/home/tfpeng/codeparser/test/")



In [None]:
import os
from clang import cindex


folder_path = "/home/tfpeng/codeparser/"
index = cindex.Index.create()

# 积累所有C++代码
all_cpp_code = ""
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith((".cpp",".hpp")) :
            file_path = os.path.join(root, file)
            with open(file_path, 'r',errors='ignore') as f:
                all_cpp_code += f.read()

# 创建TranslationUnit
tu = index.parse('in-memory.cpp', args=['-std=c++11',f'-O0'], unsaved_files=[('in-memory.cpp', all_cpp_code)])


In [None]:
def find_function_callers(cursor, target_function_name):
    callers = []

    for child in cursor.get_children():
        if child.kind == cindex.CursorKind.CALL_EXPR:
            called_function = child.get_definition()
            if called_function and called_function.spelling == target_function_name:
                caller = cursor.get_definition()
                if caller:
                    callers.append(caller.spelling)
        else:
            # 递归调用，继续查找子节点
            callers.extend(find_function_callers(child, target_function_name))
            

    return callers
def find_function_callers2(cursor, target_function_name):
    callers = []
    for child in cursor.get_children():
        if child.kind == cindex.CursorKind.CALL_EXPR:
            # 获取被调用的函数（可能是函数声明而非定义）
            called_function = child.referenced
            if called_function and called_function.spelling == target_function_name:
                # 获取调用该函数的函数
                caller = cursor.get_definition()
                if caller:
                    callers.append(caller.spelling)
        else:
            # 递归调用，继续查找子节点
            callers.extend(find_function_callers2(child, target_function_name))

    return callers

def find_function_cursor(root_cursor, function_name):
    for child in root_cursor.get_children():
        if child.kind == clang.cindex.CursorKind.FUNCTION_DECL and child.spelling == function_name:
            return child
        result = find_function_cursor(child, function_name)
        if result:
            return result
    return None

def get_all_functions(cursor):
    functions = []

    for child in cursor.get_children():
        if child.kind == cindex.CursorKind.FUNCTION_DECL:
            functions.append(child.spelling)
        else:
            # 递归调用，继续查找子节点
            functions.extend(get_all_functions(child))

    return functions
# caller=find_function_callers(tu.cursor,"SetMotorSpeed")

In [None]:
all_function=get_all_functions(tu.cursor)

In [None]:
if "Create433ServerManager" in all_function:
    print("yes")

In [None]:
def get_caller_function_name(call_expr_cursor):
    parent = call_expr_cursor.semantic_parent
    while parent and parent.kind != cindex.CursorKind.FUNCTION_DECL:
        parent = parent.semantic_parent

    if parent and parent.kind == cindex.CursorKind.FUNCTION_DECL:
        return parent.spelling
    else:
        return None

def get_callers_for_function(function_name):
    function_cursor = find_function_cursor(tu.cursor, function_name)
    if function_cursor:
        callers = []
        for child in function_cursor.get_children():
            if child.kind == cindex.CursorKind.CALL_EXPR:
                caller_name = get_caller_function_name(child)
                if caller_name:
                    callers.append(caller_name)
        return callers
    else:
        return None  # Function not found

def find_function_cursor(root_cursor, function_name):
    for child in root_cursor.get_children():
        if(hasattr(child,"spelling")):
            print(child.spelling)
        if child.kind == cindex.CursorKind.FUNCTION_DECL and child.spelling == function_name:
            return child
        result = find_function_cursor(child, function_name)
        if result:
            return result
    return None
caller=get_callers_for_function("Create433ServerManager")

In [None]:
import json

def cursor_to_dict(cursor):
    return {
        'kind': cursor.kind.name,
        'spelling': cursor.spelling,
        'children': [cursor_to_dict(child) for child in cursor.get_children()]
    }

def save_ast_to_json(file_path, translation_unit):
    root_cursor = translation_unit.cursor
    ast_dict = cursor_to_dict(root_cursor)

    with open(file_path, 'w') as f:
        json.dump(ast_dict, f, indent=2)

save_ast_to_json("ast.json", tu)

In [None]:
import json
from clang.cindex import CursorKind
nodes = []

def traverse(node):
    node_dict = {}
    node_dict['kind'] = node.kind 
    node_dict['location'] = (node.location.file, node.location.line, node.location.column)
    
    if node.kind == cindex.CursorKind.NAMESPACE:
        node_dict['name'] = node.displayname
    
    children = [traverse(c) for c in node.get_children()]
    if len(children) > 0:
        node_dict['children'] = children
        
    nodes.append(node_dict)
    
    return node_dict



class CursorKindEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, CursorKind):
            return obj.value
        return json.JSONEncoder.default(self, obj)

root = tu.cursor
traverse(root)

with open('ast1.json', 'w') as f:
    json.dump(nodes, f, indent=4,cls=CursorKindEncoder)

In [None]:
import os
import json
from clang import cindex

def analyze_cpp_files(folder_path):
    # 初始化libclang
    index = cindex.Index.create()

    # 存储所有AST的列表
    all_asts = []

    # 遍历文件夹中的所有cpp文件
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith(".cpp"):
                file_path = os.path.join(root, file)
                ast_dict = analyze_cpp_file(file_path, index)
                all_asts.append(ast_dict)

        # 递归调用自己，处理子文件夹
        for dir_name in dirs:
            subfolder_path = os.path.join(root, dir_name)
            analyze_cpp_files(subfolder_path)

    # 将所有AST转换为JSON并写入文件
    with open("all_asts.json", "w") as json_file:
        json.dump(all_asts, json_file, indent=2)

def analyze_cpp_file(file_path, index):
    # 打开C++文件
    with open(file_path, 'r',errors='ignore') as f:
        cpp_code = f.read()

    # 创建TranslationUnit
    if file_path.endswith(".cpp"):
        tu = index.parse(file_path, args=['-std=c++11'], unsaved_files=[(file_path, cpp_code)])
    else:
        tu = index.parse(file_path, unsaved_files=[(file_path, cpp_code)])

    # 分析TranslationUnit并生成AST字典
    ast_dict = []
    for cursor in tu.cursor.walk_preorder():
        ast_dict.append({
            'kind': cursor.kind.name,
            'spelling': cursor.spelling,
            'location': [cursor.location.line, cursor.location.column],
            'children': [child.spelling for child in cursor.get_children()]
        })

    return ast_dict

if __name__ == "__main__":
    folder_path = "/home/tfpeng/codeparser/test/"
    analyze_cpp_files(folder_path)


In [None]:
import os
import json
from clang import cindex

def merge_files(directory_path):
    merged_file_path = "merged_file.cpp"

    with open(merged_file_path, "w") as merged_file:
            # 遍历文件夹中的所有cpp文件
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                if file.endswith((".c",".cpp")):
                    file_path = os.path.join(root, file)
                    with open(file_path, 'r',errors='ignore') as f:
                        cpp_code = f.read()
                        merged_file.write(cpp_code)

    return merged_file_path

def analyze_merged_file(file_path, index):
    # 打开合并的C++和C文件
    with open(file_path, 'r') as f:
        merged_code = f.read()

    # 创建TranslationUnit
    tu = index.parse(file_path, args=['-std=c++11'], unsaved_files=[(file_path, merged_code)])

    # 分析TranslationUnit并生成AST字典
    ast_dict = []
    for cursor in tu.cursor.walk_preorder():
        ast_dict.append({
            'kind': cursor.kind.name,
            'spelling': cursor.spelling,
            'children': [child.spelling for child in cursor.get_children()]
        })

    return ast_dict

def process_directory(directory_path):
    merged_file_path = merge_files(directory_path)

    # 初始化libclang
    index = cindex.Index.create()

    print(f"Generating AST for merged file: {merged_file_path}")
    ast_result = analyze_merged_file(merged_file_path, index)

    # 将AST结果转换为JSON并写入文件
    with open("merged_ast.json", "w") as json_file:
        json.dump(ast_result, json_file, indent=2)

if __name__ == "__main__":
    folder_path = "/home/tfpeng/codeparser/"
    process_directory(folder_path)


In [None]:
print(nodes)

In [None]:
## 可用，但cpp文件下的成员函数无法解析，原因是找不到头文件，找不到类的定义
import os
import clang.cindex
from clang.cindex import Index, CursorKind
import re
import json
import glob
file_exclude_list=[r'^(?!arm_).*']# [非arm_开头]
func_exclude_list=[r'^(?!__).*'] #[非__开头]

cpp_and_c_keywords_and_operators = [
    'alignas', 'alignof', 'and', 'and_eq', 'asm', 'auto', 'bitand', 'bitor', 'bool', 'break', 'case', 'catch', 'char',
    'char8_t', 'char16_t', 'char32_t', 'class', 'compl', 'concept', 'const', 'consteval', 'constexpr', 'const_cast',
    'continue', 'co_await', 'co_return', 'co_yield', 'decltype', 'default', 'delete', 'do', 'double', 'dynamic_cast',
    'else', 'enum', 'explicit', 'export', 'extern', 'false', 'float', 'for', 'friend', 'goto', 'if', 'inline', 'int',
    'long', 'mutable', 'namespace', 'new', 'noexcept', 'not', 'not_eq', 'nullptr', 'operator', 'or', 'or_eq', 'private',
    'protected', 'public', 'register', 'reinterpret_cast', 'requires', 'return', 'short', 'signed', 'sizeof', 'static',
    'static_assert', 'static_cast', 'struct', 'switch', 'template', 'this', 'thread_local', 'throw', 'true', 'try',
    'typedef', 'typeid', 'typename', 'union', 'unsigned', 'using', 'virtual', 'void', 'volatile', 'wchar_t', 'while',
    'xor', 'xor_eq','NULL','null','int8_t','uint8_t','int16_t','uint16_t','int32_t','uint32_t','ifdef','ifndef','endif'
    'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', 'extern', 'float',
    'for', 'goto', 'if', 'inline', 'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof', 'static', 'struct',
    'switch', 'typedef', 'union', 'unsigned', 'void', 'volatile', 'while',
    '+', '-', '*', '/', '%', '+=', '-=', '*=', '/=', '%=', '++', '--', '<<', '>>', '<<=', '>>=', '&', '|', '^',
    '&=', '|=', '^=', '~', '!', '&&', '||', '==', '!=', '<', '>', '<=', '>=', '?', ':', '=', '+=', '-=', '*=', '/=',
]

def find_header_files(root_folder):
    header_files = []
    
    # 遍历文件夹及其子文件夹
    for foldername, subfolders, filenames in os.walk(root_folder):
        # 使用glob模块查找.h和.hpp文件
        header_files.extend(glob.glob(os.path.join(foldername, '*.h')))
        header_files.extend(glob.glob(os.path.join(foldername, '*.hpp')))

    return header_files
folder_path = "/home/tfpeng/codeparser/"  # 替换为实际的文件夹路径
header_files=find_header_files(folder_path)

def get_included_headers(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    # 使用正则表达式匹配 #include 语句
    include_pattern = re.compile(r'#include\s*["<](.*?)[">]')
    included_headers = include_pattern.findall(content)

    return included_headers

def run_clang(root,file):
    # 使用libclang分析文件，获取函数信息
    file_path = os.path.join(root, file)
    index = Index.create()
    if file_path.endswith((".c")):
        translation_unit = index.parse(file_path)
    else:
        with open(file_path, 'r',errors='ignore') as f:
            cpp_code = f.read()
        
        args =  '-x c++ --std=c++11'.split()
        include_header=get_included_headers(file_path)
        for header in include_header:
            for header_path in header_files:
                if(header_path.endswith(header)):
                    args.append(f'-I{os.path.dirname(header_path)}')
        translation_unit = index.parse(file_path, args=args,unsaved_files=[(file_path, cpp_code)])
    return translation_unit

def locate_function_body(cursor):
    # 定位函数体
    start_line = cursor.extent.start.line
    end_line = cursor.extent.end.line
    func_body=[]
    comment=0
    for token in cursor.get_tokens():
        line=token.spelling
        if "//" in line:
            line=line[:line.find("//")]
        if "/*" in line:
            line=line[:line.find("/*")]
            comment=1
        if  "*/" in line:
            comment=0
        if '\"' in line :
            continue
        if comment==1:
            continue
        func_body.append(line)
    return func_body
def has_no_letters(input_string):
    return not any(char.isalpha() for char in input_string)
def unique_elements_in_order(seq):
    seen = set()
    result = []

    for elem in seq:
        if elem not in seen:
            seen.add(elem)
            result.append(elem)

    return result
def not_start_with(str,match_str):
    pattern = re.compile(f'^(?!{match_str}).*')
    if(pattern.match(str)):
        return 1
    else:
        return 0
def extract_functions(cursor):
    functions_data = {}
    for node in cursor.get_children():
        if node.kind == CursorKind.FUNCTION_DECL or node.kind==CursorKind.CXX_METHOD:
            function_name = node.spelling
            if(not node.is_definition()):
                continue
            #排除一些函数
            for exclude_str in func_exclude_list:
                pattern = re.compile(exclude_str)
                if(pattern.match(function_name)):
                    function_body = locate_function_body(node)
                else:
                    function_body=[]
            if(len(function_body)==0):
                continue
            else:
                # 使用正则表达式提取变量和函数调用
                content = [item for item in function_body if (item and not has_no_letters(item) and not_start_with(item,'__'))]  # 移除空字符串和没有字母的字符串

                # 去重并移除C++和C关键字
                # unique_tokens = list(set(content))
                unique_tokens=unique_elements_in_order(content)
                unique_tokens = [token for token in unique_tokens if token not in cpp_and_c_keywords_and_operators]

                functions_data[function_name] = unique_tokens

    return functions_data

def process_cpp_files(folder_path):
    cpp_files_data = {}

    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith(('.cpp', '.c')):
                file_path = os.path.join(root, file)
                if(file=="cleaning_motors_control.cpp"):
                    print(file)
                translation_unit = run_clang(root,file)
                for exclude_str in file_exclude_list:
                    #排除一些文件
                    pattern = re.compile(exclude_str)
                    if(pattern.match(file)):
                        functions_data = extract_functions(translation_unit.cursor)
                    else:
                        functions_data={}
                if(len(functions_data)!=0):
                    cpp_files_data[file] = functions_data

    return cpp_files_data

if __name__ == "__main__":
    result = process_cpp_files(folder_path)
    # 将字典转换成JSON字符串
    json_string = json.dumps(result, indent=2)

    # 将JSON字符串写入文件
    file_path = 'output.json'
    with open(file_path, 'w') as json_file:
        json_file.write(json_string)




In [None]:
# 去掉第一层，将剩余的存到一个新字典中
import copy
result_dict = {}

for file_key, functions_dict in result.items():
    for func_key, variables_list in functions_dict.items():
        result_dict[func_key] = variables_list
tree={}
for func_key1, value1 in result_dict.items():
    tree[func_key1]={"parents":[],"children":[]}

for func_key1, value1 in result_dict.items():
    parents=[]
    for func_key2, value2 in result_dict.items():
        if func_key1==func_key2:
            continue
        if func_key1 in value2:
            parents.append(func_key2)
            tree[func_key2]["children"].append(func_key1)
    if parents:
        tree[func_key1]["parents"]=parents
             

# 将字典转换成JSON字符串
json_string = json.dumps(tree, indent=2)

# 将JSON字符串写入文件
file_path = 'tree.json'
with open(file_path, 'w') as json_file:
    json_file.write(json_string)



In [None]:
#测试能够得到类的成员函数
import clang.cindex
from clang.cindex import *

def method_definitions(cursor):
    for i in cursor.get_children():
        if i.kind != CursorKind.CXX_METHOD and i.kind!=CursorKind.FUNCTION_DECL:
            continue
        if not i.is_definition():
            continue
        yield i

def extract_definition(cursor):
    filename = cursor.location.file.name
    with open(filename, 'r') as fh:
        contents = fh.read()
    return contents[cursor.extent.start.offset: cursor.extent.end.offset]

idx = Index.create()
tu = idx.parse('temp2.cpp', ['-x', 'c++','-I/home/tfpeng/codeparser/modules/monitor/include/'])
defns = method_definitions(tu.cursor)
for defn in defns:
    print(extract_definition(defn))

In [None]:
import os
import glob

def find_header_files(root_folder):
    header_files = []
    
    # 遍历文件夹及其子文件夹
    for foldername, subfolders, filenames in os.walk(root_folder):
        # 使用glob模块查找.h和.hpp文件
        header_files.extend(glob.glob(os.path.join(foldername, '*.h')))
        header_files.extend(glob.glob(os.path.join(foldername, '*.hpp')))

    return header_files
def read_header_files(root_folder):
    header_files_content = []

    # 遍历文件夹及其子文件夹
    for foldername, subfolders, filenames in os.walk(root_folder):
        # 使用glob模块查找.h和.hpp文件
        header_files = glob.glob(os.path.join(foldername, '*.h')) + glob.glob(os.path.join(foldername, '*.hpp'))
        
        # 读取每个头文件的内容并添加到列表
        for header_file in header_files:
            with open(header_file, 'r', encoding='utf-8',errors='ignore') as file:
                header_files_content.append(file.read())

    # 将所有内容合并成一个字符串
    all_content = '\n'.join(header_files_content)
    return all_content
# 替换为实际的文件夹路径
folder_path = "/home/tfpeng/codeparser/"

# 获取所有.h和.hpp文件
all_header_content = read_header_files(folder_path)

print(all_header_content)
