In [44]:
import builtins
import os
import re
import glob
from pydantic import BaseModel

builtin_functions = [name for name in dir(builtins) if callable(getattr(builtins, name))]
print(builtin_functions)



In [6]:
import re

def get_used_functions(script_path):
    with open(script_path, 'r') as file:
        content = file.read()

    # Regular expression to find function calls
    # This looks for any characters (non-greedy) followed by an opening parenthesis,
    # capturing everything after the last whitespace or start of the line
    pattern = r'(^|[\s])([^\s]+?)\s*\('

    # Find all matches
    matches = re.findall(pattern, content, re.MULTILINE)

    # Extract the full function names (second group in each match)
    used_functions = set(match[1] for match in matches)

    return used_functions

In [None]:
import re

def get_used_functions_with_indent(script_path):
    with open(script_path, 'r') as file:
        lines = file.readlines()

    function_info = []
    
    for line_number, line in enumerate(lines, 1):
        # Skip lines that start with '#' (comments)
        if line.lstrip().startswith('#'):
            continue

        # Count leading spaces to determine indent level
        indent_level = len(line) - len(line.lstrip())
        
        # Regular expression to find function calls
        # This looks for alphanumeric characters and dots, followed immediately by an opening parenthesis
        matches = re.finditer(r'\b([a-zA-Z0-9_\.]+)\s*\(', line)
        
        for match in matches:
            function_name = match.group(1)
            # Ensure the function name doesn't end with a dot
            if not function_name.endswith('.'):
                function_info.append({
                    'name': function_name,
                    'line': line_number,
                    'indent': indent_level
                })

    return function_info


# Example usage
script_path = 'C:/Users/justl/Documents/folder/main.py'
functions = get_used_functions_with_indent(script_path)

print("Functions used in the script with their indent levels:")
for func in functions:
    print(f"  Line {func['line']}: {' ' * func['indent']}{func['name']} (indent: {func['indent']})")


In [None]:
folder = "C:/Users/justl/Documents/folder/"
entry_point = "main.py"

start_file_path = os.path.join(folder, entry_point)


In [None]:
import re
import json

class Node:
    def __init__(self, type, content, children=None):
        self.type = type
        self.content = content
        self.children = children or []

def parse_python_to_nodes(script_path):
    with open(script_path, 'r') as file:
        lines = file.readlines()

    root = Node(type=type=type=type=type=type=type=type=type=type=type=type=type='root', '')
    stack = [root]
    current_indent = 0

    for line in lines:
        line = line.rstrip()
        if line.strip() == '' or line.lstrip().startswith('#'):
            continue

        indent = len(line) - len(line.lstrip())
        content = line.strip()

        while indent < current_indent:
            stack.pop()
            current_indent -= 4

        if content.startswith('#'):
            node = Node(type='comment', content)
        elif content.startswith('def '):
            node = Node(type='function', content)
        elif content.startswith('if ') or content.startswith('elif ') or content.startswith('else:'):
            node = Node(type='condition', content)
        elif content.startswith('for ') or content.startswith('while '):
            node = Node(type='loop', content)
        elif content.startswith('import ') or content.startswith('from '):
            node = Node(type='import', content)
        elif '(' in content:
            node = Node(type='function_call', content)
        else:
            node = Node(type='statement', content)

        stack[-1].children.append(node)
        
        if content.endswith(':'):
            stack.append(node)
            current_indent = indent + 4

    return root

def node_to_dict(node):
    return {
        'type': node.type,
        'content': node.content,
        'children': [node_to_dict(child) for child in node.children]
    }

# Example usage
script_path = 'C:/Users/justl/Documents/folder/main.py'
root = parse_python_to_nodes(script_path)

# Convert to JSON for easy visualization or further processing
json_representation = json.dumps(node_to_dict(root), indent=2)
print(json_representation)


In [99]:
import json
import re
from typing import *

class Alias(BaseModel):
    actual_name: str
    alias_name: str
    local_file: bool

class Node(BaseModel):
    type: str
    content: str
    children: List['Node'] = []
    functions: List[str] = []
    aliases: List[Alias] = []

# class Node:
#     def __init__(self, type, content, children=None, function=None):
#         self.type = type
#         self.content = content
#         self.children = children or []
#         self.function = function

def get_alias(node: Node, folder: str):
    filename = node.content.split(" ")[1]
    filename = filename.replace(".", "/")

    file_loc = os.path.join(folder, filename+".py")

    local_file = os.path.exists(file_loc)

    statement = node.content
    aliases = []

    if " as " in statement:
        parts = statement.split()
        indexes = [i for i, part in enumerate(parts) if part == "as"]
        for index in indexes:
            actual_name = parts[index - 1]  # The name before "as"
            alias_name = parts[index + 1]    # The name after "as"

            alias = Alias(
                alias_name = alias_name,
                actual_name = actual_name,
                local_file = local_file
            )

            aliases.append(alias)

    return aliases

def alias_to_dict(alias: Alias) -> dict:
    return {
        'actual_name': alias.actual_name,
        'alias_name': alias.alias_name,
        'local_file': alias.local_file
    }

def node_to_dict(node):
    return {
        'type': node.type,
        'content': node.content,
        'functions': node.functions,
        'children': [node_to_dict(child) for child in node.children],
        'alias': [alias_to_dict(alias) for alias in node.aliases if alias],
    }

# def dict_to_node(data: Dict[str, Any]) -> Node:
#     # Create the Node object from the dictionary
#     node = Node(
#         type=data['type'],
#         content=data['content'],
#         function=data.get('function'),
#         alias=Alias(**data['alias']) if data.get('alias') else None
#     )
    
#     # Recursively convert children
#     for child_data in data.get('children', []):
#         child_node = dict_to_node(child_data)
#         node.children.append(child_node)
    
#     return node

In [108]:
def parse_python_to_nodes(script_folder, script_name, output_folder="nodes"):
    script_path = os.path.join(script_folder, script_name)

    filename, _ = os.path.splitext(script_name)
    filename = filename.replace("/", ".")
    filename = filename.replace("\\", ".")

    with open(script_path, 'r') as file:
        lines = file.readlines()

    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    node_file = os.path.join(output_folder, filename + ".json")

    root = Node(type='root', content='')
    stack = [root]
    current_indent = 0
    in_docstring = False
    docstring_content = []
    aliases = []

    def extract_function_name(line):
        match = re.findall(r'\b([a-zA-Z0-9_\.]+)\s*\(', line)
        return match if match else []  # Return a list of function names

    for line in lines:
        stripped_line = line.strip()
        indent = len(line) - len(line.lstrip())

        # Handle docstrings
        if '"""' in stripped_line:
            if not in_docstring:
                in_docstring = True
                docstring_content = [stripped_line]
            else:
                docstring_content.append(stripped_line)
                full_docstring = '\n'.join(docstring_content)
                node = Node(type='docstring', content=full_docstring)
                if stack:  # Ensure stack is not empty
                    stack[-1].children.append(node)
                in_docstring = False
                docstring_content = []
            continue

        if in_docstring:
            docstring_content.append(line.rstrip())
            continue

        if stripped_line == '' or stripped_line.startswith('#'):
            continue

        # Adjust stack based on indentation
        while stack and indent < current_indent:
            stack.pop()
            current_indent -= 4

        # Reset stack if it becomes empty
        if not stack:
            stack.append(root)  # Reset to root
            current_indent = 0

        # Handle class definitions
        if stripped_line.startswith('class '):
            if 'BaseModel' in stripped_line:
                node = Node(type='custom_type', content=stripped_line)  # Set type to custom_type
            else:
                node = Node(type='class', content=stripped_line)  # Set type to class
            stack[-1].children.append(node)
            stack.append(node)  # Push the class node onto the stack
            current_indent = indent + 4
        elif stripped_line.startswith('def '):
            function_names = extract_function_name(stripped_line)
            node = Node(type='function', content=stripped_line, functions=function_names)
            stack[-1].children.append(node)
            stack.append(node)
            current_indent = indent + 4
        elif stripped_line.startswith('if ') or stripped_line.startswith('elif ') or stripped_line.startswith('else:'):
            node = Node(type='condition', content=stripped_line)
            stack[-1].children.append(node)
            if stripped_line.endswith(':'):
                stack.append(node)
                current_indent = indent + 4
        elif stripped_line.startswith('for ') or stripped_line.startswith('while '):
            node = Node(type='loop', content=stripped_line)
            stack[-1].children.append(node)
            if stripped_line.endswith(':'):
                stack.append(node)
                current_indent = indent + 4
        elif stripped_line.startswith('import ') or stripped_line.startswith('from '):
            node = Node(type='import', content=stripped_line)
            stack[-1].children.append(node)

            # Extract aliases from the import statement
            tmp_aliases = get_alias(node, script_folder)
            if tmp_aliases:
                aliases.extend(tmp_aliases)

            # Handle the filename and local file check
            filename = node.content.split(" ")[1]
            filename = filename.replace(".", "/")

            file_loc = os.path.join(script_folder, filename + ".py")

            local_file = os.path.exists(file_loc)

            if local_file:
                # Call parse_python_to_nodes only if the file exists
                parse_python_to_nodes(script_folder, filename + ".py", output_folder)

        elif '(' in stripped_line:
            function_names = extract_function_name(stripped_line)
            node = Node(type='function_call', content=stripped_line, functions=function_names)
            if function_names == []:
                node = Node(type='statement', content=stripped_line)
                stack[-1].children.append(node)
                continue
            
            for alias in aliases:
                for function in function_names:
                    if alias.alias_name in function.split("."):
                        node.aliases.append(alias)

            stack[-1].children.append(node)
        else:
            node = Node(type='statement', content=stripped_line)
            stack[-1].children.append(node)

    data = node_to_dict(root)
    with open(node_file, 'w') as json_file:
        json.dump(data, json_file, indent=4)


script_path = 'C:/Users/justl/Documents/folder'
script_name = "main.py"
root = parse_python_to_nodes(script_path, script_name)

# Convert to JSON for easy visualization or further processing
# json_representation = json.dumps(node_to_dict(root), indent=2)
# print(json_representation)


In [26]:
def find_import_nodes(node):
    import_nodes = []
    
    if node.type == 'import':
        import_nodes.append(node)
    
    for child in node.children:
        import_nodes.extend(find_import_nodes(child))
    
    return import_nodes

# Assuming you have already created the 'root' variable
import_nodes = find_import_nodes(root)

In [None]:
class Alias()

In [55]:
original_dir = "C:/Users/justl/Documents/folder"

for import_node in import_nodes:
    statement = import_node.content

    filename = statement.split(" ")[1]
    filename = filename.replace(".", "/")
    file_loc = os.path.join(original_dir, filename+".py")

    local_file = os.path.exists(file_loc)

    aliases = []

    if " as " in statement:
        parts = statement.split()
        indexes = [i for i, part in enumerate(parts) if part == "as"]
        for index in indexes:
            actual_name = parts[index - 1]  # The name before "as"
            alias_name = parts[index + 1]    # The name after "as"

            alias = Alias(
                alias_name = alias_name,
                actual_name = actual_name,
                local_file = local_file
            )

            aliases.append(alias)

    # if "as" in parts:
    #     splits = statement.split(" as ")
    #     print(splits)
        
    # if statement.startswith("import "):
    #     for part in parts[1:]:
    #         if "as" in parts:
    #             alias = part.split(" as ")
    #             print(alias)
                
    

np numpy


In [58]:
statement = "from numpy import x as y, a as b, w"

if " as " in statement:
    parts = statement.split()
    indexes = [i for i, part in enumerate(parts) if part == "as"]
    for index in indexes:
        actual_name = parts[index - 1]  # The name before "as"
        alias_name = parts[index + 1]    # The name after "as"

        print(alias_name, actual_name)


y, x
b, a
