In [132]:
from bashlint import data_tools, lint

cmd = 'find /path/to/dir -type d -exec chmod 755 {} \\;'
cmd_ast = data_tools.bash_parser(cmd)

print(data_tools.pretty_print(cmd_ast))


print(data_tools.ast2template(cmd_ast))


cmd_ast_normalized = lint.normalize_ast(cmd)
print(data_tools.ast2template(cmd_ast_normalized))

ROOT()
    UTILITY(find)
        ARGUMENT(/path/to/dir)<Path>
        FLAG(-type)
            ARGUMENT(d)<Type>
        FLAG(-exec::;)
            UTILITY(chmod)
                ARGUMENT(755)<Permission>
                ARGUMENT({})<File>
None
find Path -type d -exec chmod Permission {} \;
find Path -type d -exec chmod Permission {} \;
ROOT()
    UTILITY(find)
        ARGUMENT(/path/to/dir)<Path>
        FLAG(-type)
            ARGUMENT(d)<Type>
        FLAG(-exec::;)
            UTILITY(chmod)
                ARGUMENT(755)<Permission>
                ARGUMENT({})<File>


In [1]:
import re
from graphviz import Digraph


# Utility node class
class UtilityNode:
    def __init__(self, utility, flags, parameters, redirect=False, left=None, right=None):
        self.utility = utility
        self.flags = flags
        self.parameters = parameters
        self.redirect = redirect
        self.left = left
        self.right = right

# Parameter categories
PARAM_CATEGORIES = [
    (r'\d+', 'NUMBER'),
    (r'\/.+\/', 'PATH'),
    (r'\w+\.\w+', 'FILE'),
    (r'\/\w+\/', 'DIRECTORY'),
    (r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', 'DATETIME'),
    (r'[rwx-]+', 'PERMISSION'),
    (r'\d+[smh]', 'TIMESPAN'),
    (r'\d+[kMG]?B', 'SIZE'),
]

def categorize_parameter(param):
    for regex, category in PARAM_CATEGORIES:
        if re.match(regex, param):
            return category
    return param


def parse_bash_command(cmd):
    # Tokenize command string
    tokens = re.split(r'\s+', cmd)

    stack = []

    for token in tokens:
        if token.startswith('-'):
            stack[-1].flags.append(token)
        elif token in {"|", ">", "<"}:
            current_utility = token
            utility_node = UtilityNode(current_utility, [], [], True)
            stack.append(utility_node)
        elif not stack or stack[-1].redirect:
            current_utility = token
            utility_node = UtilityNode(current_utility, [], [])
            stack.append(utility_node)
        else:
            param_category = categorize_parameter(token)
            stack[-1].parameters.append(param_category)

    while len(stack) > 1:
        right = stack.pop()
        left = stack.pop()
        left.right = right
        stack.append(left)

    return stack[0]

# Example usage


def print_ast(node, level=0):
    if node is None:
        return
    print("  " * level + f"Utility: {node.utility}, Flags: {node.flags}, Parameters: {node.parameters}, Redirect: {node.redirect}")
    print_ast(node.left, level + 1)
    print_ast(node.right, level + 1)

def visualize_ast(root):
    def build_graph(node, graph, parent_id=None):
        if node is None:
            return
        node_id = str(id(node))
        label = f"{node.utility}\nFlags: {','.join(node.flags)}\nParameters: {','.join(node.parameters)}"
        graph.node(node_id, label)
        if parent_id is not None:
            graph.edge(parent_id, node_id)
        build_graph(node.left, graph, node_id)
        build_graph(node.right, graph, node_id)

    graph = Digraph()
    build_graph(root, graph)
    return graph

# Example usage
cmd = "cat /boot/config-uname -r | grep IP_MROUTE"
root = parse_bash_command(cmd)
graph = visualize_ast(root)
graph.view()

print_ast(root)


Utility: cat, Flags: ['-r'], Parameters: ['PATH'], Redirect: False
  Utility: |, Flags: [], Parameters: [], Redirect: True
    Utility: grep, Flags: [], Parameters: ['IP_MROUTE'], Redirect: False
