# Solidity Ast

In [3]:
import json

In [4]:
def read_file(filename):
    with open(filename, "r") as f:
        data = json.load(f)
    return data

def pos_to_bytes(source_bytes, position):
    line, col = position
    text = source_bytes.decode("utf-8")
    lines = text.splitlines(True)
    return sum(len(l.encode("utf-8")) for l in lines[:line]) + len(lines[line][:col].encode("utf-8"))

def bytes_to_pos(source_bytes, byte_offset):
    text = source_bytes.decode("utf-8")
    curr_offset = 0
    for line_num, line_text in enumerate(text.splitlines(True)):  # keep newline
        line_bytes = line_text.encode("utf-8")
        if curr_offset + len(line_bytes) > byte_offset:
            col = byte_offset - curr_offset
            return (line_num, col)
        curr_offset += len(line_bytes)
    return None

In [8]:
class Ast:
    def __init__(self, build_filename):
        self.data = read_file(build_filename)
        self.sources = self.data['sources']
        self.id_to_path = self.data['build_infos'][0]['source_id_to_path']
        self.path_to_abs = {}
        self.nodes = self.cache_ids()

    def cache_ids(self):
        nodes = {}
        for p, contents in self.sources.items():
            ast = self.sources[p][0]['source_file']['ast']
            abs_p = ast['absolutePath']
            self.path_to_abs[p] = abs_p
            if "id" in ast:
                if abs_p not in nodes:
                    nodes[abs_p] = {}
                nodes[abs_p][ast["id"]] = {'src': ast['src'] }
                # print(ast['id'])
            curr = [ast]
            while curr:
                tree = curr.pop()
                # process current node
                nodes[abs_p][tree['id']] = {'src': tree['src'] }

                if 'nameLocation' in tree:
                    nodes[abs_p][tree['id']]['nameLocation'] = tree['nameLocation']
                if 'referencedDeclaration' in tree:
                    nodes[abs_p][tree['id']]['referencedDeclaration'] = tree['referencedDeclaration']
                if 'nodeType' in tree:
                    nodes[abs_p][tree['id']]['nodeType'] = tree['nodeType']
                if 'memberLocation' in tree:
                    nodes[abs_p][tree['id']]['memberLocation'] = tree['memberLocation']
                # use first location. Common in libraryNames
                if 'nameLocations' in tree:
                    if isinstance(tree['nameLocations'], list) and len(tree['nameLocations']) > 0:
                        nodes[abs_p][tree['id']]['nameLocation'] = tree['nameLocations'][0]
                
                # check for nodes
                if "nodes" in tree:
                    for node in tree['nodes']:
                        # print(node['id'])
                        curr.append(node)
                # check value
                if "value" in tree:
                    if isinstance(tree["value"], dict):
                        curr.append(tree["value"])
                if "initialValue" in tree:
                    if isinstance(tree["initialValue"], dict):
                        curr.append(tree["initialValue"])
                if "eventCall" in tree:
                    curr.append(tree["eventCall"])
                if "typeName" in tree:
                    curr.append(tree["typeName"])
                if "keyType" in tree:
                    curr.append(tree["keyType"])
                if "valueType" in tree:
                    curr.append(tree["valueType"])
                if "pathNode" in tree:
                    curr.append(tree["pathNode"])
                if "condition" in tree:
                    curr.append(tree["condition"])
                if "trueBody" in tree:
                    curr.append(tree["trueBody"])
                if "falseBody" in tree:
                    curr.append(tree["falseBody"])
                if "subExpression" in tree:
                    curr.append(tree["subExpression"])
                if "baseExpression" in tree:
                    curr.append(tree['baseExpression'])
                if "indexExpression" in tree:
                    curr.append(tree['indexExpression'])
                # check for nodes
                if "baseContracts" in tree:
                    for node in tree['baseContracts']:
                        if "baseName" in node:
                            # print(node['id'])
                            curr.append(node['baseName'])
                # check for struct members nodes
                if "members" in tree:
                    for node in tree['members']:
                        # print(node['id'])
                        curr.append(node)
                # check for struct members nodes
                if "modifiers" in tree:
                    for node in tree['modifiers']:
                        curr.append(node)
                if "modifierName" in tree:
                    curr.append(tree['modifierName'])
                # check for declaration nodes
                if "declarations" in tree:
                    for node in tree['declarations']:
                        curr.append(node)
                if "symbolAliases" in tree:
                    for node in tree["symbolAliases"]:
                        if "foreign" in node:
                            curr.append(node["foreign"])
                # check library using for directive
                if "libraryName" in tree:
                    curr.append(tree["libraryName"])
                # check for body nodes
                if "body" in tree:
                    curr.append(tree['body'])
                # check for body nodes (incomplete)
                if "expression" in tree:
                    curr.append(tree["expression"])
                if "leftExpression" in tree:
                    curr.append(tree["leftExpression"])
                if "rightExpression" in tree:
                    curr.append(tree["rightExpression"])
                if "arguments" in tree:
                    if isinstance(tree['arguments'], dict):
                        curr.append(tree['arguments'])
                    if isinstance(tree['arguments'], list):
                        for node in tree['arguments']:
                            curr.append(node)
                # check left
                if "leftHandSide" in tree:
                    curr.append(tree["leftHandSide"])
                # check right
                if "rightHandSide" in tree:
                    curr.append(tree["rightHandSide"])
                # check for body statement nodes
                if "statements" in tree:
                    if isinstance(tree['statements'], list):
                        for node in tree['statements']:
                            curr.append(node)
                    if isinstance(tree['statements'], dict):
                        curr.append(tree['statements'])
                # check for parameter nodes
                if "parameters" in tree:
                    if isinstance(tree['parameters'], dict):
                        curr.append(tree['parameters'])
                    if isinstance(tree['parameters'], list):
                        for node in tree['parameters']:
                            curr.append(node)
                # # check for return parameter nodes
                if "returnParameters" in tree:
                    if isinstance(tree['returnParameters'], dict):
                        curr.append(tree['returnParameters'])
                    if isinstance(tree['returnParameters'], list):
                        for node in tree['parameters']:
                            curr.append(node)
                        
        return nodes

    def goto_bytes(self, uri, position):
        # (line, char) = position
        p = uri.split("file://")[1]
        abs_p = self.path_to_abs[p]

        refs = {}
        for i, content in self.nodes[abs_p].items():
            if 'referencedDeclaration' not in content:
                continue
            start_b, l, s = content['src'].split(":")
            end_b = int(start_b) + int(l)
            
            # print(i, content)
            if int(start_b) <= int(position) < int(end_b):
                diff = int(end_b) - int(start_b)
                if i not in refs:
                    refs[diff] = i
                else:
                    if refs[diff] <= i:
                        refs[diff] = i
        location, file = None, None
        # print(refs)
        if refs:
            ref = None
            for key, values in self.nodes.items():
                decla = refs[min(refs)]
                if decla not in values:
                    continue
                choice = values[decla]

                ref = choice['referencedDeclaration']
            node = None
            for key, values in self.nodes.items():
                if ref not in values:
                    continue
                node = values[ref]
            print(node)
            print(choice)
            print(ref)
            if "nameLocation" in node:
                location, _, file = node['nameLocation'].split(":")
            elif "src" in node:
                location,_, file = node['src'].split(":")
            else:
                # default to same location
                location = int(position)
            return self.id_to_path[file], int(location)
        else:
            source = self.sources[p][0]['source_file']['ast']['absolutePath']
            return source, int(position)

    def goto_declaration(self, uri, position):
        file = uri.split('file://')[-1]
        with open(file, "rb") as f:
            source_bytes = f.read()
        b = pos_to_bytes(source_bytes, position)
        f, l = self.goto_bytes(uri, b)
        return f, bytes_to_pos(source_bytes, l)

# ast = Ast("test/c.forge.ast.json")
# ast.goto_declaration("file:///Users/meek/Developer/lsp/C.sol", (21, 8))

In [9]:
# poolmanger
tests = [
    (("file:///Users/meek/Developer/uniswap/v4-core/src/PoolManager.sol", 428), ("src/NoDelegateCall.sol", 292)),
    (("file:///Users/meek/Developer/uniswap/v4-core/src/PoolManager.sol", 4764), ("src/interfaces/IPoolManager.sol", 619)),
    (("file:///Users/meek/Developer/uniswap/v4-core/src/PoolManager.sol", 5230), ("src/types/PoolId.sol", 102)),
    (("file:///Users/meek/Developer/uniswap/v4-core/src/PoolManager.sol", 6969), ("src/PoolManager.sol", 6167)),
]

for test in tests:
    ast = Ast("PoolManager.forge.ast.json")
    res = ast.goto_bytes(test[0][0], test[0][1])

    if res != test[1]:
        print(f"❌ Fail Expected {test[1][1]}, got {res[1]}, Position {test[0][1]} {res[0]}")
    else:
        print(f"✅ Success {test[1][1]} == {res[1]}, Position {test[0][1]} {res[0]}")
    assert res == test[1], "failed"

{'src': '274:973:5', 'nameLocation': '292:14:5', 'nodeType': 'ContractDefinition'}
{'src': '428:14:6', 'referencedDeclaration': 550, 'nodeType': 'Identifier'}
550
✅ Success 292 == 292, Position 428 src/NoDelegateCall.sol
{'src': '609:12225:11', 'nameLocation': '619:12:11', 'nodeType': 'ContractDefinition'}
{'src': '4764:12:6', 'referencedDeclaration': 2529, 'nodeType': 'IdentifierPath', 'nameLocation': '4764:12:6'}
2529
✅ Success 619 == 619, Position 4764 src/interfaces/IPoolManager.sol
{'src': '97:23:41', 'nameLocation': '102:6:41', 'nodeType': 'UserDefinedValueTypeDefinition'}
{'src': '5230:6:6', 'referencedDeclaration': 8837, 'nodeType': 'IdentifierPath', 'nameLocation': '5230:6:6'}
8837
✅ Success 102 == 102, Position 5230 src/types/PoolId.sol
{'src': '6161:10:6', 'nameLocation': '6167:4:6', 'nodeType': 'VariableDeclaration'}
{'src': '6969:4:6', 'referencedDeclaration': 743, 'nodeType': 'Identifier'}
743
✅ Success 6167 == 6167, Position 6969 src/PoolManager.sol


In [7]:
# A.sol
tests = (
    # name.add_one(votes);
    (("file:///Users/meek/Developer/lsp/C.sol", 430),("C.sol", 371)), # name
    (("file:///Users/meek/Developer/lsp/C.sol", 431),("C.sol", 371)), 
    (("file:///Users/meek/Developer/lsp/C.sol", 432),("C.sol", 371)),
    (("file:///Users/meek/Developer/lsp/C.sol", 433),("C.sol", 371)),
    (("file:///Users/meek/Developer/lsp/C.sol", 434),("B.sol", 247)), # .add_one
    (("file:///Users/meek/Developer/lsp/C.sol", 435),("B.sol", 247)),
    (("file:///Users/meek/Developer/lsp/C.sol", 436),("B.sol", 247)),
    (("file:///Users/meek/Developer/lsp/C.sol", 437),("B.sol", 247)),
    (("file:///Users/meek/Developer/lsp/C.sol", 438),("B.sol", 247)),
    (("file:///Users/meek/Developer/lsp/C.sol", 439),("B.sol", 247)),
    (("file:///Users/meek/Developer/lsp/C.sol", 440),("B.sol", 247)),
    (("file:///Users/meek/Developer/lsp/C.sol", 441),("B.sol", 247)),
    (("file:///Users/meek/Developer/lsp/C.sol", 442),("C.sol", 442)), # not a node
    (("file:///Users/meek/Developer/lsp/C.sol", 443),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 444),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 445),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 446),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 447),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 448),("C.sol", 448)), # not a node
    (("file:///Users/meek/Developer/lsp/C.sol", 449),("C.sol", 449)), # not a node
    # name.get_votes(votes);
    (("file:///Users/meek/Developer/lsp/C.sol", 466),("C.sol", 371)), # name
    (("file:///Users/meek/Developer/lsp/C.sol", 467),("C.sol", 371)), # name
    (("file:///Users/meek/Developer/lsp/C.sol", 468),("C.sol", 371)), # name
    (("file:///Users/meek/Developer/lsp/C.sol", 469),("C.sol", 371)), # name
    (("file:///Users/meek/Developer/lsp/C.sol", 470),("B.sol", 359)), # get_votes
    (("file:///Users/meek/Developer/lsp/C.sol", 471),("B.sol", 359)), # get_votes
    (("file:///Users/meek/Developer/lsp/C.sol", 472),("B.sol", 359)), # get_votes
    (("file:///Users/meek/Developer/lsp/C.sol", 473),("B.sol", 359)), # get_votes
    (("file:///Users/meek/Developer/lsp/C.sol", 474),("B.sol", 359)), # get_votes
    (("file:///Users/meek/Developer/lsp/C.sol", 475),("B.sol", 359)), # get_votes
    (("file:///Users/meek/Developer/lsp/C.sol", 476),("B.sol", 359)), # get_votes
    (("file:///Users/meek/Developer/lsp/C.sol", 477),("B.sol", 359)), # get_votes
    (("file:///Users/meek/Developer/lsp/C.sol", 478),("B.sol", 359)), # get_votes
    (("file:///Users/meek/Developer/lsp/C.sol", 479),("B.sol", 359)), # get_votes
    (("file:///Users/meek/Developer/lsp/C.sol", 480),("C.sol", 480)), # not a node
    (("file:///Users/meek/Developer/lsp/C.sol", 481),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 482),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 483),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 484),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 485),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 486),("C.sol", 486)), # not a node
    (("file:///Users/meek/Developer/lsp/C.sol", 487),("C.sol", 487)), # not a node
    # bool fad;
    (("file:///Users/meek/Developer/lsp/C.sol", 417),("C.sol", 417)), # not a node
    (("file:///Users/meek/Developer/lsp/C.sol", 418),("C.sol", 418)), # not a node
    (("file:///Users/meek/Developer/lsp/C.sol", 419),("C.sol", 419)), # not a node
    # name("meek");
    (("file:///Users/meek/Developer/lsp/C.sol", 314),("A.sol", 260)), # name
    (("file:///Users/meek/Developer/lsp/C.sol", 315),("A.sol", 260)), # name
    (("file:///Users/meek/Developer/lsp/C.sol", 316),("A.sol", 260)), # name
    (("file:///Users/meek/Developer/lsp/C.sol", 317),("A.sol", 260)), # name
    # votes.name = "2024 Elections";
    (("file:///Users/meek/Developer/lsp/C.sol", 275),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 276),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 277),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 278),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 279),("C.sol", 212)), # votes
    (("file:///Users/meek/Developer/lsp/C.sol", 280),("B.sol", 163)), # .name
    (("file:///Users/meek/Developer/lsp/C.sol", 281),("B.sol", 163)), # .name
    (("file:///Users/meek/Developer/lsp/C.sol", 282),("B.sol", 163)), # .name
    (("file:///Users/meek/Developer/lsp/C.sol", 283),("B.sol", 163)), # .name
    (("file:///Users/meek/Developer/lsp/C.sol", 284),("B.sol", 163)), # .name
    # using D for *;
    (("file:///Users/meek/Developer/lsp/C.sol", 146),("B.sol", 66)), # D
)

for test in tests:
    ast = Ast("test/c.forge.ast.json")
    res = ast.goto_bytes(test[0][0], test[0][1])

    if res != test[1]:
        print(f"❌ Fail Expected {test[1][1]}, got {res[1]}, Position {test[0][1]} {res[0]}")
    else:
        print(f"✅ Success {test[1][1]} == {res[1]}, Position {test[0][1]} {res[0]}")
    assert res == test[1], "failed"

{'src': '357:18:2', 'nameLocation': '371:4:2', 'nodeType': 'VariableDeclaration'}
{'src': '430:4:2', 'referencedDeclaration': 123, 'nodeType': 'Identifier'}
123
✅ Success 371 == 371, Position 430 C.sol
{'src': '357:18:2', 'nameLocation': '371:4:2', 'nodeType': 'VariableDeclaration'}
{'src': '430:4:2', 'referencedDeclaration': 123, 'nodeType': 'Identifier'}
123
✅ Success 371 == 371, Position 431 C.sol
{'src': '357:18:2', 'nameLocation': '371:4:2', 'nodeType': 'VariableDeclaration'}
{'src': '430:4:2', 'referencedDeclaration': 123, 'nodeType': 'Identifier'}
123
✅ Success 371 == 371, Position 432 C.sol
{'src': '357:18:2', 'nameLocation': '371:4:2', 'nodeType': 'VariableDeclaration'}
{'src': '430:4:2', 'referencedDeclaration': 123, 'nodeType': 'Identifier'}
123
✅ Success 371 == 371, Position 433 C.sol
{'src': '238:106:1', 'nameLocation': '247:7:1', 'nodeType': 'FunctionDefinition'}
{'src': '430:12:2', 'referencedDeclaration': 67, 'nodeType': 'MemberAccess', 'memberLocation': '435:7:2'}
67
✅