In [65]:
import libadalang as lal
from pathlib import Path
from typing import Generator, List, Set

# def get_full_statement_start_line(start: lal.AdaNode) -> int:
#     print(f'{start.kind_name}: {start.sloc_range}')
#     for node in start.parent_chain:
#         if isinstance(node, lal.AdaNodeList) or isinstance(node, lal.CompilationUnit):
#             return node.sloc_range.start.line
#     print(f'Not found: {start}')

# def inrange(sloc: lal.Sloc, sloc_range: lal.SlocRange) -> bool:
#     start = sloc_range.start
#     end = sloc_range.end
#     if start.line == end.line == sloc.line:
#         return start.column <= sloc.column <= end.column
#     elif start.line == sloc.line and start.column > sloc.column:
#         return False
#     elif end.line == sloc.line and end.column < sloc.column:
#         return False
#     return start.line <= sloc.line <= end.line

# def custom_lookup(node: lal.AdaNode, loc: lal.Sloc) -> lal.AdaNode:
#     if len(node.children) == 0: return node
#     for child in node.children:
#         if child is None: continue
#         if inrange(loc, child.sloc_range):
#             return custom_lookup(child, loc)
#     return node

# def is_unpredictable_node(node: lal.AdaNode, loc: lal.Sloc) -> bool:
#     if isinstance(node, lal.SubpBody):
#         print("Location:", loc)
#         print("S:", node.f_subp_spec.sloc_range.end)
#         print("E:", node.f_decls.sloc_range.start)
#         if loc > node.f_subp_spec.sloc_range.end and loc < node.f_decls.sloc_range.start:
#             return True
#     for parent in node.parent_chain:
#         if isinstance(parent, lal.EndName):
#             return True
#         if isinstance(parent, lal.CompilationUnit):
#             return False
#     return False

"""
End logic:
    'end;'                     -> start of line token == end
    'end <name>;'              -> start of line token == end
    'end <name>.<name>;'       -> start of line token == end
    'end \n <name>;'           -> start of line token == end
    'end <comment> \n;         -> start of line token == end
    'end <comment> \n <name>;' -> start of line token == end

Begin logic:
    '\n begin\n'            -> start of line token == begin
    '\n begin <comment> \n' -> start of line token == begin

Generic logic:
    'generic \n package'
    'generic \n function'
    'generic \n procedure'
    'generic \n <definitions> \n <thing>'

If logic:
    '\n else'  -> start of line token == else
    '\n elsif' -> start of line token == elsif
    But only if it's an if statement, not an if expression

Case logic:
    '\n when' -> start of line token == when
    But only if it's a case statement, not a case expression


"""

def is_unpredictable(token: lal.Token, node: lal.AdaNode) -> bool:
    if token.kind in {'End', 'Begin'}:
        return True
    if token.kind in {'Elsif','Else'}:
        return node.kind_name != 'IfExpr'
    if token.kind == 'When':
        return node.kind_name != 'CaseExprAlternative'
    if token.kind == 'Package':
        return node.kind_name.startswith('Generic')
    if token.kind in {'Function', 'Procedure'}:
        return node.parent.parent.kind_name.startswith('Generic')
    return False

def get_unpredictable_line_numbers(root: lal.AdaNode, lines: List[str], unit) -> Set[int]:
    result = []
    for i, line in enumerate(lines):
        line_no = i + 1
        loc = lal.Sloc(line_no, len(line) - len(line.lstrip()) + 1)
        token = unit.lookup_token(loc)
        node = root.lookup(loc)
        if is_unpredictable(token, node):
            result.append(line_no)
    return result

def create_blocks(file_path: Path) -> Generator[List[str], None, None]:
    with open(str(file_path), "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Remove lines which are just whitespace
    lines = list(filter(lambda line: len(line.strip()) > 0, lines))
    print(lines)

    context = lal.AnalysisContext()
    unit = context.get_from_buffer(file_path.name, ''.join(lines))
    root = unit.root
    unpredictable = get_unpredictable_line_numbers(root, lines, unit)
    print(unpredictable)
    line_no = 1
    for unpredictable_line_no in unpredictable:
        block = lines[line_no-1:unpredictable_line_no-1]
        if len(block) > 1:
            yield ''.join(block)
        line_no = unpredictable_line_no
    block = lines[line_no-1:]
    if len(block) > 1:
        yield ''.join(block)


file_path = Path("/workspaces/ai-indent/data/interim/complete_files/ada/21485_c.ada")
# file_path = Path("/workspaces/ai-indent/data/interim/complete_files/ada/119_bodies.ada")



In [66]:
for block in create_blocks(file_path):
    print("---------------")
    print(''.join(block))

['package A is pragma SPARK_Mode (On);\n', '   procedure Foo (X : in out Integer);\n', 'end A;\n', 'with B;\n', 'package body A is pragma SPARK_Mode (On);\n', '   procedure Foo (X : in out Integer) is\n', '   begin\n', '      B.Bar (X);\n', '      X := X + 1;\n', '   end Foo;\n', 'end A;\n', 'package B is pragma SPARK_Mode (On);\n', '   procedure Bar (X : in out Integer);\n', 'end B;\n', 'package body B is pragma SPARK_Mode (On);\n', '   procedure Bar (X : in out Integer) is\n', '   begin\n', '      X := X + 1;\n', '   end Bar;\n', 'end B;\n']
[3, 7, 10, 11, 14, 17, 19, 20]
---------------
package A is pragma SPARK_Mode (On);
   procedure Foo (X : in out Integer);

---------------
end A;
with B;
package body A is pragma SPARK_Mode (On);
   procedure Foo (X : in out Integer) is

---------------
   begin
      B.Bar (X);
      X := X + 1;

---------------
end A;
package B is pragma SPARK_Mode (On);
   procedure Bar (X : in out Integer);

---------------
end B;
package body B is pragma SP