# Extract Full SELECT Query Blocks, DATA TYPEs, and FUNCTION Names
This notebook scans code and extracts:
- The **full SELECT query block** (from SELECT until query terminator)
- The **DATA TYPE** (the value after TYPE in DATA lines)
- The **FUNCTION NAME** (the name in CALL FUNCTION lines)


In [None]:
import re

def scan_keywords(code_lines):
    select_queries = []
    data_types = []
    function_names = []
    
    in_select = False
    current_select = []

    for line in code_lines:
        l = line.rstrip()

        # Detect start of SELECT block
        if not in_select and re.search(r'\bSELECT\b', l, re.IGNORECASE):
            in_select = True
            current_select = [l]
            # If SELECT ends on the same line (period or semicolon)
            if l.strip().endswith(('.', ';')):
                select_queries.append(' '.join(current_select))
                in_select = False
                current_select = []
            continue

        # Continue collecting SELECT block
        if in_select:
            current_select.append(l)
            if l.strip().endswith(('.', ';')):
                # End of SELECT block
                select_queries.append(' '.join(current_select))
                in_select = False
                current_select = []
            continue

        # DATA: type after TYPE keyword
        if re.search(r'\bDATA\b', l, re.IGNORECASE):
            match = re.search(r'TYPE\s+([\w\.\_]+)', l, re.IGNORECASE)
            if match:
                data_types.append(match.group(1))
        
        # CALL FUNCTION: function name only
        if re.search(r'\bCALL FUNCTION\b', l, re.IGNORECASE):
            match = re.search(r'CALL FUNCTION\s+["\']?([\w\_]+)["\']?', l, re.IGNORECASE)
            if match:
                function_names.append(match.group(1))

    return select_queries, data_types, function_names

# Example usage
sample_code = [
    'SELECT *',
    '  FROM users',
    '  WHERE id = 1;',
    'DATA lv_name TYPE string.',
    'CALL FUNCTION "BAPI_USER_CREATE".',
    'WRITE lv_name.',
    'SELECT id, name',
    '  FROM customers',
    '  WHERE active = 1',
    '  ORDER BY name.',
    'data: lt_orders TYPE TABLE_OF_ORDERS.',
    'call function "Z_CUSTOM_MODULE".',
    'DATA number TYPE i.',
    'CALL FUNCTION \'RFC_PING\'.'
]

selects, data_types, functions = scan_keywords(sample_code)

print('SELECT Queries:')
for s in selects:
    print('---')
    print(s)

print('\nDATA TYPEs:')
for d in data_types:
    print(' ', d)

print('\nFUNCTION Names:')
for f in functions:
    print(' ', f)


## Usage
- Replace `sample_code` with your own code lines (as a list).
- Run to extract all full SELECT query blocks, DATA TYPEs, and FUNCTION names.
- SELECT blocks spanning multiple lines will be joined into a single string.