# Chained Code Remediation Pipeline
This notebook performs a two-step process:
1. **Scans code to extract SELECT queries, data types, and function models**
2. **Maps and transforms data types and SELECT statements using S4 mapping from an external Excel, updating code lines if mapping found**

Input notebooks required in the working directory:
- `Scan-Program.ipynb` (for scanning ABAP code)
- `Transform-DE-DT.ipynb` (for data element mapping logic)
- `Transform-SELECT.ipynb` (for SELECT query transformation logic)
- `Mapping-DE-DT.xlsx` (for mapping data elements to S4 types)

You can adapt the `sample_code` variable to your own ABAP code for processing.

In [None]:
# Step 1: Import scan logic from Scan-Program.ipynb
# (copy-paste here, or use %run if on local Jupyter)
import re
import pandas as pd
import copy
from cache_loader import PersistentCache

# Import SELECT transformation logic from Transform-SELECT.ipynb (use %run if in Jupyter, or import if in .py)
# %run Transform-SELECT.ipynb
try:
    from transform_select import transform_query_auto  # If you have .py version
except ImportError:
    transform_query_auto = None  # fallback for notebook context
    pass

def scan_keywords(code_lines):
    select_queries = []
    data_types = []
    function_names = []
    
    in_select = False
    current_select = []
    for line in code_lines:
        l = line.rstrip()
        # SELECT block
        if not in_select and re.search(r'\bSELECT\b', l, re.IGNORECASE):
            in_select = True
            current_select = [l]
            if l.strip().endswith(('.', ';')):
                select_queries.append(' '.join(current_select))
                in_select = False
                current_select = []
            continue
        if in_select:
            current_select.append(l)
            if l.strip().endswith(('.', ';')):
                select_queries.append(' '.join(current_select))
                in_select = False
                current_select = []
            continue
        # DATA: TYPE after TYPE keyword
        if re.search(r'\bDATA\b', l, re.IGNORECASE):
            match = re.search(r'TYPE\s+([\w\.\_]+)', l, re.IGNORECASE)
            if match:
                data_types.append(match.group(1))
        # CALL FUNCTION: function name
        if re.search(r'\bCALL FUNCTION\b', l, re.IGNORECASE):
            match = re.search(r'CALL FUNCTION\s+["\']?([\w\_]+)["\']?', l, re.IGNORECASE)
            if match:
                function_names.append(match.group(1))
    return select_queries, data_types, function_names

## Sample ABAP Code
Replace `sample_code` with your own codebase as a list of lines for testing.

In [None]:
# Replace this with your own ABAP source code lines
sample_code = [
    '*&---------------------------------------------------------------------*',
    '*& Report ZGEN_1',
    '*&---------------------------------------------------------------------*',
    '*&',
    '*&---------------------------------------------------------------------*',
    'REPORT zgen_1.',
    '',
    'TABLES:crmc_proc_type,crmd_orderadm_h,crmc_tblobj_map.',
    '',
    'SELECTION-SCREEN BEGIN OF BLOCK b1 WITH FRAME TITLE TEXT-001.',
    "PARAMETERS: p_busobj TYPE crmt_subobject_category AS LISTBOX VISIBLE LENGTH 30 DEFAULT 'BUS2000116'.",
    'SELECTION-SCREEN END OF BLOCK b1.',
    '',
    'SELECTION-SCREEN BEGIN OF BLOCK b2 WITH FRAME TITLE TEXT-002.',
    'SELECT-OPTIONS: s_protyp FOR crmc_proc_type-process_type,',
    '                s_objid  FOR crmd_orderadm_h-object_id,',
    '                s_posdat FOR crmd_orderadm_h-posting_date,',
    '                s_maptab FOR crmc_tblobj_map-table_name NO INTERVALS NO-EXTENSION OBLIGATORY.',
    'SELECTION-SCREEN END OF BLOCK b2.',
    '',
    'SELECTION-SCREEN BEGIN OF BLOCK b3 WITH FRAME TITLE TEXT-003.',
    'PARAMETERS: p_test TYPE char1 AS CHECKBOX DEFAULT abap_true.',
    'SELECTION-SCREEN END OF BLOCK b3.',
    '',
    'TYPES: BEGIN OF ty_crmd_orderadm_h,',
    '         guid          TYPE crmd_orderadm_h-guid,',
    '         object_type   TYPE crmd_orderadm_h-object_type,',
    '         object_id     TYPE crmd_orderadm_h-object_id,',
    '         zzpur_of_loan TYPE char100,',
    '         zzsendtobid   TYPE char100,',
    '       END OF ty_crmd_orderadm_h,',
    '',
    '       BEGIN OF ty_crmd_orderadm_i,',
    '         guid        TYPE crmd_orderadm_i-guid,',
    '         number_int  TYPE crmd_orderadm_i-number_int,',
    '         header      TYPE crmd_orderadm_i-header,',
    '         object_type TYPE crmd_orderadm_i-object_type,',
    '       END OF ty_crmd_orderadm_i,',
    '',
    '       BEGIN OF ty_crmd_orgman,',
    '         guid      TYPE crmd_orderadm_i-guid,',
    '         sales_org TYPE crmt_sales_org,',
    '       END OF ty_crmd_orgman.',
    '',
    '',
    'DATA: lt_filter_guids TYPE crmd_object_guid_tab,',
    '      lt_header       TYPE TABLE OF ty_crmd_orderadm_h,',
    '      lt_item         TYPE TABLE OF ty_crmd_orderadm_i,',
    '      lt_guid_idx_i   TYPE TABLE OF crmd_order_index,',
    '      lt_org          TYPE TABLE OF ty_crmd_orgman,',
    '      lr_obj_type     TYPE RANGE OF crmt_subobject_category.',
    '',
    'DATA: lv_date         TYPE crmt_sms_created_at,',
    '      ls_product      TYPE comt_product,',
    '      ls_product_deta TYPE comt_product_maintain_api,',
    '      lv_guid         TYPE /cem/hierarchy_guid,',
    '      lv_dats         TYPE /bi0/oitcterrdat,',
    '      lv_p            TYPE /bi0/oitctstrrecs,',
    '      lv_partno       TYPE crmt_partner_no.',
    '*----------------------------------------------------------------------*',
    '* START-OF-SELECTION',
    '*----------------------------------------------------------------------*',
    'START-OF-SELECTION.',
    '',
    '',
    '  "Example - Header Table',
    '  SELECT guid',
    '     FROM crmd_orderadm_h INTO TABLE lt_filter_guids',
    '     WHERE object_type  EQ p_busobj',
    '       AND process_type IN s_protyp',
    '       AND object_id    IN s_objid',
    '       AND posting_date IN s_posdat.',
    '',
    '  "Example - Header Table with AET Fields',
    '  IF lt_filter_guids IS NOT INITIAL.',
    '    SELECT a~guid,',
    '           a~object_type,',
    '           a~object_id,',
    '           b~zzpur_of_loan,',
    '           b~zzsendtobid',
    '     FROM crmd_orderadm_h AS a',
    '     INNER JOIN crmd_customer_h AS b ON b~guid = a~guid',
    '     INTO TABLE @lt_header',
    '     FOR ALL ENTRIES IN @lt_filter_guids',
    '     WHERE a~guid = @lt_filter_guids-table_line.',
    '  ENDIF.',
    '',
    '  "Example - Item Table',
    '  IF lt_header IS NOT INITIAL.',
    '    SELECT guid',
    '           number_int',
    '           header',
    '           object_type',
    '     FROM crmd_orderadm_i',
    '     INTO TABLE lt_item',
    '     FOR ALL ENTRIES IN lt_header',
    '     WHERE header = lt_header-guid.',
    '  ENDIF.',
    '',
    '  "Example - Linked Table',
    '  SELECT a~guid',
    '         c~sales_org',
    '  FROM crmd_orderadm_h AS a',
    '  INNER JOIN crmd_link AS b  ON b~guid_hi  = a~guid',
    '  INNER JOIN crmd_orgman AS c ON c~guid  = b~guid_set',
    '  INTO TABLE lt_org',
    '  WHERE a~guid EQ lv_guid.',
    '',
    '  "Example - Index Table',
    '  SELECT item',
    '  INTO CORRESPONDING FIELDS OF TABLE lt_guid_idx_i',
    '  FROM crmd_order_index',
    '  FOR ALL ENTRIES IN lt_header',
    '  WHERE header    EQ lt_header-guid',
    '  AND object_type IN lr_obj_type',
    '  AND partner_no  EQ lv_partno',
    '  AND stat_bill   EQ \'X\'',
    '  AND pft_1       EQ \'X\'.',
    '',
    '  CALL FUNCTION \'COM_PRODUCT_GETDETAIL_API\'',
    '    EXPORTING',
    '      is_product      = ls_product',
    '      iv_current      = \'X\'',
    '    IMPORTING',
    '      es_product_data = ls_product_deta.'
]

In [None]:
# Step 2: Run scan to extract SELECTs, Data Types, Functions
selects, data_types, functions = scan_keywords(sample_code)
print('SELECT Queries:')
for s in selects:
    print(s)
print('\nDATA TYPEs:')
for d in data_types:
    print(d)
print('\nFUNCTION Names:')
for f in functions:
    print(f)

## Step 3: Import the Data Element Mapping Logic
We use the mapping function from `Transform-DE-DT.ipynb` for S4 mapping.

Make sure `Mapping-DE-DT.xlsx` is present in the working directory.

In [None]:
# Use persistent cache for mapping
def load_mapping():
    mapping_file = 'Mapping-DE-DT.xlsx'
    return pd.read_excel(mapping_file)

mapping_cache = PersistentCache('mapping_de_dt_cache.pkl', load_mapping)
df_map = mapping_cache.load()

def get_s4_declaration(data_element):
    row = df_map.loc[df_map['CRM'] == data_element]
    if row.empty:
        return None  # Indicate no mapping
    s4 = str(row.iloc[0]['S4']).strip()
    length = row.iloc[0]['LENGTH'] if 'LENGTH' in row.columns else None
    decimal = row.iloc[0]['DECIMAL'] if 'DECIMAL' in row.columns else None
    result = s4
    if pd.notnull(length):
        result += f" LENGTH {int(float(length))}"
    if decimal is not None and pd.notnull(decimal):
        result += f" DECIMALS {int(float(decimal))}"
    return result

## Step 4: Create a Remediation Plan
- For each data type found, get the mapped S4 declaration (if available).
- For each line in code, replace original data type with mapped S4 declaration **only if mapping is found**.
- If no mapping, leave code unchanged.

In [None]:
# Build mapping dict: {original_type: mapped_declaration}
mapping_results = {}
for dt in set(data_types):
    mapped = get_s4_declaration(dt)
    if mapped:
        mapping_results[dt] = mapped

print('Mapping Results:')
for k, v in mapping_results.items():
    print(f'{k} -> {v}')

## Step 5: Replace Code with Mapped Data Types and SELECT Statements
- Only change the TYPE clause if mapping is found.
- Remediate SELECT blocks using the SELECT transformation logic if available.

In [None]:
remediated_code = copy.deepcopy(sample_code)

# 1. Remediate data type declarations
for i, line in enumerate(remediated_code):
    if re.search(r'\bTYPE\b', line, re.IGNORECASE):
        match = re.search(r'(TYPE\s+)([\w\.\/]+)', line, re.IGNORECASE)
        if match:
            original_type = match.group(2)
            if original_type in mapping_results:
                mapped_decl = mapping_results[original_type]
                new_line = re.sub(r'(TYPE\s+)([\w\.\/]+)', f"\\1{mapped_decl}", line, count=1, flags=re.IGNORECASE)
                remediated_code[i] = new_line

# 2. Remediate SELECT queries (block-wise)
if transform_query_auto is not None:
    i = 0
    while i < len(remediated_code):
        line = remediated_code[i]
        if re.search(r'^\s*SELECT\b', line, re.IGNORECASE):
            # Try to collect the full SELECT block (ends with "." or ";")
            select_block = [line]
            j = i + 1
            while j < len(remediated_code) and not remediated_code[j].strip().endswith(('.', ';')):
                select_block.append(remediated_code[j])
                j += 1
            if j < len(remediated_code):
                select_block.append(remediated_code[j])
            select_query_str = '\n'.join(select_block)
            transformed_query = transform_query_auto(select_query_str, df_map)
            # Replace the block in remediated_code
            remediated_code[i] = transformed_query
            for k in range(1, len(select_block)):
                if i + k < len(remediated_code):
                    remediated_code[i + k] = ''  # Clear following lines
            i = j + 1
        else:
            i += 1

print('--- Remediated Code ---')
for l in remediated_code:
    if l.strip():
        print(l)