# CRM to S4 Select Query Transformation (Optimized)
This notebook transforms ABAP SELECT queries in a full ABAP program using CRM→S4 table and field mapping from Excel. It works robustly in Copilot agent mode and standalone Jupyter environments.

In [1]:
import re
import pandas as pd
from functools import lru_cache
from typing import Union

# --- Load Mappings with Error Handling ---
@lru_cache(maxsize=1)
def load_mappings_cached():
    EXCEL_URL = "https://github.com/srinivashj1/Code_Remediation/raw/main/Select_Queries_Mapping.xlsx"
    MAPPING_SHEET = "CRM_S4_TABLE_FIELDS_MAPPING"
    try:
        df = pd.read_excel(EXCEL_URL, sheet_name=MAPPING_SHEET)
    except Exception as e:
        print(f"Error loading mapping file: {e}")
        return {}, {}
    table_map = dict(zip(df["CRM_TABNAME"].astype(str).str.upper(), df["S4_TABNAME"].astype(str)))
    field_map = {}
    for _, row in df.iterrows():
        crm_tabname = str(row["CRM_TABNAME"]).upper()
        crm_fieldname = str(row["CRM_FIELDNAME"]).upper()
        s4_fieldname = str(row["S4_FIELDNAME"])
        field_map[(crm_tabname, crm_fieldname)] = s4_fieldname
    return table_map, field_map

table_mapping, field_mapping = load_mappings_cached()


In [2]:
# Tables whose joins should be removed
REMOVE_JOIN_TABLES = {
    "CRMD_LINK", "CRMD_SALES", "CRMD_BILLING", "CRMD_ORGMAN", "CRMD_PRICING", "CRMD_PRICING_I"
}

def transform_query(source_query:str)->str:
    query = re.sub(r'\s+', ' ', source_query.strip())
    # Remove joins for specified tables
    def join_remover(match):
        table = match.group(1).upper()
        if table in REMOVE_JOIN_TABLES:
            return ''
        return match.group(0)
    query = re.sub(
        r'INNER JOIN\s+(\w+)\s+AS\s+\w+\s+ON\s+[^I]*?(?=(INNER JOIN|\bWHERE\b|$))',
        join_remover,
        query,
        flags=re.IGNORECASE
    )
    from_match = re.search(r'FROM\s+(\w+)', query, re.IGNORECASE)
    if from_match:
        crm_tabname = from_match.group(1).upper()
        s4_tabname = table_mapping.get(crm_tabname, crm_tabname)
        query = re.sub(rf'FROM\s+{crm_tabname}', f'FROM {s4_tabname}', query, flags=re.IGNORECASE)
    def map_field(field):
        if '~' in field:
            alias, fname = field.split('~', 1)
        else:
            fname = field
        fname = fname.strip().upper()
        s4_fieldname = None
        if from_match:
            crm_tabname = from_match.group(1).upper()
            s4_fieldname = field_mapping.get((crm_tabname, fname), fname)
        if s4_fieldname is None:
            s4_fieldname = fname
        return f"{s4_fieldname} AS {fname}"
    select_fields_match = re.search(r'SELECT (.*?) FROM', query, re.IGNORECASE)
    fields = []
    comma_in_select = False  # PATCH: Track if original SELECT used commas
    if select_fields_match:
        fields_str = select_fields_match.group(1).strip()
        if ',' in fields_str:
            comma_in_select = True
            fields = [f.strip() for f in fields_str.split(',')]
        else:
            fields = [f.strip() for f in fields_str.split() if f.strip()]
    else:
        fields_match = re.search(r'FIELDS (.*?)(WHERE|INTO|$)', query, re.IGNORECASE)
        if fields_match:
            fields_str = fields_match.group(1).strip().rstrip(',')
            if ',' in fields_str:
                comma_in_select = True
                fields = [f.strip() for f in fields_str.split(',')]
            else:
                fields = [f.strip() for f in fields_str.split() if f.strip()]
    fields = [f for f in fields if f.strip().upper() != 'SINGLE']
    transformed_fields = [map_field(f) for f in fields]
    into_clause = ''
    into_match = re.search(r'INTO\s+(TABLE\s+)?((@?DATA\([^)]+\))|(@?\w+))', query, re.IGNORECASE)
    if into_match:
        into_var = into_match.group(2)
        if into_match.group(1):
            into_clause = f'INTO TABLE {into_var}'
        else:
            into_clause = f'INTO {into_var}'
    where_clause = ''
    where_match = re.search(r'WHERE (.*?)(FOR ALL ENTRIES|INTO|UP TO|$)', query, re.IGNORECASE)
    if where_match:
        where_cond = where_match.group(1).strip()
        crm_tabname = from_match.group(1).upper() if from_match else ""
        def map_where_field(match):
            left = match.group(1)
            op = match.group(2)
            right = match.group(3)
            if '~' in left:
                _, left_field = left.split('~', 1)
            else:
                left_field = left
            left_field = left_field.strip().upper()
            mapped = field_mapping.get((crm_tabname, left_field), left_field)
            return f"{mapped}{op}{right}"
        where_cond = re.sub(r'([@]?\w+(?:~\w+)?)(\s*(?:=|EQ|NE|<|>|LIKE)\s*)([^\s,\)]+)', map_where_field, where_cond, flags=re.IGNORECASE)
        where_clause = f'WHERE {where_cond}'
    parts = []
    if re.search(r'\bSINGLE\b', query, re.IGNORECASE):
        parts.append('SELECT SINGLE')
    else:
        parts.append('SELECT')
    # PATCH: Add comma or spaces between fields based on original SELECT
    if comma_in_select:
        parts.append(', '.join(transformed_fields))
    else:
        parts.append(' '.join(transformed_fields))
    if from_match:
        parts.append(f'FROM {table_mapping.get(from_match.group(1).upper(), from_match.group(1))}')
    if into_clause:
        parts.append(into_clause)
    fae_match = re.search(r'FOR ALL ENTRIES IN (\w+)', query, re.IGNORECASE)
    if fae_match:
        parts.append(f'FOR ALL ENTRIES IN {fae_match.group(1).upper()}')
    up_to_match = re.search(r'UP TO (\w+) ROWS', query, re.IGNORECASE)
    if up_to_match:
        parts.append(f'UP TO {up_to_match.group(1)} ROWS')
    if where_clause:
        parts.append(where_clause)
    transformed_query = ' '.join(parts)
    return transformed_query


In [3]:
def transform_abap_program_input(input_abap:Union[str,bytes])->str:
    """
    Accepts either a string (ABAP code) or a file path (str/bytes).
    Returns the remediated ABAP program.
    """
    # If file path, read file contents
    if isinstance(input_abap, str) and input_abap.endswith('.abap'):
        try:
            with open(input_abap, 'r', encoding='utf-8') as f:
                program_source = f.read()
        except Exception as e:
            print(f"Error reading ABAP file: {e}")
            return ''
    elif isinstance(input_abap, bytes):
        program_source = input_abap.decode('utf-8')
    else:
        program_source = input_abap
    return transform_abap_program(program_source)

def transform_abap_program(program_source:str)->str:
    # Pattern to match ABAP SELECT blocks (including multiline)
    select_pattern = re.compile(
        r'(SELECT[\s\S]+?FROM[\s\S]+?(?:WHERE[\s\S]+?)?(?:FOR ALL ENTRIES IN[\s\S]+?)?(?:INTO[\s\S]+?)?(?:UP TO[\s\S]+?ROWS)?\s*\.)',
        re.IGNORECASE
    )
    def transform_match(match):
        select_query_block = match.group(0)
        # Remove line breaks and extra spaces for transforming
        select_query_one_line = ' '.join(select_query_block.replace('\n', ' ').split())
        select_query_no_period = select_query_one_line.rstrip('.')
        transformed = transform_query(select_query_no_period)
        return transformed + '.'
    transformed_program = select_pattern.sub(transform_match, program_source)
    return transformed_program


In [6]:
# Transform the original_select query using the transform_query function
transformed_select = transform_query(original_select)
print(transformed_select)

SELECT HEADER_GUID AS GUID SALES_ORG AS SALES_ORG FROM CRMS4D_SERV_H INTO TABLE LT_ORG WHERE HEADER_GUID EQ LV_GUID.


In [4]:
abap_code = """
*&---------------------------------------------------------------------*
*& Report ZSELECT_TEST
*&---------------------------------------------------------------------*
*&
*&---------------------------------------------------------------------*
REPORT zselect_test.

TABLES:crmc_proc_type,crmd_orderadm_h,crmc_tblobj_map.

SELECTION-SCREEN BEGIN OF BLOCK b1 WITH FRAME TITLE TEXT-001.
PARAMETERS: p_busobj TYPE crmt_subobject_category AS LISTBOX VISIBLE LENGTH 30 DEFAULT 'BUS2000116'.
SELECTION-SCREEN END OF BLOCK b1.

SELECTION-SCREEN BEGIN OF BLOCK b2 WITH FRAME TITLE TEXT-002.
SELECT-OPTIONS: s_protyp FOR crmc_proc_type-process_type,
                s_objid  FOR crmd_orderadm_h-object_id,
                s_posdat FOR crmd_orderadm_h-posting_date,
                s_maptab FOR crmc_tblobj_map-table_name NO INTERVALS NO-EXTENSION OBLIGATORY.
SELECTION-SCREEN END OF BLOCK b2.

SELECTION-SCREEN BEGIN OF BLOCK b3 WITH FRAME TITLE TEXT-003.
PARAMETERS: p_test TYPE char1 AS CHECKBOX DEFAULT abap_true.
SELECTION-SCREEN END OF BLOCK b3.

TYPES: BEGIN OF ty_crmd_orderadm_h,
         guid        TYPE crmd_orderadm_h-guid,
         object_type TYPE crmd_orderadm_h-object_type,
         object_id   TYPE crmd_orderadm_h-object_id,
       END OF ty_crmd_orderadm_h,

       BEGIN OF ty_crmd_orderadm_i,
         guid        TYPE crmd_orderadm_i-guid,
         number_int  TYPE crmd_orderadm_i-number_int,
         header      TYPE crmd_orderadm_i-header,
         object_type TYPE crmd_orderadm_i-object_type,
       END OF ty_crmd_orderadm_i.


DATA: gt_filter_guids TYPE crmd_object_guid_tab,
      gt_header       TYPE TABLE OF ty_crmd_orderadm_h,
      gt_item         TYPE TABLE OF ty_crmd_orderadm_i.

DATA: lv_date TYPE CRMT_SMS_CREATED_AT,
      ls_product TYPE COMT_PRODUCT,
      ls_product_deta TYPE COMT_PRODUCT_MAINTAIN_API.
*----------------------------------------------------------------------*
* START-OF-SELECTION
*----------------------------------------------------------------------*
START-OF-SELECTION.

SELECT guid
   FROM crmd_orderadm_h INTO TABLE gt_filter_guids
   WHERE object_type  EQ p_busobj
     AND process_type IN s_protyp
    AND object_id    IN s_objid
     AND posting_date IN s_posdat.

IF gt_filter_guids IS NOT INITIAL.
SELECT guid, object_type, object_id,ZZ_ADD_DES, ZZ_TEST12
 FROM crmd_orderadm_h INTO TABLE gt_header
   FOR ALL ENTRIES IN gt_filter_guids
   WHERE guid = gt_filter_guids-table_line.
ENDIF.

IF gt_header IS NOT INITIAL.
  SELECT guid, number_int, header, object_type
   FROM crmd_orderadm_i
   INTO TABLE gt_item
   FOR ALL ENTRIES IN gt_header
   WHERE header = gt_header-guid.
ENDIF.

SELECT A~GUID
       C~SALES_ORG
FROM CRMD_ORDERADM_H AS A
INNER JOIN CRMD_LINK AS B  ON B~GUID_HI  = A~GUID
INNER JOIN CRMD_ORGMAN AS C ON C~GUID  = B~GUID_SET
INTO TABLE LT_ORG
WHERE  A~GUID EQ  LV_GUID.

call function 'COM_PRODUCT_GETDETAIL_API'
  exporting
    IS_PRODUCT      = ls_product
    IV_CURRENT      = 'X
  importing
    es_product_data = ls_product_deta.
"""

transformed_abap = transform_abap_program(abap_code)
print(transformed_abap)


*&---------------------------------------------------------------------*
*& Report ZSELECT HEADER_GUID AS GUID FROM CRMS4D_SERV_H INTO TABLE gt_filter_guids WHERE OBJTYPE_H EQ p_busobj AND process_type IN s_protyp AND object_id IN s_objid AND posting_date IN s_posdat.

IF gt_filter_guids IS NOT INITIAL.
SELECT HEADER_GUID AS GUID, OBJTYPE_H AS OBJECT_TYPE, OBJECT_ID AS OBJECT_ID, ZZ1_ADDLDESCRIPTION_SRH AS ZZ_ADD_DES, ZZ1_ZZTESTKUTTEXT12_SRH AS ZZ_TEST12 FROM CRMS4D_SERV_H INTO TABLE gt_header FOR ALL ENTRIES IN GT_FILTER_GUIDS WHERE HEADER_GUID = gt_filter_guids-table_line.
ENDIF.

IF gt_header IS NOT INITIAL.
  SELECT ITEM_GUID AS GUID, NUMBER_INT AS NUMBER_INT, HEADER_GUID AS HEADER, OBJTYPE_I AS OBJECT_TYPE FROM CRMS4D_SERV_I INTO TABLE gt_item FOR ALL ENTRIES IN GT_HEADER WHERE HEADER_GUID = gt_header-guid.
ENDIF.

SELECT HEADER_GUID AS GUID SALES_ORG AS SALES_ORG FROM CRMS4D_SERV_H INTO TABLE LT_ORG WHERE HEADER_GUID EQ LV_GUID.

call function 'COM_PRODUCT_GETDETAIL_API'
  expor