# Using Codey to summarize SAP ABAP files

In [10]:
!pwd

/Users/pemelend/local_jupyter_lab/GCP-genAI-kickstart


In [1]:
# Authenticate (This should trigger oAuth to authenticate with end user's credentials using Appleconnect)
!gcloud auth application-default login

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8085%2F&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login&state=tr5V5svvl9hJX987m104NNz4bxXmGa&access_type=offline&code_challenge=a-tOSzyYmLHb8U1bv9_aQDTme2JiKGbSuG_H8uA6rdk&code_challenge_method=S256


Credentials saved to file: [/Users/pemelend/.config/gcloud/application_default_credentials.json]

These credentials will be used by any library that requests Application Default Credentials (ADC).

Quota project "multi-tenancy-dataproc" was added to ADC which can be used by Google client libraries for billing and quota. Note that some services may still bill the project owning the resource.


Updates are available for some Google C

In [2]:
# Import libraries and initialize Vertex AI client
import vertexai
import google.auth
from vertexai.language_models import TextGenerationModel, CodeChatModel
from pathlib import Path
import pandas as pd
import pickle5 as pickle
import multiprocessing
cores_available = multiprocessing.cpu_count()

# Initialize Vertexai client
## To fill out by user
PROJECT_ID = 'multi-tenancy-dataproc' # @param {type:"string"}

if PROJECT_ID == "":
    PROJECT_ID = input("Enter your value: ")
LOCATION = 'us-central1'  # @param {type:"string"}

creds, _ = google.auth.default(quota_project_id=PROJECT_ID)
vertexai.init(project=PROJECT_ID, location=LOCATION, credentials=creds)

print(f'Using project ID: {PROJECT_ID}')

Using project ID: multi-tenancy-dataproc


In [13]:
# Files path Variables
cores_available = multiprocessing.cpu_count()

ROOT_PATH = "../sap_customer_programs"
MODEL = "codechat-bison-32k@002"
PROMPT = """You are a SAP ABAP expert. Provide a summary of the following SAP ABAP program and write output as bullets. \n"""

root_path = Path(ROOT_PATH)
program_list = list(root_path.rglob('*.[tT][xX][tT]'))
program_list = [[file.parent, file.name, str(file)] for file in program_list]
program_list_df = pd.DataFrame(program_list, columns =['File_Path', 'File_Name', 'Full_File_Name'])
program_list_df.head()

Unnamed: 0,File_Path,File_Name,Full_File_Name
0,../sap_customer_programs/Large/Z_IDOC_OUTPUT_C...,Z_IDOC_OUTPUT_CARR_TO_SAP_RU.TXT,../sap_customer_programs/Large/Z_IDOC_OUTPUT_C...
1,../sap_customer_programs/Large/ZRPV0404,ZRPV0404.txt,../sap_customer_programs/Large/ZRPV0404/ZRPV04...
2,../sap_customer_programs/Medium/ZFV45VFZY01 So...,ZFV45VFZY01 Source Code.TXT,../sap_customer_programs/Medium/ZFV45VFZY01 So...
3,../sap_customer_programs/Medium/ZRPF_FA_SUBLEDGER,ZRPF_FA_SUBLEDGER.TXT,../sap_customer_programs/Medium/ZRPF_FA_SUBLED...
4,../sap_customer_programs/Medium/ZRPF_FA_SUBLED...,ZRPF_FA_SUBLEDGER-checkpoint.TXT,../sap_customer_programs/Medium/ZRPF_FA_SUBLED...


In [12]:
def summarize_code_chat(full_file_name, model=MODEL, max_output_tokens= 1024, temperature = 0.3):

    parameters = {
            "temperature": temperature,  # Temperature controls the degree of randomness in token selection.
            "max_output_tokens": max_output_tokens,  # Token limit determines the maximum amount of text output.
        }

    code_chat_model = CodeChatModel.from_pretrained(model)
    chat = code_chat_model.start_chat()
    # Using this line to grab only the first 2000 lines of each file and test summarizing all the files available
    abap_code = ''.join(Path(full_file_name).read_text().splitlines(True)[:2000])
    
    # Below line reads the file without triming
    # abap_code = Path(full_file_name).read_text()
    prompt = f"{PROMPT}{abap_code}"
    response = chat.send_message(
        prompt, **parameters
    )

    return response

In [14]:
import concurrent.futures
import urllib.request
import time

# This line filters only programs in the Small folder
full_file_names = program_list_df.Full_File_Name[program_list_df['Full_File_Name'].str.contains("Small")].to_list()

# full_file_names = program_list_df.Full_File_Name.to_list()

curr_time = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=cores_available) as executor:
    # Start the operations 
    future_to_summarize = {executor.submit(summarize_code_chat, full_file_name): full_file_name for full_file_name in full_file_names}

    for future in concurrent.futures.as_completed(future_to_summarize):
        program = future_to_summarize[future]
        try:
            data = future.result()
        except Exception as exc:
            print('\n\n %r generated an exception: %s' % (full_file_names, exc))
        else:
            print(f'\n ************************ New file ******************* \nProgram summarized is: {program} \nSummary is:\n\n{data.text[:800]}')
print(f"completed in {time.time() - curr_time} seconds")


 ************************ New file ******************* 
Program summarized is: ../sap_customer_programs/Small/zutv_upd_gtsblk_status/zutv_upd_gtsblk_status.txt 
Summary is:

 **Summary of the SAP ABAP program:**

This ABAP program, named "LCL_ZUTV_UPD_GTSBLK_STATUS", is designed to update the status of blocked GTS documents in ECC tables. It provides various functionalities to retrieve and process data, including:

1. **Modifying Select Options**:
   - The program allows users to modify select options for specific fields, such as OPT_LIST, to restrict the data retrieval.

2. **Data Retrieval**:
   - It fetches data from the ZTG_GTS_LINESTAT table based on specified selection criteria, such as object type (OBJTP), sales document number (Vbeln), and date (Erdat).
   - If a synchronization checkbox (RB_SYNC) is checked, it additionally retrieves blocked records.

3. **Data Processing**:
   - The program processes the retrieved data to identify blocked documents b

 **********************