# Using Codey to summarize SAP ABAP files

In [10]:
!pwd

/Users/pemelend/local_jupyter_lab/GCP-genAI-kickstart


In [13]:
# Authenticate (This should trigger oAuth to authenticate with end user's credentials using Appleconnect)
!gcloud auth application-default login

In [2]:
# Import libraries and initialize Vertex AI client
import vertexai
import google.auth
from vertexai.language_models import TextGenerationModel, CodeChatModel
from pathlib import Path
import pandas as pd
import pickle5 as pickle
import multiprocessing
cores_available = multiprocessing.cpu_count()

# Initialize Vertexai client
## To fill out by user
PROJECT_ID = '' # @param {type:"string"}

if PROJECT_ID == "":
    PROJECT_ID = input("Enter your value: ")
LOCATION = 'us-central1'  # @param {type:"string"}

creds, _ = google.auth.default(quota_project_id=PROJECT_ID)
vertexai.init(project=PROJECT_ID, location=LOCATION, credentials=creds)

print(f'Using project ID: {PROJECT_ID}')

Enter your value:  multi-tenancy-dataproc


Using project ID: multi-tenancy-dataproc


In [6]:
!ls ../

Apple - SAP summary with Codey.ipynb gen-google-logo-img1.png
[34mGCP-genAI-kickstart[m[m                  gen-google-logo-img2.png
Internal GenAi test.ipynb            gen-google-logo-img3.png
Untitled.ipynb                       gen-imagen002-google-logo-img1.png
batch_code_bison_test.jsonl          gen-imagen002-google-logo-img2.png
batch_embedding_test.jsonl           gen-imagen002-google-logo-img3.png
batch_test.jsonl                     gen-img1.png
gen-GCP-logo-img1.png                gen-img2.png
gen-GCP-logo-img2.png                gen-img3.png
gen-GCP-logo-img3.png                [34msap_customer_programs[m[m
gen-apple-logo-img1.png              text.txt
gen-apple-logo-img2.png              [34mvirtual_env_jupyter_lab[m[m
gen-apple-logo-img3.png


In [7]:
# Files path Variables
cores_available = multiprocessing.cpu_count()

ROOT_PATH = "../sap_customer_programs"
MODEL = "codechat-bison-32k@002"
PROMPT = "Summarize this SAP ABAP code in 10 lines: \n"

root_path = Path(ROOT_PATH)
program_list = list(root_path.rglob('*.[tT][xX][tT]'))
program_list = [[file.parent, file.name, str(file)] for file in program_list]
program_list_df = pd.DataFrame(program_list, columns =['File_Path', 'File_Name', 'Full_File_Name'])
program_list_df.head()

Unnamed: 0,File_Path,File_Name,Full_File_Name
0,../sap_customer_programs/Large/Z_IDOC_OUTPUT_C...,Z_IDOC_OUTPUT_CARR_TO_SAP_RU.TXT,../sap_customer_programs/Large/Z_IDOC_OUTPUT_C...
1,../sap_customer_programs/Large/ZRPV0404,ZRPV0404.txt,../sap_customer_programs/Large/ZRPV0404/ZRPV04...
2,../sap_customer_programs/Medium/ZFV45VFZY01 So...,ZFV45VFZY01 Source Code.TXT,../sap_customer_programs/Medium/ZFV45VFZY01 So...
3,../sap_customer_programs/Medium/ZRPF_FA_SUBLEDGER,ZRPF_FA_SUBLEDGER.TXT,../sap_customer_programs/Medium/ZRPF_FA_SUBLED...
4,../sap_customer_programs/Medium/ZRPF_FA_SUBLED...,ZRPF_FA_SUBLEDGER-checkpoint.TXT,../sap_customer_programs/Medium/ZRPF_FA_SUBLED...


In [8]:
def summarize_code_chat(full_file_name, model=MODEL, max_output_tokens= 8192, temperature = 0.1):

    parameters = {
            "temperature": temperature,  # Temperature controls the degree of randomness in token selection.
            "max_output_tokens": max_output_tokens,  # Token limit determines the maximum amount of text output.
        }

    code_chat_model = CodeChatModel.from_pretrained(model)
    chat = code_chat_model.start_chat()
    # Using this line to grab only the first 2000 lines of each file and test summarizing all the files available
    abap_code = ''.join(Path(full_file_name).read_text().splitlines(True)[:2000])
    
    # Below line reads the file without triming
    # abap_code = Path(full_file_name).read_text()
    prompt = f"{PROMPT}{abap_code}"
    response = chat.send_message(
        prompt, **parameters
    )

    return response

In [9]:
import concurrent.futures
import urllib.request
import time

# This line filters only programs in the Small folder
# full_file_names = program_list_df.Full_File_Name[program_list_df['Full_File_Name'].str.contains("Small")].to_list()

full_file_names = program_list_df.Full_File_Name.to_list()

curr_time = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=cores_available) as executor:
    # Start the operations 
    future_to_summarize = {executor.submit(summarize_code_chat, full_file_name): full_file_name for full_file_name in full_file_names}

    for future in concurrent.futures.as_completed(future_to_summarize):
        program = future_to_summarize[future]
        try:
            data = future.result()
        except Exception as exc:
            print('\n\n %r generated an exception: %s' % (full_file_names, exc))
        else:
            print(f'\n ************************ New file ******************* \nProgram summarized is: {program} \nSummary is:\n\n{data.text[:800]}')
print(f"completed in {time.time() - curr_time} seconds")



 ['../sap_customer_programs/Large/Z_IDOC_OUTPUT_CARR_TO_SAP_RU/Z_IDOC_OUTPUT_CARR_TO_SAP_RU.TXT', '../sap_customer_programs/Large/ZRPV0404/ZRPV0404.txt', '../sap_customer_programs/Medium/ZFV45VFZY01 Source Code/ZFV45VFZY01 Source Code.TXT', '../sap_customer_programs/Medium/ZRPF_FA_SUBLEDGER/ZRPF_FA_SUBLEDGER.TXT', '../sap_customer_programs/Medium/ZRPF_FA_SUBLEDGER/.ipynb_checkpoints/ZRPF_FA_SUBLEDGER-checkpoint.TXT', '../sap_customer_programs/Small/ZUTMZCPO/ZUTMZCPO.txt', '../sap_customer_programs/Small/zutv_upd_gtsblk_status/zutv_upd_gtsblk_status.txt', '../sap_customer_programs/X-Large/Z_SERVICE_SALES_ORDR_DISPLAY/LZSERVSODF01_SOURCE_CODE.txt', '../sap_customer_programs/X-Large/Z_SERVICE_SALES_ORDR_DISPLAY/GET_EGUI_POSTTRACK_DATE_TIME_SOURCE_CODE.txt', '../sap_customer_programs/X-Large/Z_SERVICE_SALES_ORDR_DISPLAY/LZSERVSODF01_SOURCE_CODE.txt_concatenated.txt', '../sap_customer_programs/X-Large/Z_SERVICE_SALES_ORDR_DISPLAY/Z_SERVICE_SALES_ORDR_DISPLAY_SOURCE_CODE.txt', '../sap_cust