# FUNCTION

In [68]:
import pandas as pd
from io import StringIO
from google.cloud import storage
import requests
from vertexai.generative_models import (
    GenerativeModel,
    GenerationConfig,
    HarmCategory,
    HarmBlockThreshold
)

def load_transcription_csv(bucket_name, file_path):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(file_path)
    content = blob.download_as_text()

    return pd.read_csv(StringIO(content))

def get_prompt(**kwargs):
    with open('prompt/disposition_code_cleaned_layer_2.txt', 'r') as file:
        prompt_template = file.read()

    return prompt_template.format(**kwargs)

def generate_prompt(bucket_name, file_path, interaction_id: str, list_of_option):
    transcript_df = load_transcription_csv(bucket_name,f"{file_path}/transcription/{interaction_id}/transcription_results.csv")
    transcript_df = transcript_df.where(pd.notnull(transcript_df), '')
    json_dict = transcript_df.to_dict(orient='records')
    prompt = get_prompt(
        transcript=json_dict,
        list_of_product=','.join(list_of_option),
    )
    return prompt

# BATCH PROCESS

## MODEL LLM

In [69]:
model = GenerativeModel('gemini-1.5-flash-002')
generation_config = GenerationConfig(
    temperature=0.1,  # Example temperature value
    top_k=1,         # Example top-k value
    top_p=0.1        # Example top-p value
)

def generate_content(prompt):
    result = model.generate_content(
        contents=prompt,
        safety_settings={
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        },
        generation_config=generation_config
    )
    return result

In [70]:
import json

with open('./ref_data/list_of_option.json') as f:
    list_of_option = json.load(f)['value']

list_of_unknown_csv = [9151919416110000090, 9151919416110000091, 9151919416110000092]
output_data = []

for interaction_id in range(9151919416110000051, 9151919416110000100):
    if interaction_id in list_of_unknown_csv:
        continue
    try:
        prompt = generate_prompt('athena-nonprod-gcs', 'athena-ws4a/athena-qapm', interaction_id, list_of_option)
        response = generate_content(prompt).text
        disposition_result = json.loads(response.replace("```json", "").replace("```", "").strip())
        output_data.append({
            'interaction_id': interaction_id,
            'option_1': disposition_result[0]['value'],
            'explanation_1': disposition_result[0]['field3'],
            'option_2': disposition_result[1]['value'] if len(disposition_result) > 1 else None,
            'explanation_2': disposition_result[1]['field3'] if len(disposition_result) > 1 else None,
            'option_3': disposition_result[2]['value'] if len(disposition_result) > 2 else None,
            'explanation_3': disposition_result[2]['field3'] if len(disposition_result) > 2 else None,
        })

        print(f"Processing interaction_id: {interaction_id}")
    except Exception as e:
        print(f"Error: {e} at interaction_id: {interaction_id}")

Processing interaction_id: 9151919416110000051
Processing interaction_id: 9151919416110000052
Processing interaction_id: 9151919416110000053
Processing interaction_id: 9151919416110000054
Processing interaction_id: 9151919416110000055
Processing interaction_id: 9151919416110000056
Processing interaction_id: 9151919416110000057
Processing interaction_id: 9151919416110000058
Processing interaction_id: 9151919416110000059
Processing interaction_id: 9151919416110000060
Processing interaction_id: 9151919416110000061
Processing interaction_id: 9151919416110000062
Processing interaction_id: 9151919416110000063
Processing interaction_id: 9151919416110000064
Processing interaction_id: 9151919416110000065
Processing interaction_id: 9151919416110000066
Processing interaction_id: 9151919416110000067
Processing interaction_id: 9151919416110000068
Processing interaction_id: 9151919416110000069
Processing interaction_id: 9151919416110000070
Processing interaction_id: 9151919416110000071
Processing in

In [71]:
df = pd.DataFrame(output_data)
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3
0,9151919416110000051,Credit Card,Penjelasan klasifikasi: Nasabah mengalami gaga...,,,,
1,9151919416110000052,Credit Card,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,
2,9151919416110000053,GoPay,Penjelasan klasifikasi: Nasabah mengalami kend...,,,,
3,9151919416110000054,Credit Card,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,
4,9151919416110000055,OCTO Mobile,Penjelasan klasifikasi: Nasabah mengalami kend...,,,,
5,9151919416110000056,Debit Card,Penjelasan klasifikasi: Nasabah menanyakan bia...,,,,
6,9151919416110000057,KPR,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,
7,9151919416110000058,Credit Card,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,
8,9151919416110000059,Credit Card,Nasabah menanyakan status pembayaran kartu kre...,,,,
9,9151919416110000060,Credit Card,Penjelasan klasifikasi: Nasabah menanyakan pen...,,,,


# Combine data with ground truth

In [72]:
# read json file
import json

with open('./ground_truth/ground_truth.json') as f:
    ground_truth = json.load(f)

# map with ground truth
df['interaction_id'] = df['interaction_id'].astype(str)
df['ground_truth'] = df['interaction_id'].map(ground_truth)

# check whether option_1 or option_2 or option_3 is equal to ground_truth, the result is True or False in "is_match" column
df['is_match'] = (df['ground_truth'].isin(df['option_1'])) | \
                 (df['ground_truth'].isin(df['option_2'])) | \
                 (df['ground_truth'].isin(df['option_3']))

# save to csv
df.to_csv('output.csv', index=False)

# Read the output.csv file

In [73]:
df = pd.read_csv('output.csv')
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3,ground_truth,is_match
0,9151919416110000051,Credit Card,Penjelasan klasifikasi: Nasabah mengalami gaga...,,,,,ATM,False
1,9151919416110000052,Credit Card,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,,Credit Card,True
2,9151919416110000053,GoPay,Penjelasan klasifikasi: Nasabah mengalami kend...,,,,,OCTO Mobile,True
3,9151919416110000054,Credit Card,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,,Credit Card,True
4,9151919416110000055,OCTO Mobile,Penjelasan klasifikasi: Nasabah mengalami kend...,,,,,OCTO Mobile,True
5,9151919416110000056,Debit Card,Penjelasan klasifikasi: Nasabah menanyakan bia...,,,,,Tabungan,False
6,9151919416110000057,KPR,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,,KPR,True
7,9151919416110000058,Credit Card,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,,Credit Card,True
8,9151919416110000059,Credit Card,Nasabah menanyakan status pembayaran kartu kre...,,,,,Credit Card,True
9,9151919416110000060,Credit Card,Penjelasan klasifikasi: Nasabah menanyakan pen...,,,,,,False


# CALCULATE ACCURACY

In [74]:
# calculate the mean of "is_match" column where ground_truth is not NaN
accuracy = df[df['ground_truth'].notnull()]['is_match'].mean()
accuracy

np.float64(0.868421052631579)

In [75]:
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3,ground_truth,is_match
0,9151919416110000051,Credit Card,Penjelasan klasifikasi: Nasabah mengalami gaga...,,,,,ATM,False
1,9151919416110000052,Credit Card,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,,Credit Card,True
2,9151919416110000053,GoPay,Penjelasan klasifikasi: Nasabah mengalami kend...,,,,,OCTO Mobile,True
3,9151919416110000054,Credit Card,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,,Credit Card,True
4,9151919416110000055,OCTO Mobile,Penjelasan klasifikasi: Nasabah mengalami kend...,,,,,OCTO Mobile,True
5,9151919416110000056,Debit Card,Penjelasan klasifikasi: Nasabah menanyakan bia...,,,,,Tabungan,False
6,9151919416110000057,KPR,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,,KPR,True
7,9151919416110000058,Credit Card,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,,Credit Card,True
8,9151919416110000059,Credit Card,Nasabah menanyakan status pembayaran kartu kre...,,,,,Credit Card,True
9,9151919416110000060,Credit Card,Penjelasan klasifikasi: Nasabah menanyakan pen...,,,,,,False
