# FUNCTION

In [1]:
import pandas as pd
from io import StringIO
from google.cloud import storage
import requests
from vertexai.generative_models import (
    GenerativeModel,
    GenerationConfig,
    HarmCategory,
    HarmBlockThreshold
)

def load_transcription_csv(bucket_name, file_path):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(file_path)
    content = blob.download_as_text()

    return pd.read_csv(StringIO(content))[['speaker', 'transcript']]

def get_prompt(**kwargs):
    with open('./prompt/disposition_code_cleaned_layer_3.txt', 'r') as file:
        prompt_template = file.read()

    return prompt_template.format(**kwargs)

def generate_prompt(bucket_name, file_path, interaction_id: str, list_of_option):
    transcript_df = load_transcription_csv(bucket_name,f"{file_path}/transcription/{interaction_id}/transcription_results.csv")
    transcript_df = transcript_df.where(pd.notnull(transcript_df), '')
    json_dict = transcript_df.to_dict(orient='records')

    prompt = get_prompt(
        transcript=json_dict,
        conversational_context=','.join(list_of_option),
    )
    return prompt

In [16]:
text = load_transcription_csv('athena-nonprod-gcs', 'athena-ws4a/athena-qapm/transcription/9151919416110000100/transcription_results.csv')

json_dict = text.to_dict(orient='records')
json_dict

[{'speaker': 'Agent',
  'transcript': 'Selamat sore dengan Echo, bagaimana saya bisa membantu?'},
 {'speaker': 'Customer',
  'transcript': 'Iya, Mas, mau tanya. Kalau untuk eh kartu kredit Platinum Mastercard, itu limitnya berapa ya?'},
 {'speaker': 'Agent',
  'transcript': 'Baik. Untuk minimal limitnya atau bagaimana, Bu?'},
 {'speaker': 'Customer',
  'transcript': 'Eh, kalau saya mau tahu kartu saya limitnya berapa, gimana?'},
 {'speaker': 'Agent',
  'transcript': 'Baik. Dikarenakan untuk limit total, itu tidak dapat saya informasikan. Eh, itu dikarenakan bagian dari verifikasi. Ibu dapat melakukan pengecekannya melalui lembar tagihan, Ibu.'},
 {'speaker': 'Customer', 'transcript': 'Lembar tagihan.'},
 {'speaker': 'Agent',
  'transcript': 'Benar. Di lembar tagihan, e-statement itu di bagian paling atas ada available credit-nya, Bu.'},
 {'speaker': 'Customer',
  'transcript': 'Enggak, enggak. Kan itu di email, enggak? Iya. Soalnya udah udah lama enggak di email tuh. Udah lama enggak d

# BATCH PROCESS

In [6]:
# get data from json file
import json

with open('./ground_truth/list_of_input.json') as f:
    list_of_input = json.load(f)

with open('./ref_data/action_product_to_detail.json') as f:
    ref_data = json.load(f)

interaction_id_to_list_of_option = {}
for interaction_id, input_value in list_of_input.items():
    interaction_id_to_list_of_option[interaction_id] = ref_data[input_value]

## MODEL LLM

In [7]:
model = GenerativeModel('gemini-1.5-flash-002')
generation_config = GenerationConfig(
    temperature=1,  # Example temperature value
    # top_k=1,         # Example top-k value
    # top_p=0.1        # Example top-p value
)

def generate_content(prompt):
    result = model.generate_content(
        contents=prompt,
        safety_settings={
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        },
        generation_config=generation_config
    )
    return result

In [8]:
output_data = []
list_of_unknown_csv = [9151919416110000090, 9151919416110000091, 9151919416110000092]

for interaction_id, list_of_option in interaction_id_to_list_of_option.items():
    if interaction_id in list_of_unknown_csv:
        continue
    try:
        prompt = generate_prompt('athena-nonprod-gcs', 'athena-ws4a/athena-qapm', interaction_id, list_of_option)
        response = generate_content(prompt).text
        disposition_result = json.loads(response.replace("```json", "").replace("```", "").strip())
        output_data.append({
            'interaction_id': interaction_id,
            'option_1': disposition_result[0]['value'],
            'explanation_1': disposition_result[0]['field3'],
            'option_2': disposition_result[1]['value'] if len(disposition_result) > 1 else None,
            'explanation_2': disposition_result[1]['field3'] if len(disposition_result) > 1 else None,
            'option_3': disposition_result[2]['value'] if len(disposition_result) > 2 else None,
            'explanation_3': disposition_result[2]['field3'] if len(disposition_result) > 2 else None,
        })

        print(f"Processing interaction_id: {interaction_id}")
    except Exception as e:
        print(f"Error: {e} at interaction_id: {interaction_id}")

Processing interaction_id: 9151919416110000051
Processing interaction_id: 9151919416110000052
Processing interaction_id: 9151919416110000053
Processing interaction_id: 9151919416110000054
Processing interaction_id: 9151919416110000055
Processing interaction_id: 9151919416110000056
Processing interaction_id: 9151919416110000057
Processing interaction_id: 9151919416110000058
Processing interaction_id: 9151919416110000059
Processing interaction_id: 9151919416110000061
Processing interaction_id: 9151919416110000062
Processing interaction_id: 9151919416110000063
Processing interaction_id: 9151919416110000064
Processing interaction_id: 9151919416110000065
Processing interaction_id: 9151919416110000066
Processing interaction_id: 9151919416110000067
Processing interaction_id: 9151919416110000068
Processing interaction_id: 9151919416110000069
Processing interaction_id: 9151919416110000071
Processing interaction_id: 9151919416110000072
Processing interaction_id: 9151919416110000073
Processing in

In [9]:
df = pd.DataFrame(output_data)
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3
0,9151919416110000051,Gagal Tarik,Penjelasan klasifikasi: Nasabah mengalami gaga...,Gagal Transaksi,Penjelasan klasifikasi: Transaksi gagal tarik ...,Other,Penjelasan klasifikasi: Nasabah melaporkan tr...
1,9151919416110000052,Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah menanyakan tra...,Blokir/ Buka Blokir,Penjelasan klasifikasi: Nasabah mengalami mas...,Transaksi,Penjelasan klasifikasi: Nasabah melaporkan tra...
2,9151919416110000053,Kendala Transaksi,Penjelasan klasifikasi: Nasabah mengalami kend...,Other,Penjelasan klasifikasi: Masalah top up GoPay ...,Other,Penjelasan klasifikasi: Nasabah menanyakan kem...
3,9151919416110000054,Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah menanyakan kej...,Transaksi,Penjelasan klasifikasi: Percakapan berfokus p...,Kendala Transaksi,Penjelasan klasifikasi: Agen melakukan invest...
4,9151919416110000055,Kendala Login,Penjelasan klasifikasi: Nasabah mengalami kend...,Kendala Registrasi Perangkat Baru,Penjelasan klasifikasi: Masalah utama adalah k...,Kendala Transaksi,Penjelasan klasifikasi: Nasabah mengalami kend...
5,9151919416110000056,Fitur,Penjelasan klasifikasi: Nasabah menanyakan det...,Status,Penjelasan klasifikasi: Nasabah ingin mengetah...,Other,Penjelasan klasifikasi: Nasabah menanyakan per...
6,9151919416110000057,Status,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,
7,9151919416110000058,Limit,Penjelasan klasifikasi: Nasabah menanyakan sis...,Pembayaran Tagihan Credit Card,Penjelasan klasifikasi: Nasabah menanyakan dam...,Transaksi,Penjelasan klasifikasi: Nasabah menanyakan kon...
8,9151919416110000059,Pembayaran Tagihan Credit Card,Penjelasan klasifikasi: Nasabah menanyakan sta...,Status,Penjelasan klasifikasi: Nasabah mengalami kend...,Naik/ Turun Limit Sementara,Penjelasan klasifikasi: Nasabah menanyakan pro...
9,9151919416110000061,Tagihan,Penjelasan klasifikasi: Nasabah menanyakan sta...,Penutupan Kartu,Penjelasan klasifikasi: Nasabah mengeluhkan pr...,Tagihan (Tidak Terima/ Selisih),Penjelasan klasifikasi: Nasabah menanyakan tag...


# Combine data with ground truth

In [10]:
# read json file
import json

with open('./ground_truth/ground_truth.json') as f:
    ground_truth = json.load(f)

# map with ground truth
df['interaction_id'] = df['interaction_id'].astype(str)
df['ground_truth'] = df['interaction_id'].map(ground_truth)

# save to csv
df.to_csv('output.csv', index=False)

# Read the output.csv file

In [11]:
df = pd.read_csv('output.csv')

# check whether option_1 or option_2 or option_3 is equal to ground_truth, the result is True or False in "is_match" column
df['is_match'] = df['ground_truth'] == df['option_1']

show_df = df[['interaction_id', 'option_1', 'ground_truth', 'is_match']]
show_df

Unnamed: 0,interaction_id,option_1,ground_truth,is_match
0,9151919416110000051,Gagal Tarik,Gagal Tarik,True
1,9151919416110000052,Sanggahan Transaksi,Sanggahan Transaksi,True
2,9151919416110000053,Kendala Transaksi,Kendala Transaksi,True
3,9151919416110000054,Sanggahan Transaksi,Sanggahan Transaksi,True
4,9151919416110000055,Kendala Login,Kendala Login,True
5,9151919416110000056,Fitur,Other,False
6,9151919416110000057,Status,Other,False
7,9151919416110000058,Limit,Limit,True
8,9151919416110000059,Pembayaran Tagihan Credit Card,Pembayaran Tagihan Credit Card,True
9,9151919416110000061,Tagihan,Penutupan Kartu,False


# CALCULATE ACCURACY

In [12]:
# calculate the mean of "is_match" column where ground_truth is not NaN
accuracy = df[df['ground_truth'].notnull()]['is_match'].mean()
accuracy

np.float64(0.5128205128205128)

In [None]:
df