# FUNCTION

In [1]:
import pandas as pd
from io import StringIO
from google.cloud import storage
from vertexai.generative_models import (
    GenerativeModel,
    GenerationConfig,
    HarmCategory,
    HarmBlockThreshold
)

def load_transcription_csv(file_path):
    storage_client = storage.Client()
    bucket = storage_client.bucket('athena-nonprod-gcs')
    blob = bucket.blob(file_path)
    content = blob.download_as_text()

    return pd.read_csv(StringIO(content))[['speaker', 'transcript']]

def get_prompt(file_name, **kwargs):
    with open(f'./prompt/{file_name}', 'r') as file:
        prompt_template = file.read()

    return prompt_template.format(**kwargs)

def generate_prompt_formatted(layer: int, interaction_id: str, list_of_option):
    transcript_df = load_transcription_csv(f"athena-ws4a/athena-qapm/transcription/{interaction_id}/transcription_results.csv")
    transcript_df = transcript_df.where(pd.notnull(transcript_df), '')
    json_dict = transcript_df.to_dict(orient='records')
    prompt = get_prompt(
        file_name=f'disposition_code_cleaned_layer_{layer}.txt',
        transcript=json_dict,
        list_of_option=','.join(list_of_option),
    )
    return prompt

## MODEL LLM

In [2]:
model = GenerativeModel('gemini-1.5-flash-002')
generation_config = GenerationConfig(
    temperature=0,  # Example temperature value
    # top_k=1,         # Example top-k value
    # top_p=0.1        # Example top-p value
)

def generate_content(prompt):
    result = model.generate_content(
        contents=prompt,
        safety_settings={
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        },
        generation_config=generation_config
    )
    return result

# BATCH PROCESS

In [5]:
import json

with open('../../clean_data/action_to_product.json') as f:
    map_action_to_product = json.load(f)
    list_of_action = list(map_action_to_product.keys())

with open('../../clean_data/action_product_to_detail.json') as f:
    map_action_product_to_detail = json.load(f)

def get_layer_1(interaction_id: str, list_of_option):
    prompt = generate_prompt_formatted(1, interaction_id, list_of_option)
    response = generate_content(prompt).text
    disposition_result = json.loads(response.replace("```json", "").replace("```", "").strip())
    return disposition_result[0]['value']

def get_layer_2(layer_1: str, interaction_id: str):
    list_of_product = map_action_to_product[layer_1]
    prompt = generate_prompt_formatted(2, interaction_id, list_of_product)
    response = generate_content(prompt).text
    disposition_result = json.loads(response.replace("```json", "").replace("```", "").strip())
    return layer_1 + "|" + disposition_result[0]['value']

def get_layer_3(layer_2: str, interaction_id: str):
    list_of_conversational_context = map_action_product_to_detail[layer_2]
    prompt = generate_prompt_formatted(3, interaction_id, list_of_conversational_context)
    response = generate_content(prompt).text
    disposition_result = json.loads(response.replace("```json", "").replace("```", "").strip())
    return disposition_result

list_of_unknown_csv = [9151919416110000090, 9151919416110000091, 9151919416110000092]
output_data = []

for interaction_id in range(9151919416110000051, 9151919416110000100):
    if interaction_id in list_of_unknown_csv:
        continue
    try:
        layer_1 = get_layer_1(interaction_id, list_of_action)
        layer_2 = get_layer_2(layer_1, interaction_id)
        disposition_result = get_layer_3(layer_2, interaction_id)

        output_data.append({
            'interaction_id': interaction_id,
            'option_1': layer_2 + "|" + disposition_result[0]['value'],
            'explanation_1': disposition_result[0]['field3'],
            'option_2': layer_2 + "|" + disposition_result[1]['value'] if len(disposition_result) > 1 else None,
            'explanation_2': disposition_result[1]['field3'] if len(disposition_result) > 1 else None,
            'option_3': layer_2 + "|" +  disposition_result[2]['value'] if len(disposition_result) > 2 else None,
            'explanation_3': disposition_result[2]['field3'] if len(disposition_result) > 2 else None,
        })

        print(f"Processing interaction_id: {interaction_id}")
    except Exception as e:
        print(f"Error: {e} at interaction_id: {interaction_id}")

Processing interaction_id: 9151919416110000051
Processing interaction_id: 9151919416110000052
Processing interaction_id: 9151919416110000053
Processing interaction_id: 9151919416110000054
Processing interaction_id: 9151919416110000055
Processing interaction_id: 9151919416110000056
Processing interaction_id: 9151919416110000057
Processing interaction_id: 9151919416110000058
Processing interaction_id: 9151919416110000059
Processing interaction_id: 9151919416110000060
Processing interaction_id: 9151919416110000061
Processing interaction_id: 9151919416110000062
Processing interaction_id: 9151919416110000063
Processing interaction_id: 9151919416110000064
Processing interaction_id: 9151919416110000065
Processing interaction_id: 9151919416110000066
Processing interaction_id: 9151919416110000067
Processing interaction_id: 9151919416110000068
Processing interaction_id: 9151919416110000069
Processing interaction_id: 9151919416110000070
Processing interaction_id: 9151919416110000071
Processing in

In [6]:
df = pd.DataFrame(output_data)
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3
0,9151919416110000051,Complain|Credit Card|Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah melaporkan tra...,Complain|Credit Card|Kendala Transaksi,Penjelasan klasifikasi: Agen memproses lapora...,Complain|Credit Card|Transaksi,Penjelasan klasifikasi: Nasabah mengajukan san...
1,9151919416110000052,Complain|Credit Card|Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah melaporkan tra...,Complain|Credit Card|Pergantian Kartu,Penjelasan klasifikasi: Nasabah meminta pencet...,Complain|Credit Card|Perubahan Data,Penjelasan klasifikasi: Nasabah mengalami peru...
2,9151919416110000053,Complain|ATM|Gagal Transaksi,Penjelasan klasifikasi: Nasabah mengalami gaga...,Complain|ATM|Other,Penjelasan klasifikasi: Masalah dengan transak...,,
3,9151919416110000054,Complain|Credit Card|Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah menanyakan tra...,Complain|Credit Card|Kendala Transaksi,Penjelasan klasifikasi: Nasabah mengalami tra...,Complain|Credit Card|Tagihan,Penjelasan klasifikasi: Proses verifikasi PIN...
4,9151919416110000055,Complain|OCTO Mobile|Kendala Login,Penjelasan klasifikasi: Nasabah mengalami kend...,Complain|OCTO Mobile|Kendala Registrasi Perang...,Penjelasan klasifikasi: Nasabah mengalami kend...,Complain|OCTO Mobile|Kendala Transaksi,Penjelasan klasifikasi: Nasabah mengalami kend...
5,9151919416110000056,Inquiry|Debit Card|Biaya/Bunga/Denda,Penjelasan klasifikasi: Nasabah menanyakan bia...,Inquiry|Debit Card|Fitur,Penjelasan klasifikasi: Nasabah ingin mengetah...,Inquiry|Debit Card|Other,Penjelasan klasifikasi: Nasabah menanyakan car...
6,9151919416110000057,Inquiry|KPR|Status,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry|KPR|Other,Penjelasan klasifikasi: Informasi sisa tagihan...,,
7,9151919416110000058,Inquiry|Credit Card|Limit,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry|Credit Card|Tagihan,Penjelasan klasifikasi: Nasabah menanyakan pen...,Inquiry|Credit Card|Transaksi,Penjelasan klasifikasi: Nasabah bertanya tenta...
8,9151919416110000059,Complain|Credit Card|Pembayaran Tagihan Credit...,Penjelasan klasifikasi: Nasabah menanyakan sta...,Complain|Credit Card|Limit,Penjelasan klasifikasi: Nasabah mengalami kend...,Complain|Credit Card|Naik/ Turun Limit Permanen,Penjelasan klasifikasi: Nasabah menanyakan car...
9,9151919416110000060,Inquiry|Credit Card|Annual Fee,Penjelasan klasifikasi: Nasabah menanyakan sta...,Inquiry|Credit Card|Status,Penjelasan klasifikasi: Nasabah menanyakan de...,Inquiry|Credit Card|Aktivasi dan PIN,Penjelasan klasifikasi: Verifikasi identitas ...


# Combine data with ground truth

In [7]:
# read json file
import json

with open('./ground_truth/ground_truth.json') as f:
    ground_truth = json.load(f)

# map with ground truth
df['interaction_id'] = df['interaction_id'].astype(str)
df['ground_truth'] = df['interaction_id'].map(ground_truth)

# save to csv
df.to_csv('output.csv', index=False)

# Read the output.csv file

In [14]:
import pandas as pd
df = pd.read_csv('output.csv')
# check whether option_1 or option_2 or option_3 is equal to ground_truth, the result is True or False in "is_match" column
df['is_match'] = (df['ground_truth'] == df['option_1']) | (df['ground_truth'] == df['option_2']) | (df['ground_truth'] == df['option_3'])

df_view = df[['interaction_id', 'option_1', 'option_2', 'option_3', 'ground_truth', 'is_match']]
df_view

Unnamed: 0,interaction_id,option_1,option_2,option_3,ground_truth,is_match
0,9151919416110000051,Complain|Credit Card|Sanggahan Transaksi,Complain|Credit Card|Kendala Transaksi,Complain|Credit Card|Transaksi,Complain|ATM|Gagal Tarik,False
1,9151919416110000052,Complain|Credit Card|Sanggahan Transaksi,Complain|Credit Card|Pergantian Kartu,Complain|Credit Card|Perubahan Data,Complain|Credit Card|Sanggahan Transaksi,True
2,9151919416110000053,Complain|ATM|Gagal Transaksi,Complain|ATM|Other,,Complain|OCTO Mobile|Kendala Transaksi,False
3,9151919416110000054,Complain|Credit Card|Sanggahan Transaksi,Complain|Credit Card|Kendala Transaksi,Complain|Credit Card|Tagihan,Complain|Credit Card|Sanggahan Transaksi,True
4,9151919416110000055,Complain|OCTO Mobile|Kendala Login,Complain|OCTO Mobile|Kendala Registrasi Perang...,Complain|OCTO Mobile|Kendala Transaksi,Complain|OCTO Mobile|Kendala Login,True
5,9151919416110000056,Inquiry|Debit Card|Biaya/Bunga/Denda,Inquiry|Debit Card|Fitur,Inquiry|Debit Card|Other,Inquiry|Tabungan|Other,False
6,9151919416110000057,Inquiry|KPR|Status,Inquiry|KPR|Other,,Inquiry|KPR|Other,True
7,9151919416110000058,Inquiry|Credit Card|Limit,Inquiry|Credit Card|Tagihan,Inquiry|Credit Card|Transaksi,Inquiry|Credit Card|Limit,True
8,9151919416110000059,Complain|Credit Card|Pembayaran Tagihan Credit...,Complain|Credit Card|Limit,Complain|Credit Card|Naik/ Turun Limit Permanen,Inquiry|Credit Card|Pembayaran Tagihan Credit ...,False
9,9151919416110000060,Inquiry|Credit Card|Annual Fee,Inquiry|Credit Card|Status,Inquiry|Credit Card|Aktivasi dan PIN,,False


# CALCULATE ACCURACY

In [1]:
# calculate the mean of "is_match" column where ground_truth is not NaN
accuracy = df[df['ground_truth'].notnull()]['is_match'].mean()
accuracy*100

NameError: name 'df' is not defined

In [29]:
# GET WHICH LAYER GET THE WRONG PREDICTION
def split_option(df, column_name):
    """Memisahkan kolom berdasarkan '|' dan membuat kolom baru."""
    df[column_name + '_layer_1'] = df[column_name].str.split('|').str[0]
    df[column_name + '_layer_2'] = df[column_name].str.split('|').str[1]
    df[column_name + '_layer_3'] = df[column_name].str.split('|').str[2]
    return df

df = split_option(df, 'option_1')
df = split_option(df, 'option_2')
df = split_option(df, 'option_3')
df = split_option(df, 'ground_truth')

# check if layer 1, layer 2, layer 3 match with ground_truth layer 1, layer 2, layer 3
df['is_match_layer_1'] = (df['ground_truth_layer_1'] == df['option_1_layer_1'])
df['is_match_layer_2'] = (df['ground_truth_layer_2'] == df['option_1_layer_2'])
df['is_match_layer_3'] = (df['ground_truth_layer_3'] == df['option_1_layer_3']) | (df['ground_truth_layer_3'] == df['option_2_layer_3']) | (df['ground_truth_layer_3'] == df['option_3_layer_3'])

In [30]:
df_view = df[['interaction_id', 'option_1_layer_1', 'option_2_layer_1', 'option_3_layer_1', 'ground_truth_layer_1', 'is_match_layer_1']]
df_view

Unnamed: 0,interaction_id,option_1_layer_1,option_2_layer_1,option_3_layer_1,ground_truth_layer_1,is_match_layer_1
0,9151919416110000051,Complain,Complain,Complain,Complain,True
1,9151919416110000052,Complain,Complain,Complain,Complain,True
2,9151919416110000053,Complain,Complain,,Complain,True
3,9151919416110000054,Complain,Complain,Complain,Complain,True
4,9151919416110000055,Complain,Complain,Complain,Complain,True
5,9151919416110000056,Inquiry,Inquiry,Inquiry,Inquiry,True
6,9151919416110000057,Inquiry,Inquiry,,Inquiry,True
7,9151919416110000058,Inquiry,Inquiry,Inquiry,Inquiry,True
8,9151919416110000059,Complain,Complain,Complain,Inquiry,False
9,9151919416110000060,Inquiry,Inquiry,Inquiry,,False


In [32]:
accuracy_layer_1 = df[df['ground_truth_layer_1'].notnull()]['is_match_layer_1'].mean()
print(f"Layer 1 accuracy: {accuracy_layer_1}")
# show df for layer 1
# df[['interaction_id', 'option_1_layer_1','option_2_layer_1','option_3_layer_1', 'ground_truth_layer_1', 'is_match_layer_1']]
df[['interaction_id', 'option_1_layer_1', 'ground_truth_layer_1', 'is_match_layer_1']]

Layer 1 accuracy: 0.7894736842105263


Unnamed: 0,interaction_id,option_1_layer_1,ground_truth_layer_1,is_match_layer_1
0,9151919416110000051,Complain,Complain,True
1,9151919416110000052,Complain,Complain,True
2,9151919416110000053,Complain,Complain,True
3,9151919416110000054,Complain,Complain,True
4,9151919416110000055,Complain,Complain,True
5,9151919416110000056,Inquiry,Inquiry,True
6,9151919416110000057,Inquiry,Inquiry,True
7,9151919416110000058,Inquiry,Inquiry,True
8,9151919416110000059,Complain,Inquiry,False
9,9151919416110000060,Inquiry,,False


In [33]:
accuracy_layer_2 = df[df['ground_truth_layer_2'].notnull()]['is_match_layer_2'].mean()
print(f"Layer 2 accuracy: {accuracy_layer_2}")
df[['interaction_id','option_1_layer_2','option_2_layer_2','option_3_layer_2', 'ground_truth_layer_2', 'is_match_layer_2']]

Layer 2 accuracy: 0.7894736842105263


Unnamed: 0,interaction_id,option_1_layer_2,option_2_layer_2,option_3_layer_2,ground_truth_layer_2,is_match_layer_2
0,9151919416110000051,Credit Card,Credit Card,Credit Card,ATM,False
1,9151919416110000052,Credit Card,Credit Card,Credit Card,Credit Card,True
2,9151919416110000053,ATM,ATM,,OCTO Mobile,False
3,9151919416110000054,Credit Card,Credit Card,Credit Card,Credit Card,True
4,9151919416110000055,OCTO Mobile,OCTO Mobile,OCTO Mobile,OCTO Mobile,True
5,9151919416110000056,Debit Card,Debit Card,Debit Card,Tabungan,False
6,9151919416110000057,KPR,KPR,,KPR,True
7,9151919416110000058,Credit Card,Credit Card,Credit Card,Credit Card,True
8,9151919416110000059,Credit Card,Credit Card,Credit Card,Credit Card,True
9,9151919416110000060,Credit Card,Credit Card,Credit Card,,False


In [34]:
accuracy_layer_3 = df[df['ground_truth_layer_3'].notnull()]['is_match_layer_3'].mean()
print(f"Layer 3 accuracy: {accuracy_layer_3}")
df[['interaction_id','option_1_layer_3','option_2_layer_3','option_3_layer_3', 'ground_truth_layer_3', 'is_match_layer_3']]

Layer 3 accuracy: 0.6578947368421053


Unnamed: 0,interaction_id,option_1_layer_3,option_2_layer_3,option_3_layer_3,ground_truth_layer_3,is_match_layer_3
0,9151919416110000051,Sanggahan Transaksi,Kendala Transaksi,Transaksi,Gagal Tarik,False
1,9151919416110000052,Sanggahan Transaksi,Pergantian Kartu,Perubahan Data,Sanggahan Transaksi,True
2,9151919416110000053,Gagal Transaksi,Other,,Kendala Transaksi,False
3,9151919416110000054,Sanggahan Transaksi,Kendala Transaksi,Tagihan,Sanggahan Transaksi,True
4,9151919416110000055,Kendala Login,Kendala Registrasi Perangkat Baru,Kendala Transaksi,Kendala Login,True
5,9151919416110000056,Biaya/Bunga/Denda,Fitur,Other,Other,True
6,9151919416110000057,Status,Other,,Other,True
7,9151919416110000058,Limit,Tagihan,Transaksi,Limit,True
8,9151919416110000059,Pembayaran Tagihan Credit Card,Limit,Naik/ Turun Limit Permanen,Pembayaran Tagihan Credit Card,True
9,9151919416110000060,Annual Fee,Status,Aktivasi dan PIN,,False
