# MODEL LLM


In [13]:
from vertexai.generative_models import (
    GenerativeModel,
    GenerationConfig,
    HarmCategory,
    HarmBlockThreshold
)

model = GenerativeModel('gemini-1.5-flash-002')
generation_config = GenerationConfig(
    temperature=0.1,  # Example temperature value
    top_k=1,         # Example top-k value
    top_p=0.1        # Example top-p value
)

def generate_content(prompt):
    result = model.generate_content(
        contents=prompt,
        safety_settings={
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        },
        generation_config=generation_config
    )
    return result

# FUNCTION

In [14]:
import pandas as pd
from io import StringIO
from google.cloud import storage
import os
BASE_DIR = os.getcwd()

def load_transcription_csv(bucket_name, file_path):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(file_path)
    content = blob.download_as_text()

    return pd.read_csv(StringIO(content))[['speaker', 'transcript']]

def get_prompt(**kwargs):
    with open('prompt/disposition_code.txt', 'r') as file:
        prompt_template = file.read()

    return prompt_template.format(**kwargs)

def get_ref_table():
    excel_path = "disposition_code_ref_table_cleaned.xlsx"
    ref_table_df = pd.read_excel(os.path.join(BASE_DIR, excel_path))
    ref_table_df.drop(columns=["Konkatenasi_Layer123"], inplace=True)
    ref_table_dict = ref_table_df.to_dict(orient='records')
    return ref_table_dict

def generate_prompt(bucket_name, file_path, interaction_id: str):
    transcript_df = load_transcription_csv(bucket_name,f"{file_path}/transcription/{interaction_id}/transcription_results.csv")
    transcript_df = transcript_df.where(pd.notnull(transcript_df), 'silence')
    # json_dict = {
    #     'speaker': transcript_df['speaker'].tolist(),
    #     'transcript': transcript_df['transcript'].tolist()
    # }
    json_dict = transcript_df.to_dict(orient='records')
    prompt = get_prompt(
        transcript=json_dict,
        disposition_code_ref_table=get_ref_table()
    )
    return prompt

In [15]:
# generate_prompt('athena-nonprod-gcs', 'athena-ws4a/athena-qapm', '9151919416110000051')

# BATCH PROCESS

In [16]:
import json

list_of_unknown_csv = [9151919416110000090, 9151919416110000091, 9151919416110000092]
output_data = []
for i in range(9151919416110000051, 9151919416110000099+1):
    if i in list_of_unknown_csv:
        continue
    try:
        prompt = generate_prompt('athena-nonprod-gcs', 'athena-ws4a/athena-qapm', i)
        response = generate_content(prompt).text
        disposition_result = json.loads(response.replace("```json", "").replace("```", "").strip())
        output_data.append({
            'interaction_id': i,
            'option_1': disposition_result[0]['value'],
            'explanation_1': disposition_result[0]['field3'],
            'option_2': disposition_result[1]['value'] if len(disposition_result) > 1 else None,
            'explanation_2': disposition_result[1]['field3'] if len(disposition_result) > 1 else None,
            'option_3': disposition_result[2]['value'] if len(disposition_result) > 2 else None,
            'explanation_3': disposition_result[2]['field3'] if len(disposition_result) > 2 else None,
        })
        print(f"Processed interaction_id: {i}")
    except Exception as e:
        print(f"Error: {e} at interaction_id: {i}")

df = pd.DataFrame(output_data)


Processed interaction_id: 9151919416110000051
Processed interaction_id: 9151919416110000052
Processed interaction_id: 9151919416110000053
Processed interaction_id: 9151919416110000054
Processed interaction_id: 9151919416110000055
Processed interaction_id: 9151919416110000056
Processed interaction_id: 9151919416110000057
Processed interaction_id: 9151919416110000058
Processed interaction_id: 9151919416110000059
Processed interaction_id: 9151919416110000060
Processed interaction_id: 9151919416110000061
Processed interaction_id: 9151919416110000062
Processed interaction_id: 9151919416110000063
Processed interaction_id: 9151919416110000064
Processed interaction_id: 9151919416110000065
Processed interaction_id: 9151919416110000066
Processed interaction_id: 9151919416110000067
Processed interaction_id: 9151919416110000068
Processed interaction_id: 9151919416110000069
Processed interaction_id: 9151919416110000070
Processed interaction_id: 9151919416110000071
Processed interaction_id: 91519194

In [17]:
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3
0,9151919416110000051,Complain|ATM|Gagal Tarik,Penjelasan klasifikasi: Nasabah mengalami gaga...,,,,
1,9151919416110000052,Complain|Credit Card|Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah melaporkan tra...,Request|Credit Card|Pergantian Kartu,Penjelasan klasifikasi: Nasabah meminta pencet...,,
2,9151919416110000053,Complain|OCTO Mobile|Kendala Transaksi,Penjelasan klasifikasi: Nasabah mengalami kend...,,,,
3,9151919416110000054,Complain|Credit Card|Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,
4,9151919416110000055,Complain|OCTO Mobile|Kendala Login,Penjelasan klasifikasi: Nasabah mengalami kend...,Complain|Credit Card|Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah komplain sangg...,Complain|Credit Card|Biaya/ Bunga/ Denda,Penjelasan klasifikasi: Nasabah dikenakan dend...
5,9151919416110000056,Inquiry|Debit Card|Other,Penjelasan klasifikasi: Nasabah menanyakan det...,Inquiry|OCTO Mobile|Transaksi,Penjelasan klasifikasi: Nasabah menanyakan per...,Request|OCTO Mobile|Other,Penjelasan klasifikasi: Nasabah ingin menonakt...
6,9151919416110000057,Inquiry|KPR|Other,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,
7,9151919416110000058,Inquiry|Credit Card|Limit,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry|Credit Card|Other,Penjelasan klasifikasi: Nasabah ingin mengetah...,,
8,9151919416110000059,Complain|Credit Card|Pembayaran Tagihan Credit...,Penjelasan klasifikasi: Nasabah menanyakan sta...,Complain|Credit Card|Limit,Penjelasan klasifikasi: Nasabah mengalami keti...,Inquiry|Credit Card|Naik/ Turun Limit Permanen,Penjelasan klasifikasi: Nasabah meminta inform...
9,9151919416110000060,Request|Credit Card|Annual Fee,Penjelasan klasifikasi: Nasabah meminta pengha...,,,,


# Combine data with ground truth

In [18]:
# read json file
import json

with open('./ground_truth/ground_truth.json') as f:
    ground_truth = json.load(f)

# map with ground truth
df['interaction_id'] = df['interaction_id'].astype(str)
df['ground_truth'] = df['interaction_id'].map(ground_truth)

# save to csv
df.to_csv('output.csv', index=False)

# Read the output.csv file

In [19]:
df = pd.read_csv('output.csv')
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3,ground_truth
0,9151919416110000051,Complain|ATM|Gagal Tarik,Penjelasan klasifikasi: Nasabah mengalami gaga...,,,,,Complain|ATM|Gagal Tarik
1,9151919416110000052,Complain|Credit Card|Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah melaporkan tra...,Request|Credit Card|Pergantian Kartu,Penjelasan klasifikasi: Nasabah meminta pencet...,,,Complain|Credit Card|Sanggahan Transaksi
2,9151919416110000053,Complain|OCTO Mobile|Kendala Transaksi,Penjelasan klasifikasi: Nasabah mengalami kend...,,,,,Complain|OCTO Mobile|Kendala Transaksi
3,9151919416110000054,Complain|Credit Card|Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,,Complain|Credit Card|Sanggahan Transaksi
4,9151919416110000055,Complain|OCTO Mobile|Kendala Login,Penjelasan klasifikasi: Nasabah mengalami kend...,Complain|Credit Card|Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah komplain sangg...,Complain|Credit Card|Biaya/ Bunga/ Denda,Penjelasan klasifikasi: Nasabah dikenakan dend...,Complain|OCTO Mobile|Kendala Login
5,9151919416110000056,Inquiry|Debit Card|Other,Penjelasan klasifikasi: Nasabah menanyakan det...,Inquiry|OCTO Mobile|Transaksi,Penjelasan klasifikasi: Nasabah menanyakan per...,Request|OCTO Mobile|Other,Penjelasan klasifikasi: Nasabah ingin menonakt...,Inquiry|Tabungan|Other
6,9151919416110000057,Inquiry|KPR|Other,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,,Inquiry|KPR|Other
7,9151919416110000058,Inquiry|Credit Card|Limit,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry|Credit Card|Other,Penjelasan klasifikasi: Nasabah ingin mengetah...,,,Inquiry|Credit Card|Limit
8,9151919416110000059,Complain|Credit Card|Pembayaran Tagihan Credit...,Penjelasan klasifikasi: Nasabah menanyakan sta...,Complain|Credit Card|Limit,Penjelasan klasifikasi: Nasabah mengalami keti...,Inquiry|Credit Card|Naik/ Turun Limit Permanen,Penjelasan klasifikasi: Nasabah meminta inform...,Inquiry|Credit Card|Pembayaran Tagihan Credit ...
9,9151919416110000060,Request|Credit Card|Annual Fee,Penjelasan klasifikasi: Nasabah meminta pengha...,,,,,


# CALCULATE ACCURACY

In [27]:
import pandas as pd

# check whether option_1 or option_2 or option_3 is equal to ground_truth, the result is True or False in "is_match" column
df['is_match'] = (df['ground_truth'] == df['option_1']) | (df['ground_truth'] == df['option_2']) | (df['ground_truth'] == df['option_3'])
df_view = df[['interaction_id', 'option_1', 'option_2', 'option_3', 'ground_truth', 'is_match']]
df_view


Unnamed: 0,interaction_id,option_1,option_2,option_3,ground_truth,is_match
0,9151919416110000051,Complain|ATM|Gagal Tarik,,,Complain|ATM|Gagal Tarik,True
1,9151919416110000052,Complain|Credit Card|Sanggahan Transaksi,Request|Credit Card|Pergantian Kartu,,Complain|Credit Card|Sanggahan Transaksi,True
2,9151919416110000053,Complain|OCTO Mobile|Kendala Transaksi,,,Complain|OCTO Mobile|Kendala Transaksi,True
3,9151919416110000054,Complain|Credit Card|Sanggahan Transaksi,,,Complain|Credit Card|Sanggahan Transaksi,True
4,9151919416110000055,Complain|OCTO Mobile|Kendala Login,Complain|Credit Card|Sanggahan Transaksi,Complain|Credit Card|Biaya/ Bunga/ Denda,Complain|OCTO Mobile|Kendala Login,True
5,9151919416110000056,Inquiry|Debit Card|Other,Inquiry|OCTO Mobile|Transaksi,Request|OCTO Mobile|Other,Inquiry|Tabungan|Other,False
6,9151919416110000057,Inquiry|KPR|Other,,,Inquiry|KPR|Other,True
7,9151919416110000058,Inquiry|Credit Card|Limit,Inquiry|Credit Card|Other,,Inquiry|Credit Card|Limit,True
8,9151919416110000059,Complain|Credit Card|Pembayaran Tagihan Credit...,Complain|Credit Card|Limit,Inquiry|Credit Card|Naik/ Turun Limit Permanen,Inquiry|Credit Card|Pembayaran Tagihan Credit ...,False
9,9151919416110000060,Request|Credit Card|Annual Fee,,,,False


In [28]:
# calculate the mean of "is_match" column where ground_truth is not NaN
accuracy = df[df['ground_truth'].notnull()]['is_match'].mean()
accuracy*100

np.float64(55.26315789473685)

In [22]:
# GET WHICH LAYER GET THE WRONG PREDICTION
def split_option(df, column_name):
    """Memisahkan kolom berdasarkan '|' dan membuat kolom baru."""
    df[column_name + '_layer_1'] = df[column_name].str.split('|').str[0]
    df[column_name + '_layer_2'] = df[column_name].str.split('|').str[1]
    df[column_name + '_layer_3'] = df[column_name].str.split('|').str[2]
    return df

df = split_option(df, 'option_1')
df = split_option(df, 'option_2')
df = split_option(df, 'option_3')
df = split_option(df, 'ground_truth')

# check if layer 1, layer 2, layer 3 match with ground_truth layer 1, layer 2, layer 3
df['is_match_layer_1'] = (df['ground_truth_layer_1'] == df['option_1_layer_1']) | (df['ground_truth_layer_1'] == df['option_2_layer_1']) | (df['ground_truth_layer_1'] == df['option_3_layer_1'])
df['is_match_layer_2'] = (df['ground_truth_layer_2'] == df['option_1_layer_2']) | (df['ground_truth_layer_2'] == df['option_2_layer_2']) | (df['ground_truth_layer_2'] == df['option_3_layer_2'])
df['is_match_layer_3'] = (df['ground_truth_layer_3'] == df['option_1_layer_3']) | (df['ground_truth_layer_3'] == df['option_2_layer_3']) | (df['ground_truth_layer_3'] == df['option_3_layer_3'])

In [23]:
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3,ground_truth,is_match,option_1_layer_1,...,option_2_layer_3,option_3_layer_1,option_3_layer_2,option_3_layer_3,ground_truth_layer_1,ground_truth_layer_2,ground_truth_layer_3,is_match_layer_1,is_match_layer_2,is_match_layer_3
0,9151919416110000051,Complain|ATM|Gagal Tarik,Penjelasan klasifikasi: Nasabah mengalami gaga...,,,,,Complain|ATM|Gagal Tarik,True,Complain,...,,,,,Complain,ATM,Gagal Tarik,True,True,True
1,9151919416110000052,Complain|Credit Card|Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah melaporkan tra...,Request|Credit Card|Pergantian Kartu,Penjelasan klasifikasi: Nasabah meminta pencet...,,,Complain|Credit Card|Sanggahan Transaksi,True,Complain,...,Pergantian Kartu,,,,Complain,Credit Card,Sanggahan Transaksi,True,True,True
2,9151919416110000053,Complain|OCTO Mobile|Kendala Transaksi,Penjelasan klasifikasi: Nasabah mengalami kend...,,,,,Complain|OCTO Mobile|Kendala Transaksi,True,Complain,...,,,,,Complain,OCTO Mobile,Kendala Transaksi,True,True,True
3,9151919416110000054,Complain|Credit Card|Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,,Complain|Credit Card|Sanggahan Transaksi,True,Complain,...,,,,,Complain,Credit Card,Sanggahan Transaksi,True,True,True
4,9151919416110000055,Complain|OCTO Mobile|Kendala Login,Penjelasan klasifikasi: Nasabah mengalami kend...,Complain|Credit Card|Sanggahan Transaksi,Penjelasan klasifikasi: Nasabah komplain sangg...,Complain|Credit Card|Biaya/ Bunga/ Denda,Penjelasan klasifikasi: Nasabah dikenakan dend...,Complain|OCTO Mobile|Kendala Login,True,Complain,...,Sanggahan Transaksi,Complain,Credit Card,Biaya/ Bunga/ Denda,Complain,OCTO Mobile,Kendala Login,True,True,True
5,9151919416110000056,Inquiry|Debit Card|Other,Penjelasan klasifikasi: Nasabah menanyakan det...,Inquiry|OCTO Mobile|Transaksi,Penjelasan klasifikasi: Nasabah menanyakan per...,Request|OCTO Mobile|Other,Penjelasan klasifikasi: Nasabah ingin menonakt...,Inquiry|Tabungan|Other,False,Inquiry,...,Transaksi,Request,OCTO Mobile,Other,Inquiry,Tabungan,Other,True,False,True
6,9151919416110000057,Inquiry|KPR|Other,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,,Inquiry|KPR|Other,True,Inquiry,...,,,,,Inquiry,KPR,Other,True,True,True
7,9151919416110000058,Inquiry|Credit Card|Limit,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry|Credit Card|Other,Penjelasan klasifikasi: Nasabah ingin mengetah...,,,Inquiry|Credit Card|Limit,True,Inquiry,...,Other,,,,Inquiry,Credit Card,Limit,True,True,True
8,9151919416110000059,Complain|Credit Card|Pembayaran Tagihan Credit...,Penjelasan klasifikasi: Nasabah menanyakan sta...,Complain|Credit Card|Limit,Penjelasan klasifikasi: Nasabah mengalami keti...,Inquiry|Credit Card|Naik/ Turun Limit Permanen,Penjelasan klasifikasi: Nasabah meminta inform...,Inquiry|Credit Card|Pembayaran Tagihan Credit ...,False,Complain,...,Limit,Inquiry,Credit Card,Naik/ Turun Limit Permanen,Inquiry,Credit Card,Pembayaran Tagihan Credit Card,True,True,True
9,9151919416110000060,Request|Credit Card|Annual Fee,Penjelasan klasifikasi: Nasabah meminta pengha...,,,,,,False,Request,...,,,,,,,,False,False,False


In [24]:
accuracy_layer_1 = df[df['ground_truth_layer_1'].notnull()]['is_match_layer_1'].mean()
print(f"Layer 1 accuracy: {accuracy_layer_1}")
# show df for layer 1
# df[['interaction_id', 'option_1_layer_1','option_2_layer_1','option_3_layer_1', 'ground_truth_layer_1', 'is_match_layer_1']]
df[['interaction_id', 'option_1_layer_1', 'ground_truth_layer_1', 'is_match_layer_1']]

Layer 1 accuracy: 0.9736842105263158


Unnamed: 0,interaction_id,option_1_layer_1,ground_truth_layer_1,is_match_layer_1
0,9151919416110000051,Complain,Complain,True
1,9151919416110000052,Complain,Complain,True
2,9151919416110000053,Complain,Complain,True
3,9151919416110000054,Complain,Complain,True
4,9151919416110000055,Complain,Complain,True
5,9151919416110000056,Inquiry,Inquiry,True
6,9151919416110000057,Inquiry,Inquiry,True
7,9151919416110000058,Inquiry,Inquiry,True
8,9151919416110000059,Complain,Inquiry,True
9,9151919416110000060,Request,,False


In [25]:
accuracy_layer_2 = df[df['ground_truth_layer_2'].notnull()]['is_match_layer_2'].mean()
print(f"Layer 2 accuracy: {accuracy_layer_2}")
df[['interaction_id','option_1_layer_2','option_2_layer_2','option_3_layer_2', 'ground_truth_layer_2', 'is_match_layer_2']]

Layer 2 accuracy: 0.868421052631579


Unnamed: 0,interaction_id,option_1_layer_2,option_2_layer_2,option_3_layer_2,ground_truth_layer_2,is_match_layer_2
0,9151919416110000051,ATM,,,ATM,True
1,9151919416110000052,Credit Card,Credit Card,,Credit Card,True
2,9151919416110000053,OCTO Mobile,,,OCTO Mobile,True
3,9151919416110000054,Credit Card,,,Credit Card,True
4,9151919416110000055,OCTO Mobile,Credit Card,Credit Card,OCTO Mobile,True
5,9151919416110000056,Debit Card,OCTO Mobile,OCTO Mobile,Tabungan,False
6,9151919416110000057,KPR,,,KPR,True
7,9151919416110000058,Credit Card,Credit Card,,Credit Card,True
8,9151919416110000059,Credit Card,Credit Card,Credit Card,Credit Card,True
9,9151919416110000060,Credit Card,,,,False


In [26]:
accuracy_layer_3 = df[df['ground_truth_layer_3'].notnull()]['is_match_layer_3'].mean()
print(f"Layer 3 accuracy: {accuracy_layer_3}")
df[['interaction_id','option_1_layer_3','option_2_layer_3','option_3_layer_3', 'ground_truth_layer_3', 'is_match_layer_3']]

Layer 3 accuracy: 0.6842105263157895


Unnamed: 0,interaction_id,option_1_layer_3,option_2_layer_3,option_3_layer_3,ground_truth_layer_3,is_match_layer_3
0,9151919416110000051,Gagal Tarik,,,Gagal Tarik,True
1,9151919416110000052,Sanggahan Transaksi,Pergantian Kartu,,Sanggahan Transaksi,True
2,9151919416110000053,Kendala Transaksi,,,Kendala Transaksi,True
3,9151919416110000054,Sanggahan Transaksi,,,Sanggahan Transaksi,True
4,9151919416110000055,Kendala Login,Sanggahan Transaksi,Biaya/ Bunga/ Denda,Kendala Login,True
5,9151919416110000056,Other,Transaksi,Other,Other,True
6,9151919416110000057,Other,,,Other,True
7,9151919416110000058,Limit,Other,,Limit,True
8,9151919416110000059,Pembayaran Tagihan Credit Card,Limit,Naik/ Turun Limit Permanen,Pembayaran Tagihan Credit Card,True
9,9151919416110000060,Annual Fee,,,,False
