# FUNCTION

In [74]:
import pandas as pd
from io import StringIO
from google.cloud import storage
import requests

def load_transcription_csv(bucket_name, file_path):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(file_path)
    content = blob.download_as_text()

    return pd.read_csv(StringIO(content))

def call_disposition_api(payload):
    url = "http://localhost:8000/rtaa/api/v1/disposition_code"
    auth = ('', '')

    try:
        response = requests.post(
            url,
            json=payload,
            auth=auth
        )
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error making request: {e}")
        return None

def generate_csv_testing(bucket_name, file_path, interaction_id: str):
    transcript_df = load_transcription_csv(bucket_name,f"{file_path}/transcription/{interaction_id}/transcription_results.csv")
    transcript_df = transcript_df.where(pd.notnull(transcript_df), '')
    json_dict = transcript_df.to_dict(orient='records')

    payload = {
        "interaction_id": interaction_id,
        "subtitle": json_dict
    }
    response = call_disposition_api(payload)
    return response

# BATCH PROCESS

In [75]:
list_of_unknown_csv = [9151919416110000090, 9151919416110000091, 9151919416110000092]
output_data = []
for i in range(9151919416110000051, 9151919416110000100):
    if i in list_of_unknown_csv:
        continue
    try:
        response = generate_csv_testing('athena-nonprod-gcs', 'athena-ws4a/athena-qapm', str(i))
        dispositions = response['result']['disposition_result']
        output_data.append({
            'interaction_id': i,
            'option_1': dispositions[0]['value'],
            'explanation_1': dispositions[0]['field3'],
            'option_2': dispositions[1]['value'] if len(dispositions) > 1 else None,
            'explanation_2': dispositions[1]['field3'] if len(dispositions) > 1 else None,
            'option_3': dispositions[2]['value'] if len(dispositions) > 2 else None,
            'explanation_3': dispositions[2]['field3'] if len(dispositions) > 2 else None,
        })
        print(f"Processed interaction_id: {i}")
    except Exception as e:
        print(f"Error: {e} at interaction_id: {i}")

df = pd.DataFrame(output_data)


[{'interaction_id': 9151919416110000051, 'option_1': 'Complain (General Customer)|Branchless|BLB: ATM Gagal Tarik', 'explanation_1': "Penjelasan klasifikasi: Nasabah mengalami gagal tarik tunai kartu kredit di ATM BNI. Transkripsi relevan: 'Saya kan ini, kemarin tarik tunai nih, Rp600.000, tapi uangnya tidak keluar dari ATM, sementara saldo berkurang.'", 'option_2': None, 'explanation_2': None, 'option_3': None, 'explanation_3': None}]
[{'interaction_id': 9151919416110000051, 'option_1': 'Complain (General Customer)|Branchless|BLB: ATM Gagal Tarik', 'explanation_1': "Penjelasan klasifikasi: Nasabah mengalami gagal tarik tunai kartu kredit di ATM BNI. Transkripsi relevan: 'Saya kan ini, kemarin tarik tunai nih, Rp600.000, tapi uangnya tidak keluar dari ATM, sementara saldo berkurang.'", 'option_2': None, 'explanation_2': None, 'option_3': None, 'explanation_3': None}, {'interaction_id': 9151919416110000052, 'option_1': 'Complain (General Customer)|Credit Card|CRD: Fraud', 'explanation_1

In [76]:
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3
0,9151919416110000051,Complain (General Customer)|Branchless|BLB: AT...,Penjelasan klasifikasi: Nasabah mengalami gaga...,,,,
1,9151919416110000052,Complain (General Customer)|Credit Card|CRD: F...,Penjelasan klasifikasi: Nasabah melaporkan tra...,Request (General Customer)|Credit Card|CRD: Ca...,Penjelasan klasifikasi: Nasabah meminta pemblo...,Request (General Customer)|Credit Card|CRD: Di...,Penjelasan klasifikasi: Nasabah meminta proses...
2,9151919416110000053,Complain (General Customer)|Branchless|BLB: Au...,Penjelasan klasifikasi: Nasabah mengalami kend...,Inquiry (General Customer)|Branchless|BLB: ATM...,Penjelasan klasifikasi: Nasabah menanyakan kem...,Request (General Customer)|Branchless|BLB: Pel...,Penjelasan klasifikasi: Agen membuat pelaporan...
3,9151919416110000054,Complain (General Customer)|Credit Card|CRD: T...,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,
4,9151919416110000055,Complain (General Customer)|Branchless|BLB: OC...,Penjelasan klasifikasi: Nasabah mengalami kend...,Complain (General Customer)|Credit Card|CRD: S...,Penjelasan klasifikasi: Nasabah komplain terka...,Request (General Customer)|Credit Card|CRD: Pe...,Penjelasan klasifikasi: Nasabah meminta peruba...
5,9151919416110000056,Inquiry (General Customer)|Branchless|BLB: Inq...,Penjelasan klasifikasi: Nasabah menanyakan det...,Request (General Customer)|Branchless|BLB: Req...,Penjelasan klasifikasi: Nasabah menanyakan car...,Inquiry (General Customer)|Branchless|BLB: Inq...,Penjelasan klasifikasi: Nasabah menanyakan det...
6,9151919416110000057,Inquiry (General Customer)|Lending|LND: KPR Ou...,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry (General Customer)|Lending|LND: KPR In...,Penjelasan klasifikasi: Nasabah membutuhkan in...,Inquiry (General Customer)|Lending|LND: KPR Br...,Penjelasan klasifikasi: Nasabah ingin mengeta...
7,9151919416110000058,Inquiry (General Customer)|Credit Card|CRD: In...,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry (General Customer)|Credit Card|CRD: In...,Penjelasan klasifikasi: Nasabah menanyakan dam...,Inquiry (General Customer)|Credit Card|CRD: In...,Penjelasan klasifikasi: Nasabah menanyakan kon...
8,9151919416110000059,Complain (General Customer)|Credit Card|CRD: P...,Penjelasan klasifikasi: Nasabah menanyakan men...,Request|Credit Card|CRD: Kenaikan Limit,Penjelasan klasifikasi: Nasabah meminta kenaik...,Inquiry|Credit Card|CRD: Proses Update Saldo,Penjelasan klasifikasi: Nasabah menanyakan pro...
9,9151919416110000060,Request|Credit Card|CRD Credit Cards Annual Fee,Penjelasan klasifikasi: Nasabah meminta pengha...,,,,


# Combine data with ground truth

In [77]:
# read json file
import json

with open('../ground_truth_dirty/ground_truth.json') as f:
    ground_truth = json.load(f)

# map with ground truth
df['interaction_id'] = df['interaction_id'].astype(str)
df['ground_truth'] = df['interaction_id'].map(ground_truth)

# check whether option_1 or option_2 or option_3 is equal to ground_truth, the result is True or False in "is_match" column
df['is_match'] = (df['ground_truth'].isin(df['option_1'])) | \
                 (df['ground_truth'].isin(df['option_2'])) | \
                 (df['ground_truth'].isin(df['option_3']))

# save to csv
df.to_csv('output.csv', index=False)

# Read the output.csv file

In [114]:
df = pd.read_csv('output.csv')
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3,ground_truth,is_match
0,9151919416110000051,Complain (General Customer)|Branchless|BLB: AT...,Penjelasan klasifikasi: Nasabah mengalami gaga...,,,,,Complain (General Customer)|Branchless|BLB: AT...,True
1,9151919416110000052,Complain (General Customer)|Credit Card|CRD: F...,Penjelasan klasifikasi: Nasabah melaporkan tra...,Request (General Customer)|Credit Card|CRD: Ca...,Penjelasan klasifikasi: Nasabah meminta pemblo...,Request (General Customer)|Credit Card|CRD: Di...,Penjelasan klasifikasi: Nasabah meminta proses...,Complain (General Customer)|Credit Card|CRD: C...,False
2,9151919416110000053,Complain (General Customer)|Branchless|BLB: Au...,Penjelasan klasifikasi: Nasabah mengalami kend...,Inquiry (General Customer)|Branchless|BLB: ATM...,Penjelasan klasifikasi: Nasabah menanyakan kem...,Request (General Customer)|Branchless|BLB: Pel...,Penjelasan klasifikasi: Agen membuat pelaporan...,Complain (General Customer)|Branchless|BLB: OC...,False
3,9151919416110000054,Complain (General Customer)|Credit Card|CRD: T...,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,,Complain (General Customer)|Credit Card|CRD: C...,False
4,9151919416110000055,Complain (General Customer)|Branchless|BLB: OC...,Penjelasan klasifikasi: Nasabah mengalami kend...,Complain (General Customer)|Credit Card|CRD: S...,Penjelasan klasifikasi: Nasabah komplain terka...,Request (General Customer)|Credit Card|CRD: Pe...,Penjelasan klasifikasi: Nasabah meminta peruba...,Complain (General Customer)|Branchless|BLB: OC...,True
5,9151919416110000056,Inquiry (General Customer)|Branchless|BLB: Inq...,Penjelasan klasifikasi: Nasabah menanyakan det...,Request (General Customer)|Branchless|BLB: Req...,Penjelasan klasifikasi: Nasabah menanyakan car...,Inquiry (General Customer)|Branchless|BLB: Inq...,Penjelasan klasifikasi: Nasabah menanyakan det...,Inquiry (General Customer)|Funding (FND)|FND: ...,False
6,9151919416110000057,Inquiry (General Customer)|Lending|LND: KPR Ou...,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry (General Customer)|Lending|LND: KPR In...,Penjelasan klasifikasi: Nasabah membutuhkan in...,Inquiry (General Customer)|Lending|LND: KPR Br...,Penjelasan klasifikasi: Nasabah ingin mengeta...,Inquiry (General Customer)|Lending|LND: KPR Ot...,False
7,9151919416110000058,Inquiry (General Customer)|Credit Card|CRD: In...,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry (General Customer)|Credit Card|CRD: In...,Penjelasan klasifikasi: Nasabah menanyakan dam...,Inquiry (General Customer)|Credit Card|CRD: In...,Penjelasan klasifikasi: Nasabah menanyakan kon...,Inquiry (General Customer)|Credit Card|CRD: Cr...,False
8,9151919416110000059,Complain (General Customer)|Credit Card|CRD: P...,Penjelasan klasifikasi: Nasabah menanyakan men...,Request|Credit Card|CRD: Kenaikan Limit,Penjelasan klasifikasi: Nasabah meminta kenaik...,Inquiry|Credit Card|CRD: Proses Update Saldo,Penjelasan klasifikasi: Nasabah menanyakan pro...,Inquiry (General Customer)|Credit Card|CRD: Pe...,False
9,9151919416110000060,Request|Credit Card|CRD Credit Cards Annual Fee,Penjelasan klasifikasi: Nasabah meminta pengha...,,,,,Feedback/Report (General Customer)|Credit Card...,False


# CALCULATE ACCURACY

In [85]:
# calculate the mean of "is_match" column where ground_truth is not NaN
accuracy = df[df['ground_truth'].notnull()]['is_match'].mean()
accuracy

np.float64(0.14634146341463414)

In [111]:
# GET WHICH LAYER GET THE WRONG PREDICTION
def split_option(df, column_name):
    """Memisahkan kolom berdasarkan '|' dan membuat kolom baru."""
    df[column_name + '_layer_1'] = df[column_name].str.split('|').str[0]
    df[column_name + '_layer_2'] = df[column_name].str.split('|').str[1]
    df[column_name + '_layer_3'] = df[column_name].str.split('|').str[2]
    return df

df = split_option(df, 'option_1')
df = split_option(df, 'option_2')
df = split_option(df, 'option_3')
df = split_option(df, 'ground_truth')

# check if layer 1, layer 2, layer 3 match with ground_truth layer 1, layer 2, layer 3
df['is_match_layer_1'] = df['ground_truth_layer_1'].isin(df['option_1_layer_1']) | \
                         df['ground_truth_layer_1'].isin(df['option_2_layer_1']) | \
                         df['ground_truth_layer_1'].isin(df['option_3_layer_1'])
df['is_match_layer_2'] = df['ground_truth_layer_2'].isin(df['option_1_layer_2']) | \
                            df['ground_truth_layer_2'].isin(df['option_2_layer_2']) | \
                            df['ground_truth_layer_2'].isin(df['option_3_layer_2'])
df['is_match_layer_3'] = df['ground_truth_layer_3'].isin(df['option_1_layer_3']) | \
                            df['ground_truth_layer_3'].isin(df['option_2_layer_3']) | \
                            df['ground_truth_layer_3'].isin(df['option_3_layer_3'])

In [113]:
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3,ground_truth,is_match,option_1_layer_1,...,option_2_layer_3,option_3_layer_1,option_3_layer_2,option_3_layer_3,ground_truth_layer_1,ground_truth_layer_2,ground_truth_layer_3,is_match_layer_1,is_match_layer_2,is_match_layer_3
0,9151919416110000051,Complain (General Customer)|Branchless|BLB: AT...,Penjelasan klasifikasi: Nasabah mengalami gaga...,,,,,Complain (General Customer)|Branchless|BLB: AT...,True,Complain (General Customer),...,,,,,Complain (General Customer),Branchless,BLB: ATM Gagal Tarik,True,True,True
1,9151919416110000052,Complain (General Customer)|Credit Card|CRD: F...,Penjelasan klasifikasi: Nasabah melaporkan tra...,Request (General Customer)|Credit Card|CRD: Ca...,Penjelasan klasifikasi: Nasabah meminta pemblo...,Request (General Customer)|Credit Card|CRD: Di...,Penjelasan klasifikasi: Nasabah meminta proses...,Complain (General Customer)|Credit Card|CRD: C...,False,Complain (General Customer),...,CRD: Card Replacement,Request (General Customer),Credit Card,CRD: Dispute Transaction,Complain (General Customer),Credit Card,CRD: Credit Cards Sanggahan Transaksi,True,True,False
2,9151919416110000053,Complain (General Customer)|Branchless|BLB: Au...,Penjelasan klasifikasi: Nasabah mengalami kend...,Inquiry (General Customer)|Branchless|BLB: ATM...,Penjelasan klasifikasi: Nasabah menanyakan kem...,Request (General Customer)|Branchless|BLB: Pel...,Penjelasan klasifikasi: Agen membuat pelaporan...,Complain (General Customer)|Branchless|BLB: OC...,False,Complain (General Customer),...,BLB: ATM Penarikan Tunai,Request (General Customer),Branchless,BLB: Pelaporan Kendala Transaksi,Complain (General Customer),Branchless,BLB: OCTO Mobile Kendala Transaksi,True,True,False
3,9151919416110000054,Complain (General Customer)|Credit Card|CRD: T...,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,,Complain (General Customer)|Credit Card|CRD: C...,False,Complain (General Customer),...,,,,,Complain (General Customer),Credit Card,CRD: Credit Cards Sanggahan Transaksi,True,True,False
4,9151919416110000055,Complain (General Customer)|Branchless|BLB: OC...,Penjelasan klasifikasi: Nasabah mengalami kend...,Complain (General Customer)|Credit Card|CRD: S...,Penjelasan klasifikasi: Nasabah komplain terka...,Request (General Customer)|Credit Card|CRD: Pe...,Penjelasan klasifikasi: Nasabah meminta peruba...,Complain (General Customer)|Branchless|BLB: OC...,True,Complain (General Customer),...,CRD: Sanggahan Transaksi,Request (General Customer),Credit Card,CRD: Permintaan Perubahan Data,Complain (General Customer),Branchless,BLB: OCTO Mobile Kendala Login,True,True,True
5,9151919416110000056,Inquiry (General Customer)|Branchless|BLB: Inq...,Penjelasan klasifikasi: Nasabah menanyakan det...,Request (General Customer)|Branchless|BLB: Req...,Penjelasan klasifikasi: Nasabah menanyakan car...,Inquiry (General Customer)|Branchless|BLB: Inq...,Penjelasan klasifikasi: Nasabah menanyakan det...,Inquiry (General Customer)|Funding (FND)|FND: ...,False,Inquiry (General Customer),...,BLB: Request Nonaktifkan Kartu Debit Fisik,Inquiry (General Customer),Branchless,BLB: Inquiry Biaya Transfer Antar Bank,Inquiry (General Customer),Funding (FND),FND: Tabungan Others,True,False,False
6,9151919416110000057,Inquiry (General Customer)|Lending|LND: KPR Ou...,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry (General Customer)|Lending|LND: KPR In...,Penjelasan klasifikasi: Nasabah membutuhkan in...,Inquiry (General Customer)|Lending|LND: KPR Br...,Penjelasan klasifikasi: Nasabah ingin mengeta...,Inquiry (General Customer)|Lending|LND: KPR Ot...,False,Inquiry (General Customer),...,LND: KPR Information,Inquiry (General Customer),Lending,LND: KPR Branch Visit Procedure,Inquiry (General Customer),Lending,LND: KPR Others,True,True,False
7,9151919416110000058,Inquiry (General Customer)|Credit Card|CRD: In...,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry (General Customer)|Credit Card|CRD: In...,Penjelasan klasifikasi: Nasabah menanyakan dam...,Inquiry (General Customer)|Credit Card|CRD: In...,Penjelasan klasifikasi: Nasabah menanyakan kon...,Inquiry (General Customer)|Credit Card|CRD: Cr...,False,Inquiry (General Customer),...,CRD: Inquiry Pembayaran Cicilan,Inquiry (General Customer),Credit Card,CRD: Inquiry Transaksi Over Limit,Inquiry (General Customer),Credit Card,CRD: Credit Cards Limit,True,True,False
8,9151919416110000059,Complain (General Customer)|Credit Card|CRD: P...,Penjelasan klasifikasi: Nasabah menanyakan men...,Request|Credit Card|CRD: Kenaikan Limit,Penjelasan klasifikasi: Nasabah meminta kenaik...,Inquiry|Credit Card|CRD: Proses Update Saldo,Penjelasan klasifikasi: Nasabah menanyakan pro...,Inquiry (General Customer)|Credit Card|CRD: Pe...,False,Complain (General Customer),...,CRD: Kenaikan Limit,Inquiry,Credit Card,CRD: Proses Update Saldo,Inquiry (General Customer),Credit Card,CRD: Pembayaran Tagihan Kartu Kredit,True,True,False
9,9151919416110000060,Request|Credit Card|CRD Credit Cards Annual Fee,Penjelasan klasifikasi: Nasabah meminta pengha...,,,,,Feedback/Report (General Customer)|Credit Card...,False,Request,...,,,,,Feedback/Report (General Customer),Credit Card,CRD: Credit Cards,False,True,False


In [112]:
accuracy_layer_1 = df[df['ground_truth_layer_1'].notnull()]['is_match_layer_1'].mean()
print(f"Layer 1 accuracy: {accuracy_layer_1}")
# show df for layer 1
# df[['interaction_id', 'option_1_layer_1','option_2_layer_1','option_3_layer_1', 'ground_truth_layer_1', 'is_match_layer_1']]
df[['interaction_id', 'option_1_layer_1', 'ground_truth_layer_1', 'is_match_layer_1']]

Layer 1 accuracy: 0.975609756097561


Unnamed: 0,interaction_id,option_1_layer_1,ground_truth_layer_1,is_match_layer_1
0,9151919416110000051,Complain (General Customer),Complain (General Customer),True
1,9151919416110000052,Complain (General Customer),Complain (General Customer),True
2,9151919416110000053,Complain (General Customer),Complain (General Customer),True
3,9151919416110000054,Complain (General Customer),Complain (General Customer),True
4,9151919416110000055,Complain (General Customer),Complain (General Customer),True
5,9151919416110000056,Inquiry (General Customer),Inquiry (General Customer),True
6,9151919416110000057,Inquiry (General Customer),Inquiry (General Customer),True
7,9151919416110000058,Inquiry (General Customer),Inquiry (General Customer),True
8,9151919416110000059,Complain (General Customer),Inquiry (General Customer),True
9,9151919416110000060,Request,Feedback/Report (General Customer),False


In [106]:
accuracy_layer_2 = df[df['ground_truth_layer_2'].notnull()]['is_match_layer_2'].mean()
print(f"Layer 2 accuracy: {accuracy_layer_2}")
df[['interaction_id','option_1_layer_2','option_2_layer_2','option_3_layer_2', 'ground_truth_layer_2', 'is_match_layer_2']]

Layer 2 accuracy: 0.9512195121951219


Unnamed: 0,interaction_id,option_1_layer_2,ground_truth_layer_2,is_match_layer_2
0,9151919416110000051,Branchless,Branchless,True
1,9151919416110000052,Credit Card,Credit Card,True
2,9151919416110000053,Branchless,Branchless,True
3,9151919416110000054,Credit Card,Credit Card,True
4,9151919416110000055,Branchless,Branchless,True
5,9151919416110000056,Branchless,Funding (FND),False
6,9151919416110000057,Lending,Lending,True
7,9151919416110000058,Credit Card,Credit Card,True
8,9151919416110000059,Credit Card,Credit Card,True
9,9151919416110000060,Credit Card,Credit Card,True


In [108]:
accuracy_layer_3 = df[df['ground_truth_layer_3'].notnull()]['is_match_layer_3'].mean()
print(f"Layer 3 accuracy: {accuracy_layer_3}")
df[['interaction_id','option_1_layer_3','option_2_layer_3','option_3_layer_3', 'ground_truth_layer_3', 'is_match_layer_3']]

Layer 3 accuracy: 0.21951219512195122


Unnamed: 0,interaction_id,option_1_layer_3,option_2_layer_3,option_3_layer_3,ground_truth_layer_3,is_match_layer_3
0,9151919416110000051,BLB: ATM Gagal Tarik,,,BLB: ATM Gagal Tarik,True
1,9151919416110000052,CRD: Fraud,CRD: Card Replacement,CRD: Dispute Transaction,CRD: Credit Cards Sanggahan Transaksi,False
2,9151919416110000053,BLB: Auto Mobile Gagal Transaksi,BLB: ATM Penarikan Tunai,BLB: Pelaporan Kendala Transaksi,BLB: OCTO Mobile Kendala Transaksi,False
3,9151919416110000054,CRD: Transaksi Bermasalah,,,CRD: Credit Cards Sanggahan Transaksi,False
4,9151919416110000055,BLB: OCTO Mobile Kendala Login,CRD: Sanggahan Transaksi,CRD: Permintaan Perubahan Data,BLB: OCTO Mobile Kendala Login,True
5,9151919416110000056,BLB: Inquiry Biaya/Admin,BLB: Request Nonaktifkan Kartu Debit Fisik,BLB: Inquiry Biaya Transfer Antar Bank,FND: Tabungan Others,False
6,9151919416110000057,LND: KPR Outstanding Balance,LND: KPR Information,LND: KPR Branch Visit Procedure,LND: KPR Others,False
7,9151919416110000058,CRD: Inquiry Limit Kartu Kredit,CRD: Inquiry Pembayaran Cicilan,CRD: Inquiry Transaksi Over Limit,CRD: Credit Cards Limit,False
8,9151919416110000059,CRD: Pembayaran Belum Masuk,CRD: Kenaikan Limit,CRD: Proses Update Saldo,CRD: Pembayaran Tagihan Kartu Kredit,False
9,9151919416110000060,CRD Credit Cards Annual Fee,,,CRD: Credit Cards,False
