# FUNCTION

In [87]:
import pandas as pd
from io import StringIO
from google.cloud import storage
import requests

def load_transcription_csv(bucket_name, file_path):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(file_path)
    content = blob.download_as_text()

    return pd.read_csv(StringIO(content))

def call_disposition_api(payload):
    url = "http://localhost:8000/rtaa/api/v1/disposition_code"
    auth = ('', '')

    try:
        response = requests.post(
            url,
            json=payload,
            auth=auth
        )
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error making request: {e}")
        return None

def generate_csv_testing(bucket_name, file_path, interaction_id: str):
    transcript_df = load_transcription_csv(bucket_name,f"{file_path}/transcription/{interaction_id}/transcription_results.csv")
    transcript_df = transcript_df.where(pd.notnull(transcript_df), '')
    json_dict = transcript_df.to_dict(orient='records')

    payload = {
        "interaction_id": interaction_id,
        "subtitle": json_dict
    }
    response = call_disposition_api(payload)
    return response

# BATCH PROCESS

In [88]:
list_of_unknown_csv = [9151919416110000090, 9151919416110000091, 9151919416110000092]
output_data = []
for i in range(9151919416110000051, 9151919416110000100):
    if i in list_of_unknown_csv:
        continue
    try:
        response = generate_csv_testing('athena-nonprod-gcs', 'athena-ws4a/athena-qapm', str(i))
        dispositions = response['result']['disposition_result']
        output_data.append({
            'interaction_id': i,
            'option_1': dispositions[0]['value'],
            'explanation_1': dispositions[0]['field3'],
            'option_2': dispositions[1]['value'] if len(dispositions) > 1 else None,
            'explanation_2': dispositions[1]['field3'] if len(dispositions) > 1 else None,
            'option_3': dispositions[2]['value'] if len(dispositions) > 2 else None,
            'explanation_3': dispositions[2]['field3'] if len(dispositions) > 2 else None,
        })
        print(f"Processed interaction_id: {i}")
    except Exception as e:
        print(f"Error: {e} at interaction_id: {i}")

df = pd.DataFrame(output_data)


Processed interaction_id: 9151919416110000051
Processed interaction_id: 9151919416110000052
Processed interaction_id: 9151919416110000053
Processed interaction_id: 9151919416110000054
Processed interaction_id: 9151919416110000055
Processed interaction_id: 9151919416110000056
Processed interaction_id: 9151919416110000057
Processed interaction_id: 9151919416110000058
Processed interaction_id: 9151919416110000059
Processed interaction_id: 9151919416110000060
Processed interaction_id: 9151919416110000061
Processed interaction_id: 9151919416110000062
Processed interaction_id: 9151919416110000063
Processed interaction_id: 9151919416110000064
Processed interaction_id: 9151919416110000065
Processed interaction_id: 9151919416110000066
Processed interaction_id: 9151919416110000067
Processed interaction_id: 9151919416110000068
Processed interaction_id: 9151919416110000069
Processed interaction_id: 9151919416110000070
Processed interaction_id: 9151919416110000071
Processed interaction_id: 91519194

In [89]:
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3
0,9151919416110000051,Complain|ATM|Gagal Tarik,Penjelasan klasifikasi: Nasabah mengalami gaga...,,,,
1,9151919416110000052,Complain|Kartu Kredit|Transaksi Tidak Dikenal,Penjelasan klasifikasi: Nasabah melaporkan tra...,Request|Kartu Kredit|Penggantian Kartu,Penjelasan klasifikasi: Nasabah meminta pembua...,Request|Kartu Kredit|Penyanggahan Transaksi,Penjelasan klasifikasi: Nasabah meminta inform...
2,9151919416110000053,Complain|GoPay|Gagal Transaksi,Penjelasan klasifikasi: Nasabah mengalami kend...,Inquiry|ATM|Penarikan Tunai,Penjelasan klasifikasi: Nasabah menanyakan kem...,Complain|GoPay|Pelaporan,Penjelasan klasifikasi: Laporan dibuat terkai...
3,9151919416110000054,Complain|Kartu Kredit|Transaksi Ganda,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,
4,9151919416110000055,Complain|OCTO Mobile|Kendala Login,Penjelasan klasifikasi: Nasabah mengalami kend...,Complain|Mobile Banking|Update Data,Penjelasan klasifikasi: Nasabah komplain terka...,Complain|Kartu Kredit|Penyanggahan Transaksi,Penjelasan klasifikasi: Nasabah komplain trans...
5,9151919416110000056,Inquiry|Account|Biaya Rekening,Penjelasan klasifikasi: Nasabah menanyakan det...,Inquiry|Debit Card|Biaya Admin Bulanan,Penjelasan klasifikasi: Nasabah ingin mengetah...,Inquiry|Transfer|Metode Transfer,Penjelasan klasifikasi: Nasabah menanyakan per...
6,9151919416110000057,Inquiry|KPR|Sisa Tagihan,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,
7,9151919416110000058,Inquiry|Kartu Kredit|Sisa Limit,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry|Kartu Kredit|Pembayaran Cicilan,Penjelasan klasifikasi: Nasabah menanyakan dam...,Inquiry|Kartu Kredit|Over Limit,Penjelasan klasifikasi: Nasabah menanyakan kon...
8,9151919416110000059,Complain|Kartu Kredit|Pembayaran Belum Masuk,Penjelasan klasifikasi: Nasabah menanyakan sta...,Request|Kartu Kredit|Kenaikan Limit,Penjelasan klasifikasi: Nasabah meminta kenaik...,Inquiry|Kartu Kredit|Update Transaksi,Penjelasan klasifikasi: Nasabah menanyakan men...
9,9151919416110000060,Request|Credit Card|Annual Fee,Penjelasan klasifikasi: Nasabah meminta pengha...,,,,


# Combine data with ground truth

In [90]:
# read json file
import json

with open('./ground_truth/ground_truth.json') as f:
    ground_truth = json.load(f)

# map with ground truth
df['interaction_id'] = df['interaction_id'].astype(str)
df['ground_truth'] = df['interaction_id'].map(ground_truth)

# check whether option_1 or option_2 or option_3 is equal to ground_truth, the result is True or False in "is_match" column
df['is_match'] = (df['ground_truth'].isin(df['option_1'])) | \
                 (df['ground_truth'].isin(df['option_2'])) | \
                 (df['ground_truth'].isin(df['option_3']))

# save to csv
df.to_csv('output.csv', index=False)

# Read the output.csv file

In [2]:
import pandas as pd

df = pd.read_csv('output.csv')

# check whether option_1 or option_2 or option_3 is equal to ground_truth, the result is True or False in "is_match" column
df['is_match'] = (df['ground_truth'] == df['option_1']) | \
                 (df['ground_truth'] == df['option_2']) | \
                 (df['ground_truth']== df['option_3'])
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3,ground_truth,is_match
0,9151919416110000051,Complain|ATM|Gagal Tarik,Penjelasan klasifikasi: Nasabah mengalami gaga...,,,,,Complain|ATM|Gagal Tarik,True
1,9151919416110000052,Complain|Kartu Kredit|Transaksi Tidak Dikenal,Penjelasan klasifikasi: Nasabah melaporkan tra...,Request|Kartu Kredit|Penggantian Kartu,Penjelasan klasifikasi: Nasabah meminta pembua...,Request|Kartu Kredit|Penyanggahan Transaksi,Penjelasan klasifikasi: Nasabah meminta inform...,Complain|Credit Card|Sanggahan Transaksi,False
2,9151919416110000053,Complain|GoPay|Gagal Transaksi,Penjelasan klasifikasi: Nasabah mengalami kend...,Inquiry|ATM|Penarikan Tunai,Penjelasan klasifikasi: Nasabah menanyakan kem...,Complain|GoPay|Pelaporan,Penjelasan klasifikasi: Laporan dibuat terkai...,Complain|OCTO Mobile|Kendala Transaksi,False
3,9151919416110000054,Complain|Kartu Kredit|Transaksi Ganda,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,,Complain|Credit Card|Sanggahan Transaksi,False
4,9151919416110000055,Complain|OCTO Mobile|Kendala Login,Penjelasan klasifikasi: Nasabah mengalami kend...,Complain|Mobile Banking|Update Data,Penjelasan klasifikasi: Nasabah komplain terka...,Complain|Kartu Kredit|Penyanggahan Transaksi,Penjelasan klasifikasi: Nasabah komplain trans...,Complain|OCTO Mobile|Kendala Login,True
5,9151919416110000056,Inquiry|Account|Biaya Rekening,Penjelasan klasifikasi: Nasabah menanyakan det...,Inquiry|Debit Card|Biaya Admin Bulanan,Penjelasan klasifikasi: Nasabah ingin mengetah...,Inquiry|Transfer|Metode Transfer,Penjelasan klasifikasi: Nasabah menanyakan per...,Inquiry|Tabungan|Other,False
6,9151919416110000057,Inquiry|KPR|Sisa Tagihan,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,,Inquiry|KPR|Other,False
7,9151919416110000058,Inquiry|Kartu Kredit|Sisa Limit,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry|Kartu Kredit|Pembayaran Cicilan,Penjelasan klasifikasi: Nasabah menanyakan dam...,Inquiry|Kartu Kredit|Over Limit,Penjelasan klasifikasi: Nasabah menanyakan kon...,Inquiry|Credit Card|Limit,False
8,9151919416110000059,Complain|Kartu Kredit|Pembayaran Belum Masuk,Penjelasan klasifikasi: Nasabah menanyakan sta...,Request|Kartu Kredit|Kenaikan Limit,Penjelasan klasifikasi: Nasabah meminta kenaik...,Inquiry|Kartu Kredit|Update Transaksi,Penjelasan klasifikasi: Nasabah menanyakan men...,Inquiry|Credit Card|Pembayaran Tagihan Credit ...,False
9,9151919416110000060,Request|Credit Card|Annual Fee,Penjelasan klasifikasi: Nasabah meminta pengha...,,,,,,False


# CALCULATE ACCURACY

In [3]:
# calculate the mean of "is_match" column where ground_truth is not NaN
accuracy = df[df['ground_truth'].notnull()]['is_match'].mean()
accuracy

np.float64(0.05263157894736842)

In [93]:
df

Unnamed: 0,interaction_id,option_1,explanation_1,option_2,explanation_2,option_3,explanation_3,ground_truth,is_match
0,9151919416110000051,Complain|ATM|Gagal Tarik,Penjelasan klasifikasi: Nasabah mengalami gaga...,,,,,Complain|ATM|Gagal Tarik,True
1,9151919416110000052,Complain|Kartu Kredit|Transaksi Tidak Dikenal,Penjelasan klasifikasi: Nasabah melaporkan tra...,Request|Kartu Kredit|Penggantian Kartu,Penjelasan klasifikasi: Nasabah meminta pembua...,Request|Kartu Kredit|Penyanggahan Transaksi,Penjelasan klasifikasi: Nasabah meminta inform...,Complain|Credit Card|Sanggahan Transaksi,False
2,9151919416110000053,Complain|GoPay|Gagal Transaksi,Penjelasan klasifikasi: Nasabah mengalami kend...,Inquiry|ATM|Penarikan Tunai,Penjelasan klasifikasi: Nasabah menanyakan kem...,Complain|GoPay|Pelaporan,Penjelasan klasifikasi: Laporan dibuat terkai...,Complain|OCTO Mobile|Kendala Transaksi,True
3,9151919416110000054,Complain|Kartu Kredit|Transaksi Ganda,Penjelasan klasifikasi: Nasabah menanyakan tra...,,,,,Complain|Credit Card|Sanggahan Transaksi,False
4,9151919416110000055,Complain|OCTO Mobile|Kendala Login,Penjelasan klasifikasi: Nasabah mengalami kend...,Complain|Mobile Banking|Update Data,Penjelasan klasifikasi: Nasabah komplain terka...,Complain|Kartu Kredit|Penyanggahan Transaksi,Penjelasan klasifikasi: Nasabah komplain trans...,Complain|OCTO Mobile|Kendala Login,True
5,9151919416110000056,Inquiry|Account|Biaya Rekening,Penjelasan klasifikasi: Nasabah menanyakan det...,Inquiry|Debit Card|Biaya Admin Bulanan,Penjelasan klasifikasi: Nasabah ingin mengetah...,Inquiry|Transfer|Metode Transfer,Penjelasan klasifikasi: Nasabah menanyakan per...,Inquiry|Tabungan|Other,False
6,9151919416110000057,Inquiry|KPR|Sisa Tagihan,Penjelasan klasifikasi: Nasabah menanyakan sis...,,,,,Inquiry|KPR|Other,False
7,9151919416110000058,Inquiry|Kartu Kredit|Sisa Limit,Penjelasan klasifikasi: Nasabah menanyakan sis...,Inquiry|Kartu Kredit|Pembayaran Cicilan,Penjelasan klasifikasi: Nasabah menanyakan dam...,Inquiry|Kartu Kredit|Over Limit,Penjelasan klasifikasi: Nasabah menanyakan kon...,Inquiry|Credit Card|Limit,False
8,9151919416110000059,Complain|Kartu Kredit|Pembayaran Belum Masuk,Penjelasan klasifikasi: Nasabah menanyakan sta...,Request|Kartu Kredit|Kenaikan Limit,Penjelasan klasifikasi: Nasabah meminta kenaik...,Inquiry|Kartu Kredit|Update Transaksi,Penjelasan klasifikasi: Nasabah menanyakan men...,Inquiry|Credit Card|Pembayaran Tagihan Credit ...,False
9,9151919416110000060,Request|Credit Card|Annual Fee,Penjelasan klasifikasi: Nasabah meminta pengha...,,,,,,False


In [100]:
# GET WHICH LAYER GET THE WRONG PREDICTION
def split_option(df, column_name):
    """Memisahkan kolom berdasarkan '|' dan membuat kolom baru."""
    df[column_name + '_layer_1'] = df[column_name].str.split('|').str[0]
    df[column_name + '_layer_2'] = df[column_name].str.split('|').str[1]
    df[column_name + '_layer_3'] = df[column_name].str.split('|').str[2]
    return df

df = split_option(df, 'option_1')
df = split_option(df, 'option_2')
df = split_option(df, 'option_3')
df = split_option(df, 'ground_truth')

# check if layer 1, layer 2, layer 3 match with ground_truth layer 1, layer 2, layer 3
df['is_match_layer_1'] = df['ground_truth_layer_1'].isin(df['option_1_layer_1']) | \
                         df['ground_truth_layer_1'].isin(df['option_2_layer_1']) | \
                         df['ground_truth_layer_1'].isin(df['option_3_layer_1'])
df['is_match_layer_2'] = df['ground_truth_layer_2'].isin(df['option_1_layer_2']) | \
                            df['ground_truth_layer_2'].isin(df['option_2_layer_2']) | \
                            df['ground_truth_layer_2'].isin(df['option_3_layer_2'])
df['is_match_layer_3'] = df['ground_truth_layer_3'].isin(df['option_1_layer_3']) | \
                            df['ground_truth_layer_3'].isin(df['option_2_layer_3']) | \
                            df['ground_truth_layer_3'].isin(df['option_3_layer_3'])

In [101]:
accuracy_layer_1 = df[df['ground_truth_layer_1'].notnull()]['is_match_layer_1'].mean()
print(f"Layer 1 accuracy: {accuracy_layer_1}")
# show df for layer 1
df[['interaction_id', 'option_1_layer_1','option_2_layer_1','option_3_layer_1', 'ground_truth_layer_1', 'is_match_layer_1']]
# df[['interaction_id', 'option_1_layer_1', 'ground_truth_layer_1', 'is_match_layer_1']]

Layer 1 accuracy: 1.0


Unnamed: 0,interaction_id,option_1_layer_1,option_2_layer_1,option_3_layer_1,ground_truth_layer_1,is_match_layer_1
0,9151919416110000051,Complain,,,Complain,True
1,9151919416110000052,Complain,Request,Request,Complain,True
2,9151919416110000053,Complain,Inquiry,Complain,Complain,True
3,9151919416110000054,Complain,,,Complain,True
4,9151919416110000055,Complain,Complain,Complain,Complain,True
5,9151919416110000056,Inquiry,Inquiry,Inquiry,Inquiry,True
6,9151919416110000057,Inquiry,,,Inquiry,True
7,9151919416110000058,Inquiry,Inquiry,Inquiry,Inquiry,True
8,9151919416110000059,Complain,Request,Inquiry,Inquiry,True
9,9151919416110000060,Request,,,,True


In [103]:
accuracy_layer_2 = df[df['ground_truth_layer_2'].notnull()]['is_match_layer_2'].mean()
print(f"Layer 2 accuracy: {accuracy_layer_2}")
df[['interaction_id','option_1_layer_2','option_2_layer_2','option_3_layer_2', 'ground_truth_layer_2', 'is_match_layer_2']]

Layer 2 accuracy: 0.868421052631579


Unnamed: 0,interaction_id,option_1_layer_2,option_2_layer_2,option_3_layer_2,ground_truth_layer_2,is_match_layer_2
0,9151919416110000051,ATM,,,ATM,True
1,9151919416110000052,Kartu Kredit,Kartu Kredit,Kartu Kredit,Credit Card,True
2,9151919416110000053,GoPay,ATM,GoPay,OCTO Mobile,True
3,9151919416110000054,Kartu Kredit,,,Credit Card,True
4,9151919416110000055,OCTO Mobile,Mobile Banking,Kartu Kredit,OCTO Mobile,True
5,9151919416110000056,Account,Debit Card,Transfer,Tabungan,False
6,9151919416110000057,KPR,,,KPR,True
7,9151919416110000058,Kartu Kredit,Kartu Kredit,Kartu Kredit,Credit Card,True
8,9151919416110000059,Kartu Kredit,Kartu Kredit,Kartu Kredit,Credit Card,True
9,9151919416110000060,Credit Card,,,,True


In [104]:
accuracy_layer_3 = df[df['ground_truth_layer_3'].notnull()]['is_match_layer_3'].mean()
print(f"Layer 3 accuracy: {accuracy_layer_3}")
df[['interaction_id','option_1_layer_3','option_2_layer_3','option_3_layer_3', 'ground_truth_layer_3', 'is_match_layer_3']]

Layer 3 accuracy: 0.4473684210526316


Unnamed: 0,interaction_id,option_1_layer_3,option_2_layer_3,option_3_layer_3,ground_truth_layer_3,is_match_layer_3
0,9151919416110000051,Gagal Tarik,,,Gagal Tarik,True
1,9151919416110000052,Transaksi Tidak Dikenal,Penggantian Kartu,Penyanggahan Transaksi,Sanggahan Transaksi,False
2,9151919416110000053,Gagal Transaksi,Penarikan Tunai,Pelaporan,Kendala Transaksi,True
3,9151919416110000054,Transaksi Ganda,,,Sanggahan Transaksi,False
4,9151919416110000055,Kendala Login,Update Data,Penyanggahan Transaksi,Kendala Login,True
5,9151919416110000056,Biaya Rekening,Biaya Admin Bulanan,Metode Transfer,Other,True
6,9151919416110000057,Sisa Tagihan,,,Other,True
7,9151919416110000058,Sisa Limit,Pembayaran Cicilan,Over Limit,Limit,False
8,9151919416110000059,Pembayaran Belum Masuk,Kenaikan Limit,Update Transaksi,Pembayaran Tagihan Credit Card,False
9,9151919416110000060,Annual Fee,,,,True
