In [1]:
import pandas as pd

df = pd.read_excel("MATCH_final-anotasi-pos-taggingg.xlsx")

label_mapping = {
    'PAD':'PAD',
    'ADV': 'ADV',
    'ASP': 'ADV', 
    'B--':'B--',
    'CC-': 'CC-', 
    'CCA': 'CC-',   
    'CCS': 'CC-', 
    'CD': 'CD',
    'X--': 'F--', 
    'F--': 'F--', 
    'DET': 'DET', 
    'NSB': 'NSB', 
    'NSD': 'NSD',
    'NSM': 'NSB',  
    'NSV': 'NSB',
    'NSP': 'NSB',
    'NSP': 'NSD', 
    'NUM':'NUM',
    'ORD':'NUM', 
    'Z--':'Z--',
    'O--':'O--',
    'S--': 'S--',
    'T--': 'T--', 
    'R--':'ADV', 
    'PRD': 'PRD',
    'PRF': 'PRF',
    'PRP': 'PRP',
    'PRT': 'PRT',
    'PR-': 'PRD',
    'PRN': 'PRN',
    'W--': 'PRQ',
    'PRQ': 'PRQ',
    'PP1': 'PRP',
    'PS2': 'PRP',
    'PS3': 'PRF', 
    'G--':'PRF',
    'ASS':'ASS',
    'VSB': 'VSB',
    'VSP': 'VSP',
    'VBI': 'VBI',
    'VBL': 'VBL',
    'VBT': 'VBT',
    'VSA': 'VSA',
    'CO': 'CO', 
    'H--': 'H--',
    'M--': 'M--',
}


def parse_tagged_sentence(tagged_text):
    tokens = []
    for w in tagged_text.split():
        parts = w.rsplit('/', 1)  
        if len(parts) == 2:
            tokens.append((parts[0], parts[1]))
    return tokens

def map_tag(tag):
    return label_mapping.get(tag, tag)

def evaluate_tags(gpt_tags, stanza_tags, manual_tags):
    results = []
    len_gpt = len(gpt_tags)
    len_stanza = len(stanza_tags)
    len_manual = len(manual_tags)

    max_len = max(len_gpt, len_stanza, len_manual)

    for idx in range(max_len):
        gpt_tok, gpt_tag = gpt_tags[idx] if idx < len_gpt else (None, None)
        stanza_tok, stanza_tag = stanza_tags[idx] if idx < len_stanza else (None, None)
        manual_tok, manual_tag = manual_tags[idx] if idx < len_manual else (None, None)

        gpt_match = (gpt_tok == manual_tok) and (map_tag(gpt_tag) == map_tag(manual_tag))
        stanza_match = (stanza_tok == manual_tok) and (map_tag(stanza_tag) == map_tag(manual_tag))

        result = {
            'index': idx,
            'manual_token': manual_tok,
            'manual_tag': map_tag(manual_tag) if manual_tag else None,
            'gpt_token': gpt_tok,
            'gpt_tag': map_tag(gpt_tag) if gpt_tag else None,
            'stanza_token': stanza_tok,
            'stanza_tag': map_tag(stanza_tag) if stanza_tag else None,
            'gpt_correct': gpt_match,
            'stanza_correct': stanza_match
        }
        results.append(result)
    return results

for i, row in df.iterrows():
    gpt = parse_tagged_sentence(row['gpt'])
    stanza = parse_tagged_sentence(row['stanza'])
    manual = parse_tagged_sentence(row['manual'])

    evaluated = evaluate_tags(gpt, stanza, manual)

    print(f"\n=== Kalimat ke-{i+1} ===")
    for r in evaluated:
        print(f"Index {r['index']}: manual=({r['manual_token']}/{r['manual_tag']}), "
              f"gpt=({r['gpt_token']}/{r['gpt_tag']}) [{'✅' if r['gpt_correct'] else '❌'}], "
              f"stanza=({r['stanza_token']}/{r['stanza_tag']}) [{'✅' if r['stanza_correct'] else '❌'}]")



=== Kalimat ke-1 ===
Index 0: manual=(Bagian/NSD), gpt=(Bagian/NSD) [✅], stanza=(Bagian/NSD) [✅]
Index 1: manual=(dari/ADV), gpt=(dari/ADV) [✅], stanza=(dari/ADV) [✅]
Index 2: manual=(bidang/NSD), gpt=(bidang/NSD) [✅], stanza=(bidang/NSD) [✅]
Index 3: manual=(biologi/NSD), gpt=(biologi/NSD) [✅], stanza=(biologi/NSD) [✅]
Index 4: manual=(yang/S--), gpt=(yang/S--) [✅], stanza=(yang/S--) [✅]
Index 5: manual=(membutuhkan/VSA), gpt=(membutuhkan/VSA) [✅], stanza=(membutuhkan/VSA) [✅]
Index 6: manual=(penggunaan/NSD), gpt=(penggunaan/NSD) [✅], stanza=(penggunaan/NSD) [✅]
Index 7: manual=(mikroskop/F--), gpt=(mikroskop/NSD) [❌], stanza=(mikroskop/F--) [✅]
Index 8: manual=(,/Z--), gpt=(,/Z--) [✅], stanza=(,/Z--) [✅]
Index 9: manual=(sebagai/ADV), gpt=(sebagai/ADV) [✅], stanza=(sebagai/ADV) [✅]
Index 10: manual=(contoh/NSD), gpt=(contoh/NSD) [✅], stanza=(contoh/NSD) [✅]

=== Kalimat ke-2 ===
Index 0: manual=(Dalam/ADV), gpt=(Dalam/ADV) [✅], stanza=(Dalam/NSD) [❌]
Index 1: manual=(studi/NSD), gp

In [2]:
total_manual_tokens = 0
total_gpt_correct = 0
total_stanza_correct = 0

for i, row in df.iterrows():
    gpt = parse_tagged_sentence(row['gpt'])
    stanza = parse_tagged_sentence(row['stanza'])
    manual = parse_tagged_sentence(row['manual'])

    evaluated = evaluate_tags(gpt, stanza, manual)

    for r in evaluated:
        if r['manual_token'] is not None:
            total_manual_tokens += 1
            if r['gpt_correct']:
                total_gpt_correct += 1
            if r['stanza_correct']:
                total_stanza_correct += 1

gpt_accuracy = total_gpt_correct / total_manual_tokens if total_manual_tokens else 0
stanza_accuracy = total_stanza_correct / total_manual_tokens if total_manual_tokens else 0

print(f"Akurasi GPT: {gpt_accuracy:.2%}")
print(f"Akurasi Stanza: {stanza_accuracy:.2%}")

Akurasi GPT: 62.68%
Akurasi Stanza: 61.60%
