In [17]:
import spacy
import json
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report as clf_report_sklearn
from spacy.training.example import Example
from seqeval.metrics import classification_report as seqeval_classification_report

In [18]:
# Load model yang sudah dilatih
model_path = "models5/model_iter40"
nlp = spacy.load(model_path)
print("Model berhasil dimuat.")

Model berhasil dimuat.


In [19]:
# 2. Load data test
with open("data_test.json", "r", encoding="utf-8") as f:
    data_json = json.load(f)
    TEST_DATA = data_json["annotations"]

In [20]:
# Pendekatan 1: BIO tagging (Token-level) - SEQEVAL
true_entities_list = []
pred_entities_list = []

# Pendekatan 2: Span-level
tp, fp, fn = 0, 0, 0  # true positive, false positive, false negative

for text, annot in TEST_DATA:
    doc = nlp(text)

    # === BIO (token-level) ===
    pred_bio = []
    true_bio = ["O"] * len(doc)

    # Prediksi ke dalam format BIO
    for token in doc:
        if token.ent_type_:
            if token.ent_iob_ == "B":
                pred_bio.append(f"B-{token.ent_type_}")
            else:  # token.ent_iob_ == "I"
                pred_bio.append(f"I-{token.ent_type_}")
        else:
            pred_bio.append("O")

    # Ground truth ke dalam format BIO
    for start, end, label in annot["entities"]:
        for i, token in enumerate(doc):
            if start <= token.idx < end:
                if token.idx == start or (i > 0 and doc[i-1].idx < start):
                    true_bio[i] = f"B-{label}"
                else:
                    true_bio[i] = f"I-{label}"

    pred_entities_list.append(pred_bio)
    true_entities_list.append(true_bio)

    # === Span-level evaluation ===
    gold_entities = set([(start, end, label) for start, end, label in annot["entities"]])
    pred_entities = set([(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents])

    tp += len(gold_entities & pred_entities)
    fp += len(pred_entities - gold_entities)
    fn += len(gold_entities - pred_entities)

# ========== Hasil Evaluasi ==========
print("\n=== [Pendekatan 1] BIO Tagging dengan Seqeval ===")
print(seqeval_classification_report(true_entities_list, pred_entities_list))

print("\n=== [Pendekatan 2] Span-level Evaluation ===")
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

print(f"Precision: {precision:.2f}")
print(f"Recall   : {recall:.2f}")
print(f"F1 Score : {f1:.2f}")


=== [Pendekatan 1] BIO Tagging dengan Seqeval ===
              precision    recall  f1-score   support

         APP       0.89      0.87      0.88       357
        DESC       0.88      0.75      0.81       142

   micro avg       0.89      0.84      0.86       499
   macro avg       0.88      0.81      0.85       499
weighted avg       0.89      0.84      0.86       499


=== [Pendekatan 2] Span-level Evaluation ===
Precision: 0.89
Recall   : 0.84
F1 Score : 0.86
