In [3]:
from pathlib import Path 
import json
from transformers import pipeline

from src.nli import classify_intent
from src.util import load_config

config = load_config(Path("configs/config.yaml"))
model = pipeline(
    "zero-shot-classification",
    model=config.get("nli_classifier", "facebook/bart-large-mnli"),
)

with open("data/evaluation/intent_classification.json", "r", encoding="utf-8") as infile: 
    data = json.load(infile)


In [4]:

predictions, references = [], []
for text, expected_intent in data: 
    intent, score = classify_intent(text, model)
    predictions.append(intent)
    references.append(expected_intent)
print("classified ", len(predictions), " documents")

[38;20m2024-01-28 13:29:39,601 - src.nli - INFO - classified intent: situation 0.73 (nli.py:21)[0m
[38;20m2024-01-28 13:29:40,272 - src.nli - INFO - classified intent: situation 0.70 (nli.py:21)[0m
[38;20m2024-01-28 13:29:41,052 - src.nli - INFO - classified intent: situation 0.77 (nli.py:21)[0m
[38;20m2024-01-28 13:29:41,797 - src.nli - INFO - classified intent: situation 0.60 (nli.py:21)[0m
[38;20m2024-01-28 13:29:42,600 - src.nli - INFO - classified intent: situation 0.82 (nli.py:21)[0m
[38;20m2024-01-28 13:29:43,314 - src.nli - INFO - classified intent: situation 0.71 (nli.py:21)[0m
[38;20m2024-01-28 13:29:43,979 - src.nli - INFO - classified intent: situation 0.80 (nli.py:21)[0m
[38;20m2024-01-28 13:29:44,665 - src.nli - INFO - classified intent: situation 0.67 (nli.py:21)[0m
[38;20m2024-01-28 13:29:45,336 - src.nli - INFO - classified intent: situation 0.83 (nli.py:21)[0m
[38;20m2024-01-28 13:29:46,144 - src.nli - INFO - classified intent: situation 0.74 (nli.p

classified  54  documents


In [5]:
from sklearn.metrics import f1_score, confusion_matrix, recall_score
from src.nli import Intent
import pandas as pd 

classes = ["deckbuilding", "rules", "conversation"]

prediction_classes = [classes.index(pred) for pred in predictions]
reference_classes = [classes.index(ref) for ref in references]

print(classes)
f1_scores = f1_score(y_true=reference_classes, y_pred=prediction_classes, labels=list(range(len(classes))), average=None)
recall_scores = recall_score(y_true=reference_classes, y_pred=prediction_classes, labels=list(range(len(classes))), average=None)

evaluation = pd.DataFrame({
    "labels": classes, 
    "f1": f1_scores, 
    "recall": recall_scores
}).sort_values("f1")
evaluation 

['deckbuilding', 'rules', 'conversation']


Unnamed: 0,labels,f1,recall
2,conversation,0.26087,0.15
1,rules,0.666667,1.0
0,deckbuilding,0.967742,0.9375


In [6]:
matrix = confusion_matrix(y_true=reference_classes, y_pred=prediction_classes, labels=list(range(len(classes))))
pd.DataFrame(matrix, columns=classes, index=classes)

Unnamed: 0,deckbuilding,rules,conversation
deckbuilding,15,1,0
rules,0,18,0
conversation,0,17,3
