In [1]:
from pathlib import Path 
import json
from transformers import pipeline

from src.nli import classify_intent
from src.util import load_config

config = load_config(Path("configs/config.yaml"))
model = pipeline(
    "zero-shot-classification",
    model=config.get("nli_classifier", "facebook/bart-large-mnli"),
)

with open("data/test_data/intent_classification2.json", "r", encoding="utf-8") as infile: 
    data = json.load(infile)


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

predictions, references = [], []
for text, expected_intent in data: 
    intent, score = classify_intent(text, model)
    predictions.append(intent)
    references.append(expected_intent)
print("classified ", len(predictions), " documents")

[38;20m2024-01-28 12:16:54,568 - src.nli - INFO - classified intent: rules 0.50 (nli.py:20)[0m
[38;20m2024-01-28 12:16:55,111 - src.nli - INFO - classified intent: conversation 0.62 (nli.py:20)[0m
[38;20m2024-01-28 12:16:55,587 - src.nli - INFO - classified intent: conversation 0.47 (nli.py:20)[0m
[38;20m2024-01-28 12:16:56,095 - src.nli - INFO - classified intent: deckbuilding 0.41 (nli.py:20)[0m
[38;20m2024-01-28 12:16:56,645 - src.nli - INFO - classified intent: rules 0.43 (nli.py:20)[0m
[38;20m2024-01-28 12:16:57,160 - src.nli - INFO - classified intent: conversation 0.58 (nli.py:20)[0m
[38;20m2024-01-28 12:16:57,615 - src.nli - INFO - classified intent: rules 0.41 (nli.py:20)[0m
[38;20m2024-01-28 12:16:58,097 - src.nli - INFO - classified intent: rules 0.59 (nli.py:20)[0m
[38;20m2024-01-28 12:16:58,583 - src.nli - INFO - classified intent: conversation 0.43 (nli.py:20)[0m
[38;20m2024-01-28 12:16:59,099 - src.nli - INFO - classified intent: rules 0.53 (nli.py:20)

classified  53  documents


In [4]:
from sklearn.metrics import f1_score, confusion_matrix, recall_score
from src.nli import Intent
import pandas as pd 

classes = [intent.value for intent in Intent]
prediction_classes = [classes.index(pred) for pred in predictions]
reference_classes = [classes.index(ref) for ref in references]

print(classes)
f1_scores = f1_score(y_true=reference_classes, y_pred=prediction_classes, labels=list(range(len(classes))), average=None)
recall_scores = recall_score(y_true=reference_classes, y_pred=prediction_classes, labels=list(range(len(classes))), average=None)

evaluation = pd.DataFrame({
    "labels": classes, 
    "f1": f1_scores, 
    "recall": recall_scores
}).sort_values("f1")
evaluation 

['deckbuilding', 'rules', 'conversation']


Unnamed: 0,labels,f1,recall
2,conversation,0.75,0.6
1,rules,0.8,0.941176
0,deckbuilding,0.941176,1.0


In [6]:
matrix = confusion_matrix(y_true=reference_classes, y_pred=prediction_classes, labels=list(range(len(classes))))
pd.DataFrame(matrix, columns=classes, index=classes)

Unnamed: 0,deckbuilding,rules,conversation
deckbuilding,16,0,0
rules,1,16,0
conversation,1,7,12
