# Import and initialization

In [1]:
import json
import numpy as np

In [2]:
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.svm import SVC

In [3]:
from checklist.test_suite import TestSuite

In [4]:
INTENTS = [
    'book-appointment',
    'book-hotel',
    'book-restaurant',
    'change-appointment',
    'change-car-rental',
    'change-driver',
    'change-hotel',
    'change-reminder',
    'change-restaurant',
    'contact-provider',
    'context',
    'find-Property',
    'find-Recipe',
    'find-Trip',
    'find-activity',
    'find-around-me',
    'find-car-rental',
    'find-driver',
    'find-flight',
    'find-hotel',
    'find-information',
    'find-itinerary',
    'find-restaurant',
    'find-train',
    'inform',
    'irrelevant',
    'other',
    'provide-agenda',
    'provide-news',
    'provide-showtimes',
    'provide-translation',
    'provide-tv-guide',
    'provide-weather',
    'purchase',
    'set-reminder',
    'smalltalk',
    'unsupported',
]

INTENTS_REDUCT = [
    'find-around-me',
    'find-flight',
    'find-hotel',
    'find-restaurant',
    'find-train',
    'irrelevant',
    'provide-showtimes',
    'purchase',
]

In [5]:
TYPES_AUG = ["keyboard_aug", "letter_aug", "letter_del"]

In [6]:
suite = TestSuite.from_file("testsuite/suite.pkl")

In [7]:
def get_true_intents(type_aug):
    labels = suite.tests.get(f"MFT original {type_aug}").labels
    return [INTENTS[label] for label in labels]

In [8]:
def visualize(type_aug):

    # MFT original
    test_original = suite.tests.get(f'MFT original {type_aug}')
    y_true = get_true_intents(type_aug=type_aug)
    y_pred_original = [INTENTS[pred] for pred in test_original.results.get('preds')]
    
    print(f" =============================== {type_aug} ====================================== \n")
    print(f" ------------------- MFT original ----------------\n")
    print(classification_report(y_true= y_true, y_pred = y_pred_original,labels=INTENTS_REDUCT))
    print("\n-- Confusion matrix -- \n")
    print(confusion_matrix(y_true=y_true,y_pred=y_pred_original, normalize=None,labels=INTENTS_REDUCT))


    # MFT aug
    test_aug = suite.tests.get(f'MFT {type_aug}')
    y_pred_aug = [INTENTS[pred] for pred in test_aug.results.get('preds')]

    print(f"\n ------------------- MFT {type_aug} ----------------\n")
    print(classification_report(y_true= y_true, y_pred = y_pred_aug,labels=INTENTS_REDUCT))
    print("\n-- Confusion matrix -- \n")
    print(confusion_matrix(y_true=y_true,y_pred=y_pred_aug, normalize=None,labels=INTENTS_REDUCT))

    # INV
    test_inv = suite.tests.get(f'INV {type_aug}')
    y_pred_INV_original = [INTENTS[pred[0]] for pred in test_inv.results.get('preds')]
    y_pred_INV_aug = [INTENTS[pred[1]] for pred in test_inv.results.get('preds')]

    confusion_matrix_original = confusion_matrix(y_true=y_true,y_pred=y_pred_INV_original)
    confusion_matrix_aug = confusion_matrix(y_true=y_true,y_pred=y_pred_INV_aug)

    print(f"\n ------------------- INV {type_aug} ----------------\n")
    print(" Difference of confusion matrix (original - aug)")
    print(np.array([[(confusion_matrix_original[i,j] - confusion_matrix_aug[i,j]) 
                for i in range(len(confusion_matrix_original))] 
                for j in range(len(confusion_matrix_aug))]))
    print("\n\n")

# Visuals

In [9]:
suite.summary()

letter_del

MFT original letter_del
Test cases:      200
Fails (rate):    0 (0.0%)


MFT letter_del
Test cases:      200
Fails (rate):    4 (2.0%)

Example fails:
25 (0.0) À quelle heure le premier trai arrivera-t-il demain matin à nantes ?
----
25 (0.0) Conseille moi un retaurant tibétain
----
25 (0.0) passe comande sur amazon
----


INV letter_del
Test cases:      200
Fails (rate):    3 (1.5%)

Example fails:
22 (1.0) Conseille moi un restaurant tibétain
25 (0.0) Conseille moi un retaurant tibétain

----
22 (1.0) Coucou mon génie, tu as un resto a me conseiller sur oulan bator ?
25 (0.0) Coucou mon génie, tu as un esto a me conseiller sur oulan bator ?

----
23 (1.0) À quelle heure le premier train arrivera-t-il demain matin à nantes ?
25 (0.0) À quelle heure le premier trai arrivera-t-il demain matin à nantes ?

----




keyboard_aug

MFT original keyboard_aug
Test cases:      200
Fails (rate):    1 (0.5%)

Example fails:
25 (0.4) j'ai besoin que tu me cherche un b&b pour le dernier

In [10]:
suite.visual_summary_table()

Please wait as we prepare the table data...


SuiteSummarizer(stats={'npassed': 0, 'nfailed': 0, 'nfiltered': 0}, test_infos=[{'name': 'MFT original keyboar…

In [11]:
for type_aug in TYPES_AUG :
    visualize(type_aug=type_aug)


 ------------------- MFT original ----------------

                   precision    recall  f1-score   support

   find-around-me       1.00      1.00      1.00        10
      find-flight       1.00      1.00      1.00         3
       find-hotel       1.00      0.92      0.96        13
  find-restaurant       1.00      1.00      1.00        15
       find-train       1.00      1.00      1.00         4
       irrelevant       0.99      1.00      1.00       131
provide-showtimes       1.00      1.00      1.00         2
         purchase       1.00      1.00      1.00        22

         accuracy                           0.99       200
        macro avg       1.00      0.99      0.99       200
     weighted avg       1.00      0.99      0.99       200


-- Confusion matrix -- 

[[ 10   0   0   0   0   0   0   0]
 [  0   3   0   0   0   0   0   0]
 [  0   0  12   0   0   1   0   0]
 [  0   0   0  15   0   0   0   0]
 [  0   0   0   0   4   0   0   0]
 [  0   0   0   0   0 131   0   0]
