# Import and initialization

In [7]:
import json
import numpy as np

In [8]:
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.svm import SVC

In [9]:
from checklist.test_suite import TestSuite

In [22]:
INTENTS = [
    'book-appointment',
    'book-hotel',
    'book-restaurant',
    'change-appointment',
    'change-car-rental',
    'change-driver',
    'change-hotel',
    'change-reminder',
    'change-restaurant',
    'contact-provider',
    'context',
    'find-Property',
    'find-Recipe',
    'find-Trip',
    'find-activity',
    'find-around-me',
    'find-car-rental',
    'find-driver',
    'find-flight',
    'find-hotel',
    'find-information',
    'find-itinerary',
    'find-restaurant',
    'find-train',
    'inform',
    'irrelevant',
    'other',
    'provide-agenda',
    'provide-news',
    'provide-showtimes',
    'provide-translation',
    'provide-tv-guide',
    'provide-weather',
    'purchase',
    'set-reminder',
    'smalltalk',
    'unsupported',
]

INTENTS_REDUCT = [
    'find-around-me',
    'find-flight',
    'find-hotel',
    'find-restaurant',
    'find-train',
    'irrelevant',
    'provide-showtimes',
    'purchase',
]

# Load the test suite

In [235]:
suite = TestSuite.from_file("testsuite/suite_force1.pkl")
suite.tests

OrderedDict([('MFT original', <checklist.test_types.MFT at 0x7fda3dfb1668>),
             ('MFT keyboard_augmentation',
              <checklist.test_types.MFT at 0x7fda3c1fde10>),
             ('INV keyboard_augmentation',
              <checklist.test_types.INV at 0x7fda3b92d240>),
             ('MFT letter_augmentation',
              <checklist.test_types.MFT at 0x7fda3c146e48>),
             ('INV letter_augmentation',
              <checklist.test_types.INV at 0x7fda38e64b00>),
             ('MFT letter_deletion',
              <checklist.test_types.MFT at 0x7fda3e8ad5f8>),
             ('INV letter_deletion',
              <checklist.test_types.INV at 0x7fda392ed5f8>),
             ('MFT letter_swap', <checklist.test_types.MFT at 0x7fda3bae77f0>),
             ('INV letter_swap', <checklist.test_types.INV at 0x7fda3b40d7f0>),
             ('MFT bert_swap', <checklist.test_types.MFT at 0x7fda39ee1e80>),
             ('INV bert_swap', <checklist.test_types.INV at 0x7fda39ee1f60>)]

In [236]:
types_test = list(suite.tests.keys())
types_test.remove("MFT original")
types_aug = [type_test[4:] for type_test in types_test if type_test[:3]=='MFT']
print(f'Types of augmentation : {types_aug}')

Types of augmentation : ['keyboard_augmentation', 'letter_augmentation', 'letter_deletion', 'letter_swap', 'bert_swap']


# Visuals

In [1]:
def visualize_MFT(type_aug):
    test_mft = suite.tests.get(f'MFT {type_aug}')
    y_true = [INTENTS[label] for label in test_mft.labels]
    y_pred_aug = [INTENTS[pred] for pred in test_mft.results.get('preds')]

    print(f"\n ------------------- {test_mft.name} ----------------\n")
    print(classification_report(y_true=y_true, y_pred=y_pred_aug))
    print("\n-- Confusion matrix -- \n")
    print(confusion_matrix(y_true=y_true,y_pred=y_pred_aug, normalize='true', labels=INTENTS_REDUCT))
    print()

def visualize_INV(type_aug):
    y_true = suite.tests.get(f'MFT {type_aug}').labels
    test_inv = suite.tests.get(f'INV {type_aug}')
    passed = test_inv.results.get("passed")
    index_non_passed = [ i for i in range(len(passed)) if passed[i]==False]
    y_true_non_passed = [INTENTS[y_true[i]] for i in index_non_passed]
    y_pred_non_passed = [INTENTS[test_inv.results.get('preds')[i][1]] for i in index_non_passed]
    nb_original_good = len([i for i in index_non_passed 
                                 if test_inv.results.get('preds')[i][0] == y_true[i]])

    print(f" ------------------- {test_inv.name} ----------------\n")
    print(f"Fail rate : {len(index_non_passed)/len(test_inv.data)*100:0.2f}%")
    print(f"Correct original prediction rate : {nb_original_good/len(test_inv.data)*100:0.2f}%")
    print("Confusion matrix on non passed samples - line : true label - column : pred on aug \n")
    print(confusion_matrix(y_true=y_true_non_passed, y_pred=y_pred_non_passed, labels=INTENTS_REDUCT))
    print("\n\n")

In [238]:
suite.summary()

keyboard_augmentation

MFT keyboard_augmentation
Test cases:      5
Fails (rate):    1 (20.0%)

Example fails:
25 (0.0) Connais - tu un resYaurant réputé pour le caviar à Las Vegas?
----


INV keyboard_augmentation
Test cases:      5
Fails (rate):    1 (20.0%)

Example fails:
22 (1.0) Connais-tu un restaurant réputé pour le caviar à Las Vegas ?
25 (0.0) Connais - tu un resYaurant réputé pour le caviar à Las Vegas?

----




bert_swap

MFT bert_swap
Test cases:      5
Fails (rate):    0 (0.0%)


INV bert_swap
Test cases:      5
Fails (rate):    0 (0.0%)




letter_deletion

MFT letter_deletion
Test cases:      5
Fails (rate):    0 (0.0%)


INV letter_deletion
Test cases:      5
Fails (rate):    0 (0.0%)




letter_swap

MFT letter_swap
Test cases:      5
Fails (rate):    0 (0.0%)


INV letter_swap
Test cases:      5
Fails (rate):    0 (0.0%)




original

MFT original
Test cases:      5
Fails (rate):    0 (0.0%)




letter_augmentation

MFT letter_augmentation
Test cases:      5
Fails (

In [221]:
suite.visual_summary_table()

Please wait as we prepare the table data...


SuiteSummarizer(stats={'npassed': 0, 'nfailed': 0, 'nfiltered': 0}, test_infos=[{'name': 'MFT letter_deletion'…

In [246]:
#visualize_MFT(type_aug="original")
for type_aug in types_aug :
    print(f" =============================== {type_aug} ====================================== \n")
    visualize_MFT(type_aug=type_aug)
    visualize_INV(type_aug=type_aug)



 ------------------- MFT keyboard_augmentation ----------------

                 precision    recall  f1-score   support

find-restaurant       0.00      0.00      0.00         1
     irrelevant       0.80      1.00      0.89         4

       accuracy                           0.80         5
      macro avg       0.40      0.50      0.44         5
   weighted avg       0.64      0.80      0.71         5


-- Confusion matrix -- 

[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]]

 ------------------- INV keyboard_augmentation ----------------

Fail rate : 20.00%
Correct original prediction rate : 20.00%
Confusion matrix on non passed samples - line : true label - column : pred on aug 

[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 