# Import and initialization

In [1]:
import json
import numpy as np

In [2]:
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.svm import SVC

In [3]:
from checklist.test_suite import TestSuite

In [4]:
INTENTS = [
    'book-appointment',
    'book-hotel',
    'book-restaurant',
    'change-appointment',
    'change-car-rental',
    'change-driver',
    'change-hotel',
    'change-reminder',
    'change-restaurant',
    'contact-provider',
    'context',
    'find-Property',
    'find-Recipe',
    'find-Trip',
    'find-activity',
    'find-around-me',
    'find-car-rental',
    'find-driver',
    'find-flight',
    'find-hotel',
    'find-information',
    'find-itinerary',
    'find-restaurant',
    'find-train',
    'inform',
    'irrelevant',
    'other',
    'provide-agenda',
    'provide-news',
    'provide-showtimes',
    'provide-translation',
    'provide-tv-guide',
    'provide-weather',
    'purchase',
    'set-reminder',
    'smalltalk',
    'unsupported',
]

INTENTS_REDUCT = [
    'find-around-me',
    'find-flight',
    'find-hotel',
    'find-restaurant',
    'find-train',
    'irrelevant',
    'provide-showtimes',
    'purchase',
]

In [5]:
TYPES_AUG = ["keyboard_aug", "letter_aug", "letter_del"]

In [6]:
suite = TestSuite.from_file("testsuite/suite.pkl")

In [7]:
def get_true_intents(type_aug):
    labels = suite.tests.get(f"MFT original {type_aug}").labels
    return [INTENTS[label] for label in labels]

In [8]:
def visualize(type_aug):

    # MFT original
    test_original = suite.tests.get(f'MFT original {type_aug}')
    y_true = get_true_intents(type_aug=type_aug)
    y_pred_original = [INTENTS[pred] for pred in test_original.results.get('preds')]
    
    print(f" =============================== {type_aug} ====================================== \n")
    print(f" ------------------- MFT original ----------------\n")
    print(classification_report(y_true= y_true, y_pred = y_pred_original,labels=INTENTS_REDUCT))
    print("\n-- Confusion matrix -- \n")
    print(confusion_matrix(y_true=y_true,y_pred=y_pred_original, normalize=None,labels=INTENTS_REDUCT))


    # MFT aug
    test_aug = suite.tests.get(f'MFT {type_aug}')
    y_pred_aug = [INTENTS[pred] for pred in test_aug.results.get('preds')]

    print(f"\n ------------------- MFT {type_aug} ----------------\n")
    print(classification_report(y_true= y_true, y_pred = y_pred_aug,labels=INTENTS_REDUCT))
    print("\n-- Confusion matrix -- \n")
    print(confusion_matrix(y_true=y_true,y_pred=y_pred_aug, normalize=None,labels=INTENTS_REDUCT))

    # INV
    test_inv = suite.tests.get(f'INV {type_aug}')
    y_pred_INV_original = [INTENTS[pred[0]] for pred in test_inv.results.get('preds')]
    y_pred_INV_aug = [INTENTS[pred[1]] for pred in test_inv.results.get('preds')]

    confusion_matrix_original = confusion_matrix(y_true=y_true,y_pred=y_pred_INV_original)
    confusion_matrix_aug = confusion_matrix(y_true=y_true,y_pred=y_pred_INV_aug)

    print(f"\n ------------------- INV {type_aug} ----------------\n")
    print(" Difference of confusion matrix (original - aug)")
    print(np.array([[(confusion_matrix_original[i,j] - confusion_matrix_aug[i,j]) 
                for i in range(len(confusion_matrix_original))] 
                for j in range(len(confusion_matrix_aug))]))
    print("\n\n")

# Visuals

In [9]:
suite.summary()

keyboard_aug

MFT original keyboard_aug
Test cases:      1346
Fails (rate):    3 (0.2%)

Example fails:
25 (0.4) j'ai besoin que tu me cherche un b&b pour le dernier weekend du mois à saint-petersbourg
----
25 (0.0) J'ai une soirée élégante dans 15 jours, j'ai trouvé la robe parfaite chez Maje, reste plus qu'à faire la commande et comme je n'ai pas le temps il faut que tu t'en charges. Voici les références : ROBE LONGUE PORTEFEUILLE rouge taille 1. Tu ne peux pas la louper elle est longue et raffinée. Juste superbe. Tiens moi au jus cocotte quand tu as besoin de mes numéros de carte.
----
25 (0.0) quels son les mots començant ou finissant avec "la"
----


MFT keyboard_aug
Test cases:      1346
Fails (rate):    42 (3.1%)

Example fails:
25 (0.0) Bien le bonjour! Je suis à la rechercbe d ' un ventilateur: )
----
15 (0.4) Dis - moi, Est ce quil ya des AubeTges aux alentours de Séoul
----
33 (0.0) J ' aimerais que tu cherches trois billets (aviln) au depart de Londres vers Dublin. Du 21 / 

In [10]:
suite.visual_summary_table()

Please wait as we prepare the table data...


SuiteSummarizer(stats={'npassed': 0, 'nfailed': 0, 'nfiltered': 0}, test_infos=[{'name': 'MFT original keyboar…

In [11]:
for type_aug in TYPES_AUG :
    visualize(type_aug=type_aug)


 ------------------- MFT original ----------------

                   precision    recall  f1-score   support

   find-around-me       1.00      1.00      1.00        90
      find-flight       1.00      0.96      0.98        28
       find-hotel       1.00      0.99      0.99        74
  find-restaurant       1.00      1.00      1.00       104
       find-train       1.00      1.00      1.00        28
       irrelevant       1.00      1.00      1.00       853
provide-showtimes       1.00      1.00      1.00        31
         purchase       1.00      0.99      1.00       138

         accuracy                           1.00      1346
        macro avg       1.00      0.99      1.00      1346
     weighted avg       1.00      1.00      1.00      1346


-- Confusion matrix -- 

[[ 90   0   0   0   0   0   0   0]
 [  0  27   0   0   0   1   0   0]
 [  0   0  73   0   0   1   0   0]
 [  0   0   0 104   0   0   0   0]
 [  0   0   0   0  28   0   0   0]
 [  0   0   0   0   0 853   0   0]
