In [1]:
!pip install checklist &> /dev/null

In [2]:
from datetime import datetime
import json
import requests
import numpy as np

import checklist
from checklist.expect import Expect
from checklist.test_types import MFT, INV, DIR

In [3]:
INTENTS = [
    'book-appointment',
    'book-hotel',
    'book-restaurant',
    'change-appointment',
    'change-car-rental',
    'change-driver',
    'change-hotel',
    'change-reminder',
    'change-restaurant',
    'contact-provider',
    'context',
    'find-Property',
    'find-Recipe',
    'find-Trip',
    'find-activity',
    'find-around-me',
    'find-car-rental',
    'find-driver',
    'find-flight',
    'find-hotel',
    'find-information',
    'find-itinerary',
    'find-restaurant',
    'find-train',
    'inform',
    'irrelevant',
    'other',
    'provide-agenda',
    'provide-news',
    'provide-showtimes',
    'provide-translation',
    'provide-tv-guide',
    'provide-weather',
    'purchase',
    'set-reminder',
    'smalltalk',
    'unsupported',
]

In [4]:
def get_classif(classification):
    return INTENTS.index(max(classification, key=classification.get))

In [5]:
def get_softmax(classification):
    return [c/100 for c in classification.values()]

In [6]:
def get_classification(sentences):
    URL = "http://teaching-pfe.francecentral.azurecontainer.io/api/classification"  
    LANG = "fr-FR"
    classifs = [requests.get(f"{URL}?iso_language={LANG}&sentence={sentence}").json().get("classification") for sentence in sentences]
    return (np.array([get_classif(c) for c in classifs]), np.array([get_softmax(c) for c in classifs]))

In [9]:
with open("dataset_aug.json", "r") as dataset_file:
   data = json.load(dataset_file)

print(data[0])
data= data[:10]

{'intent': 'irrelevant', 'sentence': 'Est ce que tu peux me dire en détails ce que tu sais faire ?', 'keyboard_aug': ['Est ce que tu pejx me dire en détails ce que tu sais Gaire?', 'Est ce que tu àeux me dire en détails ce que tu sais daire?', 'Est ce que tu peux me dite en détails ce que tu sais faiEe?'], 'wordswap_aug': ['Est ce peux que tu me dire en ce détails tu que sais faire?', 'Est ce que tu peux me en dire détails ce tu que sais faire?', 'Est ce que peux tu me en dire ce détails tu que sais faire?']}


# Create and run tests

## MFT

### Original dataset

In [10]:
data_mft_original = { "data": [elem.get("sentence") for elem in data],
                      "labels": [INTENTS.index(elem.get("intent")) for elem in data]}

label_0 = data_mft_original.get('labels')[0]
print(f" Sample - data : {data_mft_original.get('data')[0]}; label : {label_0}/{INTENTS[label_0]}")

 Sample - data : Est ce que tu peux me dire en détails ce que tu sais faire ?; label : 25/irrelevant


In [11]:
test_MFT_original = MFT(**data_mft_original,name="original dataset", capability="None", description=" ")

In [12]:
start = datetime.now()
test_MFT_original.run(get_classification, overwrite=True)
end = datetime.now()
print(f"Execution time : {end - start}") # estimation : 30min

Predicting 10 examples
Execution time : 0:00:56.952825


In [13]:
test_MFT_original.summary()

Test cases:      10
Fails (rate):    1 (10.0%)

Example fails:
35 (0.0) Est ce que tu peux me dire en détails ce que tu sais faire ?
----


### Typographie faults


In [14]:
data_mft_typo = { "data": [keyboard_sentence for elem in data
                                            for keyboard_sentence in elem.get("keyboard_aug")],
                  "labels": [INTENTS.index(elem.get("intent")) for elem in data 
                                                              for keyboard_sentence in elem.get("keyboard_aug")]}

label_0 = data_mft_typo.get('labels')[0]
print(f" Sample - data : {data_mft_typo.get('data')[0]}; label : {label_0}/{INTENTS[label_0]}")

 Sample - data : Est ce que tu pejx me dire en détails ce que tu sais Gaire?; label : 25/irrelevant


In [15]:
test_MFT_typo = MFT(**data_mft_typo, name="typo", capability="None", description="MFT data aug with typo")

In [16]:
start = datetime.now()
test_MFT_typo.run(get_classification, overwrite=True)
end = datetime.now()
print(f"Execution time : {end - start}") # estimation : 2h15min

Predicting 30 examples
Execution time : 0:03:00.212069


In [99]:
#test_MFT_typo.summary()

### Switching words' positions

In [124]:
data_mft_switch = { "data": [switch_sentence for elem in data
                                               for switch_sentence in elem.get("wordswap_aug")],
                    "labels": [INTENTS.index(elem.get("intent")) for elem in data 
                                                                 for switch_sentence in elem.get("wordswap_aug")]}

label_0 = data_mft_switch.get('labels')[0]
print(f" Sample - data : {data_mft_switch.get('data')[0]}; label : {label_0}/{INTENTS[label_0]}")

 Sample - data : Ce est tu que peux me dire en détails ce que tu sais faire?; label : 25/irrelevant


In [125]:
test_MFT_switch = MFT(**data_mft_switch, name="switch", capability="None", description=" data aug with switch")

In [126]:
start = datetime.now()
test_MFT_switch.run(get_classification, overwrite=True)
end = datetime.now()
print(f"Execution time : {end - start}")  # estimation : 2h15min

Predicting 26975 examples


KeyboardInterrupt: 

In [105]:
test_MFT_switch.summary()

Test cases:      10
Fails (rate):    5 (50.0%)

Example fails:
35 (0.1) Ce est que me tu peux dire en détails que ce tu sais faire?
----
35 (0.0) Est ce que tu peux dire me en que détails ce tu faire sais?
----
35 (0.0) Est ce tu me que peux en dire détails ce que tu sais faire?
----


## INV

### Typographie faults

In [127]:
t_typo = { "data":[[item.get("sentence"), keyboard_sentence] for item in data 
                                                            for keyboard_sentence in item.get("keyboard_aug")]}

print(f"samples - {t_typo.get('data')[:2]}")

samples - [['Est ce que tu peux me dire en détails ce que tu sais faire ?', 'Est ce que tu pDux me dLre en cé$ails ce que tu sais Baire?'], ['Est ce que tu peux me dire en détails ce que tu sais faire ?', 'Est ce que tu peuw me Eire en détaLlx ce que tu sais fa1re?']]


In [128]:
test_INV_typo = INV(**t_typo, name="typo augmentation", description="test with data augmentation with typo")

In [None]:
start = datetime.now()
test_INV_typo.run(get_classification, overwrite=True)
end = datetime.now()
print(f"Execution time : {end - start}")  # estimation : 5h

Predicting 53950 examples


In [110]:
#test_INV_typo.summary()

### Switching words' positions

In [22]:
t_switch = { "data":[[item.get("sentence"), swap_sentence] for item in data 
                     for swap_sentence in item.get("wordswap_aug")] }

print(f"samples - {t_switch.get('data')[:2]}")

samples - [["Trop envie de me remplir la panse ce midi, t'as des bons plans à proposer ? 😏", "Trop de envie me remplir la ce panse midi, ' t bons des as plans à proposer? 😏"], ["Trop envie de me remplir la panse ce midi, t'as des bons plans à proposer ? 😏", "Trop envie de remplir la me panse ce midi, t as ' des plans bons à proposer? 😏"]]


In [23]:
test_INV_switch = INV(**t_switch, name="switch augmentation", description="test with data augmentation with swap words")

Predicting 20 examples


In [None]:
start = datetime.now()
test_INV_switch.run(get_classification, overwrite=True)
end = datetime.now()
print(f"Execution time : {end - start}")  # estimation : 2h15min# estimation : 5h

In [24]:
test_INV_switch.summary()

Test cases:      10
Fails (rate):    0 (0.0%)


# Visualisation

## Original dataset

In [111]:
test_MFT_original.visual_summary()

TestSummarizer(stats={'npassed': 1, 'nfailed': 1, 'nfiltered': 0}, summarizer={'name': 'original dataset', 'de…

## Typo

In [112]:
test_MFT_typo.visual_summary()

TestSummarizer(stats={'npassed': 2, 'nfailed': 8, 'nfiltered': 0}, summarizer={'name': 'typo', 'description': …

In [113]:
test_INV_typo.visual_summary()

TestSummarizer(stats={'npassed': 6, 'nfailed': 4, 'nfiltered': 0}, summarizer={'name': 'typo augmentation', 'd…

## Switch



In [130]:
test_MFT_switch.visual_summary()

Exception: No results. Run run() first

In [115]:
test_INV_switch.visual_summary()

TestSummarizer(stats={'npassed': 10, 'nfailed': 0, 'nfiltered': 0}, summarizer={'name': 'switch augmentation',…

TestSummarizer(stats={'npassed': 6, 'nfailed': 4, 'nfiltered': 0}, summarizer={'name': 'typo augmentation', 'd…