# Analyse des performances du modèle
Ce notebook a pour but la visualisation plus approfondie des différentes performances du modèle.
### Imports

In [1]:
import json

import numpy as np
import requests
from bokeh.io import output_notebook, show
from bokeh.plotting import figure

In [2]:
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.svm import SVC

In [3]:
with open("../data/testing_set.json", "r") as testing_set_file:
    testing_set = json.load(testing_set_file)

In [4]:
print(f'Nombre de phrases dans le testing set : {len(testing_set)}')

Nombre de phrases dans le testing set : 1207


## Inférence
Nous avons, au préalable, exécuter l'image docker permettant d'accéder au modèle.

In [5]:
def get_classification_idiwii(sentence):
    URL = "http://0.0.0.0:8080/api/intent"
    classif = requests.get(f"{URL}?sentence={sentence}").json()
    return classif

In [6]:
y_classif = [get_classification_idiwii(sample.get('sentence')) for sample in testing_set]
print(testing_set[0].get('sentence'),y_classif[0])

hébergements (pas cher) nouvelle calédonie {'find-around-me': 0.0002909251779783517, 'find-flight': 0.044978659600019455, 'find-hotel': 7.517455378547311e-05, 'find-restaurant': 0.0006114518619142473, 'find-train': 0.04666263610124588, 'irrelevant': 0.8721067309379578, 'provide-showtimes': 0.002411704044789076, 'purchase': 0.032862719148397446}


## Mapping des intents

In [7]:
INTENTS = [
    'find-around-me',
    'find-flight',
    'find-hotel',
    'find-restaurant',
    'find-train',
    'irrelevant',
    'provide-showtimes',
    'purchase',
]

In [8]:
def get_intent_index(classif, treshold = 1.0):
    if classif.get(max(classif, key=classif.get)) < treshold :
        return INTENTS.index('irrelevant')
    else :
        return INTENTS.index(max(classif, key=classif.get))

In [9]:
# y_xxx : indice correspondant à l'intent réel/prédi
# y_xxx_label : texte correspondant à l'intent réel/prédi

y_true = [INTENTS.index(sample.get('intent')) for sample in testing_set]
y_true_label = [sample.get('intent') for sample in testing_set]

In [10]:
y_pred = [INTENTS.index(max(classif, key=classif.get)) for classif in y_classif]
y_pred_label = [INTENTS[pred] for pred in y_pred]

## Visualisation des performances

In [11]:
print(classification_report(y_true= y_true_label, y_pred = y_pred_label,labels=INTENTS))

                   precision    recall  f1-score   support

   find-around-me       0.78      0.71      0.75        70
      find-flight       0.97      0.91      0.94        33
       find-hotel       0.82      0.87      0.85        63
  find-restaurant       0.92      0.93      0.92        96
       find-train       0.92      0.88      0.90        40
       irrelevant       0.95      0.97      0.96       755
provide-showtimes       0.90      0.95      0.92        19
         purchase       0.88      0.82      0.85       131

         accuracy                           0.92      1207
        macro avg       0.89      0.88      0.88      1207
     weighted avg       0.92      0.92      0.92      1207



In [12]:
confusion_matrix(y_true=y_true_label,y_pred=y_pred_label, normalize=None,labels=INTENTS)

array([[ 50,   0,   2,   1,   0,  13,   0,   4],
       [  0,  30,   0,   0,   2,   1,   0,   0],
       [  2,   0,  55,   0,   0,   5,   0,   1],
       [  1,   0,   1,  89,   0,   5,   0,   0],
       [  0,   1,   1,   0,  35,   2,   1,   0],
       [  6,   0,   6,   4,   0, 729,   1,   9],
       [  0,   0,   1,   0,   0,   0,  18,   0],
       [  5,   0,   1,   3,   1,  14,   0, 107]])

## Calcul suivant un treshold

### Global

In [13]:
tresholds = np.linspace(0.0, 1.0, num = 100)

In [14]:
y_preds = [[get_intent_index(classif, treshold = tresh) for classif in y_classif] for tresh in tresholds ]
y_preds_label = [[INTENTS[index] for index in y_pred] for y_pred in y_preds]   

In [15]:
recalls = [recall_score(y_true = y_true, y_pred = y_pred, average = 'macro') for y_pred in y_preds]
precisions = [precision_score(y_true = y_true, y_pred = y_pred, average = 'macro') for y_pred in y_preds]
f1_scores = [f1_score(y_true = y_true, y_pred = y_pred, average = 'macro') for y_pred in y_preds]

In [16]:
output_notebook()

p = figure(plot_width=900, plot_height=500,title="Metrics for differents tresholds")

p.line(tresholds, recalls, line_width=2, legend_label='recall', color = 'limegreen')
p.line(tresholds, precisions, line_width=2, legend_label='precision', color = 'darkblue')
p.line(tresholds, f1_scores, line_width=2, legend_label='f1_score', color = 'deeppink')

p.xaxis.axis_label = 'Treshold'
p.legend.location = 'bottom_left'
p.legend.click_policy="hide"
show(p)

### Par intent

In [17]:
def plot_metrics_treshold(metric_name, metric_values):
    output_notebook()
    p = figure(plot_width=900, plot_height=500,title=f"{metric_name} for differents intents")
    colors = ['limegreen','blue','deeppink', 'slategray','orange','orangered','gold','darkmagenta']

    for i in range(len(INTENTS)):
        intent = INTENTS[i]
        p.line(tresholds, metric_values.get(intent), legend_label = str(intent), color = colors[i])
        
    p.xaxis.axis_label = 'Treshold'
    p.yaxis.axis_label =  metric_name
    p.legend.location = "bottom_left"
    p.legend.click_policy = "hide"
    show(p)

In [18]:
evals = [classification_report(y_true= y_true_label, y_pred = y_pred_label,labels=INTENTS,output_dict=True) for y_pred_label in y_preds_label]

In [19]:
precisions_per_class = {str(intent):[eval.get(intent).get('precision') for eval in evals] for intent in INTENTS}
recalls_per_class = {str(intent):[eval.get(intent).get('recall') for eval in evals] for intent in INTENTS}
f1_scores_per_class = {str(intent):[eval.get(intent).get('f1-score') for eval in evals] for intent in INTENTS}

In [20]:
plot_metrics_treshold(metric_name="f1-score", metric_values= f1_scores_per_class)

In [21]:
plot_metrics_treshold(metric_name="precision", metric_values= precisions_per_class)

In [22]:
plot_metrics_treshold(metric_name="recall",metric_values=recalls_per_class)