# Import and load

In [1]:
import json
import requests
import numpy as np 

from bokeh.plotting import figure
from bokeh.io import output_notebook, show

from sklearn.metrics import (accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, confusion_matrix,classification_report)
from sklearn.svm import SVC

In [2]:
def get_classification(sentence):
    URL = "http://0.0.0.0:8080/api/intent"
    classif = requests.get(f"{URL}?sentence={sentence}").json()
    return classif

In [3]:
sentence = "Pierrick pue du cul"
print(get_classification(sentence))

{'find-train': 0.0336722768843174, 'irrelevant': 0.5826996564865112, 'find-flight': 0.04238372668623924, 'find-restaurant': 0.061728451400995255, 'purchase': 0.09524249285459518, 'find-around-me': 0.045582037419080734, 'provide-showtimes': 0.06193576380610466, 'find-hotel': 0.07675562053918839}


In [4]:
with open("../data/testing_set.json", "r") as testing_set_file:
    testing_set = json.load(testing_set_file)

In [5]:
print(len(testing_set))

1065


# Inférence

In [6]:
y_classif = [get_classification(sample.get('sentence')) for sample in testing_set]
print(y_classif[0])

{'find-train': 0.019063251093029976, 'irrelevant': 0.38808488845825195, 'find-flight': 0.02717919461429119, 'find-restaurant': 0.053374409675598145, 'purchase': 0.39877641201019287, 'find-around-me': 0.0745929405093193, 'provide-showtimes': 0.010972157120704651, 'find-hotel': 0.02795671485364437}


# Mapping des intents

In [7]:
INTENTS = [
    'find-around-me',
    'find-flight',
    'find-hotel',
    'find-restaurant',
    'find-train',
    'irrelevant',
    'provide-showtimes',
    'purchase',
]

In [8]:
def get_intent_index(classif, treshold = 1.0):
    if classif.get(max(classif, key=classif.get)) < treshold :
        return INTENTS.index('irrelevant')
    else :
        return INTENTS.index(max(classif, key=classif.get))

In [9]:
y_true = [INTENTS.index(sample.get('intent')) for sample in testing_set]
y_true_label = [sample.get('intent') for sample in testing_set]

In [10]:
y_pred_conf = [list(classif.values()) for classif in y_classif]

In [11]:
y_pred = [INTENTS.index(max(classif, key=classif.get)) for classif in y_classif]
y_pred_label = [INTENTS[pred] for pred in y_pred]

# Visualisation des performances

In [12]:
print(f"Accuracy : {accuracy_score(y_true=y_true,y_pred=y_pred)*100:.2f}%")


print(" \n --- Micro --- ")
print(f"Recall : {recall_score(y_true=y_true,y_pred=y_pred,average='micro'):.3f}")
print(f"Precision : {precision_score(y_true=y_true,y_pred=y_pred,average='micro'):.3f}")
print(f"f1-score : {f1_score(y_true=y_true,y_pred=y_pred,average='micro'):.3f}")

print(" \n --- Macro --- ")
print(f"Recall : {recall_score(y_true=y_true,y_pred=y_pred,average='macro'):.3f}")
print(f"Precision : {precision_score(y_true=y_true,y_pred=y_pred,average='macro'):.3f}")
print(f"f1-score : {f1_score(y_true=y_true,y_pred=y_pred,average='macro'):.3f}")


Accuracy : 80.47%
 
 --- Micro --- 
Recall : 0.805
Precision : 0.805
f1-score : 0.805
 
 --- Macro --- 
Recall : 0.523
Precision : 0.853
f1-score : 0.624


In [13]:
roc_auc_score(y_true=y_true, y_score=y_pred_conf, average='macro', multi_class='ovo')

0.5710505291736722

In [14]:
confusion_matrix(y_true=y_true_label,y_pred=y_pred_label, normalize=None,labels=INTENTS)

array([[ 29,   0,   0,   1,   0,  32,   0,   5],
       [  0,   7,   0,   0,   0,  14,   1,   2],
       [  2,   0,  21,   0,   0,  30,   0,   2],
       [  0,   0,   1,  52,   0,  39,   0,   1],
       [  0,   0,   0,   0,  14,   6,   0,   1],
       [  1,   0,   5,   0,   1, 663,   0,   7],
       [  1,   0,   0,   0,   0,   9,   4,   0],
       [  0,   1,   0,   0,   0,  46,   0,  67]])

# Calcul suivant un treshold

In [15]:
tresholds = np.linspace(0.0, 1.0, num = 100)

In [16]:
y_preds = [[get_intent_index(classif, treshold = tresh) for classif in y_classif] for tresh in tresholds ]
y_preds_label = [[INTENTS[index] for index in y_pred] for y_pred in y_preds]   

In [17]:
recalls = [recall_score(y_true = y_true, y_pred = y_pred, average = 'macro') for y_pred in y_preds]
precisions = [precision_score(y_true = y_true, y_pred = y_pred, average = 'macro') for y_pred in y_preds]
f1_scores = [f1_score(y_true = y_true, y_pred = y_pred, average = 'macro') for y_pred in y_preds]

In [18]:
output_notebook()

p = figure(plot_width=900, plot_height=500,title="Metrics for differents tresholds")

# add a line renderer

p.line(tresholds, recalls, line_width=2, legend_label='recall', color = 'limegreen')
p.line(tresholds, precisions, line_width=2, legend_label='precision', color = 'darkblue')
p.line(tresholds, f1_scores, line_width=2, legend_label='f1_score', color = 'deeppink')

p.xaxis.axis_label = 'Treshold'
p.legend.click_policy="hide"
show(p)

In [19]:
evals = [classification_report(y_true= y_true_label, y_pred = y_pred_label,labels=INTENTS,output_dict=True) for y_pred_label in y_preds_label]
print(evals[0])

{'find-around-me': {'precision': 0.8787878787878788, 'recall': 0.43283582089552236, 'f1-score': 0.58, 'support': 67}, 'find-flight': {'precision': 0.875, 'recall': 0.2916666666666667, 'f1-score': 0.43750000000000006, 'support': 24}, 'find-hotel': {'precision': 0.7777777777777778, 'recall': 0.38181818181818183, 'f1-score': 0.5121951219512196, 'support': 55}, 'find-restaurant': {'precision': 0.9811320754716981, 'recall': 0.5591397849462365, 'f1-score': 0.7123287671232875, 'support': 93}, 'find-train': {'precision': 0.9333333333333333, 'recall': 0.6666666666666666, 'f1-score': 0.7777777777777778, 'support': 21}, 'irrelevant': {'precision': 0.7902264600715138, 'recall': 0.9793205317577548, 'f1-score': 0.8746701846965699, 'support': 677}, 'provide-showtimes': {'precision': 0.8, 'recall': 0.2857142857142857, 'f1-score': 0.4210526315789473, 'support': 14}, 'purchase': {'precision': 0.788235294117647, 'recall': 0.5877192982456141, 'f1-score': 0.6733668341708543, 'support': 114}, 'accuracy': 0.

In [20]:
precisions_per_class = {str(intent):[eval.get(intent).get('precision') for eval in evals] for intent in INTENTS}
f1_scores_per_class = {str(intent):[eval.get(intent).get('f1-score') for eval in evals] for intent in INTENTS}

In [21]:
output_notebook()

p = figure(plot_width=900, plot_height=500,title="f1-score for differents intents")

# add a line renderer
colors = ['limegreen','blue','deeppink', 'slategray','orange','orangered','gold','darkmagenta']
for i in range(len(INTENTS)):
    intent = INTENTS[i]
    p.line(tresholds, f1_scores_per_class.get(intent), legend_label = str(intent), color = colors[i])
    

p.xaxis.axis_label = 'Treshold'
p.yaxis.axis_label = 'f1-score'
p.legend.click_policy="hide"
show(p)

In [22]:
output_notebook()

p = figure(plot_width=900, plot_height=500,title="precision for differents intents")

# add a line renderer
colors = ['limegreen','blue','deeppink', 'slategray','orange','orangered','gold','darkmagenta']
for i in range(len(INTENTS)):
    intent = INTENTS[i]
    p.line(tresholds, precisions_per_class.get(intent), legend_label = str(intent), color = colors[i])
    

p.xaxis.axis_label = 'Treshold'
p.yaxis.axis_label = 'Precision'
p.legend.click_policy="hide"
p.legend.location = "bottom_left"
show(p)