## Evaluation part 1


In [3]:

%matplotlib inline
import numpy as np
import pandas as pd
import re
import sklearn
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report


Get data set from CSV and pandas dataframe

In [5]:
data = pd.read_csv("evalution_data_set.csv")
data.head()
data = data.sample(frac=1).reset_index(drop=True)
data

Unnamed: 0,label,expression
0,intent_flight_search,please help me to search for a flight
1,intent_flight_status,check status of flight leaving [today](Departu...
2,intent_provide_flight_info,on [next friday](Departure.EstimatedDate|tomor...
3,intent_flight_status,help me check status of flight departing from ...
4,intent_flight_status,will my flight leave ontime from [SLC](Departu...
...,...,...
73,intent_flight_search,can you please help me to search for a flight ...
74,intent_flight_status,tell me flight status for [762(FlightNumber|75...
75,intent_provide_flight_info,we are on flight [234](FlightNumber|600|666|92...
76,intent_flight_status,flight status


Extract statement and entities from expression using the given function

In [6]:
def _entity_string_to_dict(entity_string):
    entity_extract_pattern = re.compile(r'(?P<entity>\[(?P<value>.+?)\]\((?P<name_and_synonyms>.+?)\))')
    new_string = ''
    start = 0
    output = dict()
    output['entities'] = []
    for item in re.finditer(entity_extract_pattern, entity_string):
        d = dict()
        new_string += entity_string[start:item.start()]
        start = item.start()
        d['span_start'] = len(new_string)
        new_string += item.group('value')
        d['span_end'] = len(new_string)
        start += len(item.group('entity'))
        d['entity_value'] = item.group('value')

        syn_items = item.group('name_and_synonyms').split('|')
        d['entity_type'] = syn_items[0]
        d['synonyms'] = list()
        if len(syn_items) > 1:
            d['synonyms'] += [t for t in syn_items[1:]]

        output['entities'].append(d)
    new_string += entity_string[start:]
    output['statement'] = new_string
    return output


data['statement'] = data.apply(lambda x: _entity_string_to_dict(x['expression'])['statement'], axis=1)
data['entities'] = data.apply(lambda x: _entity_string_to_dict(x['expression'])['entities'], axis=1)
data.head()

Unnamed: 0,label,expression,statement,entities
0,intent_flight_search,please help me to search for a flight,please help me to search for a flight,[]
1,intent_flight_status,check status of flight leaving [today](Departu...,check status of flight leaving today,"[{'span_start': 31, 'span_end': 36, 'entity_va..."
2,intent_provide_flight_info,on [next friday](Departure.EstimatedDate|tomor...,on next friday I'm taking flight 874 from Calg...,"[{'span_start': 3, 'span_end': 14, 'entity_val..."
3,intent_flight_status,help me check status of flight departing from ...,help me check status of flight departing from ...,"[{'span_start': 46, 'span_end': 51, 'entity_va..."
4,intent_flight_status,will my flight leave ontime from [SLC](Departu...,will my flight leave ontime from SLC?,"[{'span_start': 33, 'span_end': 36, 'entity_va..."


Simple split function to split the data in to test data and training data samples

In [7]:
def simple_split(data,y,length,split_mark=0.7):
    if split_mark > 0. and split_mark < 1.0:
        n = int(split_mark*length)
    else:
        n = int(split_mark)
    x_train = data[:n].copy()
    x_test = data[n:].copy()
    y_train = y[:n].copy()
    y_test = y[n:].copy()
    return x_train,x_test,y_train,y_test

Create count verctorizer and split the data set

In [11]:
vectorizer = CountVectorizer()
x_train,x_test,y_train,y_test = simple_split(data.statement,data.label,len(data))
print(x_train.shape,x_test.shape)

(54,) (24,)


Number of data points from each intent class

In [12]:
data.groupby('label').count()

Unnamed: 0_level_0,expression,statement,entities
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
intent_flight_search,25,25,25
intent_flight_status,28,28,28
intent_provide_flight_info,25,25,25



Train the count vectorizer uning train dataset from tranform and fit function, also tranform the test data.

In [13]:
x_train = vectorizer.fit_transform(x_train)
x_test = vectorizer.transform(x_test)

Features extracted from train dataset using the count vectorizer

In [14]:
feature_names = vectorizer.get_feature_names()
print("Number of features: {}".format(len(feature_names)))
print("Number of features: {}".format(feature_names[:80]))

Number of features: 91
Number of features: ['100', '12th', '214', '400', '412', '417', '431', '433', '451', '456', '473', '562', '571', '581', '688', '781', '874', '944', '981', 'able', 'about', 'airport', 'airportcode', 'are', 'be', 'calgary', 'can', 'check', 'cmb', 'colombo', 'depart', 'departe', 'departing', 'departure', 'does', 'dubai', 'find', 'flight', 'flights', 'flying', 'fo', 'for', 'fran', 'francisco', 'fransisco', 'friday', 'from', 'going', 'heathrow', 'help', 'how', 'is', 'jfk', 'june', 'lax', 'leave', 'leaving', 'me', 'monday', 'my', 'need', 'next', 'number', 'of', 'on', 'ontime', 'our', 'planning', 'please', 'quick', 'san', 'search', 'sfo', 'slc', 'status', 'sunday', 'taking', 'the', 'time', 'to']


In [15]:
vectorizer.vocabulary_

{'please': 68,
 'help': 49,
 'me': 57,
 'to': 79,
 'search': 71,
 'for': 41,
 'flight': 37,
 'check': 27,
 'status': 74,
 'of': 63,
 'leaving': 56,
 'today': 80,
 'on': 64,
 'next': 61,
 'friday': 45,
 'taking': 76,
 '874': 16,
 'from': 46,
 'calgary': 25,
 'heathrow': 48,
 'departing': 32,
 'dubai': 35,
 'will': 88,
 'my': 59,
 'leave': 55,
 'ontime': 65,
 'slc': 73,
 'wanna': 82,
 'quick': 69,
 '473': 10,
 'lax': 54,
 'can': 26,
 'you': 89,
 'find': 36,
 'when': 87,
 'be': 24,
 'tomorrow': 81,
 'planning': 67,
 'sunday': 75,
 'flying': 39,
 'sfo': 72,
 'we': 84,
 'are': 23,
 '12th': 1,
 'june': 53,
 'want': 83,
 'flights': 38,
 'cmb': 28,
 'number': 62,
 '456': 9,
 'does': 34,
 '214': 2,
 'depart': 30,
 'fo': 40,
 'yyz': 90,
 'jfk': 52,
 'were': 85,
 '688': 14,
 'is': 51,
 '562': 11,
 'what': 86,
 'the': 77,
 '100': 0,
 '431': 6,
 'colombo': 29,
 '412': 4,
 '400': 3,
 '781': 15,
 'monday': 58,
 'how': 50,
 'about': 20,
 'our': 66,
 '417': 5,
 'departure': 33,
 'airport': 21,
 'airpor

## Logistic regression
Calculate the cross validation score for logistic regression for the dataset 

In [17]:
scores = cross_val_score(LogisticRegression( max_iter=100),vectorizer.transform(data.statement),data.label,cv=5)
print("Mean cross validation accuracy: {:.2f}".format(np.mean(scores)))

Mean cross validation accuracy: 0.91


Train logistic regression model with the train data set with lables. then test and extact the prediction accuracy on the test dataset

In [18]:
logreg = LogisticRegression(max_iter=1000)
logreg.fit(x_train,y_train)
print("Training set score: {:.3f}".format(logreg.score(x_train,y_train)))
print("Testing set score: {:.3f}".format(logreg.score(x_test,y_test)))

Training set score: 1.000
Testing set score: 0.958


 Confusion metrix to check how prediction result differ from their actual lable for each class

In [20]:
sorted_labels = sorted(
    data.label.unique(), 
    key=lambda name: (name[1:], name[0])
)

pred_logreg = logreg.predict(x_test)
confusion_logreg = confusion_matrix(y_test,pred_logreg)
print("Confusion matrix:\n ")

header = pd.MultiIndex.from_product([['Predicted label'],
                                     sorted_labels],names=['','Actual label'])
confusion_mtrix_df =  pd.DataFrame(confusion_logreg,index=sorted_labels, 
                  columns=header)
confusion_mtrix_df

Confusion matrix:
 


Unnamed: 0_level_0,Predicted label,Predicted label,Predicted label
Actual label,intent_flight_search,intent_flight_status,intent_provide_flight_info
intent_flight_search,9,0,0
intent_flight_status,0,8,1
intent_provide_flight_info,0,0,6


Using trained logistic regression model to predict intent lable from sample test cases

In [21]:
ans = logreg.predict(vectorizer.transform(["can you help me to find a flight ?","we are leaving from San Francisco","how about the status of our flight departing from next Friday)"]))
print("prediction:\n {}".format(ans))

prediction:
 ['intent_flight_search' 'intent_provide_flight_info'
 'intent_flight_status']


Following is the precision, recall and f1-score for the trained logistic regression model

In [22]:
print(classification_report(y_test, pred_logreg, target_names=sorted_labels))

                            precision    recall  f1-score   support

      intent_flight_search       1.00      1.00      1.00         9
      intent_flight_status       1.00      0.89      0.94         9
intent_provide_flight_info       0.86      1.00      0.92         6

                  accuracy                           0.96        24
                 macro avg       0.95      0.96      0.95        24
              weighted avg       0.96      0.96      0.96        24



## Stochastic gradient descent model

Calculate cross validation score for the data set using stochastic gradient descent model

In [23]:
scores = cross_val_score(SGDClassifier(loss="hinge", penalty="l2", max_iter=1000),vectorizer.transform(data.statement),data.label,cv=5)
print("Mean cross validation accuracy: {:.2f}".format(np.mean(scores)))

Mean cross validation accuracy: 0.86


Train gradient descent model using train dataset and test the accuracy using the test dataset

In [24]:
clf = SGDClassifier(loss="hinge", penalty="l2", max_iter=1000)
clf.fit(x_train,y_train)
print("Training set score: {:.3f}".format(clf.score(x_train,y_train)))
print("Testing set score: {:.3f}".format(clf.score(x_test,y_test)))

Training set score: 1.000
Testing set score: 0.958


Confusion metrix to check how prediction result differ from their actual lable for each class

In [25]:
pred_sgd = clf.predict(x_test)
confusion_sgd = confusion_matrix(y_test,pred_sgd)
print("Confusion matrix:\n")

header = pd.MultiIndex.from_product([['Predicted label'],
                                     sorted_labels],names=['','Actual label'])
confusion_mtrix_df =  pd.DataFrame(confusion_sgd,index=sorted_labels, 
                  columns=header)
confusion_mtrix_df

Confusion matrix:



Unnamed: 0_level_0,Predicted label,Predicted label,Predicted label
Actual label,intent_flight_search,intent_flight_status,intent_provide_flight_info
intent_flight_search,9,0,0
intent_flight_status,0,8,1
intent_provide_flight_info,0,0,6


Following is the precision, recall and f1-score for the trained  stochastic gradient descent model

In [26]:
print(classification_report(y_test, pred_sgd, target_names=sorted_labels))

                            precision    recall  f1-score   support

      intent_flight_search       1.00      1.00      1.00         9
      intent_flight_status       1.00      0.89      0.94         9
intent_provide_flight_info       0.86      1.00      0.92         6

                  accuracy                           0.96        24
                 macro avg       0.95      0.96      0.95        24
              weighted avg       0.96      0.96      0.96        24



## Multilayer perceptron model
Using Multilayer perceptron model to the same tests

In [20]:
scores = cross_val_score(MLPClassifier(random_state=1, max_iter=1000),vectorizer.transform(data.statement),data.label,cv=5)
print("Mean cross validation accuracy: {:.2f}".format(np.mean(scores)))

Mean cross validation accuracy: 0.87


In [21]:
mlp = MLPClassifier(random_state=1, max_iter=500)
mlp.fit(x_train,y_train)
print("Training set score: {:.3f}".format(mlp.score(x_train,y_train)))
print("Testing set score: {:.3f}".format(mlp.score(x_test,y_test)))


pred_mlp = mlp.predict(x_test)
confusion_mlp = confusion_matrix(y_test,pred_mlp)
print("Confusion matrix:\n")

header = pd.MultiIndex.from_product([['Predicted label'],
                                     sorted_labels],names=['','Actual label'])
confusion_mtrix_df =  pd.DataFrame(confusion_mlp,index=sorted_labels, 
                  columns=header)
confusion_mtrix_df

Training set score: 1.000
Testing set score: 0.833
Confusion matrix:



Unnamed: 0_level_0,Predicted label,Predicted label,Predicted label
Actual label,intent_flight_search,intent_flight_status,intent_provide_flight_info
intent_flight_search,9,0,0
intent_flight_status,0,6,1
intent_provide_flight_info,1,2,5


In [22]:
print(classification_report(y_test, pred_mlp, target_names=sorted_labels))

                            precision    recall  f1-score   support

      intent_flight_search       0.90      1.00      0.95         9
      intent_flight_status       0.75      0.86      0.80         7
intent_provide_flight_info       0.83      0.62      0.71         8

                  accuracy                           0.83        24
                 macro avg       0.83      0.83      0.82        24
              weighted avg       0.83      0.83      0.83        24

