In [29]:
####### Importing Libraries and Packages required #######

import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, precision_recall_fscore_support, confusion_matrix, accuracy_score

In [10]:
## Reading data from the file ##

# Train_Data
train_data = pd.read_csv(r'C:\Users\dell\Downloads\Clustering_Assignment\Final Project\atis_intents_train.csv', header=None)
train_data.rename(columns={0:'Label', 1:'Text'}, inplace=True)

# Test Data
test_data = pd.read_csv(r'C:\Users\dell\Downloads\Clustering_Assignment\Final Project\atis_intents_test.csv',header=None)
test_data.rename(columns={0:'Label', 1:'Text'}, inplace=True)

### Data Preparation

In [20]:
########### Preparation of the Data #############

def pre_process(data):
    
    # Removing stopwords and numbers 
    words = set(stopwords.words('english'))
    data['Text']=data['Text'].apply(lambda x:' '.join([word for word in x.split()if word not in (words)]))
    data['Text']= data['Text'].str.replace('\d+','')

    text = data['Text']
    labels = data['Label']
    
    return text,labels

# Function for Pre-processing of Data
train_text, train_labels = pre_process(train_data)
test_text, test_labels = pre_process(test_data)

print('Number of Classes : ',train_labels.nunique())
print('Number of training entries :',len(train_text.index))
print('Number of testing entries :',len(test_text.index))

Number of Classes :  8
Number of training entries : 4834
Number of testing entries : 800


### Transform to TF-IDF

In [21]:
####### Transformation using TF-IDF ############

## Create a TF-IDF vectorizer to convert the text data into numerical features ##
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(train_text)
X_test_vec = vectorizer.transform(test_text)

### Model Building

In [24]:
## Train and Test the model on the data ##
def model_data(model, train_data, train_labels, test_data):
    model.fit(train_data, train_labels)
    predicted_label = model.predict(test_data)
    return predicted_label

In [37]:
## Model1 : SVM ##

# Train an SVM classifier on the training data
clf = SVC(kernel='linear', C=1.0)
predicted_svc = model_data(clf, X_train_vec, train_labels, X_test_vec)

## Model2 : NB ##

# Train an NB classifier on the training data
clf_nb =  MultinomialNB()
predicted_nb = model_data(clf_nb, X_train_vec, train_labels, X_test_vec)


# Train an SVM classifier on the training data
clf_mlp = MLPClassifier()
predicted_mlp = model_data(clf, X_train_vec, train_labels, X_test_vec)

### Evaluation and Error Analysis

In [38]:
####### Evaluation ############

## Evaluate the data using different models ##
def evaluate_classifier(test_labels, predicted_labels):
    print("Training and Evaluating all the models:")
    accuracy = accuracy_score(test_labels, predicted_labels)
    precision, recall, fscore, support = precision_recall_fscore_support(test_labels, predicted_labels, average='macro')
    print('Classification Metrics:')
    print('Precision : {}'.format(precision))
    print( 'Recall    : {}'.format(recall))
    print('F-score   : {}'.format(fscore))
    print('Accuracy  : {}'.format(accuracy))
    print("Classification Report: ")
    print(classification_report(test_labels,predicted_labels))

In [39]:
## SVM Evaluation Results ##
svc_results = evaluate_classifier(test_labels, predicted_svc)
svc_results

Training and Evaluating all the models:
Classification Metrics:
Precision : 0.8653822288409034
Recall    : 0.9386585289941386
F-score   : 0.8802353741308633
Accuracy  : 0.96625
Classification Report: 
                     precision    recall  f1-score   support

  atis_abbreviation       1.00      0.79      0.88        33
      atis_aircraft       0.67      0.89      0.76         9
       atis_airfare       0.90      0.96      0.93        48
       atis_airline       1.00      0.89      0.94        38
        atis_flight       0.98      0.98      0.98       632
   atis_flight_time       1.00      1.00      1.00         1
atis_ground_service       1.00      1.00      1.00        36
      atis_quantity       0.38      1.00      0.55         3

           accuracy                           0.97       800
          macro avg       0.87      0.94      0.88       800
       weighted avg       0.97      0.97      0.97       800



In [40]:
## NB Evaluation Results ##
nb_results = evaluate_classifier(test_labels, predicted_nb)
nb_results

Training and Evaluating all the models:
Classification Metrics:
Precision : 0.6762031693052497
Recall    : 0.47181003909851343
F-score   : 0.512505455710278
Accuracy  : 0.895
Classification Report: 
                     precision    recall  f1-score   support

  atis_abbreviation       1.00      0.79      0.88        33
      atis_aircraft       0.62      0.56      0.59         9
       atis_airfare       0.92      0.25      0.39        48
       atis_airline       1.00      0.18      0.31        38
        atis_flight       0.89      1.00      0.94       632
   atis_flight_time       0.00      0.00      0.00         1
atis_ground_service       0.97      1.00      0.99        36
      atis_quantity       0.00      0.00      0.00         3

           accuracy                           0.90       800
          macro avg       0.68      0.47      0.51       800
       weighted avg       0.90      0.90      0.87       800



In [41]:
## MLP Evaluation Results ##
mlp_results = evaluate_classifier(test_labels, predicted_mlp)
mlp_results

Training and Evaluating all the models:
Classification Metrics:
Precision : 0.8653822288409034
Recall    : 0.9386585289941386
F-score   : 0.8802353741308633
Accuracy  : 0.96625
Classification Report: 
                     precision    recall  f1-score   support

  atis_abbreviation       1.00      0.79      0.88        33
      atis_aircraft       0.67      0.89      0.76         9
       atis_airfare       0.90      0.96      0.93        48
       atis_airline       1.00      0.89      0.94        38
        atis_flight       0.98      0.98      0.98       632
   atis_flight_time       1.00      1.00      1.00         1
atis_ground_service       1.00      1.00      1.00        36
      atis_quantity       0.38      1.00      0.55         3

           accuracy                           0.97       800
          macro avg       0.87      0.94      0.88       800
       weighted avg       0.97      0.97      0.97       800



In [44]:
##### Error Analysis #######

def results(test_data, actual_labels, predicted_labels):
    data=[]
    actual = []
    predicted = []
    
    for i in range(0, len(test_data)):
        if(actual_labels[i]!=predicted_labels[i]):
            data.append(test_data[i])
            actual.append(actual_labels[i])
            predicted.append(predicted_labels[i])
                   
    df = pd.DataFrame()
    df['Text'] = data
    df['Actual_Label'] = actual
    df['Predicted_Label'] = predicted
                   
    return df

In [45]:
## SVC Analysis ##
svc_error = results(test_text, test_labels, predicted_svc)
svc_error

Unnamed: 0,Text,Actual_Label,Predicted_Label
0,expensive one way fare detroit westchester county,atis_airfare,atis_flight
1,show connecting flights boston denver types ai...,atis_flight,atis_aircraft
2,different airlines go las vegas new york city,atis_airline,atis_flight
3,charlotte airport many different types aircraf...,atis_aircraft,atis_quantity
4,would like airline flies toronto detroit st. l...,atis_airline,atis_flight
5,show airlines flights toronto detroit detroit ...,atis_airline,atis_flight
6,phl,atis_abbreviation,atis_flight
7,mci,atis_abbreviation,atis_flight
8,flights dallas phoenix using dc aircraft,atis_flight,atis_aircraft
9,want fly nashville seattle want cheapest fare ...,atis_flight,atis_airfare


In [46]:
## NB Analysis ##
nb_error = results(test_text, test_labels, predicted_nb)
nb_error

Unnamed: 0,Text,Actual_Label,Predicted_Label
0,april first need ticket tacoma san jose departing,atis_airfare,atis_flight
1,expensive one way fare detroit westchester county,atis_airfare,atis_flight
2,airlines fly detroit westchester county,atis_airline,atis_flight
3,departure times detroit westchester county,atis_flight_time,atis_flight
4,airline flies boston san diego,atis_airline,atis_flight
5,show connecting flights boston denver types ai...,atis_flight,atis_aircraft
6,different airlines go las vegas new york city,atis_airline,atis_flight
7,airlines love field june sixth,atis_airline,atis_flight
8,many canadian airlines international flights u...,atis_quantity,atis_flight
9,many canadian airlines international flights u...,atis_quantity,atis_aircraft


In [47]:
## MLP Analysis ##
mlp_error = results(test_text, test_labels, predicted_mlp)
mlp_error

Unnamed: 0,Text,Actual_Label,Predicted_Label
0,expensive one way fare detroit westchester county,atis_airfare,atis_flight
1,show connecting flights boston denver types ai...,atis_flight,atis_aircraft
2,different airlines go las vegas new york city,atis_airline,atis_flight
3,charlotte airport many different types aircraf...,atis_aircraft,atis_quantity
4,would like airline flies toronto detroit st. l...,atis_airline,atis_flight
5,show airlines flights toronto detroit detroit ...,atis_airline,atis_flight
6,phl,atis_abbreviation,atis_flight
7,mci,atis_abbreviation,atis_flight
8,flights dallas phoenix using dc aircraft,atis_flight,atis_aircraft
9,want fly nashville seattle want cheapest fare ...,atis_flight,atis_airfare


In [None]:
###############################################################################################################################################