In [653]:
import pandas as pd
pd.set_option('display.max_colwidth', 200)

import numpy as np
import re

# for NLP related tasks
import spacy
global nlp
nlp=spacy.load('en_core_web_sm')

# for mongodb operations
from pymongo import MongoClient

# saving model as pickle
import pickle

In [654]:
df = pd.read_csv(r'C:/Users/DAG9KOR/Downloads/ProjectMulticlasstextclassification/inventory.csv')
print('Shape -->',df.shape)
df.head()

Shape --> (814, 3)


Unnamed: 0,text,label,action
0,add 5 kg of Biscuits,ham,add
1,play music,spam,play
2,add 2 litres of milk,ham,add
3,who is prime minister,spam,none
4,remove 1kg of fruits,ham,remove


In [655]:
df['text'].sample(5)

136             provide exisitng items in inventory
144             provide exisitng items in inventory
48                   what is the dollar price today
102    add 2 bottles of softdrinks in food category
389          subtract 50 kg of Sugar from inventory
Name: text, dtype: object

In [656]:
df['label'].value_counts(normalize=True)

ham     0.7457
spam    0.2543
Name: label, dtype: float64

In [657]:
df['action'].value_counts(normalize=False)

none        112
remove       60
show         59
return       58
add          56
update       55
give         54
play         54
provide      51
display      49
offer        49
subtract     45
get          38
sing         37
dispense     31
push          6
Name: action, dtype: int64

In [658]:
unique_actions = df['action'].nunique()
unique_actions

16

In [659]:
def text_cleaner(text):
  
  #remove user mentions
    text = re.sub(r'@[A-Za-z0-9]+','',text)           
  
  #remove hashtags
  #text = re.sub(r'#[A-Za-z0-9]+','',text)         
  
  #remove links
    text = re.sub(r'http\S+', '', text)  

  #convering text to lower case
    text = text.lower()

  # fetch only words
    text = re.sub("[^a-z]+", " ", text)

  # removing extra spaces
    text=re.sub("[\s]+"," ",text)
  
  # creating doc object
    doc=nlp(text)

  # remove stopwords and lemmatize the text
    tokens=[token.lemma_ for token in doc if(token.is_stop==False)]
  
  #join tokens by space
    return " ".join(tokens)

In [660]:
# perform text cleaning
df['clean_text']= df['text'].apply(text_cleaner)

In [661]:
df['clean_text'].sample(5)

279                              offer
808     dispense kg sandwich inventory
752                                   
690          add kg salt food category
144    provide exisitng item inventory
Name: clean_text, dtype: object

In [662]:
text   = df['clean_text'].values
labels = df['label'].values
actions = df['action'].values

In [663]:
labels[:5]

array(['ham', 'spam', 'ham', 'spam', 'ham'], dtype=object)

In [664]:
actions[:5]

array(['add', 'play', 'add', 'none', 'remove'], dtype=object)

### Label Encoding

In [665]:
#importing label encoder
from sklearn.preprocessing import LabelEncoder

#define label encoder
le = LabelEncoder()
le1 = LabelEncoder()

#fit and transform target strings to a numbers
labels = le.fit_transform(labels)
actions = le1.fit_transform(actions)

In [666]:
labels[:10]

array([0, 1, 0, 1, 0, 1, 0, 0, 0, 1])

In [667]:
actions[:10]

array([ 0,  7,  0,  5, 10,  5,  0, 10,  0,  5])

In [668]:
le.inverse_transform(labels)

array(['ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'ham', 'ham',
       'spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'ham',
       'spam', 'spam', 'spam', 'spam', 'ham', 'ham', 'ham', 'spam', 'ham',
       'ham', 'ham', 'ham', 'spam', 'ham', 'ham', 'ham', 'ham', 'spam',
       'spam', 'spam', 'spam', 'spam', 'spam', 'spam', 'ham', 'ham',
       'ham', 'ham', 'ham', 'ham', 'spam', 'spam', 'spam', 'ham', 'ham',
       'ham', 'ham', 'ham', 'ham', 'ham', 'spam', 'spam', 'spam', 'ham',
       'ham', 'ham', 'ham', 'ham', 'ham', 'ham', 'ham', 'spam', 'ham',
       'spam', 'ham', 'spam', 'ham', 'ham', 'ham', 'spam', 'ham', 'spam',
       'ham', 'spam', 'ham', 'spam', 'ham', 'ham', 'spam', 'spam', 'spam',
       'spam', 'ham', 'ham', 'ham', 'spam', 'ham', 'ham', 'ham', 'ham',
       'spam', 'ham', 'ham', 'ham', 'ham', 'spam', 'spam', 'spam', 'spam',
       'spam', 'spam', 'spam', 'ham', 'ham', 'ham', 'ham', 'ham', 'ham',
       'spam', 'spam', 'spam', 'ham', 'ham', 'ham',

In [669]:
le1.inverse_transform(actions)

array(['add', 'play', 'add', 'none', 'remove', 'none', 'add', 'remove',
       'add', 'none', 'add', 'none', 'add', 'none', 'add', 'none', 'add',
       'add', 'none', 'none', 'sing', 'add', 'show', 'display', 'offer',
       'add', 'provide', 'return', 'subtract', 'add', 'none', 'add',
       'add', 'add', 'add', 'none', 'none', 'none', 'none', 'none',
       'none', 'none', 'push', 'add', 'add', 'display', 'get', 'remove',
       'none', 'play', 'sing', 'offer', 'provide', 'get', 'show', 'push',
       'return', 'subtract', 'none', 'play', 'sing', 'offer', 'provide',
       'get', 'show', 'push', 'return', 'subtract', 'add', 'play', 'add',
       'none', 'remove', 'none', 'add', 'remove', 'add', 'none', 'add',
       'none', 'add', 'none', 'add', 'none', 'add', 'add', 'none', 'none',
       'sing', 'add', 'show', 'display', 'offer', 'add', 'provide',
       'return', 'subtract', 'add', 'none', 'add', 'add', 'add', 'add',
       'none', 'none', 'none', 'none', 'none', 'none', 'none', 

In [670]:
valid = le.inverse_transform([0,1])
valid

array(['ham', 'spam'], dtype=object)

In [671]:
# Spam/Ham training, val dataset preparation
from sklearn.model_selection import train_test_split

# Splitting into train and validation set
x_train,x_val,y_train,y_val=train_test_split(text, labels,stratify=labels, test_size=0.30, random_state=0,shuffle=True)

In [672]:
print('x_train:',x_train.shape,'y_train:',y_train.shape)
print('x_val:',x_val.shape,'y_val:',y_val.shape)

x_train: (569,) y_train: (569,)
x_val: (245,) y_val: (245,)


In [673]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [674]:
word_vectorizer = TfidfVectorizer(max_features=1000)

In [675]:
word_vectorizer.fit(x_train)

TfidfVectorizer(max_features=1000)

In [676]:
pickle.dump(word_vectorizer,open("vectorizer.pkl", "wb"))

In [677]:
# create TF-IDF vectors for Train Set
train_word_features = word_vectorizer.transform(x_train)
train_word_features

<569x76 sparse matrix of type '<class 'numpy.float64'>'
	with 1655 stored elements in Compressed Sparse Row format>

In [678]:
# create TF-IDF vectors for Validation Set
val_word_features = word_vectorizer.transform(x_val)
val_word_features

<245x76 sparse matrix of type '<class 'numpy.float64'>'
	with 707 stored elements in Compressed Sparse Row format>

In [679]:
# action ckassifier training, validation dataset preparation
from sklearn.model_selection import train_test_split

# Splitting into train and validation set
x_train_action,x_val_action,y_train_action,y_val_action=train_test_split(text, actions,stratify=actions, test_size=0.30, random_state=0,shuffle=True)


In [680]:
print('x_train_action:',x_train.shape,'y_train_action:',y_train.shape)
print('x_val_action:',x_val.shape,'y_val_action:',y_val.shape)

x_train_action: (569,) y_train_action: (569,)
x_val_action: (245,) y_val_action: (245,)


In [681]:
word_vectorizer_action = TfidfVectorizer(max_features=1000)

In [682]:
word_vectorizer_action.fit(x_train_action)

TfidfVectorizer(max_features=1000)

In [683]:
pickle.dump(word_vectorizer_action,open("vectorizer_action.pkl", "wb"))

In [684]:
# create TF-IDF vectors for action Train Set
train_word_features_action = word_vectorizer_action.transform(x_train_action)
train_word_features_action

<569x76 sparse matrix of type '<class 'numpy.float64'>'
	with 1666 stored elements in Compressed Sparse Row format>

In [685]:
# create TF-IDF vectors for action Validation Set
val_word_features_action = word_vectorizer_action.transform(x_val_action)
val_word_features_action

<245x76 sparse matrix of type '<class 'numpy.float64'>'
	with 695 stored elements in Compressed Sparse Row format>

## Model building

### Naive Bayes

In [686]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import f1_score

In [687]:
# Training model
nb_model=MultinomialNB().fit(train_word_features,y_train)
nb_model

MultinomialNB()

In [688]:
# save model to pickle file
pickle.dump(nb_model, open('nb_model.pkl', 'wb'))

In [689]:
# read model from pickle file
pickled_model = pickle.load(open('nb_model.pkl', 'rb'))
pickled_model.predict(train_word_features)


array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,

In [690]:
# Make predictions for train set
train_pred_nb=nb_model.predict(train_word_features)

In [691]:
train_pred_nb

array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,

In [692]:
# Evaluating on Training Set
f1_nb_train = f1_score(y_train,train_pred_nb,average="weighted")
print("F1-score on Train Set:",f1_nb_train)

F1-score on Train Set: 0.8711067098077112


In [693]:
# Make predictions for validation set
val_pred_nb=nb_model.predict(val_word_features)

# Evaluating on Validation Set
f1_nb_val = f1_score(y_val,val_pred_nb,average="weighted")
print("F1-score on Validation Set:",f1_nb_val)

F1-score on Validation Set: 0.9402734107997266


In [694]:
# Training action model
nb_model_action=MultinomialNB().fit(train_word_features_action,y_train_action)
nb_model_action

MultinomialNB()

In [695]:
y_train_action

array([ 1, 13,  8,  6, 10,  8, 10,  0,  5,  5, 12,  5, 12,  4, 12,  8,  3,
        5, 14, 13,  4,  1,  2, 11,  0,  4,  5,  9, 11,  4,  0,  2, 12,  7,
       12,  6,  8, 11,  0,  0, 13,  4,  1,  7, 10,  2,  5,  4,  6,  3,  5,
       15,  1,  8, 15,  0,  4, 12,  6, 15,  7,  5, 11,  4,  1, 15,  2,  3,
        0,  6,  4,  6,  1, 12,  8, 15, 11, 14,  5, 10,  5,  5,  4, 11, 11,
        5,  0,  4, 12,  2,  3, 12,  2,  5,  2, 11,  0, 13, 11, 12,  7, 10,
        1, 12, 12,  5, 11, 14,  1,  7, 12, 10, 14,  3, 13,  8,  5, 10,  2,
        5,  5, 14, 10,  0,  4, 14, 11, 14, 13, 12,  4,  5,  5,  4,  0,  1,
        0,  7,  4,  0,  5, 13,  0,  7,  7, 12, 14,  7,  2,  2,  3, 14, 12,
        2,  6,  4, 14, 12,  7, 15,  4, 13, 13,  4, 15,  1,  0,  6,  2, 12,
        8,  0,  5, 15,  3, 14, 11,  0,  7, 10, 13, 15,  6,  7,  7,  6, 14,
       10, 15, 13,  5,  6,  5, 12,  5, 14, 15, 14,  8, 11, 14,  7,  8, 12,
       10, 10,  8,  5, 13,  4, 11, 11, 13,  7, 13,  3, 12, 15, 12,  5, 14,
        0,  4,  5,  5,  5

In [696]:
# save action model to pickle file
pickle.dump(nb_model_action, open('nb_model_action.pkl', 'wb'))

In [697]:
# read action model from pickle file
pickled_model_action = pickle.load(open('nb_model_action.pkl', 'rb'))
pickled_model_action.predict(train_word_features_action)

array([ 1, 13,  8,  6, 10,  8, 10,  0,  5,  5, 12,  5, 12, 10, 12,  8,  3,
        5, 14, 13,  4,  1,  2, 11,  0, 10,  5,  4, 11,  4,  0,  2, 12,  7,
       12,  6,  8, 11,  0,  0, 13,  4,  1,  7, 10,  2,  5,  4,  6,  3,  5,
       15,  1,  8, 15,  0, 14, 12,  6, 15,  7,  5, 11,  4,  1, 15,  2,  3,
        0,  6, 10,  6,  1, 12,  8, 15, 11, 14,  5, 10,  5,  5,  4, 11, 11,
        5,  0, 10, 12,  2,  3, 12,  2,  5,  2, 11,  0, 13, 11, 12,  7, 10,
        1, 12, 12,  5, 11, 14,  1,  7, 12, 10, 14,  3, 13,  8,  5, 10,  2,
        5,  5, 14, 10,  0, 15, 14, 11, 14, 13, 12, 10,  5,  5, 10,  0,  1,
        0,  7,  4,  0,  5, 13,  0,  7,  7, 12, 14,  7,  2,  2,  3, 14, 12,
        2,  6, 14, 14, 12,  7, 15, 10, 13, 13,  4, 15,  1,  0,  6,  2, 12,
        8,  0,  5, 15,  3, 14, 11,  0,  7, 10, 13, 15,  6,  7,  7,  6, 14,
       10, 15, 13,  5,  6,  5, 12,  5, 14, 15, 14,  8, 11, 14,  7,  8, 12,
       10, 10,  8,  5, 13, 10, 11, 11, 13,  7, 13,  3, 12, 15, 12,  5, 14,
        0, 10,  5,  5,  5

In [698]:
# Make predictions for train set
train_pred_nb_action=nb_model_action.predict(train_word_features_action)

In [699]:
# Evaluating on Training Set
f1_nb_train_action = f1_score(y_train_action,train_pred_nb_action,average="weighted")
print("The F1-score on Training data: ",f1_nb_train_action)

The F1-score on Training data:  0.9363981491082348


In [700]:
# Make predictions for validation set
val_pred_nb_action=nb_model_action.predict(val_word_features_action)

# Evaluating on Validation Set
f1_nb_val_action = f1_score(y_val_action,val_pred_nb_action,average="weighted")
print("F1-score on Validation Set:",f1_nb_val_action)

F1-score on Validation Set: 0.939365923129026


## Logistic Regression

In [701]:
from sklearn.linear_model import LogisticRegression

In [702]:
# Training model
lr_model=LogisticRegression().fit(train_word_features, y_train)
lr_model

LogisticRegression()

In [703]:
# Make predictions for train set
train_pred_lr=lr_model.predict(train_word_features)
train_pred_lr

array([0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1,
       1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,

In [704]:
# Evaluating on Training Set
f1_lr_train = f1_score(y_train,train_pred_lr,average="weighted")
print("F1-score on Train Set:",f1_lr_train)

F1-score on Train Set: 1.0


In [705]:
# Make predictions for validation set
val_pred_lr=lr_model.predict(val_word_features)

# Evaluating on Validation Set
f1_lr_val = f1_score(y_val,val_pred_lr,average="weighted")
print("F1-score on Validation Set:", f1_lr_val)

F1-score on Validation Set: 1.0


In [706]:
# Training action model
lr_model_action=LogisticRegression().fit(train_word_features_action, y_train_action)

In [707]:
# Make predictions for train set
train_pred_lr_action=lr_model_action.predict(train_word_features_action)
train_pred_lr_action



array([ 1, 13,  8,  6, 10,  8, 10,  0,  5,  5, 12,  5, 12,  4, 12,  8,  3,
        5, 14, 13,  4,  1,  2, 11,  0,  4,  5,  9, 11,  4,  0,  2, 12,  7,
       12,  6,  8, 11,  0,  0, 13,  4,  1,  7, 10,  2,  5,  4,  6,  3,  5,
       15,  1,  8, 15,  0,  4, 12,  6, 15,  7,  5, 11,  4,  1, 15,  2,  3,
        0,  6,  4,  6,  1, 12,  8, 15, 11, 14,  5, 10,  5,  5,  4, 11, 11,
        5,  0,  4, 12,  2,  3, 12,  2,  5,  2, 11,  0, 13, 11, 12,  7, 10,
        1, 12, 12,  5, 11, 14,  1,  7, 12, 10, 14,  3, 13,  8,  5, 10,  2,
        5,  5, 14, 10,  0,  4, 14, 11, 14, 13, 12,  4,  5,  5,  4,  0,  1,
        0,  7,  4,  0,  5, 13,  0,  7,  7, 12, 14,  7,  2,  2,  3, 14, 12,
        2,  6,  4, 14, 12,  7, 15,  4, 13, 13,  4, 15,  1,  0,  6,  2, 12,
        8,  0,  5, 15,  3, 14, 11,  0,  7, 10, 13, 15,  6,  7,  7,  6, 14,
       10, 15, 13,  5,  6,  5, 12,  5, 14, 15, 14,  8, 11, 14,  7,  8, 12,
       10, 10,  8,  5, 13,  4, 11, 11, 13,  7, 13,  3, 12, 15, 12,  5, 14,
        0,  4,  5,  5,  5

In [708]:
# Evaluating on Training Set
f1_lr_train_action = f1_score(y_train_action,train_pred_lr_action,average="weighted")
print("F1-score on Train Set:",f1_lr_train_action)

F1-score on Train Set: 0.9981225942740726


In [709]:
# Make predictions for validation set
val_pred_lr_action=lr_model_action.predict(val_word_features_action)

# Evaluating on Validation Set
f1_lr_val_action = f1_score(y_val_action,val_pred_lr_action,average="weighted")
print("F1-score on Validation Set:", f1_lr_val_action)

F1-score on Validation Set: 0.9870713999885669


## Linear SVC

In [710]:
from sklearn.svm import LinearSVC
lsvc = LinearSVC()

In [711]:
lsvc.fit(train_word_features,y_train)

LinearSVC()

In [712]:
preds_val_lsvc = lsvc.predict(val_word_features)
preds_train_lsvc = lsvc.predict(train_word_features)

In [713]:
print("F1-score on Train Set:",f1_score(y_train,preds_train_lsvc,average="weighted"))
print("F1-score on Validation Set:",f1_score(y_val,preds_val_lsvc,average="weighted"))

train_lsvc_f1 = f1_score(y_train,preds_train_lsvc,average="weighted")
val_lsvc_f1 = f1_score(y_val,preds_val_lsvc,average="weighted")
train_lsvc_f1

F1-score on Train Set: 1.0
F1-score on Validation Set: 1.0


1.0

In [714]:
# training action model
lsvc_model_action = lsvc.fit(train_word_features_action,y_train_action)

In [715]:
preds_val_lsvc_action = lsvc_model_action.predict(val_word_features_action)
preds_train_lsvc_action = lsvc_model_action.predict(train_word_features_action)

In [716]:
print("F1-score on Train Set:",f1_score(y_train_action,preds_train_lsvc_action,average="weighted"))
print("F1-score on Validation Set:",f1_score(y_val_action,preds_val_lsvc_action,average="weighted"))

train_lsvc_f1_action = f1_score(y_train_action,preds_train_lsvc_action,average="weighted")
val_lsvc_f1_action = f1_score(y_val_action,preds_val_lsvc_action,average="weighted")

F1-score on Train Set: 1.0
F1-score on Validation Set: 0.9958601622467168


## XGBoost

In [717]:
import xgboost as xgb

In [718]:
xgb_cl = xgb.XGBClassifier()
xgb_cl.fit(train_word_features,y_train)

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, ...)

In [719]:
preds_val = xgb_cl.predict(val_word_features)
preds_train = xgb_cl.predict(train_word_features)

In [720]:
print("F1-score on Train Set:",f1_score(y_train,preds_train,average="weighted"))
print("F1-score on Validation Set:",f1_score(y_val,preds_val,average="weighted"))

train_xg_f1 = f1_score(y_train,preds_train,average="weighted")
val_xg_f1 = f1_score(y_val,preds_val,average="weighted")

F1-score on Train Set: 0.9982445129952306
F1-score on Validation Set: 0.995929102599944


In [721]:
# train action classifier
xgb_cl_action = xgb.XGBClassifier()
xgb_cl_action.fit(train_word_features_action,y_train_action)

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=None, num_parallel_tree=None,
              objective='multi:softprob', predictor=None, ...)

In [722]:
preds_val_action = xgb_cl_action.predict(val_word_features_action)
preds_train_action = xgb_cl_action.predict(train_word_features_action)

In [723]:
print("F1-score on Train Set:",f1_score(y_train_action,preds_train_action,average="weighted"))
print("F1-score on Validation Set:",f1_score(y_val_action,preds_val_action,average="weighted"))

train_xg_f1_action = f1_score(y_train_action,preds_train_action,average="weighted")
val_xg_f1_action = f1_score(y_val_action,preds_val_action,average="weighted")

F1-score on Train Set: 1.0
F1-score on Validation Set: 0.9917462637228804


## Spam classification Model Building Summary

In [724]:
f1_df = {"model":['Naive Bayes','Logistic Regression','Linear SVC','XGBooster'],
         'train_F1_score':[f1_nb_train,f1_lr_train,train_lsvc_f1,train_xg_f1],
         'val_F1_score':[f1_nb_val,f1_lr_val,val_lsvc_f1,val_xg_f1]}

In [725]:
model_df = pd.DataFrame(f1_df)
model_df

Unnamed: 0,model,train_F1_score,val_F1_score
0,Naive Bayes,0.871107,0.940273
1,Logistic Regression,1.0,1.0
2,Linear SVC,1.0,1.0
3,XGBooster,0.998245,0.995929


## Action classifier summary

In [726]:
f1_df_action = {"action_model":['Naive Bayes','Logistic Regression','Linear SVC','XGBooster'],
         'train_F1_score':[f1_nb_train_action,f1_lr_train_action,train_lsvc_f1_action,train_xg_f1_action],
         'val_F1_score':[f1_nb_val_action,f1_lr_val_action,val_lsvc_f1_action,val_xg_f1_action]}

In [727]:
action_df = pd.DataFrame(f1_df_action)
action_df

Unnamed: 0,action_model,train_F1_score,val_F1_score
0,Naive Bayes,0.936398,0.939366
1,Logistic Regression,0.998123,0.987071
2,Linear SVC,1.0,0.99586
3,XGBooster,1.0,0.991746


In [728]:
pickled_model = pickle.load(open('nb_model.pkl', 'rb'))
pickled_vectorizer = pickle.load(open('vectorizer.pkl','rb'))

pickled_model_action = pickle.load(open('nb_model_action.pkl', 'rb'))
pickled_vectorizer_action = pickle.load(open('vectorizer_action.pkl','rb'))

## Database operations

In [729]:
#input_message = 'give 300 kg of Sandwich from inventory'
input_message = 'add 5 kg of Sugar to stocks inventory'
#input_message = 'remove 5 kg of Fish to food inventory'
#input_message = 'what is the gdp of india'
#input_message = 'please add me to your fb account'
#input_message = "remove 12 kg of Sugar to food category"
#input_message = "update inventory by 5 kg of Sugar"
#input_message = 'what do you offer for me'
#input_message = 'display the existing data'


# predicting the label from input message
processed = text_cleaner(input_message)
vector = pickled_vectorizer.transform([processed])
pred = pickled_model.predict(vector)
    
label = le.inverse_transform(np.array(pred))

# predicting the action from input message
vector_action = pickled_vectorizer_action.transform([processed])
pred_action = pickled_model_action.predict(vector_action)
#print("the pred_action--->", pred_action)
action_label = le1.inverse_transform(np.array(pred_action))[0]
print('action label: ', action_label)


# available menu
menu = ['Biscuits','Milk','Sandwich','Fruits','Wheat','Sugar','Salt','Bread','Detergent','Softdrinks','Sweets']

# actions that can be performed with inventory
add_action = ['add','append','push']
remove_action = ['remove','delete','subtract']
display_action = ['display','provide','show','offer','retrieve','extract','get']
give_action = ['give','dispatch','dispense']

json = {}

try:

    if label == 'ham':
        print(f"The input message '{input_message}' is valid")

        # database connection
        uri = "mongodb://dhanu:dhanu@localhost:27072/?authSource=admin"
        client = MongoClient(uri)
        db = client['inventory']
        collection = db['products']

        # spaCy object creation
        doc = nlp(input_message)

        # identifying the quantity entities using NER
        for ent in doc.ents:
            if ent.label_ == 'QUANTITY':
                item_quantity = re.search('\d+', ent.text)
                item_quantity = item_quantity.group()
                json['item_quantity'] = int(item_quantity)
                #print("the quantity----->",json['item_quantity'])
                item_units = re.search('\D+', ent.text)
                item_units = str(item_units.group())
                json['units'] = item_units.strip()
                #print("The units are ----->",json['units'])

            elif ent.label_ == 'CARDINAL':
                item_quantity = int(ent.text)
                #print("The cardinal number--->",item_quantity)
                json['item_quantity'] = item_quantity
                json['units'] = 'NA'


        # extracting the item from input message
        for token in doc:
            #print(token)
            for i in menu:
                if token.text.lower() == i.lower():
                    item1 = menu[menu.index(i)]
                    json['item'] = item1


        # identifying the action from input message
        action = []
        for token in doc:
            if token.pos_ == 'VERB':
                action.append(token.text)

        print("The action from input message: ",action[0])


        # display action processing
        if action_label in display_action:
            print("The following items are present in the inventory:\n")
            cursor = collection.find({},{'_id':0})
            item_list = []
            for itr in cursor:
                item_list.append(itr)

            df_items = pd.DataFrame(item_list)
            print(df_items)

        # input products check in the inventory
        elif json.get('item') == None:
            print("The specified item from input message is not in the Menu. The available menu: \n", menu)
        else:
            print("The metadata extracted from input message:\n", json)

        # add action process
        if action_label in add_action:

            if json['units'] == 'kg' and json.get('item'):

                # filter for searching the item
                search_filter = {'item':json['item'], 'units':'kg'}

                # quantity extracted from input message
                quantity = {'$inc':{'item_quantity':json['item_quantity']}}

                # database operation
                collection.update_one(search_filter, quantity, upsert=True)

                print("The items are updated in database")

            elif json['units'] == 'liter' and json.get('item'):

                # filter for searching the item
                search_filter = {'item':json['item'], 'units':'liter'}

                # quantity updation
                quantity = {'$inc':{'item_quantity':json['item_quantity']}}

                # database operation
                collection.update_one(search_filter, quantity, upsert=True)

                print("The items are updated in database")

            elif json['units'] == 'NA' and json.get('item'):

                search_filter = {'item':json['item'], 'units':'NA'}

                # quantity updation
                quantity = {'$inc':{'item_quantity':json['item_quantity']}}

                # database operation
                collection.update_one(search_filter, quantity, upsert=True)


                print("The items are updated in database")

            else:
                print("The product from input message was not available in inventory")

        # delete action process
        elif action_label in remove_action:

            if json['units'] == 'kg' and json.get('item'):

                # filter for searching the item
                search_filter = {'item':json['item'], 'units':'kg'}

                # quantity extracted from input message
                quantity = {'$inc':{'item_quantity':-json['item_quantity']}}

                # database operation
                collection.update_one(search_filter, quantity, upsert=True)

                print("The items are updated in database")

            elif json['units'] == 'liter' and json.get('item'):

                # filter for searching the item
                search_filter = {'item':json['item'], 'units':'liter'}

                # quantity extracted from input message
                quantity = {'$inc':{'item_quantity':-json['item_quantity']}}

                # database operation
                collection.update_one(search_filter, quantity, upsert=True)

                print("The items are updated in database")

            elif json['units'] == 'NA' and json.get('item'):

                search_filter = {'item':json['item'], 'units':'NA'}

                # quantity updation
                quantity = {'$inc':{'item_quantity':-json['item_quantity']}}

                # database operation
                collection.update_one(search_filter, quantity, upsert=True)

                print("The items are updated in database")

            else:
                print("The product from input message was not available in inventory")

        # dispatch action processing


        elif action_label in give_action:

            if json['units'] == 'kg' and json.get('item'):

                # filter for searching the item
                search_filter = {'item':json['item'], 'units':'kg'}

                # fetching the documents from db
                cursor = collection.find_one(search_filter)
                if cursor:
                    print("Available {} stock: {} {}".format(json['item'],cursor['item_quantity'],json['units']))
                    db_quantity = cursor['item_quantity']

                    if json['item_quantity'] > db_quantity:
                        print("Insufficient items in inventory")
                    else:
                        print("The items are available and ready to dispense")
                        quantity = {'$inc':{'item_quantity':-json['item_quantity']}}
                        collection.update_one(search_filter, quantity, upsert=True)
                else:
                    print(f"The desired item '{json['item']}' is not available. Please add to inventory")

            elif json['units'] == 'liter' and json.get('item'):

                # filter for searching the item
                search_filter = {'item':json['item'], 'units':'liter'}

                # fetching the documents from db
                cursor = collection.find_one(search_filter)
                if cursor:
                    print("Available {} stock: {} {}".format(json['item'],cursor['item_quantity'],json['units']))
                    db_quantity = cursor['item_quantity']

                    if json['item_quantity'] > db_quantity:
                        print("Insufficient items in inventory")
                    else:
                        print("The items are available and ready to dispense")
                        quantity = {'$inc':{'item_quantity':-json['item_quantity']}}
                        collection.update_one(search_filter, quantity, upsert=True)
                else:
                    print(f"The desired item '{json['item']}' is not available. Please add to inventory")

            elif json['units'] == 'NA' and json.get('item'):

                 # filter for searching the item
                search_filter = {'item':json['item'], 'units':'NA'}

                # fetching the documents from db
                cursor = collection.find_one(search_filter)

                if cursor:
                    print("Available {} stock: {} ".format(json['item'],cursor['item_quantity']))
                    db_quantity = cursor['item_quantity']

                    if json['item_quantity'] > db_quantity:
                        print("Insufficient items in inventory")
                    else:
                        print("The items are available and ready to dispense")
                        quantity = {'$inc':{'item_quantity':-json['item_quantity']}}
                        collection.update_one(search_filter, quantity, upsert=True)
                else:
                    print(f"The desired item '{json['item']}' is not available. Please add to inventory")

        else:
            print("There is no action from input message")

    else:
        print(f"The input message '{input_message}' was not valid")

except Exception as error:
     print("The exception is --->", error)
    

action label:  add
The input message 'add 5 kg of Sugar to stocks inventory' is valid
The action from input message:  add
The metadata extracted from input message:
 {'item_quantity': 5, 'units': 'kg', 'item': 'Sugar'}
The items are updated in database


In [730]:
# retrieving the documents from database
cursor = collection.find()
for itr in cursor:
    print(itr)

{'_id': ObjectId('63c7e7f76213d5f5b7afa7eb'), 'item': 'Wheat', 'units': 'kg', 'item_quantity': 7}
{'_id': ObjectId('63c7e9d66213d5f5b7afa860'), 'item': 'Fruits', 'units': 'kg', 'item_quantity': -118}
{'_id': ObjectId('63c7eba36213d5f5b7afa8c5'), 'item': 'Salt', 'units': 'kg', 'item_quantity': 10}
{'_id': ObjectId('63c7eda76213d5f5b7afa936'), 'item': 'Detergent', 'units': 'kg', 'item_quantity': 4}
{'_id': ObjectId('63c7ef506213d5f5b7afa9a1'), 'item': 'Sweets', 'units': 'kg', 'item_quantity': 18}
{'_id': ObjectId('63c8d73c6213d5f5b7afacca'), 'item': 'Sugar', 'units': 'kg', 'item_quantity': 92}
{'_id': ObjectId('63c925926213d5f5b7afb5f9'), 'item': 'Softdrinks', 'units': 'liter', 'item_quantity': 20}
{'_id': ObjectId('63c925ad6213d5f5b7afb608'), 'item': 'Milk', 'units': 'liter', 'item_quantity': 88}
{'_id': ObjectId('63c93adf6213d5f5b7afb899'), 'item': 'Milk', 'units': 'NA', 'item_quantity': 54}
{'_id': ObjectId('63c94e82069026c63931ad4c'), 'item': 'Sugar', 'units': 'NA', 'item_quantity': 

In [731]:
# finding the desired document
search_filter = {'item':'Sandwich', 'units':'NA'}
cursor = collection.find_one(search_filter)
if cursor:
    print('yes')
else:
    print('no')

no


In [732]:
del_filter = {'item':'Sandwich', 'units':'kg'}

del_cursor = collection.delete_one(del_filter)

In [733]:
# retrieving the documents from database
cursor = collection.find()
for itr in cursor:
    print(itr)

{'_id': ObjectId('63c7e7f76213d5f5b7afa7eb'), 'item': 'Wheat', 'units': 'kg', 'item_quantity': 7}
{'_id': ObjectId('63c7e9d66213d5f5b7afa860'), 'item': 'Fruits', 'units': 'kg', 'item_quantity': -118}
{'_id': ObjectId('63c7eba36213d5f5b7afa8c5'), 'item': 'Salt', 'units': 'kg', 'item_quantity': 10}
{'_id': ObjectId('63c7eda76213d5f5b7afa936'), 'item': 'Detergent', 'units': 'kg', 'item_quantity': 4}
{'_id': ObjectId('63c7ef506213d5f5b7afa9a1'), 'item': 'Sweets', 'units': 'kg', 'item_quantity': 18}
{'_id': ObjectId('63c8d73c6213d5f5b7afacca'), 'item': 'Sugar', 'units': 'kg', 'item_quantity': 92}
{'_id': ObjectId('63c925926213d5f5b7afb5f9'), 'item': 'Softdrinks', 'units': 'liter', 'item_quantity': 20}
{'_id': ObjectId('63c925ad6213d5f5b7afb608'), 'item': 'Milk', 'units': 'liter', 'item_quantity': 88}
{'_id': ObjectId('63c93adf6213d5f5b7afb899'), 'item': 'Milk', 'units': 'NA', 'item_quantity': 54}
{'_id': ObjectId('63c94e82069026c63931ad4c'), 'item': 'Sugar', 'units': 'NA', 'item_quantity': 