In [737]:
import pandas as pd
pd.set_option('display.max_colwidth', 200)

import numpy as np
import re

# for NLP related tasks
import spacy
global nlp
nlp=spacy.load('en_core_web_sm')

# for mongodb operations
from pymongo import MongoClient

# saving model as pickle
import pickle

In [818]:
df = pd.read_csv(r'C:/Users/DAG9KOR/Downloads/ProjectMulticlasstextclassification/inventory.csv')
print('Shape -->',df.shape)
df.head()

Shape --> (687, 3)


Unnamed: 0,text,label,action
0,add 5 kg of Biscuits,ham,add
1,play music,spam,play
2,add 2 litres of milk,ham,add
3,who is prime minister,spam,none
4,remove 1kg of fruits,ham,remove


In [819]:
df['text'].sample(5)

492    return the exisitng items in inventory
274                  what do you offer for me
290                  what do you offer for me
561    remove 2 kg of Sandwich from inventory
347        show me what is there in inventory
Name: text, dtype: object

In [820]:
df['label'].value_counts(normalize=True)

ham     0.809316
spam    0.190684
Name: label, dtype: float64

In [821]:
df['action'].value_counts(normalize=False)

remove      60
show        59
return      58
update      55
give        54
play        54
provide     51
offer       49
display     49
subtract    45
get         38
sing        37
none        36
add         36
push         6
Name: action, dtype: int64

In [822]:
unique_actions = df['action'].nunique()
unique_actions

15

In [823]:
def text_cleaner(text):
  
  #remove user mentions
    text = re.sub(r'@[A-Za-z0-9]+','',text)           
  
  #remove hashtags
  #text = re.sub(r'#[A-Za-z0-9]+','',text)         
  
  #remove links
    text = re.sub(r'http\S+', '', text)  

  #convering text to lower case
    text = text.lower()

  # fetch only words
    text = re.sub("[^a-z]+", " ", text)

  # removing extra spaces
    text=re.sub("[\s]+"," ",text)
  
  # creating doc object
    doc=nlp(text)

  # remove stopwords and lemmatize the text
    tokens=[token.lemma_ for token in doc if(token.is_stop==False)]
  
  #join tokens by space
    return " ".join(tokens)

In [824]:
# perform text cleaning
df['clean_text']= df['text'].apply(text_cleaner)

In [825]:
df['clean_text'].sample(5)

178    provide exisitng item inventory
644           update kg salt inventory
236                          sing poem
264                              offer
87             highest rate imdb movie
Name: clean_text, dtype: object

In [826]:
text   = df['clean_text'].values
labels = df['label'].values
actions = df['action'].values

In [827]:
labels[:5]

array(['ham', 'spam', 'ham', 'spam', 'ham'], dtype=object)

In [828]:
actions[:5]

array(['add', 'play', 'add', 'none', 'remove'], dtype=object)

### Label Encoding

In [829]:
#importing label encoder
from sklearn.preprocessing import LabelEncoder

#define label encoder
le = LabelEncoder()
le1 = LabelEncoder()

#fit and transform target strings to a numbers
labels = le.fit_transform(labels)
actions = le1.fit_transform(actions)

In [830]:
labels[:10]

array([0, 1, 0, 1, 0, 1, 0, 0, 0, 1])

In [831]:
actions[:10]

array([0, 6, 0, 4, 9, 4, 0, 9, 0, 4])

In [832]:
le.inverse_transform(labels)

array(['ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'ham', 'ham',
       'spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'ham',
       'spam', 'spam', 'spam', 'spam', 'ham', 'ham', 'ham', 'spam', 'ham',
       'ham', 'ham', 'ham', 'spam', 'ham', 'ham', 'ham', 'ham', 'spam',
       'spam', 'spam', 'spam', 'spam', 'spam', 'spam', 'ham', 'ham',
       'ham', 'ham', 'ham', 'ham', 'spam', 'spam', 'spam', 'ham', 'ham',
       'ham', 'ham', 'ham', 'ham', 'ham', 'spam', 'spam', 'spam', 'ham',
       'ham', 'ham', 'ham', 'ham', 'ham', 'ham', 'ham', 'spam', 'ham',
       'spam', 'ham', 'spam', 'ham', 'ham', 'ham', 'spam', 'ham', 'spam',
       'ham', 'spam', 'ham', 'spam', 'ham', 'ham', 'spam', 'spam', 'spam',
       'spam', 'ham', 'ham', 'ham', 'spam', 'ham', 'ham', 'ham', 'ham',
       'spam', 'ham', 'ham', 'ham', 'ham', 'spam', 'spam', 'spam', 'spam',
       'spam', 'spam', 'spam', 'ham', 'ham', 'ham', 'ham', 'ham', 'ham',
       'spam', 'spam', 'spam', 'ham', 'ham', 'ham',

In [833]:
le1.inverse_transform(actions)

array(['add', 'play', 'add', 'none', 'remove', 'none', 'add', 'remove',
       'add', 'none', 'add', 'none', 'add', 'none', 'add', 'none', 'add',
       'add', 'none', 'none', 'sing', 'add', 'show', 'display', 'offer',
       'add', 'provide', 'return', 'subtract', 'add', 'none', 'add',
       'add', 'add', 'add', 'none', 'none', 'none', 'none', 'none',
       'none', 'none', 'push', 'add', 'add', 'display', 'get', 'remove',
       'none', 'play', 'sing', 'offer', 'provide', 'get', 'show', 'push',
       'return', 'subtract', 'none', 'play', 'sing', 'offer', 'provide',
       'get', 'show', 'push', 'return', 'subtract', 'add', 'play', 'add',
       'none', 'remove', 'none', 'add', 'remove', 'add', 'none', 'add',
       'none', 'add', 'none', 'add', 'none', 'add', 'add', 'none', 'none',
       'sing', 'add', 'show', 'display', 'offer', 'add', 'provide',
       'return', 'subtract', 'add', 'none', 'add', 'add', 'add', 'add',
       'none', 'none', 'none', 'none', 'none', 'none', 'none', 

In [834]:
valid = le.inverse_transform([0,1])
valid

array(['ham', 'spam'], dtype=object)

In [835]:
# Spam/Ham training, val dataset preparation
from sklearn.model_selection import train_test_split

# Splitting into train and validation set
x_train,x_val,y_train,y_val=train_test_split(text, labels,stratify=labels, test_size=0.30, random_state=0,shuffle=True)

In [836]:
print('x_train:',x_train.shape,'y_train:',y_train.shape)
print('x_val:',x_val.shape,'y_val:',y_val.shape)

x_train: (480,) y_train: (480,)
x_val: (207,) y_val: (207,)


In [837]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [838]:
word_vectorizer = TfidfVectorizer(max_features=1000)

In [839]:
word_vectorizer.fit(x_train)

TfidfVectorizer(max_features=1000)

In [840]:
pickle.dump(word_vectorizer,open("vectorizer.pkl", "wb"))

In [841]:
# create TF-IDF vectors for Train Set
train_word_features = word_vectorizer.transform(x_train)
train_word_features

<480x79 sparse matrix of type '<class 'numpy.float64'>'
	with 1525 stored elements in Compressed Sparse Row format>

In [842]:
# create TF-IDF vectors for Validation Set
val_word_features = word_vectorizer.transform(x_val)
val_word_features

<207x79 sparse matrix of type '<class 'numpy.float64'>'
	with 632 stored elements in Compressed Sparse Row format>

In [843]:
# action ckassifier training, validation dataset preparation
from sklearn.model_selection import train_test_split

# Splitting into train and validation set
x_train_action,x_val_action,y_train_action,y_val_action=train_test_split(text, actions,stratify=actions, test_size=0.30, random_state=0,shuffle=True)


In [844]:
print('x_train_action:',x_train.shape,'y_train_action:',y_train.shape)
print('x_val_action:',x_val.shape,'y_val_action:',y_val.shape)

x_train_action: (480,) y_train_action: (480,)
x_val_action: (207,) y_val_action: (207,)


In [845]:
word_vectorizer_action = TfidfVectorizer(max_features=1000)

In [846]:
word_vectorizer_action.fit(x_train_action)

TfidfVectorizer(max_features=1000)

In [847]:
pickle.dump(word_vectorizer_action,open("vectorizer_action.pkl", "wb"))

In [848]:
# create TF-IDF vectors for action Train Set
train_word_features_action = word_vectorizer_action.transform(x_train_action)
train_word_features_action

<480x78 sparse matrix of type '<class 'numpy.float64'>'
	with 1506 stored elements in Compressed Sparse Row format>

In [849]:
# create TF-IDF vectors for action Validation Set
val_word_features_action = word_vectorizer_action.transform(x_val_action)
val_word_features_action

<207x78 sparse matrix of type '<class 'numpy.float64'>'
	with 649 stored elements in Compressed Sparse Row format>

## Model building

### Naive Bayes

In [850]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import f1_score

In [851]:
# Training model
nb_model=MultinomialNB().fit(train_word_features,y_train)
nb_model

MultinomialNB()

In [852]:
# save model to pickle file
pickle.dump(nb_model, open('nb_model.pkl', 'wb'))

In [853]:
# read model from pickle file
pickled_model = pickle.load(open('nb_model.pkl', 'rb'))
pickled_model.predict(train_word_features)


array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,

In [854]:
# Make predictions for train set
train_pred_nb=nb_model.predict(train_word_features)

In [855]:
train_pred_nb

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,

In [856]:
# Evaluating on Training Set
f1_nb_train = f1_score(y_train,train_pred_nb,average="weighted")
print("F1-score on Train Set:",f1_nb_train)

F1-score on Train Set: 0.9915954415954414


In [857]:
# Make predictions for validation set
val_pred_nb=nb_model.predict(val_word_features)

# Evaluating on Validation Set
f1_nb_val = f1_score(y_val,val_pred_nb,average="weighted")
print("F1-score on Validation Set:",f1_nb_val)

F1-score on Validation Set: 1.0


In [858]:
# Training action model
nb_model_action=MultinomialNB().fit(train_word_features_action,y_train_action)
nb_model_action

MultinomialNB()

In [859]:
y_train_action

array([12, 13, 14, 10, 14,  9,  1,  9,  1,  1, 11, 14, 14,  0,  6,  5, 10,
        7,  7, 14,  0,  2, 10, 12,  6,  7,  7,  1,  4,  5,  6,  7,  9, 12,
       11, 13,  2,  3, 12,  9, 11,  9,  4, 12,  6,  6, 10, 13,  5, 13,  9,
       11, 14, 14,  6,  1,  7,  7,  1, 11,  7, 13,  9, 10,  7,  1, 10, 14,
        2,  9,  9, 12,  1,  2, 12,  5,  7,  1,  6, 10,  7,  9,  6,  7,  5,
        3,  6, 12,  2, 14, 13,  1,  0,  0, 13,  2,  7,  4,  5, 14,  6,  3,
       12, 11,  6,  4, 12,  6,  7, 13,  4,  7,  1,  3,  9, 11, 14,  7,  2,
        6, 10,  1, 12,  7,  5, 11, 11,  9,  6,  3,  2,  0,  3, 11, 12,  9,
       14,  9,  1,  5, 14,  8, 10,  1, 10, 11,  1, 11, 11,  6,  3,  5, 14,
       11, 10, 12,  3, 10,  0,  3, 12,  2,  4,  7,  2,  9, 12,  4, 13,  0,
        4, 12,  9,  9,  3, 11,  6,  6,  4,  5,  7,  3,  6,  5, 11,  0, 13,
        0,  0, 10,  7,  2, 11,  3,  5,  9,  1,  3, 10,  5, 10,  4, 14,  0,
        4, 13, 14, 14,  1,  9, 10, 14,  1, 11,  9,  4,  3, 10,  6,  3, 13,
       14, 11,  6,  0,  2

In [860]:
# save action model to pickle file
pickle.dump(nb_model_action, open('nb_model_action.pkl', 'wb'))

In [861]:
# read action model from pickle file
pickled_model_action = pickle.load(open('nb_model_action.pkl', 'rb'))
pickled_model_action.predict(train_word_features_action)

array([12, 13, 14, 10, 14,  9,  1,  9,  1,  1, 11, 14, 14,  0,  6,  5, 10,
        7,  7, 14,  0,  2, 10, 12,  6,  7,  7,  1,  4,  5,  6,  7,  9, 12,
       11, 13,  2,  3, 12,  9, 11,  9,  4, 12,  6,  6, 10, 13,  5, 13,  9,
       11, 14, 14,  6,  1,  7,  7,  1, 11,  7, 13,  9, 10,  7,  1, 10, 14,
        2,  9,  9, 12,  1,  2, 12,  5,  7,  1,  6, 10,  7,  9,  6,  7,  5,
        3,  6, 12,  2, 14, 13,  1,  0,  0, 13,  2,  7,  4,  5, 14,  6,  3,
       12, 11,  6,  4, 12,  6,  7, 13,  9,  7,  1,  3,  9, 11, 14,  7,  2,
        6, 10,  1, 12,  7,  5, 11, 11,  9,  6,  3,  2,  0,  3, 11, 12,  9,
       14,  9,  1,  5, 14,  9, 10,  1, 10, 11,  1, 11, 11,  6,  3,  5, 14,
       11, 10, 12,  3, 10,  0,  3, 12,  2,  4,  7,  2,  9, 12,  4, 13,  0,
        4, 12,  9,  9,  3, 11,  6,  6,  4,  5,  7,  3,  6,  5, 11,  0, 13,
        0,  0, 10,  7,  2, 11,  3,  5,  9,  1,  3, 10,  5, 10,  4, 14,  0,
        4, 13, 14, 14,  1,  9, 10, 14,  1, 11,  9,  4,  3, 10,  6,  3, 13,
       14, 11,  6,  0,  2

In [862]:
# Make predictions for train set
train_pred_nb_action=nb_model_action.predict(train_word_features_action)

In [863]:
# Evaluating on Training Set
f1_nb_train_action = f1_score(y_train_action,train_pred_nb_action,average="weighted")
print("The F1-score on Training data: ",f1_nb_train_action)

The F1-score on Training data:  0.9835526589125607


In [864]:
# Make predictions for validation set
val_pred_nb_action=nb_model_action.predict(val_word_features_action)

# Evaluating on Validation Set
f1_nb_val_action = f1_score(y_val_action,val_pred_nb_action,average="weighted")
print("F1-score on Validation Set:",f1_nb_val_action)

F1-score on Validation Set: 0.9759929281668411


## Logistic Regression

In [865]:
from sklearn.linear_model import LogisticRegression

In [866]:
# Training model
lr_model=LogisticRegression().fit(train_word_features, y_train)
lr_model

LogisticRegression()

In [867]:
# Make predictions for train set
train_pred_lr=lr_model.predict(train_word_features)
train_pred_lr

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,

In [868]:
# Evaluating on Training Set
f1_lr_train = f1_score(y_train,train_pred_lr,average="weighted")
print("F1-score on Train Set:",f1_lr_train)

F1-score on Train Set: 0.9915954415954414


In [869]:
# Make predictions for validation set
val_pred_lr=lr_model.predict(val_word_features)

# Evaluating on Validation Set
f1_lr_val = f1_score(y_val,val_pred_lr,average="weighted")
print("F1-score on Validation Set:", f1_lr_val)

F1-score on Validation Set: 1.0


In [870]:
# Training action model
lr_model_action=LogisticRegression().fit(train_word_features_action, y_train_action)

In [871]:
# Make predictions for train set
train_pred_lr_action=lr_model_action.predict(train_word_features_action)
train_pred_lr_action



array([12, 13, 14, 10, 14,  9,  1,  9,  1,  1, 11, 14, 14,  0,  6,  5, 10,
        7,  7, 14,  0,  2, 10, 12,  6,  7,  7,  1,  4,  5,  6,  7,  9, 12,
       11, 13,  2,  3, 12,  9, 11,  9,  4, 12,  6,  6, 10, 13,  5, 13,  9,
       11, 14, 14,  6,  1,  7,  7,  1, 11,  7, 13,  9, 10,  7,  1, 10, 14,
        2,  9,  9, 12,  1,  2, 12,  5,  7,  1,  6, 10,  7,  9,  6,  7,  5,
        3,  6, 12,  2, 14, 13,  1,  0,  0, 13,  2,  7,  4,  5, 14,  6,  3,
       12, 11,  6,  4, 12,  6,  7, 13,  4,  7,  1,  3,  9, 11, 14,  7,  2,
        6, 10,  1, 12,  7,  5, 11, 11,  9,  6,  3,  2,  0,  3, 11, 12,  9,
       14,  9,  1,  5, 14,  8, 10,  1, 10, 11,  1, 11, 11,  6,  3,  5, 14,
       11, 10, 12,  3, 10,  0,  3, 12,  2,  4,  7,  2,  9, 12,  4, 13,  0,
        4, 12,  9,  9,  3, 11,  6,  6,  4,  5,  7,  3,  6,  5, 11,  0, 13,
        0,  0, 10,  7,  2, 11,  3,  5,  9,  1,  3, 10,  5, 10,  4, 14,  0,
        4, 13, 14, 14,  1,  9, 10, 14,  1, 11,  9,  4,  3, 10,  6,  3, 13,
       14, 11,  6,  0,  2

In [872]:
# Evaluating on Training Set
f1_lr_train_action = f1_score(y_train_action,train_pred_lr_action,average="weighted")
print("F1-score on Train Set:",f1_lr_train_action)

F1-score on Train Set: 1.0


In [873]:
# Make predictions for validation set
val_pred_lr_action=lr_model_action.predict(val_word_features_action)

# Evaluating on Validation Set
f1_lr_val_action = f1_score(y_val_action,val_pred_lr_action,average="weighted")
print("F1-score on Validation Set:", f1_lr_val_action)

F1-score on Validation Set: 1.0


## Linear SVC

In [874]:
from sklearn.svm import LinearSVC
lsvc = LinearSVC()

In [875]:
lsvc.fit(train_word_features,y_train)

LinearSVC()

In [876]:
preds_val_lsvc = lsvc.predict(val_word_features)
preds_train_lsvc = lsvc.predict(train_word_features)

In [877]:
print("F1-score on Train Set:",f1_score(y_train,preds_train_lsvc,average="weighted"))
print("F1-score on Validation Set:",f1_score(y_val,preds_val_lsvc,average="weighted"))

train_lsvc_f1 = f1_score(y_train,preds_train_lsvc,average="weighted")
val_lsvc_f1 = f1_score(y_val,preds_val_lsvc,average="weighted")
train_lsvc_f1

F1-score on Train Set: 1.0
F1-score on Validation Set: 1.0


1.0

In [878]:
# training action model
lsvc_model_action = lsvc.fit(train_word_features_action,y_train_action)

In [879]:
preds_val_lsvc_action = lsvc_model_action.predict(val_word_features_action)
preds_train_lsvc_action = lsvc_model_action.predict(train_word_features_action)

In [880]:
print("F1-score on Train Set:",f1_score(y_train_action,preds_train_lsvc_action,average="weighted"))
print("F1-score on Validation Set:",f1_score(y_val_action,preds_val_lsvc_action,average="weighted"))

train_lsvc_f1_action = f1_score(y_train_action,preds_train_lsvc_action,average="weighted")
val_lsvc_f1_action = f1_score(y_val_action,preds_val_lsvc_action,average="weighted")

F1-score on Train Set: 1.0
F1-score on Validation Set: 1.0


## XGBoost

In [881]:
import xgboost as xgb

In [882]:
xgb_cl = xgb.XGBClassifier()
xgb_cl.fit(train_word_features,y_train)

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, ...)

In [883]:
preds_val = xgb_cl.predict(val_word_features)
preds_train = xgb_cl.predict(train_word_features)

In [884]:
print("F1-score on Train Set:",f1_score(y_train,preds_train,average="weighted"))
print("F1-score on Validation Set:",f1_score(y_val,preds_val,average="weighted"))

train_xg_f1 = f1_score(y_train,preds_train,average="weighted")
val_xg_f1 = f1_score(y_val,preds_val,average="weighted")

F1-score on Train Set: 1.0
F1-score on Validation Set: 1.0


In [885]:
# train action classifier
xgb_cl_action = xgb.XGBClassifier()
xgb_cl_action.fit(train_word_features_action,y_train_action)

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=None, num_parallel_tree=None,
              objective='multi:softprob', predictor=None, ...)

In [886]:
preds_val_action = xgb_cl_action.predict(val_word_features_action)
preds_train_action = xgb_cl_action.predict(train_word_features_action)

In [887]:
print("F1-score on Train Set:",f1_score(y_train_action,preds_train_action,average="weighted"))
print("F1-score on Validation Set:",f1_score(y_val_action,preds_val_action,average="weighted"))

train_xg_f1_action = f1_score(y_train_action,preds_train_action,average="weighted")
val_xg_f1_action = f1_score(y_val_action,preds_val_action,average="weighted")

F1-score on Train Set: 1.0
F1-score on Validation Set: 1.0


## Spam classification Model Building Summary

In [888]:
f1_df = {"model":['Naive Bayes','Logistic Regression','Linear SVC','XGBooster'],
         'train_F1_score':[f1_nb_train,f1_lr_train,train_lsvc_f1,train_xg_f1],
         'val_F1_score':[f1_nb_val,f1_lr_val,val_lsvc_f1,val_xg_f1]}

In [889]:
model_df = pd.DataFrame(f1_df)
model_df

Unnamed: 0,model,train_F1_score,val_F1_score
0,Naive Bayes,0.991595,1.0
1,Logistic Regression,0.991595,1.0
2,Linear SVC,1.0,1.0
3,XGBooster,1.0,1.0


## Action classifier summary

In [890]:
f1_df_action = {"action_model":['Naive Bayes','Logistic Regression','Linear SVC','XGBooster'],
         'train_F1_score':[f1_nb_train_action,f1_lr_train_action,train_lsvc_f1_action,train_xg_f1_action],
         'val_F1_score':[f1_nb_val_action,f1_lr_val_action,val_lsvc_f1_action,val_xg_f1_action]}

In [891]:
action_df = pd.DataFrame(f1_df_action)
action_df

Unnamed: 0,action_model,train_F1_score,val_F1_score
0,Naive Bayes,0.983553,0.975993
1,Logistic Regression,1.0,1.0
2,Linear SVC,1.0,1.0
3,XGBooster,1.0,1.0


## Database operations

In [892]:
#input_message = 'give 300 kg of Sandwich from inventory'
#input_message = 'update 20 kg of Biscuits to stocks inventory'
input_message = 'remove 5 kg of Fish to food inventory'
#input_message = 'what is the gdp of india'
#input_message = 'please add me to your fb account'
#input_message = "remove 12 kg of Sugar to food category"
#input_message = "update inventory by 5 kg of Sugar"
#input_message = 'what do you offer for me'
#input_message = 'display the existing data'


# predicting the label from input message
processed = text_cleaner(input_message)
vector = word_vectorizer.transform([processed])
pred = pickled_model.predict(vector)
    
label = le.inverse_transform(np.array(pred))

# predicting the action from input message
vector_action = word_vectorizer_action.transform([processed])
pred_action = pickled_model_action.predict(vector_action)
#print("the pred_action--->", pred_action)
action_label = le1.inverse_transform(np.array(pred_action))[0]
print('action label: ', action_label)


# available menu
menu = ['Biscuits','Milk','Sandwich','Fruits','Wheat','Sugar','Salt','Bread','Detergent','Softdrinks','Sweets']

# actions that can be performed with inventory
add_action = ['add','append','push']
remove_action = ['remove','delete','subtract']
display_action = ['display','provide','show','offer','retrieve','extract','get']
give_action = ['give','dispatch','dispense']

json = {}

try:

    if label == 'ham':
        print(f"The input message '{input_message}' is valid")

        # database connection
        uri = "mongodb://dhanu:dhanu@localhost:27072/?authSource=admin"
        client = MongoClient(uri)
        db = client['inventory']
        collection = db['products']

        # spaCy object creation
        doc = nlp(input_message)

        # identifying the quantity entities using NER
        for ent in doc.ents:
            if ent.label_ == 'QUANTITY':
                item_quantity = re.search('\d+', ent.text)
                item_quantity = item_quantity.group()
                json['item_quantity'] = int(item_quantity)
                #print("the quantity----->",json['item_quantity'])
                item_units = re.search('\D+', ent.text)
                item_units = str(item_units.group())
                json['units'] = item_units.strip()
                #print("The units are ----->",json['units'])

            elif ent.label_ == 'CARDINAL':
                item_quantity = int(ent.text)
                #print("The cardinal number--->",item_quantity)
                json['item_quantity'] = item_quantity
                json['units'] = 'NA'


        # extracting the item from input message
        for token in doc:
            #print(token)
            for i in menu:
                if token.text.lower() == i.lower():
                    item1 = menu[menu.index(i)]
                    json['item'] = item1


        # identifying the action from input message
        action = []
        for token in doc:
            if token.pos_ == 'VERB':
                action.append(token.text)

        print("The action from input message: ",action[0])


        # display action processing
        if action_label in display_action:
            print("The following items are present in the inventory:\n")
            cursor = collection.find({},{'_id':0})
            item_list = []
            for itr in cursor:
                item_list.append(itr)

            df_items = pd.DataFrame(item_list)
            print(df_items)

        # input products check in the inventory
        elif json.get('item') == None:
            print("The specified item from input message is not in the Menu. The available menu: \n", menu)
        else:
            print("The metadata extracted from input message:\n", json)

        # add action process
        if action_label in add_action:

            if json['units'] == 'kg' and json.get('item'):

                # filter for searching the item
                search_filter = {'item':json['item'], 'units':'kg'}

                # quantity extracted from input message
                quantity = {'$inc':{'item_quantity':json['item_quantity']}}

                # database operation
                collection.update_one(search_filter, quantity, upsert=True)

                print("The items are updated in database")

            elif json['units'] == 'liter' and json.get('item'):

                # filter for searching the item
                search_filter = {'item':json['item'], 'units':'liter'}

                # quantity updation
                quantity = {'$inc':{'item_quantity':json['item_quantity']}}

                # database operation
                collection.update_one(search_filter, quantity, upsert=True)

                print("The items are updated in database")

            elif json['units'] == 'NA' and json.get('item'):

                search_filter = {'item':json['item'], 'units':'NA'}

                # quantity updation
                quantity = {'$inc':{'item_quantity':json['item_quantity']}}

                # database operation
                collection.update_one(search_filter, quantity, upsert=True)


                print("The items are updated in database")

            else:
                print("The product from input message was not available in inventory")

        # delete action process
        elif action_label in remove_action:

            if json['units'] == 'kg' and json.get('item'):

                # filter for searching the item
                search_filter = {'item':json['item'], 'units':'kg'}

                # quantity extracted from input message
                quantity = {'$inc':{'item_quantity':-json['item_quantity']}}

                # database operation
                collection.update_one(search_filter, quantity, upsert=True)

                print("The items are updated in database")

            elif json['units'] == 'liter' and json.get('item'):

                # filter for searching the item
                search_filter = {'item':json['item'], 'units':'liter'}

                # quantity extracted from input message
                quantity = {'$inc':{'item_quantity':-json['item_quantity']}}

                # database operation
                collection.update_one(search_filter, quantity, upsert=True)

                print("The items are updated in database")

            elif json['units'] == 'NA' and json.get('item'):

                search_filter = {'item':json['item'], 'units':'NA'}

                # quantity updation
                quantity = {'$inc':{'item_quantity':-json['item_quantity']}}

                # database operation
                collection.update_one(search_filter, quantity, upsert=True)

                print("The items are updated in database")

            else:
                print("The product from input message was not available in inventory")

        # dispatch action processing


        elif action_label in give_action:

            if json['units'] == 'kg' and json.get('item'):

                # filter for searching the item
                search_filter = {'item':json['item'], 'units':'kg'}

                # fetching the documents from db
                cursor = collection.find_one(search_filter)
                if cursor:
                    print("Available {} stock: {} {}".format(json['item'],cursor['item_quantity'],json['units']))
                    db_quantity = cursor['item_quantity']

                    if json['item_quantity'] > db_quantity:
                        print("Insufficient items in inventory")
                    else:
                        print("The items are available and ready to dispense")
                        quantity = {'$inc':{'item_quantity':-json['item_quantity']}}
                        collection.update_one(search_filter, quantity, upsert=True)
                else:
                    print(f"The desired item '{json['item']}' is not available. Please add to inventory")

            elif json['units'] == 'liter' and json.get('item'):

                # filter for searching the item
                search_filter = {'item':json['item'], 'units':'liter'}

                # fetching the documents from db
                cursor = collection.find_one(search_filter)
                if cursor:
                    print("Available {} stock: {} {}".format(json['item'],cursor['item_quantity'],json['units']))
                    db_quantity = cursor['item_quantity']

                    if json['item_quantity'] > db_quantity:
                        print("Insufficient items in inventory")
                    else:
                        print("The items are available and ready to dispense")
                        quantity = {'$inc':{'item_quantity':-json['item_quantity']}}
                        collection.update_one(search_filter, quantity, upsert=True)
                else:
                    print(f"The desired item '{json['item']}' is not available. Please add to inventory")

            elif json['units'] == 'NA' and json.get('item'):

                 # filter for searching the item
                search_filter = {'item':json['item'], 'units':'NA'}

                # fetching the documents from db
                cursor = collection.find_one(search_filter)

                if cursor:
                    print("Available {} stock: {} ".format(json['item'],cursor['item_quantity']))
                    db_quantity = cursor['item_quantity']

                    if json['item_quantity'] > db_quantity:
                        print("Insufficient items in inventory")
                    else:
                        print("The items are available and ready to dispense")
                        quantity = {'$inc':{'item_quantity':-json['item_quantity']}}
                        collection.update_one(search_filter, quantity, upsert=True)
                else:
                    print(f"The desired item '{json['item']}' is not available. Please add to inventory")

        else:
            print("There is no action from input message")

    else:
        print(f"The input message '{input_message}' was not valid")

except Exception as error:
     print("The exception is --->", error)
    

action label:  remove
The input message 'remove 5 kg of Fish to food inventory' is valid
The action from input message:  remove
The specified item from input message is not in the Menu. The available menu: 
 ['Biscuits', 'Milk', 'Sandwich', 'Fruits', 'Wheat', 'Sugar', 'Salt', 'Bread', 'Detergent', 'Softdrinks', 'Sweets']
The product from input message was not available in inventory


In [893]:
# retrieving the documents from database
cursor = collection.find()
for itr in cursor:
    print(itr)

{'_id': ObjectId('63c54ca3a2389fc49b021ed3'), 'item': 'Sandwich', 'units': 'kg', 'item_quantity': 221}
{'_id': ObjectId('63c7e7f76213d5f5b7afa7eb'), 'item': 'Wheat', 'units': 'kg', 'item_quantity': 8}
{'_id': ObjectId('63c7e9d66213d5f5b7afa860'), 'item': 'Fruits', 'units': 'kg', 'item_quantity': 2}
{'_id': ObjectId('63c7eba36213d5f5b7afa8c5'), 'item': 'Salt', 'units': 'kg', 'item_quantity': 2}
{'_id': ObjectId('63c7eda76213d5f5b7afa936'), 'item': 'Detergent', 'units': 'kg', 'item_quantity': 4}
{'_id': ObjectId('63c7ef506213d5f5b7afa9a1'), 'item': 'Sweets', 'units': 'kg', 'item_quantity': 18}
{'_id': ObjectId('63c8d73c6213d5f5b7afacca'), 'item': 'Sugar', 'units': 'kg', 'item_quantity': 12}
{'_id': ObjectId('63c925926213d5f5b7afb5f9'), 'item': 'Softdrinks', 'units': 'liter', 'item_quantity': 0}
{'_id': ObjectId('63c925ad6213d5f5b7afb608'), 'item': 'Milk', 'units': 'liter', 'item_quantity': 84}
{'_id': ObjectId('63c93adf6213d5f5b7afb899'), 'item': 'Milk', 'units': 'NA', 'item_quantity': 4

In [814]:
# finding the desired document
search_filter = {'item':'Sandwich', 'units':'NA'}
cursor = collection.find_one(search_filter)
if cursor:
    print('yes')
else:
    print('no')

no
