In [409]:
import pandas as pd
pd.set_option('display.max_colwidth', 200)

import numpy as np
import re

# for NLP related tasks
import spacy
global nlp
nlp=spacy.load('en_core_web_sm')

# for mongodb operations
from pymongo import MongoClient

# saving model as pickle
import pickle

In [521]:
df = pd.read_csv(r'C:/Users/DAG9KOR/Downloads/ProjectMulticlasstextclassification\inventory.csv')
print('Shape -->',df.shape)
df.head()

Shape --> (42, 2)


Unnamed: 0,text,label
0,add 5 kg of Biscuits,ham
1,play music,spam
2,add 2 litres of milk,ham
3,who is prime minister,spam
4,remove 1kg of fruits,ham


In [412]:
df['text'].sample(5)

12               add 2 kg bread in food category
24                      what do you offer for me
40                                      who am i
33    add 2 liter of softdrinks in food category
16               add 2 kg bread in food category
Name: text, dtype: object

In [413]:
df['label'].value_counts(normalize=True)

ham     0.52381
spam    0.47619
Name: label, dtype: float64

In [414]:
def text_cleaner(text):
  
  #remove user mentions
    text = re.sub(r'@[A-Za-z0-9]+','',text)           
  
  #remove hashtags
  #text = re.sub(r'#[A-Za-z0-9]+','',text)         
  
  #remove links
    text = re.sub(r'http\S+', '', text)  

  #convering text to lower case
    text = text.lower()

  # fetch only words
    text = re.sub("[^a-z]+", " ", text)

  # removing extra spaces
    text=re.sub("[\s]+"," ",text)
  
  # creating doc object
    doc=nlp(text)

  # remove stopwords and lemmatize the text
    tokens=[token.lemma_ for token in doc if(token.is_stop==False)]
  
  #join tokens by space
    return " ".join(tokens)

In [415]:
# perform text cleaning
df['clean_text']= df['text'].apply(text_cleaner)

In [501]:
df['clean_text'].sample(5)

9                               today
23    display exisitng item inventory
30                            climate
13              nearby petrol station
35          schedule bengaluru flight
Name: clean_text, dtype: object

In [502]:
text   = df['clean_text'].values
labels = df['label'].values

In [503]:
labels[:5]

array(['ham', 'spam', 'ham', 'spam', 'ham'], dtype=object)

### Label Encoding

In [419]:
#importing label encoder
from sklearn.preprocessing import LabelEncoder

#define label encoder
le = LabelEncoder()

#fit and transform target strings to a numbers
labels = le.fit_transform(labels)

In [420]:
labels[:10]

array([0, 1, 0, 1, 0, 1, 0, 0, 0, 1])

In [535]:
le.inverse_transform([0,1])

array(['ham', 'spam'], dtype=object)

In [536]:
valid = le.inverse_transform([0,1])[0]

In [465]:
from sklearn.model_selection import train_test_split

# Splitting into train and validation set
x_train,x_val,y_train,y_val=train_test_split(text, labels,stratify=labels, test_size=0.30, random_state=0,shuffle=True)

In [466]:
print('x_train:',x_train.shape,'y_train:',y_train.shape)
print('x_val:',x_val.shape,'y_val:',y_val.shape)

x_train: (29,) y_train: (29,)
x_val: (13,) y_val: (13,)


In [467]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [468]:
word_vectorizer = TfidfVectorizer(max_features=1000)

In [469]:
word_vectorizer.fit(x_train)

TfidfVectorizer(max_features=1000)

In [470]:
pickle.dump(word_vectorizer,open("vectorizer.pkl", "wb"))

In [471]:
# create TF-IDF vectors for Train Set
train_word_features = word_vectorizer.transform(x_train)
train_word_features

<29x48 sparse matrix of type '<class 'numpy.float64'>'
	with 87 stored elements in Compressed Sparse Row format>

In [472]:
# create TF-IDF vectors for Validation Set
val_word_features = word_vectorizer.transform(x_val)
val_word_features

<13x48 sparse matrix of type '<class 'numpy.float64'>'
	with 24 stored elements in Compressed Sparse Row format>

## Model building

### Naive Bayes

In [473]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import f1_score

In [474]:
# Training model
nb_model=MultinomialNB().fit(train_word_features,y_train)
nb_model

MultinomialNB()

In [475]:
# save model to pickle file
pickle.dump(nb_model, open('nb_model.pkl', 'wb'))

In [476]:
# read model from pickle file
pickled_model = pickle.load(open('nb_model.pkl', 'rb'))
pickled_model.predict(train_word_features)


array([1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 0, 0])

In [477]:
# Make predictions for train set
train_pred_nb=nb_model.predict(train_word_features)

In [478]:
train_pred_nb

array([1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 0, 0])

In [479]:
# Evaluating on Training Set
f1_nb_train = f1_score(y_train,train_pred_nb,average="weighted")
print("F1-score on Train Set:",f1_nb_train)

F1-score on Train Set: 0.9305371352785144


In [480]:
# Make predictions for validation set
val_pred_nb=nb_model.predict(val_word_features)

# Evaluating on Validation Set
f1_nb_val = f1_score(y_val,val_pred_nb,average="weighted")
print("F1-score on Validation Set:",f1_nb_val)

F1-score on Validation Set: 0.6495726495726496


## Logistic Regression

In [481]:
from sklearn.linear_model import LogisticRegression

In [493]:
# Training model
lr_model=LogisticRegression().fit(train_word_features, y_train)
lr_model

LogisticRegression()

In [494]:
# Make predictions for train set
train_pred_lr=lr_model.predict(train_word_features)
train_pred_lr

array([1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 0])

In [495]:
# Evaluating on Training Set
f1_lr_train = f1_score(y_train,train_pred_lr,average="weighted")
print("F1-score on Train Set:",f1_lr_train)

F1-score on Train Set: 1.0


In [533]:
# Make predictions for validation set
val_pred_lr=lr_model.predict(val_word_features)

# Evaluating on Validation Set
f1_lr_val = f1_score(y_val,val_pred_lr,average="weighted")
print("F1-score on Validation Set:", f1_lr_val)

F1-score on Validation Set: 1.0


## Linear SVC

In [528]:
from sklearn.svm import LinearSVC
lsvc = LinearSVC()

In [529]:
lsvc_model = lsvc.fit(train_word_features,y_train)

In [530]:
preds_val_lsvc = lsvc.predict(val_word_features)
preds_train_lsvc = lsvc.predict(train_word_features)

In [531]:
print("F1-score on Train Set:",f1_score(y_train,preds_train_lsvc,average="weighted"))
print("F1-score on Validation Set:",f1_score(y_val,preds_val_lsvc,average="weighted"))

train_lsvc_f1 = f1_score(y_train,preds_train_lsvc,average="weighted")
val_lsvc_f1 = f1_score(y_val,preds_val_lsvc,average="weighted")

F1-score on Train Set: 1.0
F1-score on Validation Set: 1.0


In [532]:
import xgboost as xgb

In [523]:
xgb_cl = xgb.XGBClassifier()
xgb_cl.fit(train_word_features,y_train)

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, ...)

In [524]:
preds_val = xgb_cl.predict(val_word_features)
preds_train = xgb_cl.predict(train_word_features)

In [525]:
print("F1-score on Train Set:",f1_score(y_train,preds_train,average="weighted"))
print("F1-score on Validation Set:",f1_score(y_val,preds_val,average="weighted"))

train_xg_f1 = f1_score(y_train,preds_train,average="weighted")
val_xg_f1 = f1_score(y_val,preds_val,average="weighted")

F1-score on Train Set: 0.8958101594364107
F1-score on Validation Set: 0.7608391608391608


## Model Building Summary

In [526]:
f1_df = {"model":['Naive Bayes','Logistic Regression','Linear SVC','XGBooster'],
         'train_F1_score':[f1_nb_train,f1_lr_train,train_lsvc_f1,train_xg_f1],
         'val_F1_score':[f1_nb_val,f1_lr_val,val_lsvc_f1,val_xg_f1]}

In [527]:
model_df = pd.DataFrame(f1_df)
model_df

Unnamed: 0,model,train_F1_score,val_F1_score
0,Naive Bayes,0.930537,0.649573
1,Logistic Regression,1.0,1.0
2,Linear SVC,1.0,1.0
3,XGBooster,0.89581,0.760839


## Database operations

In [637]:
input_message = 'give 300 kg of Sandwich from inventory'
#input_message = 'add 20 kg of Biscuits to stocks inventory'
#input_message = 'add 5 kg of Fish to food inventory'
#input_message = 'what is the gdp of india'
#input_message = 'please add me to your fb account'
#input_message = "update 12 kg of Sugar to food category"
#input_message = "update inventory by 5 kg of Sugar"
#input_message = 'what do u offer for me'
#input_message = 'provide the existing stocks'


# predicting the input message label
processed = text_cleaner(input_message)
vector = word_vectorizer.transform([processed])
pred = pickled_model.predict(vector)
    
label = le.inverse_transform(np.array(pred))[0]

# available menu
menu = ['Biscuits','Milk','Sandwich','Fruits','Wheat','Sugar','Salt','Bread','Detergent','Softdrinks','Sweets']

# actions that can be performed with inventory
add_action = ['add','append','push']
remove_action = ['remove','delete','subtract']
display_action = ['display','provide','show','offer','retrieve','extract','get']
give_action = ['give','dispatch','dispense']

json = {}

#try:

if label == valid:
    print(f"The input message '{input_message}' is valid")

    # database connection
    uri = "mongodb://dhanu:dhanu@localhost:27072/?authSource=admin"
    client = MongoClient(uri)
    db = client['inventory']
    collection = db['products']

    # spaCy object creation
    doc = nlp(input_message)

    # identifying the quantity entities using NER
    for ent in doc.ents:
        if ent.label_ == 'QUANTITY':
            item_quantity = re.search('\d+', ent.text)
            item_quantity = item_quantity.group()
            json['item_quantity'] = int(item_quantity)
            #print("the quantity----->",json['item_quantity'])
            item_units = re.search('\D+', ent.text)
            item_units = str(item_units.group())
            json['units'] = item_units.strip()
            #print("The units are ----->",json['units'])

        elif ent.label_ == 'CARDINAL':
            item_quantity = int(ent.text)
            #print("The cardinal number--->",item_quantity)
            json['item_quantity'] = item_quantity
            json['units'] = 'NA'


    # extracting the item from input message
    for token in doc:
        #print(token)
        for i in menu:
            if token.text.lower() == i.lower():
                item1 = menu[menu.index(i)]
                json['item'] = item1


    # identifying the action from input message
    action = []
    for token in doc:
        if token.pos_ == 'VERB':
            action.append(token.text)

    print("The action from input message: ",action[0])


    # display action processing
    if action[0] in display_action:
        print("The following items are present in the inventory:\n")
        cursor = collection.find({},{'_id':0})
        item_list = []
        for itr in cursor:
            item_list.append(itr)

        df_items = pd.DataFrame(item_list)
        print(df_items)

    # input products check in the inventory
    elif json.get('item') == None:
        print("The specified item from input message is not in the Menu. The available menu: \n", menu)
    else:
        print("The metadata extracted from input message:\n", json)

    # add action process
    if action[0] in add_action:

        if json['units'] == 'kg' and json.get('item'):

            # filter for searching the item
            search_filter = {'item':json['item'], 'units':'kg'}

            # quantity extracted from input message
            quantity = {'$inc':{'item_quantity':json['item_quantity']}}

            # database operation
            collection.update_one(search_filter, quantity, upsert=True)

            print("The items are updated in database")

        elif json['units'] == 'liter' and json.get('item'):

            # filter for searching the item
            search_filter = {'item':json['item'], 'units':'liter'}

            # quantity updation
            quantity = {'$inc':{'item_quantity':json['item_quantity']}}

            # database operation
            collection.update_one(search_filter, quantity, upsert=True)

            print("The items are updated in database")

        elif json['units'] == 'NA' and json.get('item'):

            search_filter = {'item':json['item'], 'units':'NA'}

            # quantity updation
            quantity = {'$inc':{'item_quantity':json['item_quantity']}}

            # database operation
            collection.update_one(search_filter, quantity, upsert=True)
            

            print("The items are updated in database")

        else:
            print("The product from input message was not available in inventory")

    # delete action process
    elif action[0] in remove_action:

        if json['units'] == 'kg' and json.get('item'):

            # filter for searching the item
            search_filter = {'item':json['item'], 'units':'kg'}

            # quantity extracted from input message
            quantity = {'$inc':{'item_quantity':-json['item_quantity']}}

            # database operation
            collection.update_one(search_filter, quantity, upsert=True)

            print("The items are updated in database")

        elif json['units'] == 'liter' and json.get('item'):

            # filter for searching the item
            search_filter = {'item':json['item'], 'units':'liter'}

            # quantity extracted from input message
            quantity = {'$inc':{'item_quantity':-json['item_quantity']}}

            # database operation
            collection.update_one(search_filter, quantity, upsert=True)

            print("The items are updated in database")

        elif json['units'] == 'NA' and json.get('item'):

            search_filter = {'item':json['item'], 'units':'NA'}

            # quantity updation
            quantity = {'$inc':{'item_quantity':-json['item_quantity']}}

            # database operation
            collection.update_one(search_filter, quantity, upsert=True)

            print("The items are updated in database")

        else:
            print("The product from input message was not available in inventory")

    # dispatch action processing


    elif action[0] in give_action:

        if json['units'] == 'kg' and json.get('item'):

            # filter for searching the item
            search_filter = {'item':json['item'], 'units':'kg'}

            # fetching the documents from db
            cursor = collection.find_one(search_filter)
            if cursor:
                print("Available {} stock: {} {}".format(json['item'],cursor['item_quantity'],json['units']))
                db_quantity = cursor['item_quantity']
                
                if json['item_quantity'] > db_quantity:
                    print("Insufficient items in inventory")
                else:
                    print("The items are available and ready to dispense")
                    quantity = {'$inc':{'item_quantity':-json['item_quantity']}}
                    collection.update_one(search_filter, quantity, upsert=True)
            else:
                print(f"The desired item '{json['item']}' is not available. Please add to inventory")
            
        elif json['units'] == 'liter' and json.get('item'):

            # filter for searching the item
            search_filter = {'item':json['item'], 'units':'liter'}

            # fetching the documents from db
            cursor = collection.find_one(search_filter)
            if cursor:
                print("Available {} stock: {} {}".format(json['item'],cursor['item_quantity'],json['units']))
                db_quantity = cursor['item_quantity']
                
                if json['item_quantity'] > db_quantity:
                    print("Insufficient items in inventory")
                else:
                    print("The items are available and ready to dispense")
                    quantity = {'$inc':{'item_quantity':-json['item_quantity']}}
                    collection.update_one(search_filter, quantity, upsert=True)
            else:
                print(f"The desired item '{json['item']}' is not available. Please add to inventory")

        elif json['units'] == 'NA' and json.get('item'):

             # filter for searching the item
            search_filter = {'item':json['item'], 'units':'NA'}
            
            # fetching the documents from db
            cursor = collection.find_one(search_filter)

            if cursor:
                print("Available {} stock: {} ".format(json['item'],cursor['item_quantity']))
                db_quantity = cursor['item_quantity']
                
                if json['item_quantity'] > db_quantity:
                    print("Insufficient items in inventory")
                else:
                    print("The items are available and ready to dispense")
                    quantity = {'$inc':{'item_quantity':-json['item_quantity']}}
                    collection.update_one(search_filter, quantity, upsert=True)
            else:
                print(f"The desired item '{json['item']}' is not available. Please add to inventory")

    else:
        print("There is no action from input message")

else:
    print(f"The input message '{input_message}' was not valid")

# except Exception as error:
#     print("The exception is --->", error)
    

The input message 'give 300 kg of Sandwich from inventory' is valid
The action from input message:  give
The metadata extracted from input message:
 {'item_quantity': 300, 'units': 'kg', 'item': 'Sandwich'}
Available Sandwich stock: 246 kg
Insufficient items in inventory


In [630]:
# retrieving the documents from database
cursor = collection.find()
for itr in cursor:
    print(itr)

{'_id': ObjectId('63c54ca3a2389fc49b021ed3'), 'item': 'Sandwich', 'units': 'kg', 'item_quantity': 246}
{'_id': ObjectId('63c7e7f76213d5f5b7afa7eb'), 'item': 'Wheat', 'units': 'kg', 'item_quantity': 8}
{'_id': ObjectId('63c7e9d66213d5f5b7afa860'), 'item': 'Fruits', 'units': 'kg', 'item_quantity': 2}
{'_id': ObjectId('63c7eba36213d5f5b7afa8c5'), 'item': 'Salt', 'units': 'kg', 'item_quantity': 2}
{'_id': ObjectId('63c7eda76213d5f5b7afa936'), 'item': 'Detergent', 'units': 'kg', 'item_quantity': 4}
{'_id': ObjectId('63c7ef506213d5f5b7afa9a1'), 'item': 'Sweets', 'units': 'kg', 'item_quantity': 18}
{'_id': ObjectId('63c8d73c6213d5f5b7afacca'), 'item': 'Sugar', 'units': 'kg', 'item_quantity': 24}
{'_id': ObjectId('63c925926213d5f5b7afb5f9'), 'item': 'Softdrinks', 'units': 'liter', 'item_quantity': 0}
{'_id': ObjectId('63c925ad6213d5f5b7afb608'), 'item': 'Milk', 'units': 'liter', 'item_quantity': 84}
{'_id': ObjectId('63c93adf6213d5f5b7afb899'), 'item': 'Milk', 'units': 'NA', 'item_quantity': 4

In [635]:
# finding the desired document
search_filter = {'item':'Sandwich', 'units':'NA'}
cursor = collection.find_one(search_filter)
if cursor:
    print('yes')
else:
    print('no')

no
