In [34]:
import pandas as pd 
import numpy as np

In [35]:
qdf = pd.read_csv('query_dataset.csv')

In [36]:
x = qdf['query'].tolist()
x

['what is the basic distance for free delivery?',
 'How far you delivery for free?',
 'Is free delivery available?',
 'what is in the menu',
 'Where is the shop?',
 "what's the address?",
 'where it is located?',
 'Is there any offer?',
 'Is there any offer on 2 pizza and one choco lava?',
 'Do you have non-veg ?',
 "Can you tell me what's in the menu?"]

In [37]:
y = qdf['tag'].tolist()
y

['delivery',
 'delivery',
 'delivery',
 'menu',
 'address',
 'address',
 'address',
 'offers',
 'offers',
 'menu',
 'menu']

In [38]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer(stop_words='english')
x = vectorizer.fit_transform(x)
print(x.toarray())

[[0 0 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 1 0 0 0 0 1 0 0 0 1 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0]]


In [39]:
print(vectorizer.get_feature_names())

['address', 'available', 'basic', 'choco', 'delivery', 'distance', 'far', 'free', 'lava', 'located', 'menu', 'non', 'offer', 'pizza', 'shop', 'tell', 'veg']


In [40]:
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y = lb.fit_transform(y)
y

array([[0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 1, 0],
       [0, 0, 1, 0]])

In [41]:
print(lb.classes_)

['address' 'delivery' 'menu' 'offers']


In [46]:
from sklearn.ensemble import RandomForestClassifier # main algorithm for bot

In [47]:
model = RandomForestClassifier(random_state=1)
model.fit(x,y) # algorithm will learn to identify tags

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=1, verbose=0, warm_start=False)

In [52]:
# testing
text = 'what is in the menu?'
input_data = vectorizer.transform([text])
input_data.toarray()
result = model.predict(input_data) # answer 
print(lb.inverse_transform(result))

['menu']


In [67]:
text = 'do you deliver for free?'
input_data = vectorizer.transform([text])
result = model.predict(input_data) # answer 
ai_tag = lb.inverse_transform(result)[0]

In [54]:
text = 'is there any offer on pizza?'
input_data = vectorizer.transform([text])
result = model.predict(input_data) # answer 
print(lb.inverse_transform(result))

['offers']


In [66]:
text = 'does it open at 9am?'
input_data = vectorizer.transform([text])
result = model.predict(input_data) # answer 
print(lb.inverse_transform(result))

['address']


TypeError: 'NoneType' object is not subscriptable

 # Bot responses

In [59]:
rdf = pd.read_csv('bot_response_dataset.csv')

In [62]:
resultdf = rdf[rdf['tag']== ai_tag]

In [71]:
responses = resultdf.response.to_list()

In [72]:
import random

In [73]:
random.choice(responses)

'Free delivery within 10 km'

In [74]:
import pickle

In [75]:
# classes
# binarizer
# model
# vectorizer
model_data = {
    'binarizer' : lb,
    'model' : model,
    'vectorizer' : vectorizer,
    'classes' : lb.classes_.tolist()
} 

In [76]:
with open('model.pkl', 'wb') as f: #model.pkl file as a binary file and name it as f 
    pickle.dump(model_data, f)

# testing saved model

In [79]:
model_loaded_data = {}

with open('model.pkl','rb') as f:
    model_loaded_data = pickle.loads(f.read())