In [1]:
import nltk
import string

In [2]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [3]:
import pandas as pd

In [4]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

In [5]:
def text_process(mess):
    """
    Takes in a string of text, then performs the following:
    1. Remove all punctuation
    2. Remove all stopwords
    3. Returns a list of the cleaned text
    """
    # Check characters to see if they are in punctuation
    nopunc = [char for char in mess if char not in string.punctuation]

    # Join the characters again to form the string.
    nopunc = ''.join(nopunc)
    
    # Now just remove any stopwords
    return [word for word in nopunc.split() if word.lower() not in stopwords.words('english')]

In [7]:
dataSet = pd.read_csv("intentsfile.csv", names=["inputs", "intents"])
print(dataSet)

                                                inputs  \
0                        Kindly provide delivery time?   
1         In how many days it will be delivered to me?   
2                          How many days will it take?   
3                          How many days will it take?   
4                        And when it will be delivered   
5                    How many days u take for delivery   
6           Is it possible you can deliver on monday ?   
7                                 I want it on tuesday   
8                    How many days u take for delivery   
9                               Kb tk place ho jye gaa   
10          Is it possible you can deliver on monday ?   
11                          Price kya ha or kb mily gA   
12   Then wts the price of full set along with name...   
13                   How many days u take for delivery   
14                          Price kya ha or kb mily gA   
15      How many days it will take to reach Rawalpindi   
16            

In [3]:
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

In [8]:
dataSet['inputs'] = dataSet['inputs'].apply(text_process)

In [9]:
dataSet.head()

Unnamed: 0,inputs,intents
0,"[Kindly, provide, delivery, time]",Delivery Time
1,"[many, days, delivered]",Delivery Time
2,"[many, days, take]",Delivery Time
3,"[many, days, take]",Delivery Time
4,[delivered],Delivery Time


In [10]:
# Might take awhile...
bow_transformer = CountVectorizer(analyzer=text_process).fit(dataSet['inputs'])

# Print total number of vocab words
print(len(bow_transformer.vocabulary_))

338


In [11]:
dataset3 = dataSet['inputs'][3]
print(dataset3)

['many', 'days', 'take']


In [12]:
pipeline = Pipeline([
    ('bow', CountVectorizer(analyzer=text_process)),  # strings to token integer counts
    ('tfidf', TfidfTransformer()),  # integer counts to weighted TF-IDF scores
    ('classifier', MultinomialNB()),  # train on TF-IDF vectors w/ Naive Bayes classifier
])

In [13]:
msg_train, msg_test, label_train, label_test = \
train_test_split(dataSet['inputs'], dataSet['intents'], test_size=0.3)

print(len(msg_train), len(msg_test), len(msg_train) + len(msg_test))

287 123 410


In [14]:
pipeline.fit(msg_train,label_train)

Pipeline(steps=[('bow', CountVectorizer(analyzer=<function text_process at 0x00000036F777F488>,
        binary=False, decode_error='strict', dtype=<class 'numpy.int64'>,
        encoding='utf-8', input='content', lowercase=True, max_df=1.0,
        max_features=None, min_df=1, ngram_range=(1, 1), preprocesso...f=False, use_idf=True)), ('classifier', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])

In [15]:
predictions = pipeline.predict(msg_test)

In [111]:
predictions = pipeline.predict(["what is delivery charges ?"])

In [81]:
type(msg_test)

pandas.core.series.Series

In [16]:
print(predictions)

['Price' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Delivery Time'
 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order'
 'Order' 'Price' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order'
 'Order' 'Order' 'Order' 'Order' 'Delivery Time' 'Delivery Charges' 'Order'
 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order'
 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order'
 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order'
 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order'
 'Order' 'Order' 'Order' 'Order' 'Order' 'Price' 'Delivery Time' 'Order'
 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order'
 'Order' 'Order' 'Order' 'Delivery Charges' 'Order' 'Order' 'Order' 'Order'
 'Price' 'Price' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order' 'Order'
 'Delivery Time' 'Order' 'Order' 'Order' 'Order' 'Delivery Time' 'Order'
 'Order' 'Order' 'Order' 'Order' 'Order' 'Ord

In [17]:
print(classification_report(predictions,label_test))

                                                 precision    recall  f1-score   support

                               Delivery Charges       0.33      0.50      0.40         2
                                Delivery Method       0.00      0.00      0.00         0
                                  Delivery Time       0.15      0.67      0.24         6
                                          Order       0.96      0.23      0.37       110
                                   Order Status       0.00      0.00      0.00         0
                             Order cancellation       0.00      0.00      0.00         0
                                          Price       0.20      0.80      0.32         5
                                     Total Cost       0.00      0.00      0.00         0
imp questions/ queries to be answered by goshop       0.00      0.00      0.00         0

                                    avg / total       0.88      0.28      0.36       123



  'recall', 'true', average, warn_for)


In [104]:
predictions

array(['Price', 'Price', 'Price', 'Price', 'Total Cost', 'Price', 'Price',
       'Price', 'Price', 'Price', 'Price', 'Price', 'Price', 'Price',
       'Price', 'Price', 'Price', 'Price', 'Price', 'Price', 'Price',
       'Price', 'Price', 'Price', 'Delivery Time', 'Price'],
      dtype='<U16')

In [88]:
msg_test

305                                           [discount]
215                   [u, send, original, pic, GSW, 504]
237                                               [much]
380                      [product, liked, delivery, thn]
186                           [Exchange, return, policy]
132                                               [want]
1                                [many, days, delivered]
109              [Prices, delivery, charges, n, karachi]
264                        [Ok, rough, idea, abt, price]
2                                     [many, days, take]
375                                 [guarattee, leather]
295                    [price, brown, wallet, name, itt]
377                                            [genuine]
4                                            [delivered]
61                                         [Ok, kab, ga]
409                              [change, order, GSW565]
198             [Different, sizes, one, size, Available]
272              [Normal, walle

In [45]:
label_test

360    imp questions/ queries to be answered by goshop
101                                   Delivery Charges
351    imp questions/ queries to be answered by goshop
186                                              Order
8                                        Delivery Time
395    imp questions/ queries to be answered by goshop
226                                       Order Status
116                                    Delivery Method
379    imp questions/ queries to be answered by goshop
104                                   Delivery Charges
86                                       Delivery Time
295                                              Price
54                                       Delivery Time
313                                         Total Cost
329                                         Total Cost
202                                              Order
191                                              Order
150                                              Order
187       

In [26]:
labelTestDF = pd.DataFrame(label_test)
predictionsDF = pd.DataFrame(predictions)
print(labelTestDF.shape)
print(predictionsDF.shape)

(123, 1)
(123, 1)


In [53]:
labelTestDF

Unnamed: 0,intents
360,imp questions/ queries to be answered by goshop
101,Delivery Charges
351,imp questions/ queries to be answered by goshop
186,Order
8,Delivery Time
395,imp questions/ queries to be answered by goshop
226,Order Status
116,Delivery Method
379,imp questions/ queries to be answered by goshop
104,Delivery Charges


In [54]:
predictionsDF

Unnamed: 0,0
0,Order
1,Order
2,Order
3,Order
4,Delivery Time
5,Order
6,Order
7,Order
8,Order
9,Order


In [30]:
result = pd.concat([predictionsDF[0], labelTestDF1['intents'], testmDF['inputs']], axis=1, ignore_index=False)
result.head(25)

Unnamed: 0,0,intents,inputs
0,Price,Price,
1,Order,Delivery Time,"[many, days, delivered]"
2,Order,Order,
3,Order,Delivery Time,
4,Order,imp questions/ queries to be answered by goshop,
5,Order,imp questions/ queries to be answered by goshop,"[many, days, u, take, delivery]"
6,Order,Order,
7,Delivery Time,Delivery Time,
8,Order,Delivery Time,
9,Order,Delivery Time,


In [67]:
res1 = pd.DataFrame(columns=["pred", "label"], data=[predictionsDF[0], labelTestDF['intents']], index=None)

In [68]:
res1.head()

Unnamed: 0,pred,label
0,,
intents,,


In [55]:
print(result)

                                                   0  \
0                                      Delivery Time   
1                                      Delivery Time   
2                                      Delivery Time   
3                                      Delivery Time   
4                                      Delivery Time   
5                                      Delivery Time   
6                                      Delivery Time   
7                                              Price   
8                                      Delivery Time   
9                                      Delivery Time   
10                                     Delivery Time   
11                                     Delivery Time   
12                                     Delivery Time   
13   imp questions/ queries to be answered by goshop   
14                                     Delivery Time   
15                                     Delivery Time   
16                                     Delivery 

In [27]:
labelTestDF1 = labelTestDF.reset_index()

In [28]:
labelTestDF1 = labelTestDF.reset_index()

Unnamed: 0,index,intents
0,257,Price
1,77,Delivery Time
2,126,Order
3,54,Delivery Time
4,359,imp questions/ queries to be answered by goshop


In [29]:
testmDF = pd.DataFrame(msg_test)
testmDF.head()

Unnamed: 0,inputs
257,"[price, wallet]"
77,"[u, snd, pictrs, gsw, 547nd, delivery, days]"
126,"[want, product]"
54,"[kub, tuk, mil, jy, ga]"
359,[u]


In [23]:
testmDF = testmDF.reset_index()

In [24]:
testmDF.head()

Unnamed: 0,index,inputs
0,257,"[price, wallet]"
1,77,"[u, snd, pictrs, gsw, 547nd, delivery, days]"
2,126,"[want, product]"
3,54,"[kub, tuk, mil, jy, ga]"
4,359,[u]
