In [1]:
#import and loading json file

import nltk
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
import json
import pickle
import numpy as np
# from keras.models import Sequential
# from keras.layers import Dense, Activation, Dropout
# from keras.optimizers import SGD
import random

In [2]:
words=[]
classes = []
documents = []
ignore_words = ['?', '!']
data_file = open('grp7.json').read()
intents = json.loads(data_file)
intents

{'intents': [{'tag': 'PY1',
   'patterns': ['python'],
   'responses': ['Is python a programming or scripting language?'],
   'options': ['Programming', 'scripting', 'both', 'none'],
   'right_key': 'programming',
   'wrong_key': 'programming',
   'answer': 'programming',
   'r_reward': 10},
  {'tag': 'PY2',
   'patterns': ['programming'],
   'responses': ['Is Python case sensitive when dealing with identifiers?'],
   'options': ['yes', 'no', 'machine dependent', 'none of the mentioned'],
   'right_key': 'case sensitive',
   'wrong_key': 'case sensitive',
   'answer': 'yes',
   'r_reward': 10},
  {'tag': 'PY3',
   'patterns': ['case sensitive'],
   'responses': ['Which of the following symbols are used for comments in Python?'],
   'options': ['//', '*/', '/**/', '#'],
   'right_key': 'comments',
   'wrong_key': 'comments',
   'answer': '#',
   'r_reward': 10},
  {'tag': 'PY4',
   'patterns': ['comments'],
   'responses': ['Which of the following operators has the highest precedence?']

In [3]:
#preprocessing 

for intent in intents['intents']:
    for pattern in intent['patterns']:
        #tokenize each word
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        #add documents in the corpus
        documents.append((w, intent['tag']))
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [4]:
classes

['PY1', 'PY2', 'PY3', 'PY4', 'PY5', 'PY6', 'PY7', 'PY8', 'PY9', 'PY10']

In [5]:
documents

[(['python'], 'PY1'),
 (['programming'], 'PY2'),
 (['case', 'sensitive'], 'PY3'),
 (['comments'], 'PY4'),
 (['precedence'], 'PY5'),
 (['keyword'], 'PY6'),
 (['method'], 'PY7'),
 (['data', 'type'], 'PY8'),
 (['file', 'handling'], 'PY9'),
 (['list'], 'PY10')]

In [6]:
# lemmatize, lower each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
len(words)

13

In [7]:
words

['case',
 'comment',
 'data',
 'file',
 'handling',
 'keyword',
 'list',
 'method',
 'precedence',
 'programming',
 'python',
 'sensitive',
 'type']

In [8]:
# sort classes        
classes = sorted(list(set(classes)))
classes

['PY1', 'PY10', 'PY2', 'PY3', 'PY4', 'PY5', 'PY6', 'PY7', 'PY8', 'PY9']

In [9]:
# documents = combination between patterns and intents
print (len(documents), "documents")

10 documents


In [10]:
# classes = intents
print (len(classes), "classes", classes)

10 classes ['PY1', 'PY10', 'PY2', 'PY3', 'PY4', 'PY5', 'PY6', 'PY7', 'PY8', 'PY9']


In [11]:
# words = all words, vocabulary
print (len(words), "unique lemmatized words", words)

13 unique lemmatized words ['case', 'comment', 'data', 'file', 'handling', 'keyword', 'list', 'method', 'precedence', 'programming', 'python', 'sensitive', 'type']


In [12]:
pickle.dump(words,open('words.pkl','wb'))
pickle.dump(classes,open('classes.pkl','wb'))

In [13]:
# create our training data
training = []
# create an empty array for our output
output_empty = [0] * len(classes)
output_empty

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [14]:
# training set, bag of words for each sentence
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # lemmatize each word - create base word, in attempt to represent related words
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    
    print(pattern_words)


    for w in words:
        print(w,1 if w in pattern_words else 0)
        bag.append(1) if w in pattern_words else bag.append(0)
        
    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])
# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training)


['python']
case 0
comment 0
data 0
file 0
handling 0
keyword 0
list 0
method 0
precedence 0
programming 0
python 1
sensitive 0
type 0
['programming']
case 0
comment 0
data 0
file 0
handling 0
keyword 0
list 0
method 0
precedence 0
programming 1
python 0
sensitive 0
type 0
['case', 'sensitive']
case 1
comment 0
data 0
file 0
handling 0
keyword 0
list 0
method 0
precedence 0
programming 0
python 0
sensitive 1
type 0
['comment']
case 0
comment 1
data 0
file 0
handling 0
keyword 0
list 0
method 0
precedence 0
programming 0
python 0
sensitive 0
type 0
['precedence']
case 0
comment 0
data 0
file 0
handling 0
keyword 0
list 0
method 0
precedence 1
programming 0
python 0
sensitive 0
type 0
['keyword']
case 0
comment 0
data 0
file 0
handling 0
keyword 1
list 0
method 0
precedence 0
programming 0
python 0
sensitive 0
type 0
['method']
case 0
comment 0
data 0
file 0
handling 0
keyword 0
list 0
method 1
precedence 0
programming 0
python 0
sensitive 0
type 0
['data', 'type']
case 0
comment 0
data 1

In [15]:
training

array([[list([0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]),
        list([0, 0, 0, 0, 0, 0, 0, 0, 0, 1])],
       [list([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]),
        list([0, 1, 0, 0, 0, 0, 0, 0, 0, 0])],
       [list([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
        list([0, 0, 0, 0, 0, 0, 0, 0, 1, 0])],
       [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]),
        list([1, 0, 0, 0, 0, 0, 0, 0, 0, 0])],
       [list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]),
        list([0, 0, 0, 0, 0, 0, 1, 0, 0, 0])],
       [list([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]),
        list([0, 0, 0, 1, 0, 0, 0, 0, 0, 0])],
       [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]),
        list([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])],
       [list([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
        list([0, 0, 0, 0, 1, 0, 0, 0, 0, 0])],
       [list([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]),
        list([0, 0, 0, 0, 0, 0, 0, 1, 0, 0])],
       [list([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]),
        list([0, 0, 0, 0, 0

In [16]:
bag

[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]

In [17]:
# create train and test lists. X - patterns, Y - intents
train_x = list(training[:,0])
train_y = list(training[:,1])
print("Training data created")

Training data created


In [18]:
train_x

[[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
 [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]]

In [19]:
train_y


[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
 [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]]

In [20]:
count=0
train_svm_y=[]
for i in range(len(train_y)):
    for j in range(len(train_y[i])):
        if train_y[i][j]==0:
            count+=1
        else:
            train_svm_y.append(count)
            count=0
            break
            
train_svm_y

[9, 1, 8, 0, 6, 3, 2, 4, 7, 5]

In [21]:
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV,KFold

pipeline = Pipeline([('clf', SVC(kernel='rbf'))])
parameters = {
    'clf__gamma': (0.01, 0.03, 0.1, 0.3, 1),
    'clf__C': (0.1, 0.3, 1, 3, 10, 30,100),
}
inner_cv = KFold(n_splits=2, shuffle=True, random_state=i)
grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1,verbose=1,cv=inner_cv, scoring='accuracy')
grid_search.fit(train_x, train_svm_y)
print('Best parameters set:')
best_parameters = grid_search.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print ('\t%s: %r' % (param_name, best_parameters[param_name]))
    
best_c = grid_search.best_params_['clf__C']
best_gamma = grid_search.best_params_['clf__gamma']
#     predictions = grid_search.predict(X_test)
#     print (classification_report(y_test, predictions) )

model=SVC(C=best_c,gamma=best_gamma)
model.fit(train_x, train_svm_y)

Fitting 2 folds for each of 35 candidates, totalling 70 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Best parameters set:
	clf__C: 0.1
	clf__gamma: 0.01


[Parallel(n_jobs=-1)]: Done  62 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done  70 out of  70 | elapsed:    1.7s finished


SVC(C=0.1, gamma=0.01)

In [22]:
# # Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
# # equal to number of intents to predict output intent with softmax
# model = Sequential()
# model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
# model.add(Dropout(0.5))
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.5))
# model.add(Dense(len(train_y[0]), activation='softmax'))

# # Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
# sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
# model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# #fitting and saving the model 
# hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
# model.save('chatbot_model.h5', hist)

# print("model created")


In [33]:
# a=model.predict([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]])
# a

array([0])

In [24]:
import nltk
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
import pickle
import numpy as np

# from keras.models import load_model
# model = load_model('chatbot_model.h5')
import json
import random
intents = json.loads(open('grp7.json').read())
words = pickle.load(open('words.pkl','rb'))
classes = pickle.load(open('classes.pkl','rb'))

In [51]:
def clean_up_sentence(sentence):
    # tokenize the pattern - split words into array
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word - create short form for word
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=True):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words - matrix of N words, vocabulary matrix
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)
    return(np.array(bag))

def predict_class(sentence,model):
#     print(sentence)
    return_list=[]
    p=bow(sentence,words,show_details=False)
#     print(np.array([p]))
    res=model.predict(np.array([p]))
#     print(res.shape)
    for i in res:
        return_list.append({"intent":classes[res[0]]})
#         print(return_list)
    return return_list


def getResponse(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if(i['tag']== tag):
            result = random.choice(i['responses'])
            break
    return result

def getOptions(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if(i['tag']== tag):
            result = i['options']
            break
    return result

def getRightKeyword(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if(i['tag']== tag):
            result = i['right_key']
            break
    return result

def getWrongKeyword(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if(i['tag']== tag):
            result = i['wrong_key']
            break
    return result

def getAnswer(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if(i['tag']== tag):
            result = i['answer']
            break
    return result

def getReward(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if(i['tag']== tag):
            result = i['r_reward']
            break
    return result


answer_list = []
reward_list = []
user_answer_list = []

def getTotalReward(intents_json):
    total_reward = []
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        total_reward.append(i['r_reward'])
    return sum(total_reward)

def chatbot_response(msg,user_answer,reward):
    
    try:
        ints = predict_class(msg, model)
        response = getResponse(ints, intents)
        options = getOptions(ints, intents)
        right_key = getRightKeyword(ints, intents)
        wrong_key = getWrongKeyword(ints, intents)
        qreward = getReward(ints, intents)
        answer = getAnswer(ints, intents)
        
        #answer_list.append(answer)
        reward_list.append(int(reward))
    except IndexError:
        print ('INVALID KEYWORD')   
    if msg == "no":
        byefn("BYE!")     
    elif msg == "end":
        res = 'Pass'
        if sum(reward_list)<70:
            res = 'Fail'
        byefn('Total Rewards:'+str(sum(reward_list))+'\nResult: '+res)
    else:
        try:
            button(response,options,right_key,wrong_key,qreward,answer)
        except IndexError:
            print ('Response Not Found!')
            res = 'Pass'
            if sum(reward_list)<70:
                res = 'Fail'
            byefn('Total Rewards:'+str(sum(reward_list))+'\nResult: '+res)

In [52]:
import tkinter
from tkinter import Tk,Frame,Canvas,Label,Button,BOTTOM

root = Tk()
root.config(bg='lightblue')
root.geometry("1000x500")

    
f1=Frame(root,bg='lightblue',width=400, height=500, relief='raised', borderwidth=1)
f1.pack()
Label(f1,text='INTERVIEW',bg='lightblue',fg='black',font=('Times 30 bold')).pack()


c1=Canvas(root,bg='lightblue')
c1.pack()


f2=Frame(root,bg='lightblue')
f2.pack()
Label(f2,text='Welcome Guest',bg='lightblue',fg='black',font=('Times 20 bold')).pack()


f3=Frame(c1,bg='lightblue')
f3.grid(row=0,column=0,stick='nsew')
f4=Frame(c1,bg='lightblue')
f4.grid(row=0,column=0,stick='nsew')
f5=Frame(c1,bg='lightblue')
f5.grid(row=0,column=0,stick='nsew')


def start_ques():
    button("Are you ready to begin",["Yes","No"],"python","no",answer="Yes")
    reward_list = [0]

Button(f2,text='START',bd=0,bg='green',font=('Times 30 bold'),fg='black',relief='flat',activebackground='gray',activeforeground='green',command=lambda: start_ques()).pack(side=BOTTOM)
 
    
Button(root,text='QUIT',bd=0,bg='green',font=('Times 30 bold'),fg='black',relief='flat',activebackground='gray',activeforeground='green',command=lambda: root.destroy()).pack(side=BOTTOM)
 

def byefn(rew):
    f5.tkraise()
    nooo=Label(f5,pady=65,text=rew,fg='black',bg='lightblue',font=('Times 30 bold'))
    nooo.pack()

def button(res,ops,right_key,wrong_key="no",right_reward=0,answer="python"):
    f4=Frame(c1,bg='lightblue')
    f4.grid(row=0,column=0,stick='nsew')
    q1=Label(f4,text=res,fg='black',bg='lightblue',font=('Times 15 bold'))
    q1.pack()
    ch = 1
    for op in ops:
        if op == answer:
            key = right_key
            reward = right_reward
        else:
            key = wrong_key
            reward = 1
        button_options(f4,str(ch)+'. '+op,key,reward)
        ch += 1
    
def button_options(fr_no,op,key,reward):
    Button(fr_no,text=op,bd=0,fg='green',font=('Times 15 bold'),bg='black',relief='flat',activebackground='gray',activeforeground='green',command=lambda:chatbot_response(key,op,reward)).pack()
        
root.title('GoFree Passport')
root.mainloop()

In [None]:
# x=[7]
# x.reshape(-1,1)
# x

In [None]:
# table=[]
# for r in range(0):
#     row=[0]*len(classes)
#     for c in range():
#         row.append(0)
# table.append(row)
# table

In [None]:
# a=[7]
# b=[]
# b.append(a)
# b

In [None]:
# a=len(classes)
# a

In [None]:
# res=[7]

In [None]:
# res_svm=[[0 for i in range(len(classes))] for j in range(1)]
# res_svm

In [None]:
# for i in res_svm:
#     i[res[0]]=1
    
# res_svm

In [None]:
# res = model.predict([[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]])

In [None]:
# aa=np.array(res)

In [48]:
# c=[{'intent': 'PY1'}]
# ttag=c['intent']
# ttag

TypeError: list indices must be integers or slices, not str