In [71]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/codespace/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [72]:
import nltk
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
import json
import pickle 
import warnings 
warnings.filterwarnings("ignore")


In [73]:
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD
import random

In [74]:
words=[]
classes = []
documents = []
ignore_words = ['?', '!']
data_file = open('intents/intents.json').read()
intents = json.loads(data_file)

In [75]:
for intent in intents['intents']:
    for pattern in intent['patterns']:
        
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        
        documents.append((w, intent['tag']))
        
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [76]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     /home/codespace/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [77]:
nltk.download('omw-1.4')

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /home/codespace/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [78]:
#lemmatize, lower each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]

words = sorted(list(set(words)))

#sort classes
classes = sorted(list(set(classes)))

#documents = combination between patterns and intents
print (len(documents), "documents\n", documents, "\n")

#classes = intents[tags]
print (len(classes), "classes\n", classes, "\n")

#words = all words, vocabulary
print (len(words), "unique lemmatized words", words, "\n")

pickle.dump(words,open('words.pkl','wb'))
pickle.dump(classes,open('classes.pkl','wb'))

405 documents
 [(['Hi'], 'greeting'), (['How', 'are', 'you', '?'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['What', "'s", 'up'], 'greeting'), (['how', 'are', 'ya'], 'greeting'), (['heyy'], 'greeting'), (['whatsup'], 'greeting'), (['?', '?', '?', '?', '?', '?', '?', '?'], 'greeting'), (['cya'], 'goodbye'), (['see', 'you'], 'goodbye'), (['bye', 'bye'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Goodbye'], 'goodbye'), (['I', 'am', 'Leaving'], 'goodbye'), (['Bye'], 'goodbye'), (['Have', 'a', 'Good', 'day'], 'goodbye'), (['talk', 'to', 'you', 'later'], 'goodbye'), (['ttyl'], 'goodbye'), (['i', 'got', 'to', 'go'], 'goodbye'), (['gtg'], 'goodbye'), (['what', 'is', 'the', 'name', 'of', 'your', 'developers'], 'creator'), (['what', 'is', 'the', 'name', 'of', 'your', 'creators'], 'creator'), (['what', 'is', 'the', 'name', 'of', 'the', 'developers'], 'creator'), (['what', 'is', 'the', 'name', 'of', 'the', 'creato

In [79]:
#creating our training data 
training = []

# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    
    # initialize our bag of words
    bag = []
    
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    
    # lemmatize each word - create base word, in attempt to represent related words
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    
    # create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    
    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])

# shuffle our features and turn into list
random.shuffle(training)

# create train and test lists. X - patterns, Y - intents
train_x = [pattern[0] for pattern in training]
train_y = [pattern[1] for pattern in training]
print("Training data created")

Training data created


In [80]:
from tensorflow.python.framework import ops
ops.reset_default_graph()

In [81]:
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))
print("First layer: ", model.layers[0].get_weights()[0])

First layer:  [[-0.03075065 -0.0859829  -0.00761569 ...  0.08692342  0.02949463
  -0.10834949]
 [ 0.04449113  0.05554867  0.00067722 ... -0.11950869  0.11958717
   0.08889368]
 [ 0.04230911 -0.0370267  -0.09099003 ... -0.06066165  0.02549605
  -0.0911328 ]
 ...
 [ 0.05493507  0.0927335   0.01580124 ... -0.04722486  0.03800164
   0.05989435]
 [ 0.05401814  0.05534828  0.08459885 ... -0.10615571 -0.0533525
   0.01408841]
 [-0.02343772 -0.0479252   0.02184777 ...  0.10483916 -0.09252248
  -0.0702171 ]]


In [82]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [83]:
#fitting and saving the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=100, batch_size=5, verbose=1)
model.save('chatbot_model.h5', hist)

print("model created")

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

In [84]:
from keras.models import load_model
model = load_model('chatbot_model.h5')

In [85]:
intents = json.loads(open('/workspaces/Song_Rec/chatbot_test/intents/intents.json').read())

words = pickle.load(open('/workspaces/Song_Rec/chatbot_test/words.pkl','rb'))

classes = pickle.load(open('/workspaces/Song_Rec/chatbot_test/classes.pkl','rb'))

In [86]:
def clean_up_sentences(sentences):
    # tokenize the pattern - split words into array

    sentence_words = nltk.word_tokenize(sentences)
    #print(sentence_words)
    # stem each word - create short form for word

    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    #print(sentence_words)

    return sentence_words

In [87]:
def bow(sentence, words, show_details=True):

    # tokenize the pattern

    def clean_up_sentence(sentence):
        sentence_words = nltk.word_tokenize(sentence)
        sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
        return sentence_words
    #print(bag)

    for s in clean_up_sentence(sentence):  
        for i,w in enumerate(words):
            if w == s: 
                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)
                #print ("found in bag: %s" % w)
    #print(bag)
    return(np.array(bag))

In [88]:
def predict_class(sentence, model):

    # filter out predictions below a threshold

    p = bow(sentence, words,show_details=False)
    #print(p)

    res = model.predict(np.array([p]))[0]
    #print(res)

    ERROR_THRESHOLD = 0.25

    results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]
    #print(results)
    # sort by strength of probability

    results.sort(key=lambda x: x[1], reverse=True)
    #print(results)

    return_list = []

    for r in results:
        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})

    return return_list
    #print(return_list)


In [89]:
def getResponse(ints, intents_json):

    tag = ints[0]['intent']
    #print(tag)

    list_of_intents = intents_json['intents']
    #print(list_of_intents)

    for i in list_of_intents:
        if(i['tag']== tag):
            result = random.choice(i['responses'])
            break
    return result


In [90]:
def chatbot_response(text): 
    
    ints = predict_class(text, model)

    #print(ints)
    res = getResponse(ints, intents)
    #print(res)
    return res

In [91]:
chatbot_response("Hello")



'HODs differ for each branch, please be more specific like: (HOD it)'