In [1]:
import nltk
# nltk.download('punkt') # if in case tokenize sentences in words
from nltk.stem.lancaster import  LancasterStemmer
from nltk.stem import WordNetLemmatizer
# nltk.download('wordnet') 
import numpy as np
import tensorflow as tf
import tflearn
import random
import json
import pandas as pd

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\mogam\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\mogam\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Instructions for updating:
non-resource variables are not supported in the long term
curses is not supported on this machine (please install/reinstall curses for an optimal experience)


# Import Data

In [3]:
data_dir = 'data/intents.json'
intents = pd.read_json(data_dir)
intents.head()

Unnamed: 0,intents
0,"{'tag': 'greeting', 'patterns': ['Hi', 'Hey', ..."
1,"{'tag': 'morning', 'patterns': ['Good morning'..."
2,"{'tag': 'afternoon', 'patterns': ['Good aftern..."
3,"{'tag': 'evening', 'patterns': ['Good evening'..."
4,"{'tag': 'night', 'patterns': ['Good night'], '..."


## Tokenizing Words

In [4]:
words = []
classes = []
documents = []
ignore = ['?']
#loop through each sentence in the intent's patterns
for intent in intents['intents']:
  for pattern in intent['patterns']:
    w = nltk.word_tokenize(pattern)
    words.extend(w)
    documents.append((w, intent['tag']))
    if intent['tag'] not in classes:
      classes.append(intent['tag'])

## Lemmatization

In [5]:
lemmatizer = WordNetLemmatizer()

words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore]
words = sorted(list(set(words)))
classes = sorted(list(set(classes)))

print(len(documents),'documents')
print(len(classes), 'classes', classes)
print(len(words), 'unique stemmed words', words)

506 documents
243 classes ['', ' know  social anxiety? ', ' many issues', ' think about death all the time', 'Addiction,Substance Abuse,Anxiety', 'All I can do is cry and hate myself', 'Am I being picky when it comes to my boyfriend? ', 'Am I going to be alone forever? ', 'Am I somehow stressing myself out? ', 'Am I unworthy of being in a meaningful relationship? ', 'Anger Management', 'Anger Management,Depression,Relationships', 'Anger Management,Domestic Violence', 'Anger Management,Family Conflict', 'Anger Management,Relationships', 'Anger Management,Relationships,Social Relationships', 'Anger Management,Sleep Improvement', 'Anger Management,Social Relationships,Relationships', 'Anger Management,behavioural Change', 'Anxiety, hearing', 'Anxiety,Career counselling', 'Anxiety,behavioural Change', 'Bipolar', 'Can I change my feeling of being worthless to everyone? ', 'Can i learn to be happy alone? ', 'Career counselling,Professional Ethics', 'Crave depression', 'Deal with OCD', 'Depre

## Creating Training Data

In [6]:
training = []
output = []
output_empty = [0] * len(classes)

for doc in documents: 
  bag = []
  pattern_words = doc[0]
  pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
  for w in words:
    bag.append(1) if w in pattern_words else bag.append(0)

  output_row = list(output_empty)
  output_row[classes.index(doc[1])] = 1
  training.append([bag,output_row])

random.shuffle(training)
training = np.array(training, dtype=object)

train_x = list(training[:,0])
train_y = list(training[:,1])

## Creating Model & Training

In [7]:
tf.compat.v1.reset_default_graph()

In [8]:
# DNN
net = tflearn.input_data(shape=[None, len(train_x[0])])
net = tflearn.fully_connected(net, 10)
net = tflearn.fully_connected(net, 10)
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
net = tflearn.regression(net)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [9]:
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')

In [10]:
model.fit(train_x, train_y, n_epoch = 1000, batch_size=8, show_metric=True)

Training Step: 63999  | total loss: [1m[32m0.04101[0m[0m | time: 0.455s
| Adam | epoch: 1000 | loss: 0.04101 - acc: 0.9617 -- iter: 504/506
Training Step: 64000  | total loss: [1m[32m0.05863[0m[0m | time: 0.461s
| Adam | epoch: 1000 | loss: 0.05863 - acc: 0.9655 -- iter: 506/506
--


In [11]:
model.save('tlmodel/model.tflearn')

INFO:tensorflow:c:\Users\mogam\Desktop\Work\Mental Health Chatbot\mental-health-chatbot-app\py_model\tlmodel\model.tflearn is not in all_model_checkpoint_paths. Manually adding it.


In [12]:
import pickle
pickle.dump({'words' : words, 'classes':classes, 'train_x': train_x, 'train_y' : train_y}, open('data/training_data', 'wb'))

## Chatbot Testing

In [13]:
data = pickle.load(open('data/training_data','rb'))
words = data['words']
classes = data['classes']
train_x = data['train_x']
train_y = data['train_y']

model.load('tlmodel/model.tflearn')

INFO:tensorflow:Restoring parameters from c:\Users\mogam\Desktop\Work\Mental Health Chatbot\mental-health-chatbot-app\py_model\tlmodel\model.tflearn


## Creating methods for calling chatbot

In [14]:
def clean_up_sentence(sentence):
  sentence_words = nltk.word_tokenize(sentence)
  sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
  return sentence_words

def bow(sentence, words, show_details = False):
  sentence_words = clean_up_sentence(sentence)
  bag = [0]*len(words)
  for s in sentence_words:
    for i,w in enumerate(words):
      if w==s:
        bag[i] = 1
        if show_details:
          print('found in bag: %s' % w)
  return (np.array(bag))

In [15]:
context = {}

ERROR_THRESHOLD = 0.25
def classify(sentence):
    # generate probabilities from the model
    results = model.predict([bow(sentence, words)])[0]
    # filter out predictions below a threshold
    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))
    # return tuple of intent and probability
    return return_list

def response(sentence, userID='123', show_details=False):
    results = classify(sentence)
    # if we have a classification then find the matching intent tag
    if results:
        # loop as long as there are matches to process
        while results:
            for i in intents['intents']:
                # find a tag matching the first result
                if i['tag'] == results[0][0]:
                    # set context for this intent if necessary
                    if 'context_set' in i:
                        if show_details: print ('context:', i['context_set'])
                        context[userID] = i['context_set']

                    # check if this intent is contextual and applies to this user's conversation
                    if not 'context_filter' in i or \
                        (userID in context and 'context_filter' in i and i['context_filter'] == context[userID]):
                        if show_details: print ('tag:', i['tag'])
                        # a random response from the intent
                        return random.choice(i['responses'])

            results.pop(0)

Testing Chatbot

In [16]:
print(response('What is your name?'))

You can call me Well-being Buddy. 


In [17]:
# define a function to start the chatbot
def chatbot():
    print('Start talking with the chatbot (type quit to stop)!')
    while True:
        inp = input('You: ')
        if inp.lower() == 'quit':
            break
        response_output = response(inp)
        print('Bot:', response_output)

# start the chatbot
chatbot()

Start talking with the chatbot (type quit to stop)!
Bot: Sorry, I didn't understand you. 
Bot: Sorry, I didn't understand you. 
