### Helpful links

* Code example: https://www.section.io/engineering-education/creating-chatbot-using-natural-language-processing-in-python/

* Another code example: https://towardsdatascience.com/a-simple-chatbot-in-python-with-deep-learning-3e8669997758

### Step one: Importing libraries

In [58]:
import json
import string
import random
import nltk

nltk.download("omw-1.4")
import numpy as num
from nltk.stem import WordNetLemmatizer  # It has the ability to lemmatize.
import tensorflow as tensorF  # A multidimensional array of elements is represented by this symbol.
from tensorflow.keras import Sequential # Sequential groups a linear stack of layers into a tf.keras.Model
from tensorflow.keras.layers import Dense, Dropout

nltk.download("punkt")  # required package for tokenization
nltk.download("wordnet")  # word database

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\StephenJeranyama\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\StephenJeranyama\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\StephenJeranyama\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

### Step two: Creating a JSON file

In [50]:
data = {
    "intents": [
        {
            "tag": "age",
            "patterns": ["how old are you?"],
            "responses": ["I am 2 years old and my birthday was yesterday"],
        },
        {
            "tag": "greeting",
            "patterns": ["Hi", "Hello", "Hey"],
            "responses": ["Hi there", "Hello", "Hi :)"],
        },
        {
            "tag": "goodbye",
            "patterns": ["bye", "later"],
            "responses": ["Bye", "take care"],
        },
        {
            "tag": "name",
            "patterns": ["what's your name?", "who are you?"],
            "responses": [
                "I have no name yet," "You can give me one, and I will appreciate it"
            ],
        },
        {
            "tag": "coach",
            "patterns": ["Who is our data coach?", "Who is our Associate coach"],
            "responses": ["Data Coach, Associate coach"]
        },
        {
            "tag": "task",
            "patterns": ["What is my task?", "show me a task"],
            "responses": ["Data Coach, Associate coach"]
        },
        {
            "tag": "tomorrow",
            "patterns": ["What is my task tomorrow?", "show me a task next day", "What are we covering in class tomorrow?"],
            "responses": ["Data Coach, Associate coach"]
        },
         {
            "tag": "schedule",
            "patterns": ["What is my task tomorrow?", "show me a task next day", "What are we covering in class tomorrow?"],
            "responses": ["Data Coach, Associate coach"]
        },
    ]
}

In [72]:
import pprint
pprint.pprint(data["intents"])

[{'patterns': ['how old are you?'],
  'responses': ['I am 2 years old and my birthday was yesterday'],
  'tag': 'age'},
 {'patterns': ['Hi', 'Hello', 'Hey'],
  'responses': ['Hi there', 'Hello', 'Hi :)'],
  'tag': 'greeting'},
 {'patterns': ['bye', 'later'],
  'responses': ['Bye', 'take care'],
  'tag': 'goodbye'},
 {'patterns': ["what's your name?", 'who are you?'],
  'responses': ['I have no name yet,You can give me one, and I will appreciate '
                'it'],
  'tag': 'name'},
 {'patterns': ['Who is our data coach?', 'Who is our Associate coach'],
  'responses': ['Data Coach, Associate coach'],
  'tag': 'coach'},
 {'patterns': ['What is my task?', 'show me a task'],
  'responses': ['Data Coach, Associate coach'],
  'tag': 'task'},
 {'patterns': ['What is my task tomorrow?',
               'show me a task next day',
               'What are we covering in class tomorrow?'],
  'responses': ['Data Coach, Associate coach'],
  'tag': 'tomorrow'}]


### Step three: Processing data

In [66]:
lm = WordNetLemmatizer()  # for getting words
# lists
ourClasses = []
newWords = []
documentX = []
documentY = []
# Each intent is tokenized into words and the patterns and their associated tags are added to their respective lists.
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        print(f"pattern: {pattern}")
        ournewTkns = nltk.word_tokenize(pattern)  # tokenize the patterns
        newWords.extend(ournewTkns)  # extends the tokens
        documentX.append(pattern) 
        documentY.append(intent["tag"]) 

    if (
        intent["tag"] not in ourClasses
    ):  # add unexisting tags to their respective classes
        ourClasses.append(intent["tag"])

newWords = [
    lm.lemmatize(word.lower()) for word in newWords if word not in string.punctuation
]  # set words to lowercase if not in punctuation
newWords = sorted(set(newWords))  # sorting words
ourClasses = sorted(set(ourClasses))  # sorting classes

pattern how old are you?
pattern Hi
pattern Hello
pattern Hey
pattern bye
pattern later
pattern what's your name?
pattern who are you?
pattern Who is our data coach?
pattern Who is our Associate coach
pattern What is my task?
pattern show me a task
pattern What is my task tomorrow?
pattern show me a task next day
pattern What are we covering in class tomorrow?


In [62]:
print(f"classes: {ourClasses}")
print()
print(f"New words: {newWords}")
print()
print(f'documentX: {documentX}')
print()
print(f'documentY: {documentY}')


classes: ['age', 'coach', 'goodbye', 'greeting', 'name', 'task', 'tomorrow']

New words: ["'s", 'a', 'are', 'associate', 'bye', 'class', 'coach', 'covering', 'data', 'day', 'hello', 'hey', 'hi', 'how', 'in', 'is', 'later', 'me', 'my', 'name', 'next', 'old', 'our', 'show', 'task', 'tomorrow', 'we', 'what', 'who', 'you', 'your']

documentX: ['how old are you?', 'Hi', 'Hello', 'Hey', 'bye', 'later', "what's your name?", 'who are you?', 'Who is our data coach?', 'Who is our Associate coach', 'What is my task?', 'show me a task', 'What is my task tomorrow?', 'show me a task next day', 'What are we covering in class tomorrow?']

documentY: ['age', 'greeting', 'greeting', 'greeting', 'goodbye', 'goodbye', 'name', 'name', 'coach', 'coach', 'task', 'task', 'tomorrow', 'tomorrow', 'tomorrow']


### Step four: Designing a neural network model

In [73]:
trainingData = []  # training list array
outEmpty = [0] * len(ourClasses)
# bow model
for idx, doc in enumerate(documentX):
    bagOfwords = []
    text = lm.lemmatize(doc.lower())
    print(f"doc: {doc} lem text: {text}")
    for word in newWords:
        bagOfwords.append(1) if word in text else bagOfwords.append(0)

    print(f"bagOfwords: {bagOfwords}")
    outputRow = list(outEmpty)
    outputRow[ourClasses.index(documentY[idx])] = 1
    trainingData.append([bagOfwords, outputRow])

random.shuffle(trainingData)
trainingData = num.array(
    trainingData, dtype=object
)  # converting our data into an array after shuffling

x = num.array(list(trainingData[:, 0]))  # first training phase
y = num.array(list(trainingData[:, 1]))  # second training phase

doc: how old are you? lem text: how old are you?
bagOfwords: [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0]
doc: Hi lem text: hi
bagOfwords: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
doc: Hello lem text: hello
bagOfwords: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
doc: Hey lem text: hey
bagOfwords: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
doc: bye lem text: bye
bagOfwords: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
doc: later lem text: later
bagOfwords: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
doc: what's your name? lem text: what's your name?
bagOfwords: [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1]
doc: who are you? lem text: who are y

In [74]:
print(trainingData)

[[list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
  list([0, 0, 0, 1, 0, 0, 0])]
 [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
  list([0, 0, 0, 1, 0, 0, 0])]
 [list([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0])
  list([0, 0, 0, 0, 0, 1, 0])]
 [list([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
  list([0, 0, 1, 0, 0, 0, 0])]
 [list([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
  list([0, 0, 1, 0, 0, 0, 0])]
 [list([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1])
  list([0, 0, 0, 0, 1, 0, 0])]
 [list([0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0])
  list([0, 1, 0, 0, 0, 0, 0])]
 [list([0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1

### Step five: Building useful features

In [51]:
iShape = (len(x[0]),)
oShape = len(y[0])
# parameter definition
ourNewModel = Sequential()
# In the case of a simple stack of layers, a Sequential model is appropriate

# Dense function adds an output layer
ourNewModel.add(Dense(128, input_shape=iShape, activation="relu"))
# The activation function in a neural network is in charge of converting the node's summed weighted input into activation of the node or output for the input in question
ourNewModel.add(Dropout(0.5))
# Dropout is used to enhance visual perception of input neurons
ourNewModel.add(Dense(64, activation="relu"))
ourNewModel.add(Dropout(0.3))
ourNewModel.add(Dense(oShape, activation="softmax"))
# below is a callable that returns the value to be used with no arguments
md = tensorF.keras.optimizers.legacy.Adam(learning_rate=0.01, decay=1e-6)
# Below line improves the numerical stability and pushes the computation of the probability distribution into the categorical crossentropy loss function.
ourNewModel.compile(loss="categorical_crossentropy", optimizer=md, metrics=["accuracy"])
# Output the model in summary
print(ourNewModel.summary())
# Whilst training your Neural Network, you have the option of making the output verbose or simple.
ourNewModel.fit(x, y, epochs=200, verbose=1)
# By epochs, we mean the number of times you repeat a training set.

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 128)               4096      
                                                                 
 dropout_8 (Dropout)         (None, 128)               0         
                                                                 
 dense_13 (Dense)            (None, 64)                8256      
                                                                 
 dropout_9 (Dropout)         (None, 64)                0         
                                                                 
 dense_14 (Dense)            (None, 7)                 455       
                                                                 
Total params: 12,807
Trainable params: 12,807
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/2

<keras.callbacks.History at 0x15a768c4f40>

In [52]:
def ourText(text):
    newtkns = nltk.word_tokenize(text)
    newtkns = [lm.lemmatize(word) for word in newtkns]
    return newtkns


def wordBag(text, vocab):
    newtkns = ourText(text)
    bagOwords = [0] * len(vocab)
    for w in newtkns:
        for idx, word in enumerate(vocab):
            if word == w:
                bagOwords[idx] = 1
    return num.array(bagOwords)


def Pclass(text, vocab, labels):
    bagOwords = wordBag(text, vocab)
    ourResult = ourNewModel.predict(num.array([bagOwords]))[0]
    newThresh = 0.2
    yp = [[idx, res] for idx, res in enumerate(ourResult) if res > newThresh]

    yp.sort(key=lambda x: x[1], reverse=True)
    newList = []
    for r in yp:
        newList.append(labels[r[0]])
    return newList


def getRes(firstlist, fJson):
    tag = firstlist[0]
    listOfIntents = fJson["intents"]
    for i in listOfIntents:
        if i["tag"] == tag:
            ourResult = random.choice(i["responses"])
            break
    return ourResult


In [53]:
# while True:
#     newMessage = input("")
#     intents = Pclass(newMessage, newWords, ourClasses)
#     ourResult = getRes(intents, data)
#     print(ourResult)

In [54]:
import pandas as pd

df = pd.read_excel("Schedule_Cleaned.xlsm")
df

Unnamed: 0,Date,Day,Coach/Associate,Name,Week,Cohort,Task,AM,PM,EOD
0,2022-11-28,Monday,Coach,,W12,C6,Final Project,Waseem Career Talks 11-12,,
1,2022-11-29,Tuesday,Coach,,W12,C6,Final Project,,,
2,2022-11-30,Wednesday,Coach,,W12,C6,Academy + (1),,,
3,2022-12-01,Thursday,Coach,,W12,C6,Academy + (1),,,
4,2022-12-02,Friday,Coach,,W12,C6,Academy + (1),,,
5,2022-11-28,Monday,Associate,Nathan,W12,C6,Final Project,Dev,Project,Admin
6,2022-11-29,Tuesday,Associate,Nathan,W12,C6,Final Project,Project,Project,Admin
7,2022-11-30,Wednesday,Associate,Nathan,W12,C6,Academy + (1),Set on Academy Portfolio Building,Dev,Admin
8,2022-12-01,Thursday,Associate,Nathan,W12,C6,Academy + (1),Dev,Dev,Admin
9,2022-12-02,Friday,Associate,Nathan,W12,C6,Academy + (1),C7 Presentation,C7 Presentation,Admin


In [55]:
intents = Pclass("What are we class tomorrow?", newWords, ourClasses)
intents



['tomorrow']

In [57]:
if intents[0] == 'tomorrow':
    display(df[["Coach/Associate", "Name"]])

if intents == "task":
    #TODO what is the poandas stuff i need to answer the question 
    pass
if intents =="another task":
    pass
    # TODO more pandas stuff

Unnamed: 0,Coach/Associate,Name
0,Coach,
1,Coach,
2,Coach,
3,Coach,
4,Coach,
5,Associate,Nathan
6,Associate,Nathan
7,Associate,Nathan
8,Associate,Nathan
9,Associate,Nathan
