# LLM with a ChatBot
# Problem statement
To develop a simple chatbot that can answer basic questions about a specific topic.

# Setup environment

In [3]:
# import libraries
import nltk
from nltk.stem.lancaster import LancasterStemmer

import numpy as np
import json
import random
import pickle
import requests

import tensorflow as tf

# Load and Preprocess Data

**Discussion**
* The intents file contains the curated information about a smaller subset of Winnipeg Libraries in a Question and Answer format
* 'patterns' contains the usual patterns that users would input to query the related information
* 'responses' contains the expected results from the organization to answer the query
reference: https://wpl.winnipeg.ca/library/

In [4]:
# load data
# define the URL of the JSON file
url = "https://github.com/santosh-gyan/llm-chatbot/blob/main/intents_lib.json"

# make a GET request to the URL
response = requests.get(url)

# check if the request was successful
if response.status_code == 200:
    # parse the response as JSON
    raw_data = response.json()
    # print the raw data
    print(raw_data)
else:
    # handle the error
    print(f"Request failed with status code {response.status_code}")

{'intents': [{'tag': 'greeting', 'patterns': ['Hi there', 'How are you', 'Is anyone there?', 'Hey', 'Hola', 'Hello', 'Good day'], 'responses': ['Hello, thanks for asking', 'Good to see you again', 'Hi there, how can I help?'], 'context': ['']}, {'tag': 'goodbye', 'patterns': ['Bye', 'See you later', 'Goodbye', 'Nice chatting to you, bye', 'Till next time'], 'responses': ['See you!', 'Have a nice day', 'Bye! Come back again soon.'], 'context': ['']}, {'tag': 'thanks', 'patterns': ['Thanks', 'Thank you', "That's helpful", 'Awesome, thanks', 'Thanks for helping me'], 'responses': ['Happy to help!', 'Any time!', 'My pleasure'], 'context': ['']}, {'tag': 'noanswer', 'patterns': [], 'responses': ["Sorry, can't understand you", 'Please give me more info', 'Not sure I understand'], 'context': ['']}, {'tag': 'librarycard', 'patterns': ['why library card is important', 'why need a library card'], 'responses': ['Winnipeg Public Library has 20 locations serving the citizens of Winnipeg. The Librar

In [5]:
stemmer = LancasterStemmer()

In [6]:
# reminder to delete the pickle file if you change the intents file
try:
    with open('data.pickle', 'rb') as data_file:
        words, labels, training, output = pickle.load(data_file)
except:
# get the words and labels
    words = []
    labels = []
    docs_x = []
    docs_y = []

    for intent in raw_data['intents']:
        for pattern in intent['patterns']:
            tokenized_words = nltk.word_tokenize(pattern)
            words.extend(tokenized_words)
            docs_x.append(tokenized_words)
            docs_y.append(intent['tag'])

        if intent['tag'] not in labels:
            labels.append(intent['tag'])

    # stem the words
    words = [stemmer.stem(w.lower()) for w in words if w != '?']
    words = sorted(list(set(words)))
    labels = sorted(labels)

    # create training and output data
    training = []
    output = []

    out_empty = [0 for _ in range(len(labels))]

    # one hot encoding
    for x, doc in enumerate(docs_x):
        bag = []

        stemmed_words = [stemmer.stem(w.lower()) for w in doc]

        for w in words:
            if w in stemmed_words:
                bag.append(1)
            else:
                bag.append(0)

        output_row = out_empty[:]
        output_row[labels.index(docs_y[x])] = 1

        training.append(bag)
        output.append(output_row)

    # convert to numpy arrays
    training = np.array(training)
    output = np.array(output)

    # save data
    with open('data.pickle', 'wb') as data_file:
        pickle.dump((words, labels, training, output), data_file)

# Train the model

In [7]:
# build the model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(8, input_shape=[len(training[0])]))
model.add(tf.keras.layers.Dense(8))
model.add(tf.keras.layers.Dense(len(output[0]), activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# reminder to delete the model file if you change the model
try:
    model.load_weights('model.keras')
except:
    model.fit(training, output, epochs=1000, batch_size=8)
    model.save('model.keras')

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [8]:
# create a bag of words function to be used in the chat
def bag_of_words(s, words):
    bag = [0 for _ in range(len(words))]

    tokenized_words = nltk.word_tokenize(s)
    stemmed_words = [stemmer.stem(w.lower()) for w in tokenized_words]

    for w in stemmed_words:
        for i, word in enumerate(words):
            if word == w:
                bag[i] = 1

    return np.array(bag)

In [9]:
# create a chat function
def chat():
    print('Start talking with the bot! (type quit to stop)')
    while True:
        inp = input('You: ')
        if inp.lower() == 'quit':
            break

        results = model.predict(np.array([bag_of_words(inp, words)]), verbose=0)
        results_index = np.argmax(results)
        tag = labels[results_index]

        for intent in raw_data['intents']:
            if intent['tag'] == tag:
                responses = intent['responses']

        print('You: ' + inp)
        print('Bot: ' + random.choice(responses))

In [10]:
# start the chat
chat()

Start talking with the bot! (type quit to stop)
You: hello
Bot: Hi there, how can I help?
You: how to get library card
Bot: To get a card, visit one of our 20 branch locations. Staff in the branch will be happy to assist you in becoming a library member.
You: where is the library
Bot: Bill and Helen Norrie; Charleswood; Millennium; Munroe; Osborne
You: norrie?
Bot: Hi there, how can I help?
You: norrie
Bot: Hello, thanks for asking
You: munroe
Bot: Good to see you again
You: how to go to osborne
Bot: 625 Osborne St.
You: thank you
Bot: Any time!
