### Libraries

In [2]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.13.0-cp310-cp310-macosx_12_0_arm64.whl (1.9 kB)
Collecting tensorflow-macos==2.13.0
  Downloading tensorflow_macos-2.13.0-cp310-cp310-macosx_12_0_arm64.whl (189.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.3/189.3 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:02[0m
[?25hCollecting google-pasta>=0.1.1
  Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.5/57.5 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-estimator<2.14,>=2.13.0
  Downloading tensorflow_estimator-2.13.0-py2.py3-none-any.whl (440 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m440.8/440.8 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3
  Downloading protobuf-4.23.4-cp37-abi3-macosx_10_9_universal2.whl (4

In [20]:
import random
import json
import pickle
import numpy as np

import nltk
from nltk.stem import WordNetLemmatizer

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense , Activation , Dropout
from tensorflow.keras.optimizers import SGD
import tensorflow as tf
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.models import load_model

In [3]:
nltk.download('omw-1.4')

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/sherywalid/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [4]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

### Extracting and Load Data

In [5]:
# load the json file with the bot responses
chat_data = json.loads(open("chatbot_data.json").read())

### Transforming Data

In [6]:
lemmatizer = WordNetLemmatizer()

In [7]:
chat_data

{'intents': [{'tag': 'greeting',
   'patterns': ['Hi',
    'How are you?',
    'Is anyone there?',
    'Hello',
    'Good day',
    "What's up",
    'how are ya',
    'heyy',
    'whatsup',
    '??? ??? ??'],
   'responses': ['Hello!',
    'Good to see you again!',
    'Hi there, how can I help?'],
   'context_set': ''},
  {'tag': 'goodbye',
   'patterns': ['cya',
    'see you',
    'bye bye',
    'See you later',
    'Goodbye',
    'I am Leaving',
    'Bye',
    'Have a Good day',
    'talk to you later',
    'ttyl',
    'i got to go',
    'gtg'],
   'responses': ['Sad to see you go :(',
    'Talk to you later',
    'Goodbye!',
    'Come back soon'],
   'context_set': ''},
  {'tag': 'creator',
   'patterns': ['what is the name of your developers',
    'what is the name of your creators',
    'what is the name of the developers',
    'what is the name of the creators',
    'who created you',
    'your developers',
    'your creators',
    'who are your developers',
    'developers',
  

In [8]:
words = []
classes = []
documents = []

ignore_symbols = [';',',','.','?','!','|','(',')']

In [9]:
# Iterate over all the intents in the chat data
for intent in chat_data['intents']:
    
    # Iterate over all the patterns in the current intent
    for pattern in intent['patterns']:
        
        # Tokenize the current pattern into a list of words
        words_list = nltk.tokenize.word_tokenize(pattern)
        
        # Add the words in the pattern to the global list of words
        words.extend(words_list)
        
        # Add the current pattern and its corresponding intent tag to the list of documents
        documents.append((words_list, intent['tag']))
        
        # Add the current intent tag to the list of classes if it's not already there
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [10]:
print(documents)

[(['Hi'], 'greeting'), (['How', 'are', 'you', '?'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['What', "'s", 'up'], 'greeting'), (['how', 'are', 'ya'], 'greeting'), (['heyy'], 'greeting'), (['whatsup'], 'greeting'), (['?', '?', '?', '?', '?', '?', '?', '?'], 'greeting'), (['cya'], 'goodbye'), (['see', 'you'], 'goodbye'), (['bye', 'bye'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Goodbye'], 'goodbye'), (['I', 'am', 'Leaving'], 'goodbye'), (['Bye'], 'goodbye'), (['Have', 'a', 'Good', 'day'], 'goodbye'), (['talk', 'to', 'you', 'later'], 'goodbye'), (['ttyl'], 'goodbye'), (['i', 'got', 'to', 'go'], 'goodbye'), (['gtg'], 'goodbye'), (['what', 'is', 'the', 'name', 'of', 'your', 'developers'], 'creator'), (['what', 'is', 'the', 'name', 'of', 'your', 'creators'], 'creator'), (['what', 'is', 'the', 'name', 'of', 'the', 'developers'], 'creator'), (['what', 'is', 'the', 'name', 'of', 'the', 'creators'], 'creator'

In [11]:
# Lemmatize each word in the words list using WordNetLemmatizer
# and remove any words that are in the ignore_symbols list
words = [lemmatizer.lemmatize(w) for w in words if w not in ignore_symbols]

# Sort the list of words in lexicographic order and remove duplicates
words = sorted(set(words))

In [12]:
words

["'s",
 'AC',
 'AI/ML',
 'AI/Ml',
 'About',
 'Ac',
 'Are',
 'Bye',
 'Cafetaria',
 'Can',
 'Chemical',
 'Civil',
 'College',
 'Computer',
 'Contact',
 'Do',
 'Does',
 'Engineering',
 'Events',
 'Food',
 'Give',
 'Good',
 'Goodbye',
 'HOD',
 'Have',
 'Hello',
 'Hi',
 'Holiday',
 'How',
 'I',
 'IT',
 'Information',
 'Is',
 'Leaving',
 'Library',
 'Mechanical',
 'See',
 'Semester',
 'Sports',
 'Syllabus',
 'Technology',
 'Tell',
 'Thank',
 'Thanks',
 'UNI',
 'What',
 'Whats',
 'When',
 'Where',
 'Which',
 'Who',
 'Will',
 'You',
 'a',
 'about',
 'active',
 'activity',
 'address',
 'admision',
 'admission',
 'against',
 'allotment',
 'am',
 'an',
 'and',
 'antiragging',
 'any',
 'anyone',
 'are',
 'as',
 'asshole',
 'at',
 'attend',
 'automobile',
 'available',
 'average',
 'be',
 'between',
 'big',
 'bitch',
 'book',
 'boy',
 'branch',
 'bring',
 'building',
 'by',
 'bye',
 'cafetaria',
 'call',
 'called',
 'campus',
 'can',
 'canteen',
 'capacity',
 'case',
 'casuals',
 'ce',
 'chatting',

In [14]:
# Save the words variable to a file called 'word.pkl'
# using the pickle.dump() function and binary mode ('wb')
pickle.dump(words, open('word.pkl', 'wb'))

# Save the classes variable to a file called 'classes.pkl'
# using the pickle.dump() function and binary mode ('wb')
pickle.dump(classes, open('classes.pkl', 'wb'))

In [15]:
# Initialize an empty list called 'training'
training = []

# Create a list of zeros with length equal to the number of classes
# This will be used as a default output vector for each training example
output_empty = [0] * len(classes)

In [16]:
# Iterate over each document in the list of documents
for document in documents:
    
    # Initialize an empty list called 'bag'
    bag = []
    
    # Get the word patterns and lemmatize them
    word_patterns = document[0]
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
    
    # Iterate over all the words in the global list 'words'
    # and add 1 to the 'bag' list if the word is in 'word_patterns'
    for word in words:
        if word in word_patterns:
            bag.append(1)
        else:
            bag.append(0)
    
    # Create an output row by copying the 'output_empty' list
    # and setting the appropriate index to 1 for the current document's intent class
    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    
    # Append the bag-of-words representation and the output row to the 'training' list
    training.append([bag, output_row])

In [17]:
# Shuffle the 'training' list randomly to ensure that the training examples are presented in a random order
random.shuffle(training)

# Convert the 'training' list to a NumPy array for easier indexing
training = np.array(training)

# Separate the input and output vectors into separate lists
train_x = list(training[:, 0])
train_y = list(training[:, 1])

  training = np.array(training)


### Neural Network Model

In [18]:
# Initialize a new sequential neural network model
model = Sequential()

# Add a fully connected dense layer with 128 nodes, using ReLU activation function
# The input shape is the length of the 'train_x' list (i.e., the number of words in the vocabulary)
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))

# Add a dropout layer with a rate of 0.5
# Dropout is a regularization technique that randomly drops out some nodes during training
model.add(Dropout(0.5))

# Add another fully connected dense layer with 64 nodes, using ReLU activation function
model.add(Dense(64, activation='relu'))

# Add another dropout layer with a rate of 0.5
model.add(Dropout(0.5))

# Add a fully connected dense layer with a number of nodes equal to the number of classes in 'train_y'
# Use softmax activation function to output a probability distribution over the classes
model.add(Dense(len(train_y[0]), activation='softmax'))

In [19]:
def lr_schedule(epoch, lr):
    """
    Learning rate schedule function that decreases the learning rate over time.
    The function takes an epoch number and a learning rate as input, and returns a new learning rate.
    """
    decay_rate = 1e-6
    return lr / (1 + decay_rate * epoch)

learning_rate = 0.01
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True)

lr_callback = LearningRateScheduler(lr_schedule)

model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1, callbacks=[lr_callback])
model.save('chatbotmodel.h5', hist)
print("done")



Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200


Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200


Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
done


  saving_api.save_model(


### Build ChatBot App

In [21]:
# Load the chatbot data from a JSON file and store it in a Python dictionary
chat_data = json.loads(open("chatbot_data.json").read())

# Load the vocabulary of words used to train the neural network model from a pickle file
words = pickle.load(open('word.pkl','rb'))

# Load the list of intent classes used to train the neural network model from a pickle file
classes = pickle.load(open('classes.pkl','rb'))

# Load the trained neural network model from a saved HDF5 file
model = load_model('chatbotmodel.h5')

In [24]:
def clean_sentence(sentence):
    """
    Tokenizes and lemmatizes the words in a sentence.

    Args:
    - sentence: A string representing the sentence to be tokenized and lemmatized.

    Returns:
    - A list of strings representing the lemmatized words in the sentence.
    """
    # Tokenize the sentence into individual words using the NLTK library
    sentence_words = nltk.word_tokenize(sentence)
    
    # Lemmatize each word using the WordNetLemmatizer from the NLTK library
    sentence_words = [lemmatizer.lemmatize(word) for word in sentence_words]
    
    # Return the lemmatized words
    return sentence_words

In [23]:

def bag_of_words(sentence):
    """
    Converts a sentence into a bag of words representation using the vocabulary of words.

    Args:
    - sentence: A string representing the sentence to be converted into a bag of words representation.

    Returns:
    - A 2D numpy array representing the bag of words representation of the sentence. The array has shape (1, len(words)),
      where `len(words)` is the length of the vocabulary of words used to train the neural network model.
      The array is a binary vector where each element is 1 if the corresponding word in the vocabulary is present in the
      sentence, and 0 otherwise.
    """
    # Tokenize and lemmatize the sentence using the `clean_sentence` function
    sentence_words = clean_sentence(sentence)
    
    # Initialize a binary vector of length `len(words)` with all elements set to 0
    bag = [0]*len(words)
    
    # Convert the sentence into a bag of words representation using the vocabulary of words
    for w in sentence_words:
        for i , word in enumerate(words):
            if word == w:
                bag[i] = 1
    
    # Return the bag of words representation as a 2D numpy array with shape (1, len(words))
    return np.array([bag])


In [25]:
def predict_class(sentence):
    """
    Predicts the intent of a sentence and returns a list of likely intents and their probabilities.

    Args:
    - sentence: A string representing the sentence for which the intent is to be predicted.

    Returns:
    - A list of dictionaries representing the likely intents and their probabilities. Each dictionary has two keys:
      - 'intent': A string representing the name of the predicted intent.
      - 'probability': A string representing the probability of the predicted intent, rounded to 2 decimal places.
    """
    # Convert the sentence into a bag of words representation using the `bag_of_words` function
    bow = bag_of_words(sentence)
    
    # Use the trained neural network model to predict the intent of the sentence
    res = model.predict(np.array(bow))[0]
    
    # Set an error threshold to filter out low-probability intents
    error_threshold = 0.25
    
    # Filter out low-probability intents
    result = [[i,r] for i, r in enumerate(res) if r > error_threshold]
    
    # Sort the intents by probability in descending order
    result.sort(key = lambda x : x[1], reverse = True)
    
    # Create a list of likely intents and their probabilities
    return_list = []
    for r in result:
        # Round the probability to 2 decimal places and store it as a string
        probability = str(round(r[1], 2))
        
        # Create a dictionary with keys 'intent' and 'probability' and add it to the return list
        return_list.append({'intent': classes[r[0]], 'probability' : probability})
        
    # Return the list of likely intents and their probabilities
    return return_list

In [26]:
def _get_response(ints, intents_json):
    """
    Retrieves a response for a given intent from a JSON file.

    Args:
    - ints: A list of dictionaries representing the likely intents and their probabilities. Each dictionary has two keys:
      - 'intent': A string representing the name of the predicted intent.
      - 'probability': A string representing the probability of the predicted intent, rounded to 2 decimal places.
    - intents_json: A dictionary containing the intents and their corresponding responses.

    Returns:
    - A string representing the response for the predicted intent. If the predicted intent is not found in the
      `intents_json` dictionary, a default response is returned.
    """
    try:
        # Get the tag of the predicted intent from the list of likely intents
        tag = ints[0]['intent']
        
        # Get the list of intents and their corresponding responses from the `intents_json` dictionary
        list_of_intents = intents_json['intents']
        
        # Find the intent that matches the predicted tag and choose a random response from its list of responses
        for i in list_of_intents:
            if i['tag'] == tag:
                result = random.choice(i['responses'])
                break
        else:
            # Default response when the predicted intent is not found in the `intents_json` dictionary
            result = "I don't have a response for that."
    except (IndexError, KeyError):
        # Default response when an error occurs while retrieving the response from the `intents_json` dictionary
        result = "I don't understand!"
        
    # Return the response for the predicted intent
    return result

In [None]:
while True:
    message = input("")
    ints = predict_class(message)
    res = _get_response(ints, chat_data)
    print(res)

Heyy
Good to see you again!
any libraries
There is one huge and spacious library.timings are 8am to 6pm and for more visit <a target="blank" href="ADD LIBRARY DETAIL LINK">here</a>
thank you
welcome, anything else i can assist you with?
no bye
Sad to see you go :(
