<a href="https://colab.research.google.com/github/salllwaaa/ChatBot/blob/main/Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

****Importing Libraries****

In [None]:
import numpy as np
import json
import re
import tensorflow as tf
import random
import spacy
from pathlib import Path
from collections import defaultdict


loads a pretrained English language model 'en_core_web_smI' in Spacy
to perform various NLP tasks like tokenization, part-of-speech tagging, named entity recognition, dependency parsing, and more on English text data.

In [None]:
nlp = spacy.load('en_core_web_sm')

**Reading DataSet**

In [None]:
DataSet_file=Path('/content/Intent.json')

with open(DataSet_file) as D:
    intents = json.load(D)

#**Preprocessing data**

- cleaning data
- split them into inputs and targets
- build a tokenizer dictionary
- turn sentences into sequences


In [None]:
def preprocccessing (sent):
  #removing any character that is not in the specified set of allowed characters and replaces it with a space.
  sent = ' '.join(filter(lambda x: x.isalpha() or x in ['.', '!', '?', "'"], sent.split()))

  #his line replaces any sequence of one or more spaces with a single space.
  sent = ' '.join(sent.split())
  return sent

In [None]:
intents['intents']

[{'intent': 'Greeting',
  'text': ['Hi',
   'Hi there',
   'Hola',
   'Hello',
   'Hello there',
   'Hya',
   'Hya there'],
  'responses': ['Hi human, please tell me your GeniSys user',
   'Hello human, please tell me your GeniSys user',
   'Hola human, please tell me your GeniSys user'],
  'extension': {'function': '', 'entities': False, 'responses': []},
  'context': {'in': '', 'out': 'GreetingUserRequest', 'clear': False},
  'entityType': 'NA',
  'entities': []},
 {'intent': 'GreetingResponse',
  'text': ['My user is Adam',
   'This is Adam',
   'I am Adam',
   'It is Adam',
   'My user is Bella',
   'This is Bella',
   'I am Bella',
   'It is Bella'],
  'responses': ['Great! Hi <HUMAN>! How can I help?',
   'Good! Hi <HUMAN>, how can I help you?',
   'Cool! Hello <HUMAN>, what can I do for you?',
   'OK! Hola <HUMAN>, how can I help you?',
   'OK! hi <HUMAN>, what can I do for you?'],
  'extension': {'function': 'extensions.gHumans.updateHuman',
   'entities': True,
   'responses':

In [None]:
inputs, targets = [], []
classes = []
intentDic = {}

# Iterate through each intent dictionary
for intent in intents['intents']:
    intent_title = intent['intent']

    # Add intent title to the classes list if it's not already present
    if intent_title not in classes:
        classes.append(intent_title)

    # Create an empty list for responses under the intent title in intent_doc dictionary
    if intent_title not in intentDic:
        intentDic[intent_title] = []

    # Iterate through each text and append preprocessed text to inputs list
    for text in intent['text']:
        preprocessed_text = preprocccessing(text)
        inputs.append(preprocessed_text)

        # Append the corresponding intent title to targets list
        targets.append(intent_title)

    # Append each response to the list under the corresponding intent title in intent_doc dictionary
    for response in intent['responses']:
        intentDic[intent_title].append(response)

**Tokenizing**

In [None]:
import tensorflow as tf

def tokenize_data(input_list):
    # Create a tokenizer object with no filters and a special token for out-of-vocabulary words
    tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', oov_token='<unk>')

    # Fit the tokenizer on the input data to build the vocabulary
    tokenizer.fit_on_texts(input_list)

    # Convert the input texts into sequences of integers using the tokenizer
    input_seq = tokenizer.texts_to_sequences(input_list)

    # Pad the sequences to ensure they have the same length , Padding is added at the beginning of the sequences ('pre')
    padded_input_seq = tf.keras.preprocessing.sequence.pad_sequences(input_seq, padding='pre')

    # Return the tokenizer and the padded input tensor
    return tokenizer, padded_input_seq




**Tokenize the input data and get the tokenizer object and the padded input tensor**


In [None]:
tokenizer, input_tensor = tokenize_data(inputs)

**preprocesses a list of target labels by converting them into categorical tensors**

In [None]:
def create_categorical_target(targets):
    # Create a defaultdict that automatically assigns a unique index to new target labels
    word = defaultdict(lambda: len(word))

    # Map each target label to its corresponding index in the word dictionary
    categorical_target = [word[trg] for trg in targets]

    # Convert the categorical target to a one-hot encoded tensor
    categorical_tensor = tf.keras.utils.to_categorical(categorical_target, num_classes=len(word), dtype='int32')

    # Create a dictionary with index-to-label mapping
    trg_index_word = dict((v, k) for k, v in word.items())

    # Return the categorical tensor and the index-to-label dictionary
    return categorical_tensor, trg_index_word

# Preprocess output data
target_tensor, trg_index_word = create_categorical_target(targets)

In [None]:
print('input shape: {} and output shape: {}'.format(input_tensor.shape, target_tensor.shape))

input shape: (143, 9) and output shape: (143, 22)


#**Building The Model**

In [None]:
# hyperparameters

epochs = 50                                 # The number of times the model will iterate over the entire training dataset
vocab_size = len(tokenizer.word_index) + 1  # The total number of unique words in the vocabulary
EmbeddDim = 512                             # The dimensionality of the word embedding
units = 128                                 # The number of units/neurons in the LSTM layer
TensorLength = target_tensor.shape[1]       # The length of the target tensor (number of output classes)

# Build RNN Model with tensorflow

model = tf.keras.models.Sequential([
    # Embedding layer that maps input words to dense vectors of fixed size
    tf.keras.layers.Embedding(vocab_size, EmbeddDim),

    # Bidirectional LSTM layer that captures the context from both directions
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units, dropout=0.2)),

    # Dense layer with 'units' neurons and ReLU activation function
    tf.keras.layers.Dense(units, activation='relu'),

    # Dropout layer to prevent overfitting by randomly setting a fraction of input units to 0
    tf.keras.layers.Dropout(0.5),

    # Dense layer with 'target_length' neurons and softmax activation function
    tf.keras.layers.Dense(TensorLength, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(lr=1e-2)     # Adam optimizer with learning rate of 0.01
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 512)         57344     
                                                                 
 bidirectional (Bidirection  (None, 256)               656384    
 al)                                                             
                                                                 
 dense (Dense)               (None, 128)               32896     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 22)                2838      
                                                                 
Total params: 749462 (2.86 MB)
Trainable params: 749462 (2.86 MB)
Non-trainable params: 0 (0.00 Byte)
____________________

In [None]:
early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=4)




# This is formatted as code




In [None]:
model.fit(input_tensor, target_tensor, epochs=epochs, callbacks=[early_stop])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7a6d8316a3b0>

**Function Chatbot_RESPONSE() that processes a sentence, predicts a class based on the sentence using a pre-trained model**

In [None]:
def Chatbot_RESPONSE(sentence, tokenizer, model, intent_doc, trg_index_word):
    sent_seq = []
    doc = nlp(repr(sentence))

    # Split the input sentences into words
    for token in doc:
        if token.text in tokenizer.word_index:
            sent_seq.append(tokenizer.word_index[token.text])
        else:
            sent_seq.append(tokenizer.word_index['<unk>'])

    sent_seq = tf.expand_dims(sent_seq, 0)
    # Predict the category of input sentences
    pred = model(sent_seq)

    pred_class = np.argmax(pred.numpy(), axis=1)

    # Choose a random response for predicted sentence
    return random.choice(intent_doc[trg_index_word[pred_class[0]]]), trg_index_word[pred_class[0]]


#**Chat with Chatbot**

In [None]:
print(" Enter 'Quit' to exit chat")
while True:
    input_ = input('me: ')
    if input_.lower() == 'quit':
        break
    res, typ = Chatbot_RESPONSE(input_, tokenizer, model, intentDic, trg_index_word)
    print('Bot: {} -- TYPE: {}'.format(res, typ))
    print()

 Enter 'Quit' to exit chat
me: hello
Bot: Hello human, please tell me your GeniSys user -- TYPE: Greeting

me: what is my name 
Bot: Your name is <HUMAN>, how can I help you? -- TYPE: CurrentHumanQuery

me: what is your name 
Bot: GeniSys -- TYPE: RealNameQuery

me: how old are you
Bot: Hi, I am good thank you, how are you? Please tell me your GeniSys user -- TYPE: CourtesyGreeting

me: how are you
Bot: Hi, good thank you, how are you? Please tell me your GeniSys user -- TYPE: CourtesyGreeting

me: what is the clock
Bot: One moment -- TYPE: TimeQuery

me: do you understand what i am saying ?
Bot: I read you loud and clear! -- TYPE: UnderstandQuery

me: i am talking to you
Bot: OK -- TYPE: NotTalking2U

me: be quite 
Bot: I am sorry to disturb you -- TYPE: Shutup

me: thanks 
Bot: Not a problem! Have a nice day -- TYPE: CourtesyGoodBye

me: bye
Bot: Not a problem! Have a nice day -- TYPE: CourtesyGoodBye

me: quite
Bot: Hello, how are you? I am great thanks! Please tell me your GeniSys 