In [1]:
import numpy as np
import pandas as pd
import sklearn
import tensorflow 
import nltk
import spacy
import json

In [2]:
from tensorflow import keras
from tensorflow.keras import layers

In [3]:
with open('D:\\HWAN\\Data\\Main_chatbot_dataset.json','r') as f:
    data = json.load(f)

In [4]:
df = pd.DataFrame(data['intents'])
df

Unnamed: 0,tag,patterns,responses,context_set
0,greeting,"[Hi, Hello, Hey, Good morning, Good afternoon,...","[Hi there! How can I assist you today?, Hello!...",
1,symptom_inquiry,"[I'm not feeling well, I have some symptoms, W...",[I'm here to help you assess your symptoms. Le...,symptom_checker
2,fitness_recommendations,"[I want to exercise, Can you recommend some wo...",[Staying active is important for your health! ...,fitness_recommendations
3,stress_support,"[I'm feeling stressed, How can I relax?, What ...",[It's important to prioritize your mental well...,stress_support
4,menu_options,"[What can you do?, What are your capabilities?...","[I can assist you with a variety of tasks, inc...",
5,feature_request,"[I wish this app could do X, It would be great...",[Thank you for your suggestion! I'll pass it a...,
6,bug_report,"[I found a bug, There's a problem with the app...",[Thank you for letting us know! Please provide...,
7,app_tour,"[Can you show me around the app?, How does the...","[Sure! Our app has several sections, including...",
8,app_feedback,"[I have some feedback about the app, Can I pro...",[We value your feedback! Please feel free to s...,
9,user_engagement,"[How can I make the most out of this app?, Wha...","[To make the most out of the app, try to use i...",


In [5]:
df_patterns = df[['patterns', 'tag']]
df_patterns

Unnamed: 0,patterns,tag
0,"[Hi, Hello, Hey, Good morning, Good afternoon,...",greeting
1,"[I'm not feeling well, I have some symptoms, W...",symptom_inquiry
2,"[I want to exercise, Can you recommend some wo...",fitness_recommendations
3,"[I'm feeling stressed, How can I relax?, What ...",stress_support
4,"[What can you do?, What are your capabilities?...",menu_options
5,"[I wish this app could do X, It would be great...",feature_request
6,"[I found a bug, There's a problem with the app...",bug_report
7,"[Can you show me around the app?, How does the...",app_tour
8,"[I have some feedback about the app, Can I pro...",app_feedback
9,"[How can I make the most out of this app?, Wha...",user_engagement


In [6]:
tags = {}

for i,tag in enumerate(df_patterns['tag']):
    tags[tag] = i
tags

{'greeting': 0,
 'symptom_inquiry': 1,
 'fitness_recommendations': 2,
 'stress_support': 3,
 'menu_options': 4,
 'feature_request': 5,
 'bug_report': 6,
 'app_tour': 7,
 'app_feedback': 8,
 'user_engagement': 9,
 'privacy_policy': 10,
 'about_app': 11}

In [7]:
for i,tag in enumerate(df_patterns['tag']):
    df_patterns['tag'].loc[i] = i

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_patterns['tag'].loc[i] = i
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_patterns['tag'].loc[i] = i


In [8]:
# Ignore The eror above

In [9]:
df_patterns

Unnamed: 0,patterns,tag
0,"[Hi, Hello, Hey, Good morning, Good afternoon,...",0
1,"[I'm not feeling well, I have some symptoms, W...",1
2,"[I want to exercise, Can you recommend some wo...",2
3,"[I'm feeling stressed, How can I relax?, What ...",3
4,"[What can you do?, What are your capabilities?...",4
5,"[I wish this app could do X, It would be great...",5
6,"[I found a bug, There's a problem with the app...",6
7,"[Can you show me around the app?, How does the...",7
8,"[I have some feedback about the app, Can I pro...",8
9,"[How can I make the most out of this app?, Wha...",9


In [10]:
from nltk import PorterStemmer

In [11]:
Stemmer = PorterStemmer()
nlp = spacy.load('en_core_web_lg')

In [12]:
def clean(sentence):
    a = []
    doc = nlp(sentence)
    for token in doc:
        if token.is_alpha:
            a.append(token.lemma_.lower())
    return ' '.join(a)

def lemmatize(sentence):
    doc = nlp(sentence)
    return ' '.join([token.lemma_ for token in doc])

def stem(sentence):
    porter = nltk.PorterStemmer()
    return ' '.join([porter.stem(word) for word in nltk.word_tokenize(sentence)])

In [13]:
sentence = '''Our app is designed to help you with your health and wellness journey. It provides resources and support in various areas, including symptom management, fitness, and stress relief. Let me know if you have any specific questions or if there's anything else I can assist you with.'''
lemmatize(clean(sentence))

'our app be design to help you with your health and wellness journey it provide resource and support in various area include symptom management fitness and stress relief let I know if you have any specific question or if there anything else I can assist you with'

In [14]:
stem(clean(sentence))

'our app be design to help you with your health and well journey it provid resourc and support in variou area includ symptom manag fit and stress relief let i know if you have ani specif question or if there anyth els i can assist you with'

In [15]:
import random
from nltk.corpus import wordnet

In [16]:
# Data Augmentation
def random_deletion(words, p=0.5):
    augmented_sentence = []
    for word in words:
        if random.uniform(0, 1) < p:
            continue
        augmented_sentence.append(word)
    return ' '.join(augmented_sentence)

def synonym_replacement(words, n=1):
    augmented_sentences = []
    for _ in range(n):
        augmented_sentence = words.copy()
        for i, token in enumerate(augmented_sentence):
            synsets = wordnet.synsets(token)
            if synsets:
                synonyms = []
                for synset in synsets:
                    for lemma in synset.lemmas():
                        synonyms.append(lemma.name())
                if synonyms:
                    synonym = random.choice(synonyms)
                    augmented_sentence[i] = synonym
        augmented_sentences.append(' '.join(augmented_sentence))
    return augmented_sentences[0]

In [17]:
random_deletion(nltk.word_tokenize(sentence))

"Our app designed to with wellness . provides resources and support in various including symptom management , , and relief . know if you specific questions or if there 's anything else I can you with ."

In [18]:
synonym_replacement(nltk.word_tokenize(sentence))

"Our app equal design to help you with your wellness and health journey . information_technology allow_for imagination and endorse inwards various area , admit symptom management , fittingness , and accent relief . lease Pine_Tree_State know if you have any specific wonder operating_theater if in_that_location 's anything else i fundament aid you with ."

In [19]:
for i,patterns in enumerate(df_patterns['patterns']):
    Pre_proccessed_List = []
    Existing_sents_List = df_patterns['patterns'].loc[i]
    for sentence in Existing_sents_List:
        print(sentence)
        Pre_proccessed_List.append(lemmatize(clean(sentence)))
        Pre_proccessed_List.append(stem(clean(sentence)))
        Pre_proccessed_List.append(clean(random_deletion(nltk.word_tokenize(sentence))))
        Pre_proccessed_List.append(clean(synonym_replacement(nltk.word_tokenize(sentence))))
    Existing_sents_List.extend(Pre_proccessed_List)

Hi
Hello
Hey
Good morning
Good afternoon
Good evening
I'm not feeling well
I have some symptoms
What should I do if I feel sick?
I think I might be sick
I want to exercise
Can you recommend some workouts?
How can I stay fit?
I need help with my fitness routine
I'm feeling stressed
How can I relax?
What should I do if I'm anxious?
Dealing with anxiety
What can you do?
What are your capabilities?
Show me the options
How can you help me?
I wish this app could do X
It would be great if I could X
Can you add X feature?
More features please
I found a bug
There's a problem with the app
I encountered an issue
The app isn't working properly
Can you show me around the app?
How does the app work?
Give me a tour of the app
What can I do in this app?
I have some feedback about the app
Can I provide feedback?
I want to share my thoughts about the app
How can I make the most out of this app?
What are some tips for using this app effectively?
I want to get the best results from this app
What's your pr

In [20]:
df_patterns_train = df_patterns.explode('patterns')

In [21]:
df_patterns_train.reset_index(drop=True, inplace=True)
print(df_patterns_train)

                        patterns tag
0                             Hi   0
1                          Hello   0
2                            Hey   0
3                   Good morning   0
4                 Good afternoon   0
..                           ...  ..
225  what the design of this app  11
226         who develop this app  11
227          who develop thi app  11
228                 develop this  11
229             develop this app  11

[230 rows x 2 columns]


In [22]:
X_train = []
for sentence in df_patterns_train['patterns']:
    doc = nlp(sentence)
    X_train.append(doc.vector)

In [23]:
X_train[0]

array([ 11.618   ,  -7.7895  ,   4.4404  ,  -1.2964  ,  -3.7925  ,
         0.13122 ,   3.0552  ,  -4.0384  ,  -0.93526 ,   4.2015  ,
        -5.3589  ,   4.5183  ,  -4.6501  ,   3.7295  ,   0.51439 ,
        -7.0579  ,  -0.80911 ,   1.4454  ,  -1.8873  ,   3.5586  ,
         5.1142  ,   3.3712  ,  -0.17961 ,  -8.7045  ,   3.4782  ,
        -1.5345  ,  -2.2227  ,  -4.4678  ,  -7.1788  ,  -7.5381  ,
        -5.7588  ,  -3.9471  ,  -4.0918  ,   4.5009  ,   1.2282  ,
         4.2638  ,  -7.6763  ,  -3.975   ,  -0.12942 ,  -0.10574 ,
         2.7062  ,   3.2303  ,  -3.2014  ,   0.87593 ,   3.8434  ,
        -2.3847  ,  -3.9484  ,   2.0605  ,  -4.26    ,   0.72341 ,
         5.2853  ,  -4.1931  ,  -3.3609  ,  -6.4365  ,   2.6142  ,
        -3.7905  ,  -0.65833 ,  -1.9956  ,   0.41714 ,  -1.4288  ,
         4.9303  ,   6.6759  ,  -2.3289  ,   0.88461 ,  -2.9958  ,
        -0.20061 ,   0.8433  ,   5.0334  ,   1.7636  ,   1.0861  ,
         3.4015  ,   4.4063  ,  -5.4782  ,  -8.2889  ,   4.693

In [24]:
X_train[0].shape

(300,)

In [25]:
X_train[0].shape

(300,)

In [26]:
X_train = np.array(X_train)

In [27]:
X_train

array([[11.618     , -7.7895    ,  4.4404    , ..., -4.1385    ,
         0.049123  ,  3.5613    ],
       [ 4.5354    , -2.1455    ,  2.068     , ...,  0.99953   ,
        -0.89679   ,  1.69      ],
       [ 2.9       ,  0.48218   , -2.2693    , ...,  4.1293    ,
         2.247     ,  0.35988   ],
       ...,
       [-1.099095  ,  1.3987875 ,  0.34864998, ..., -1.413715  ,
        -2.8751    ,  2.37323   ],
       [-0.55210006,  2.9096498 , -1.67515   , ..., -1.4742501 ,
        -3.1082    ,  2.38475   ],
       [ 1.6170667 ,  2.82      ,  0.6730667 , ..., -2.1270335 ,
        -3.3191335 ,  4.0084996 ]], dtype=float32)

In [28]:
X_train_reshaped = np.reshape(X_train, (X_train.shape[0], 1, -1))

In [29]:
X_train_reshaped[0].shape

(1, 300)

In [30]:
y_train = []

for tag in df_patterns_train['tag']:
    y_train.append(int(tag))

In [31]:
y_train

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11]

In [32]:
y_train = np.array(y_train)

In [33]:
y_train

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,
        5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,
        6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,
        7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
        8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,
        9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10,
       10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11,
       11, 11, 11, 11, 11

In [34]:
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D, Dense, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard

In [1]:
import tensorflow as tf
import datetime


model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(filters=64, kernel_size=5, activation='relu', input_shape=(300, 1)),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(12, activation='softmax')  # Output layer with 12 units and softmax activation for multi-class classification
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

logdir="logs/fit/" + datetime.time().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
# Train the model with TensorBoard callback
model.fit(X_train, y_train, epochs=400, callbacks=[tensorboard_callback])

%tensorboard --logdir logs/scalars

model.save('D:\HAWN\Model')

  model.save('D:\HAWN\Model')
  model.save('D:\HAWN\Model')

KeyboardInterrupt



In [39]:
sentence = 'Can you show me around the app?'
sentence = stem(clean(sentence))
doc = nlp(sentence)
vector = doc.vector
vector = np.reshape(vector, (1, 300,1))
np.argmax(model.predict(vector))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


7

In [50]:
# all the sentences used in testing are different except the first one

In [45]:
while True:
    User_input = input('User: ')
    sentence = stem(clean(User_input))
    doc = nlp(sentence)
    vector = doc.vector
    vector = np.reshape(vector, (1, 300,1))
    predictions = model.predict(vector)
    predicted_class_index = np.argmax(predictions)
    predicted_class_probability = predictions[0][predicted_class_index]
    if predicted_class_probability > 0.6:
        print(predicted_class_index)
    else:
        print('I dont Understand', predicted_class_probability, predicted_class_index)

User:  Tell me more about this app


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
11


User:  Tell me more about this app


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
11


User:  What can this app do?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
I dont Understand 0.40357852 5


User:  How does this app work? 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
7


User:  What are the features of this app? 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
1


User:  Is there anything interesting about this app?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
1


User:  I'm curious about this app, can you elaborate?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
I dont Understand 0.36335185 11


KeyboardInterrupt: Interrupted by user

In [47]:
# clearly better after removing clean
while True:
    User_input = input('User: ')
    sentence = stem(User_input)
    doc = nlp(sentence)
    vector = doc.vector
    vector = np.reshape(vector, (1, 300,1))
    predictions = model.predict(vector)
    predicted_class_index = np.argmax(predictions)
    predicted_class_probability = predictions[0][predicted_class_index]
    if predicted_class_probability > 0.6:
        print(predicted_class_index)
    else:
        print('I dont Understand', predicted_class_probability, predicted_class_index)

User:  Tell me more about this app


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
11


User:  How does this app work?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
4


User:  What are the features of this app? 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
9


User:  Is there anything interesting about this app? 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
I dont Understand 0.34150308 4


User:  I'm curious about this app, can you elaborate?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
I dont Understand 0.37147018 11


KeyboardInterrupt: Interrupted by user

In [48]:
while True:
    User_input = input('User: ')
    sentence = lemmatize(User_input)
    doc = nlp(sentence)
    vector = doc.vector
    vector = np.reshape(vector, (1, 300,1))
    predictions = model.predict(vector)
    predicted_class_index = np.argmax(predictions)
    predicted_class_probability = predictions[0][predicted_class_index]
    if predicted_class_probability > 0.6:
        print(predicted_class_index)
    else:
        print('I dont Understand', predicted_class_probability, predicted_class_index)

User:  Tell me more about this app


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
11


User:  What can this app do?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
I dont Understand 0.51486975 7


User:  How does this app work? 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
4


User:  What are the features of this app? 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
6


User:  Is there anything interesting about this app? 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
I dont Understand 0.4556609 4


User:  I'm curious about this app, can you elaborate? 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
I dont Understand 0.24178007 7


KeyboardInterrupt: Interrupted by user

In [51]:
# Poor Using Lemmatization

In [53]:
while True:
    User_input = input('User: ')
    sentence = lemmatize(clean(User_input))
    doc = nlp(sentence)
    vector = doc.vector
    vector = np.reshape(vector, (1, 300,1))
    predictions = model.predict(vector)
    predicted_class_index = np.argmax(predictions)
    predicted_class_probability = predictions[0][predicted_class_index]
    if predicted_class_probability > 0.6:
        print(predicted_class_index)
    else:
        print('I dont Understand', predicted_class_probability, predicted_class_index)

User:  Tell me More about this app


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
11


User:  How does this app work? 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
7


User:  What are the features of this app?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
I dont Understand 0.44931814 6


User:  Is there anything interesting about this app? 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
1


User:  I'm curious about this app, can you elaborate?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
I dont Understand 0.5859932 7


KeyboardInterrupt: Interrupted by user

In [54]:
# Clearly Using Stemming only is the better option

In [55]:
model.save('D:\HWAN\Models\Main_chatbot_Model\M_CBOT_M.keras')

  model.save('D:\HWAN\Models\Main_chatbot_Model\M_CBOT_M.keras')
