In [6]:
import tensorflow as tf 
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras.preprocessing.sequence import pad_sequences 
import numpy as np 
import random 
import json 
  
import warnings 
warnings.filterwarnings('ignore')

In [7]:
import json

with open('Intent.json', 'r') as f: 
    data = json.load(f)

print("Keys in the data:", data.keys())
print("Type of 'intents':", type(data['intents']))
print("Number of intents:", len(data['intents']))
print("Keys in the first intent:", data['intents'][0].keys())
print("Last intent data:", data['intents'][-1])


Keys in the data: dict_keys(['intents'])
Type of 'intents': <class 'list'>
Number of intents: 22
Keys in the first intent: dict_keys(['intent', 'text', 'responses', 'extension', 'context', 'entityType', 'entities'])
Last intent data: {'intent': 'SelfAware', 'text': ['Can you prove you are self-aware', 'Can you prove you are self aware', 'Can you prove you have a conscious', 'Can you prove you are self-aware please', 'Can you prove you are self aware please', 'Can you prove you have a conscious please', 'prove you have a conscious'], 'responses': ['That is an interesting question, can you prove that you are?', 'That is an difficult question, can you prove that you are?', 'That depends, can you prove that you are?'], 'extension': {'function': '', 'entities': False, 'responses': []}, 'context': {'in': '', 'out': '', 'clear': False}, 'entityType': 'NA', 'entities': []}


In [8]:
def clean(line): 
	cleaned_line = '' 
	for char in line: 
		if char.isalpha(): 
			cleaned_line += char 
		else: 
			cleaned_line += ' '
	cleaned_line = ' '.join(cleaned_line.split()) 
	return cleaned_line


In [9]:
intents = []
unique_intents = []
text_input = []
response_for_intent = {}

for intent in data['intents']:
    unique_intents.append(intent['intent'])

    for text in intent['text']:
        text_input.append(clean(text))
        intents.append(intent['intent'])
    
    response_for_intent[intent['intent']] = intent['responses']

unique_intents = list(set(unique_intents))  # Remove duplicates from unique_intents


In [10]:
print("Intent :",intents[0]) 
print("Number of Intent:",len(intents)) 
print("Sample Input:", text_input[0]) 
print('Length of text_input:',len(text_input)) 
print("Sample Response: ", response_for_intent[intents[0]])

Intent : Greeting
Number of Intent: 143
Sample Input: Hi
Length of text_input: 143
Sample Response:  ['Hi human, please tell me your GeniSys user', 'Hello human, please tell me your GeniSys user', 'Hola human, please tell me your GeniSys user']


In [11]:
tokenizer = Tokenizer(filters='',oov_token='<unk>') 
tokenizer.fit_on_texts(text_input) 
sequences = tokenizer.texts_to_sequences(text_input) 
padded_sequences = pad_sequences(sequences, padding='pre') 
print('Shape of Input Sequence:',padded_sequences.shape) 
padded_sequences[:5]

Shape of Input Sequence: (143, 9)


array([[ 0,  0,  0,  0,  0,  0,  0,  0, 52],
       [ 0,  0,  0,  0,  0,  0,  0, 52, 53],
       [ 0,  0,  0,  0,  0,  0,  0,  0, 68],
       [ 0,  0,  0,  0,  0,  0,  0,  0, 39],
       [ 0,  0,  0,  0,  0,  0,  0, 39, 53]])

In [12]:
intent_to_index = {}
categorical_target = []

for index, intent in enumerate(set(intents)):
    intent_to_index[intent] = index

categorical_target = [intent_to_index[intent] for intent in intents]

num_classes = len(intent_to_index)
print('Number of Intents:', num_classes)

index_to_intent = {index: intent for intent, index in intent_to_index.items()}
index_to_intent


Number of Intents: 22


{0: 'CourtesyGoodBye',
 1: 'UnderstandQuery',
 2: 'PodBayDoorResponse',
 3: 'SelfAware',
 4: 'NameQuery',
 5: 'Greeting',
 6: 'Jokes',
 7: 'NotTalking2U',
 8: 'GreetingResponse',
 9: 'Gossip',
 10: 'PodBayDoor',
 11: 'Thanks',
 12: 'TimeQuery',
 13: 'GoodBye',
 14: 'Shutup',
 15: 'RealNameQuery',
 16: 'Clever',
 17: 'CourtesyGreetingResponse',
 18: 'CurrentHumanQuery',
 19: 'WhoAmI',
 20: 'Swearing',
 21: 'CourtesyGreeting'}

In [13]:
categorical_vec = tf.keras.utils.to_categorical(categorical_target, num_classes=num_classes)
print('Shape of categorical vector:', categorical_vec.shape)
print(categorical_vec[:5])


Shape of categorical vector: (143, 22)
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [14]:
epochs=100
embed_dim=300
lstm_num=50
output_dim=categorical_vec.shape[1] 
input_dim=len(unique_intents) 
print("Input Dimension :{},\nOutput Dimension :{}".format(input_dim,output_dim))

Input Dimension :22,
Output Dimension :22


In [15]:
import tensorflow as tf

# Define the parameters
embed_dim = 300
lstm_num = 50
output_dim = 22

# Assuming tokenizer.word_index is already defined
vocab_size = len(tokenizer.word_index) + 1

model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(vocab_size, embed_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm_num, dropout=0.1, return_sequences=False)),
    tf.keras.layers.Dense(lstm_num, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(output_dim, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [17]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import random
import json
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split

# Load the data
with open('Intent.json', 'r') as f:
    data = json.load(f)

# Function to clean the text data
def clean(line):
    cleaned_line = ''
    for char in line:
        if char.isalpha():
            cleaned_line += char
        else:
            cleaned_line += ' '
    cleaned_line = ' '.join(cleaned_line.split())
    return cleaned_line

# Prepare the data
intents = []
unique_intents = []
text_input = []
response_for_intent = {}

for intent in data['intents']:
    unique_intents.append(intent['intent'])
    for text in intent['text']:
        text_input.append(clean(text))
        intents.append(intent['intent'])
    response_for_intent[intent['intent']] = intent['responses']

unique_intents = list(set(unique_intents))  # Remove duplicates from unique_intents

# Tokenize the text data
tokenizer = Tokenizer(filters='', oov_token='<unk>')
tokenizer.fit_on_texts(text_input)
sequences = tokenizer.texts_to_sequences(text_input)
padded_sequences = pad_sequences(sequences, padding='pre')

# Prepare the target data
intent_to_index = {}
for index, intent in enumerate(set(intents)):
    intent_to_index[intent] = index

categorical_target = [intent_to_index[intent] for intent in intents]
num_classes = len(intent_to_index)
categorical_vec = tf.keras.utils.to_categorical(categorical_target, num_classes=num_classes)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(padded_sequences, categorical_vec, test_size=0.2, random_state=42)

# Define the model parameters
embed_dim = 300
lstm_num = 50
output_dim = num_classes  # Use the number of classes for output_dim

# Define the model
vocab_size = len(tokenizer.word_index) + 1
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(vocab_size, embed_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm_num, dropout=0.1, return_sequences=False)),
    tf.keras.layers.Dense(lstm_num, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(output_dim, activation='softmax')
])

# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()

# Fit the model
history = model.fit(
    X_train, y_train,
    epochs=20,  # Adjust the number of epochs as needed
    batch_size=32,  # Adjust the batch size as needed
    validation_data=(X_val, y_val)
)


Epoch 1/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 243ms/step - accuracy: 0.0572 - loss: 3.0913 - val_accuracy: 0.0345 - val_loss: 3.0992
Epoch 2/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.0697 - loss: 3.0692 - val_accuracy: 0.0345 - val_loss: 3.0992
Epoch 3/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.1185 - loss: 3.0554 - val_accuracy: 0.0345 - val_loss: 3.0929
Epoch 4/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.1904 - loss: 3.0155 - val_accuracy: 0.0690 - val_loss: 3.0897
Epoch 5/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.1642 - loss: 3.0371 - val_accuracy: 0.1034 - val_loss: 3.0767
Epoch 6/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.1800 - loss: 2.9843 - val_accuracy: 0.1034 - val_loss: 3.0612
Epoch 7/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━

In [18]:
model.fit(padded_sequences, categorical_vec, epochs=epochs, verbose=0)


<keras.src.callbacks.history.History at 0x28b9fed8510>

In [19]:
test_text_inputs = ["Hello",  
                    "my name is adam",  
                    "how are you?",  
                    "can you guess my name?", 
                    "Do you get me","Adios"] 
  
test_intents = ["Greeting", 
                "GreetingResponse", 
                "CourtesyGreeting", 
                "CurrentHumanQuery", 
                "UnderstandQuery", 
                "GoodBye"] 
  
test_sequences = tokenizer.texts_to_sequences(test_text_inputs) 
test_padded_sequences = pad_sequences(test_sequences,  padding='pre') 
test_labels = np.array([unique_intents.index(intent) for intent in test_intents]) 
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=num_classes) 
loss, accuracy = model.evaluate(test_padded_sequences, test_labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 782ms/step - accuracy: 0.8333 - loss: 0.4778


In [20]:
def response(sentence): 
    sent_tokens = [] 
    # Split the input sentence into words 
    words = sentence.split() 
    # Convert words to their corresponding word indices 
    for word in words:                                            
        if word in tokenizer.word_index: 
            sent_tokens.append(tokenizer.word_index[word]) 
        else: 
            # Handle unknown words 
            sent_tokens.append(tokenizer.word_index['<unk>']) 
    sent_tokens = tf.expand_dims(sent_tokens, 0) 
    #predict numerical category 
    pred = model(sent_tokens)     
    #category to intent 
    pred_class = np.argmax(pred.numpy(), axis=1)                 
    # random response to that intent 
    return random.choice( 
        response_for_intent[index_to_intent[pred_class[0]]]), index_to_intent[pred_class[0]]

In [None]:
print("Note: Enter 'quit' to break the loop.")    
while True:                                                 
    query = input('You: ') 
    if query.lower() == 'quit': 
        break
    bot_response, typ = response(query) 
    print('Geek: {} -- TYPE: {}'.format(bot_response, typ)) 
    print()

Note: Enter 'quit' to break the loop.
You: how are you
Geek: Hello, I am good thank you, how are you? Please tell me your GeniSys user -- TYPE: CourtesyGreeting

You: i study in college
Geek: Let me see -- TYPE: WhoAmI

You: who am i
Geek: You are <HUMAN>! How can I help? -- TYPE: CurrentHumanQuery

You: goof
Geek: Jim, I just don't have the power -- TYPE: PodBayDoorResponse

You: what is your name
Geek: Call me Geni -- TYPE: NameQuery

You: nice name!
Geek: Mary said I a question and I answer then I ask him a question and he answer. -- TYPE: Gossip

You: gossip
Geek: Peter said he seems to him that I can not be very sure on anything. -- TYPE: Gossip

