In [None]:
from google.colab import files
files.upload()

KeyboardInterrupt: ignored

In [None]:
import json
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import nltk
from tensorflow import keras
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

# Load the intents data
with open("intents.json", "r") as f:
    data = json.load(f)

# Preprocessing
lemmatizer = WordNetLemmatizer()
vocab = set()
classes = []
documents = []

# Tokenize, lemmatize, and collect vocabulary and documents
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        pattern = pattern.rstrip("?")  # Remove trailing question marks
        tokens = word_tokenize(pattern)  # Tokenize the pattern into words
        tokens = [lemmatizer.lemmatize(token.lower()) for token in tokens if token not in set(stopwords.words("english"))]  # Lemmatize words and remove stopwords
        vocab.update(tokens)  # Update the vocabulary set
        documents.append((tokens, intent["tag"]))  # Add tokenized pattern and intent tag to the documents list
    if intent["tag"] not in classes:
        classes.append(intent["tag"])  # Add unique intent tags to the classes list

vocab = sorted(list(vocab))  # Sort the vocabulary in alphabetical order
classes = sorted(list(set(classes)))  # Sort the intent tags in alphabetical order

# Create training data
X = []
y = []

# Convert documents into bag-of-words representation and one-hot encode the intent tags
for doc in documents:
    bag = [0] * len(vocab)  # Create a bag-of-words vector for each document
    for token in doc[0]:
        bag[vocab.index(token)] = 1  # Set the corresponding index to 1 if the token is present in the vocabulary

    output_row = [0] * len(classes)  # Create a one-hot encoded vector for the intent tag
    output_row[classes.index(doc[1])] = 1  # Set the corresponding index to 1 for the intent tag

    X.append(bag)  # Add the bag-of-words representation to the training data
    y.append(output_row)  # Add the one-hot encoded intent tag to the training labels

X = np.array(X)  # Convert training data to NumPy array
y = np.array(y)  # Convert training labels to NumPy array

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Build the model
model = Sequential()
model.add(Dense(128, input_dim=len(X_train[0]), activation="relu"))  # Input layer with ReLU activation
model.add(Dropout(0.5))  # Dropout layer to prevent overfitting
model.add(Dense(64, activation="relu"))  # Hidden layer with ReLU activation
model.add(Dense(len(y_train[0]), activation="softmax"))  # Output layer with softmax activation for multi-class classification

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])  # Compile the model

early_stop=keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1,mode='auto',restore_best_weights=True) #Early stopping criteria

# Train the model
model.fit(X_train, y_train, epochs=200, validation_split=0.2,batch_size=5,callbacks=early_stop)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Loss: {loss}, Accuracy: {accuracy}")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 41: early stopping
Loss: 0.499512642621994, Accuracy: 0.8275862336158752


In [None]:
def predict_intent(text):
    text = text.rstrip("?")  # Remove trailing question marks
    tokens = word_tokenize(text)  # Tokenize the text into words
    tokens = [lemmatizer.lemmatize(token.lower()) for token in tokens if token not in set(stopwords.words("english"))]  # Lemmatize words and remove stopwords

    bag = [0] * len(vocab)  # Create a bag-of-words vector
    for token in tokens:
        if token in vocab:
            bag[vocab.index(token)] = 1  # Set the corresponding index to 1 if the token is present in the vocabulary

    input_data = np.array([bag])  # Convert the bag-of-words vector to a NumPy array
    prediction = model.predict(input_data)  # Make a prediction using the trained model

    intent_index = np.argmax(prediction)  # Get the index of the predicted intent
    intent_confidence = prediction[0][intent_index]  # Get the confidence score for the predicted intent

    if intent_confidence > 0.2:  # Check if the confidence score is above a threshold (0.2 in this case)
        return classes[intent_index]  # Return the predicted intent tag
    else:
        return None  # Return None if the confidence score is below the threshold

def get_response(tag):
    for intent in data["intents"]:
        if intent["tag"] == tag:  # Find the intent with the matching tag
            return random.choice(intent["responses"])  # Return a random response from the intent's list of responses
    return None  # Return None if no matching intent is found


In [None]:
def bag_of_words(sentence, words):
    sentence = sentence.rstrip("?")  # Remove trailing question marks
    # Tokenize the sentence into words
    sentence_words = nltk.word_tokenize(sentence)
    # Lemmatize each word and remove stopwords
    sentence_words = [lemmatizer.lemmatize(word.lower(), pos='v') for word in sentence_words if word not in set(stopwords.words("english"))]
    # Create a bag of words representation
    bag = [0] * len(words)
    for w in sentence_words:
        for i, word in enumerate(words):
            if word == w:
                bag[i] = 1  # Set the corresponding index to 1 if the word is present in the bag of words
    return bag


In [None]:
print("Hi there, this is Genie. I’m so happy to chat with you today! 😊 \nCould you please be more specific with your questions.\nI’m always trying to improve my communication skills and learn from your feedback. (type quit to end the conversation):")
while True:
    inp = input("You: ")  # Get user input
    if inp.lower() == "quit":
        # If the user enters "quit", find the "goodbye" intent and print a random response
        for intent in data["intents"]:
            if intent["tag"] == "goodbye":
                print("Bot: " + random.choice(intent["responses"]))
        break  # Exit the loop and end the conversation

    # Predict the intent based on the user input
    results = model.predict([bag_of_words(inp, vocab)])[0]
    results_index = np.argmax(results)
    tag = classes[results_index]

    if results[results_index] > 0.5:
        # If the confidence score is above 0.5, find the intent and print a random response
        for intent in data["intents"]:
            if intent["tag"] == tag:
                if intent["tag"] == "goodbye" and inp == 'quit':
                    # If the intent is "goodbye" and the user entered "quit", print a goodbye message and end the conversation
                    print("Bot: " + random.choice(intent["responses"]))
                    print("Bot: Goodbye!")
                    break
                if intent["tag"] == "goodbye" and inp != 'quit':
                    # If the intent is "goodbye" but the user entered something other than "quit", print a specific message
                    print("Bot: I am evolving constantly. I apologize that I could not help you with your query. Please give us a call at 669-224-0833 and our team will be happy to assist.")
                else:
                    # Print a random response from the intent
                    print("Bot: " + random.choice(intent["responses"]))
                    break
    else:
        # If the confidence score is below 0.5, print a generic message
        print("Bot: I am evolving constantly. I apologize that I could not help you with your query. Please give us a call at 669-224-0833 and our team will be happy to assist.")


Hi there, this is Genie. I’m so happy to chat with you today! 😊 
Could you please be more specific with your questions.
I’m always trying to improve my communication skills and learn from your feedback. (type quit to end the conversation):
You: delivery
Bot: Yes, we provide home delivery through Doordash, Uber Eats, Grubhub, and LemonHat
You: order food
Bot: We have many great options to choose from, You can place your order online at https://www.suggioota.com/order-online
You: operational hours
Bot: I am evolving constantly. I apologize that I could not help you with your query. Please give us a call at 669-224-0833 and our team will be happy to assist
You: what can i buy today
Bot: Our daily changing menu features only fresh, made-from-scratch meals, using the best seasonal ingredients. Please visit https://www.suggioota.com/order-online to explore our menu.
You: what can i buy
Bot: I am evolving constantly. I apologize that I could not help you with your query. Please give us a call

In [None]:
model.save('Genie.h5') #save the model