In [None]:
# Install necessary libraries (only needed for first-time setup)
!pip install tensorflow numpy nltk

import numpy as np
import tensorflow as tf
import nltk
import json
import random
import string

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, SpatialDropout1D

nltk.download('punkt')

# Expanded dataset with more intents and examples
data = {
    "intents": [
        {"tag": "greeting",
         "patterns": ["Hi", "Hello", "Hey", "Howdy", "What's up?", "Good morning", "Good evening", "Hey there"],
         "responses": ["Hello!", "Hey!", "Hi there!", "Howdy!", "Good to see you!", "Hey, how's it going?"]},

        {"tag": "goodbye",
         "patterns": ["Bye", "See you later", "Goodbye", "Take care", "Catch you later", "See you soon"],
         "responses": ["Goodbye!", "Take care!", "See you later!", "Have a nice day!", "See you next time!"]},

        {"tag": "thanks",
         "patterns": ["Thanks", "Thank you", "That's helpful", "I appreciate it", "Thanks a lot", "Many thanks"],
         "responses": ["You're welcome!", "Glad to help!", "No problem!", "Anytime!", "Always happy to help!"]},

        {"tag": "name",
         "patterns": ["What is your name?", "Who are you?", "Tell me your name", "What's your name?"],
         "responses": ["I am a chatbot!", "You can call me ChatBot.", "I'm your friendly AI assistant!"]},

        {"tag": "age",
         "patterns": ["How old are you?", "What is your age?", "When were you created?", "Are you old?"],
         "responses": ["I'm timeless!", "Age is just a number, and I don’t have one!", "I was created recently."]},

        {"tag": "weather",
         "patterns": ["What's the weather like?", "Tell me the weather", "How's the weather today?", "Is it raining?"],
         "responses": ["I can't check the weather yet, but you can try a weather app!",
                       "Weather changes all the time! You might want to check an online forecast."]},

        {"tag": "restaurant",
         "patterns": ["Can you suggest a good restaurant?", "Where can I eat?", "Recommend a place to eat", "Best food places?"],
         "responses": ["I can't taste food, but I hear Italian and Chinese restaurants are great choices!",
                       "Try searching online for top-rated restaurants near you.",
                       "A highly-rated local restaurant is always a great choice!"]},

        {"tag": "jokes",
         "patterns": ["Tell me a joke", "Make me laugh", "Say something funny", "Do you know any jokes?"],
         "responses": ["Why don’t skeletons fight each other? Because they don’t have the guts!",
                       "I told my wife she should embrace her mistakes. She gave me a hug.",
                       "Why did the scarecrow win an award? Because he was outstanding in his field!"]},

        {"tag": "general_knowledge",
         "patterns": ["What time is it?", "Tell me the time", "What's today's date?", "What is the capital of France?", "Who is the president?"],
         "responses": ["I'm not connected to the real-time clock, but you can check your phone or watch!",
                       "Today's date depends on your timezone! You can check your calendar.",
                       "The capital of France is Paris.",
                       "Presidents change over time! You might want to check the latest news."]},

        {"tag": "help",
         "patterns": ["I need help", "Can you help me?", "Help me please", "I need assistance"],
         "responses": ["Sure! How can I assist you?", "I'm here to help! What do you need?", "Tell me what you need help with."]}
    ]
}

# Prepare dataset
all_sentences = []
all_tags = []
tag_responses = {}

for intent in data["intents"]:
    for pattern in intent["patterns"]:
        all_sentences.append(pattern.lower())
        all_tags.append(intent["tag"])
    tag_responses[intent["tag"]] = intent["responses"]

# Tokenize text
tokenizer = Tokenizer(num_words=2000, filters=string.punctuation)
tokenizer.fit_on_texts(all_sentences)
vocab_size = len(tokenizer.word_index) + 1

# Convert sentences to sequences
sequences = tokenizer.texts_to_sequences(all_sentences)
max_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

# Fix for KeyError: Create a mapping of unique tags to indices manually
unique_tags = list(set(all_tags))
tag_to_index = {tag: i for i, tag in enumerate(unique_tags)}
index_to_tag = {i: tag for tag, i in tag_to_index.items()}

# Convert tags to numerical labels
tag_sequences = np.array([tag_to_index[tag] for tag in all_tags])

# Build chatbot model
model = Sequential([
    Embedding(vocab_size, 128, input_length=max_length),
    SpatialDropout1D(0.2),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(128, activation="relu"),
    Dense(len(unique_tags), activation="softmax")
])

model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train model
model.fit(padded_sequences, tag_sequences, epochs=100, verbose=1)

# Function to get chatbot response
def get_response(user_input):
    seq = tokenizer.texts_to_sequences([user_input.lower()])
    padded = pad_sequences(seq, maxlen=max_length, padding='post')
    prediction = model.predict(padded)
    tag_index = np.argmax(prediction)

    if tag_index >= len(unique_tags):
        return "Sorry, I didn't understand that."

    tag = index_to_tag[tag_index]
    return random.choice(tag_responses.get(tag, ["I'm not sure how to respond."]))

# Chatbot interaction loop
while True:
    user_input = input("You: ")

    if user_input.lower() in ["exit", "quit"]:
        print("Chatbot: Goodbye!")
        break

    bot_response = get_response(user_input)
    print("Chatbot:", bot_response)


Epoch 1/100


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 59ms/step - accuracy: 0.1201 - loss: 2.3003
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.1713 - loss: 2.2959
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.1609 - loss: 2.2887
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.1954 - loss: 2.2804
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.1849 - loss: 2.2733
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.1745 - loss: 2.2584 
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.1849 - loss: 2.2433
Epoch 8/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - accuracy: 0.1745 - loss: 2.2202
Epoch 9/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m