In [1]:
# # Install required libraries (run this only if not already installed)
# !pip install nltk spacy scikit-learn transformers torch streamlit

# # For SpaCy, download the English model (run this only if not already installed)
# # !python -m spacy download en_core_web_sm?

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ----- ---------------------------------- 1.8/12.8 MB 9.1 MB/s eta 0:00:02
     ------------------ --------------------- 6.0/12.8 MB 18.4 MB/s eta 0:00:01
     --------------------------- ------------ 8.9/12.8 MB 16.8 MB/s eta 0:00:01
     ------------------------------------- - 12.3/12.8 MB 16.4 MB/s eta 0:00:01
     --------------------------------------  12.6/12.8 MB 15.2 MB/s eta 0:00:01
     --------------------------------------- 12.8/12.8 MB 11.2 MB/s eta 0:00:00
[38;5;2m[+] Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [2]:
# Import libraries: NLTK for text, Spacy for NLP, and others for various tasks.
import nltk
import spacy
import random
import torch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from transformers import pipeline
import warnings
warnings.filterwarnings("ignore")

# Download NLTK data: tokenizers, lemmatizer, and stopwords for text processing.
nltk.download('punkt')        # Original tokenizer
nltk.download('punkt_tab')    # Newer tokenizer data
nltk.download('wordnet')      # Lemmatizer
nltk.download('stopwords')    # Stopwords

nlp = spacy.load("en_core_web_sm")
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
# Define intents: a dictionary mapping user inputs to chatbot responses.
intents = {
    "greeting": {
        "patterns": ["hello", "hi", "hey", "good morning"],
        "responses": ["Hi there!", "Hello!", "Hey! How can I help?", "Good morning!"]
    },
    "farewell": {
        "patterns": ["bye", "goodbye", "see you", "take care"],
        "responses": ["Goodbye!", "See you later!", "Take care!"]
    },
    "how_are_you": {
        "patterns": ["how are you", "how you doing", "are you okay"],
        "responses": ["I'm just a chatbot, but I'm doing great!", "All good here, thanks!"]
    },
    "default": {
        "patterns": [],
        "responses": ["Sorry, I don’t understand.", "Can you rephrase that?"]
    }
}


In [4]:
# Set up preprocessing tools: lemmatizer and stopwords to clean text.
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))

# Preprocessing function: cleans text by tokenizing, removing stopwords, and lemmatizing.
def preprocess_text(text):
    # Tokenize
    tokens = word_tokenize(text.lower())
    # Remove stopwords and lemmatize
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalnum() and token not in stop_words]
    return " ".join(tokens)

# Test preprocessing: shows how raw text becomes processed for matching.
sample_text = "Hello, how are you doing today?"
processed_text = preprocess_text(sample_text)
print(f"Original: {sample_text}")
print(f"Processed: {processed_text}")

# Preprocess intents: clean all patterns in the intents dictionary for later use.
preprocessed_intents = {}
for intent, data in intents.items():
    preprocessed_intents[intent] = {
        "patterns": [preprocess_text(pattern) for pattern in data["patterns"]],
        "responses": data["responses"]
    }

# Display preprocessed intents: prints cleaned patterns for debugging.
print("\nPreprocessed Intents:")
for intent, data in preprocessed_intents.items():
    print(f"{intent}: {data['patterns']}")


Original: Hello, how are you doing today?
Processed: hello today

Preprocessed Intents:
greeting: ['hello', 'hi', 'hey', 'good morning']
farewell: ['bye', 'goodbye', 'see', 'take care']
how_are_you: ['', '', 'okay']
default: []


In [5]:
# Import tqdm: adds progress bars to loops for visual feedback.
from tqdm import tqdm

# First chatbot function: uses preprocessed text to match intents with tqdm.
def simple_chatbot(text):
    # Preprocess the input text
    processed_text = preprocess_text(text)
    
    # Iterate over intents with tqdm to find a match and show progress.
    for intent, data in tqdm(preprocessed_intents.items(), desc="Matching Intent"):
        if processed_text in data["patterns"]:
            return random.choice(data["responses"])
    
    # If no match, return a default response randomly.
    return random.choice(preprocessed_intents["default"]["responses"])

# Test the first chatbot: runs it with sample inputs and shows responses.
test_inputs = ["hello", "bye", "how are you", "what's up"]
print("Testing Rule-Based Chatbot:")
for input_text in tqdm(test_inputs, desc="Processing Inputs"):
    response = simple_chatbot(input_text)
    print(f"User: {input_text} | Bot: {response}")



Testing Rule-Based Chatbot:


Processing Inputs:   0%|                                                                         | 0/4 [00:00<?, ?it/s]
Matching Intent:   0%|                                                                           | 0/4 [00:00<?, ?it/s][A


User: hello | Bot: Hi there!



Matching Intent:  25%|████████████████▌                                                 | 1/4 [00:00<00:00, 221.13it/s][A


User: bye | Bot: Take care!



Matching Intent:  50%|█████████████████████████████████████▌                                     | 2/4 [00:00<?, ?it/s][A


User: how are you | Bot: All good here, thanks!



Matching Intent:  50%|█████████████████████████████████████▌                                     | 2/4 [00:00<?, ?it/s][A
Processing Inputs: 100%|█████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 80.72it/s]

User: what's up | Bot: I'm just a chatbot, but I'm doing great!





In [6]:
# Second chatbot function: uses raw text (just lowercase) for simpler matching.
def simple_chatbot(text):
    text = text.lower()  # Just lowercase, no heavy preprocessing
    
    # Loop through intents with tqdm to match raw input to patterns.
    for intent, data in tqdm(intents.items(), desc="Matching Intent"):  # Use original intents, not preprocessed
        if text in data["patterns"]:
            return random.choice(data["responses"])
    
    # Return a default response if no pattern matches the input.
    return random.choice(intents["default"]["responses"])

# Test the second chatbot: tries it with the same inputs for comparison.
test_inputs = ["hello", "bye", "how are you", "what's up"]
print("Testing Rule-Based Chatbot:")
for input_text in tqdm(test_inputs, desc="Processing Inputs"):
    response = simple_chatbot(input_text)
    print(f"User: {input_text} | Bot: {response}")

Testing Rule-Based Chatbot:


Processing Inputs:   0%|                                                                         | 0/4 [00:00<?, ?it/s]
Matching Intent:   0%|                                                                           | 0/4 [00:00<?, ?it/s][A


User: hello | Bot: Hello!



Matching Intent:  25%|██████████████████▊                                                        | 1/4 [00:00<?, ?it/s][A


User: bye | Bot: Take care!



Matching Intent:  50%|█████████████████████████████████████▌                                     | 2/4 [00:00<?, ?it/s][A


User: how are you | Bot: I'm just a chatbot, but I'm doing great!



Matching Intent: 100%|███████████████████████████████████████████████████████████████████████████| 4/4 [00:00<?, ?it/s][A
Processing Inputs: 100%|█████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 91.38it/s]

User: what's up | Bot: Sorry, I don’t understand.



