In [29]:
# Import libraries
import nltk
import spacy
import random
import torch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from transformers import pipeline
import warnings
from tqdm import tqdm
warnings.filterwarnings("ignore")
# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

In [30]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))  # Fix missing stop_words


In [20]:
def preprocess_text(text):
    # Tokenize
    tokens = word_tokenize(text.lower())
    # Remove stopwords and lemmatize
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalnum() and token not in stop_words]
    return " ".join(tokens)

# Test preprocessing
sample_text = "Hello, how are you doing today?"
processed_text = preprocess_text(sample_text)
print(f"Original: {sample_text}")
print(f"Processed: {processed_text}")

# Preprocess all patterns in intents
preprocessed_intents = {}
for intent, data in intents.items():
    preprocessed_intents[intent] = {
        "patterns": [preprocess_text(pattern) for pattern in data["patterns"]],
        "responses": data["responses"]
    }

# Display preprocessed intents
print("\nPreprocessed Intents:")
for intent, data in preprocessed_intents.items():
    print(f"{intent}: {data['patterns']}")

Original: Hello, how are you doing today?
Processed: hello today

Preprocessed Intents:
greeting: ['hello', 'hi', 'hey', 'good morning']
farewell: ['bye', 'goodbye', 'see', 'take care']
how_are_you: ['', '', 'okay']
default: []


In [21]:
# Define intents: maps user phrases to chatbot responses for training and replies.
intents = {
    "greeting": {
        "patterns": ["hello", "hi", "hey", "good morning"],
        "responses": ["Hi there!", "Hello!", "Hey! How can I help?", "Good morning!"]
    },
    "farewell": {
        "patterns": ["bye", "goodbye", "see you", "take care"],
        "responses": ["Goodbye!", "See you later!", "Take care!"]
    },
    "how_are_you": {
        "patterns": ["how are you", "how you doing", "are you okay"],
        "responses": ["I'm just a chatbot, but I'm doing great!", "All good here, thanks!"]
    },
    "default": {
        "patterns": [],
        "responses": ["Sorry, I don’t understand.", "Can you rephrase that?"]
    }
}



In [28]:


# Prepare training data from intents
training_sentences = []
labels = []

# Use tqdm to track data preparation
for intent, data in tqdm(intents.items(), desc="Preparing Training Data"):
    if intent != "default":  # Skip default intent for training
        for pattern in data["patterns"]:
            training_sentences.append(pattern)
            labels.append(intent)

# Print prepared data
print(f"Training Sentences: {training_sentences}")
print(f"Labels: {labels}")

# Vectorize text using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(training_sentences)

# Train SVM model
model = SVC(kernel="linear", probability=True)
model.fit(X, labels)

# ML-based chatbot function
def ml_chatbot(text):
    # Vectorize the input
    text_vector = vectorizer.transform([text])
    
    # Predict intent
    predicted_intent = model.predict(text_vector)[0]
    
    # Return a random response from the predicted intent
    return random.choice(intents[predicted_intent]["responses"])

# Test the ML chatbot with tqdm
test_inputs = ["hi there", "goodbye", "how you doing", "what's this"]
print("\nTesting ML-Based Chatbot:")
for input_text in tqdm(test_inputs, desc="Processing ML Inputs"):
    response = ml_chatbot(input_text)
    print(f"User: {input_text} | Bot: {response}")

Preparing Training Data: 100%|███████████████████████████████████████████████████████████████████| 4/4 [00:00<?, ?it/s]


Training Sentences: ['hello', 'hi', 'hey', 'good morning', 'bye', 'goodbye', 'see you', 'take care', 'how are you', 'how you doing', 'are you okay']
Labels: ['greeting', 'greeting', 'greeting', 'greeting', 'farewell', 'farewell', 'farewell', 'farewell', 'how_are_you', 'how_are_you', 'how_are_you']

Testing ML-Based Chatbot:


Processing ML Inputs: 100%|█████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 973.38it/s]

User: hi there | Bot: Hello!
User: goodbye | Bot: Take care!
User: how you doing | Bot: All good here, thanks!
User: what's this | Bot: Hi there!



