# Imports

In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

In [2]:
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")
nltk.download("punkt_tab")
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [3]:
def preprocess(text):
    text = text.lower()
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(w) for w in tokens if w.isalpha() and w not in stop_words]
    return tokens

preprocess("Hello! How are you doing today?")

['hello', 'today']

In [4]:
preprocess("Hiee My name is rushi and I am doing code.!")

['hiee', 'name', 'rushi', 'code']

# STEP 4

##  TF-IDF Vectorization + 1st ML Intent Classifier

In [5]:
# ‚ö†Ô∏è This is NOT final data ‚Äî just to learn the pipeline.
training_sents = ["Hello", "Hi there", "good morning",
                "bye", "see you later",
                "what can you do later", "how you can help me",
                "thanks", "thanks you so much"]

# Intents
training_labels = ["greeting", "greeting", "greeting",
                  "goodbye", "goodbye", 
                   "ask_capabilities", "ask_capabilities",
                  "thanks", "thanks"]

## TF-IDF Vectorization
We give raw sentences ‚Üí get numeric vectors

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(tokenizer=preprocess, ngram_range=(1,2)) 
# preprocess ‚Üí our function, (1,2) ‚Üí unigrams + bigrams

X = vectorizer.fit_transform(training_sents)
# X is now numrical matrix

print(X.shape)
# X.shape ‚Üí (nums_of_sents, nums_of_features)



(9, 13)


# STEP 5 ‚Äî Train First ML Intent Classifier - 

# LogisticRegression

In [7]:
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X, training_labels)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


## Test the Chatbot

In [8]:
test_text = ["Hi Buddy", "will see you tommorrow", "how you do it", "how can i help you"]
test_vector = vectorizer.transform(test_text)
prediction = lr_model.predict(test_vector)

print('predicted Intent: ',prediction)


predicted Intent:  ['greeting' 'goodbye' 'greeting' 'ask_capabilities']


In [9]:
# checking probabilities

probs = lr_model.predict_proba(test_vector)
intents = lr_model.classes_

for intent, prob in zip(intents, probs[0]):
    print(f"{intent} : {prob:.2f} | ")

ask_capabilities : 0.17 | 
goodbye : 0.17 | 
greeting : 0.51 | 
thanks : 0.16 | 


## Define a Confidence Threshold

In [10]:
CONFIDENCE_THRESHOLD = 0.3

In [11]:
max_prob = max(probs[2])
best_intent = intents[probs[2].argmax()]

if max_prob < CONFIDENCE_THRESHOLD:
    final_intent  = "Fallback"
else:
    final_intent  = best_intent

print("final intent: ", final_intent)
print("Confidence: ", max_prob)

final intent:  greeting
Confidence:  0.3522010027393277


In [12]:
# Why This Step Is CRUCIAL
# ‚ÄúMy chatbot uses prediction probabilities with a confidence threshold to avoid incorrect responses 
# and handle unknown user inputs gracefully.‚Äù

# STEP 6 ‚Äî Response Mapping (Make Chatbot Talk)

In [13]:
responces = {"greeting":["Hello! üòä", "Hi there!", "Hiee! How can I help you?"],
            "goodbye": ["Good Byee!", "See you soo!", "Take care!"],
            "ask_capabilities":["I can chat with you and answer basic questions.", "I can help you understand how this chatbot works."],
            "thanks":["Happy to helpüòä", "You are welcome"],
            "fallback":["Sorry! I didn't understand that.", "Can you please rephrase?"]
            }

## 6.2 ‚Äî Select Response Randomly

In [14]:
import random

def get_response(intent):
    return random.choice(responces[intent])

In [15]:
user_input = "Bye! meet you tommarrow"

vector = vectorizer.transform([user_input])
probs = lr_model.predict_proba(vector)[0]

max_prob = max(probs)
lr_model.classes_[probs.argmax()]

if max_prob < CONFIDENCE_THRESHOLD:
    intent = "fallback"

print(get_response(intent))

Happy to helpüòä
