In [None]:
# Step 1: Import libraries
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import TreebankWordTokenizer
import string

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 2: Download stopwords (run once)
# nltk.download('stopwords')

# Step 3: Define FAQ data
faqs = [
    {"question": "What is the battery capacity of the smartphone?",
     "answer": "The smartphone has a 4500mAh battery that supports fast charging."},

    {"question": "Does this phone support 5G connectivity?",
     "answer": "Yes, this smartphone supports 5G networks for faster internet speeds."},

    {"question": "What is the warranty period for the phone?",
     "answer": "The phone comes with a standard 1-year manufacturer warranty."},

    {"question": "Is the phone water-resistant?",
     "answer": "Yes, it has an IP68 rating, making it water and dust resistant."},

    {"question": "Can I expand the storage with a microSD card?",
     "answer": "No, this model does not support expandable storage via microSD."},

    {"question": "What operating system does the phone run?",
     "answer": "It runs on the latest Android 13 operating system."},

    {"question": "Does the phone support wireless charging?",
     "answer": "Yes, wireless charging is supported with compatible chargers."},

    {"question": "How many cameras does the phone have?",
     "answer": "The phone features a quad-camera setup on the back and a single front camera."}
]

# Step 4: Preprocessing setup
tokenizer = TreebankWordTokenizer()
stop_words = set(stopwords.words('english'))

def preprocess(text):
    # Lowercase and tokenize
    tokens = tokenizer.tokenize(text.lower())
    # Remove stopwords and punctuation
    filtered = [word for word in tokens if word not in stop_words and word not in string.punctuation]
    return " ".join(filtered)

# Step 5: Preprocess FAQ questions
processed_questions = [preprocess(faq['question']) for faq in faqs]

# Step 6: Vectorize questions with TF-IDF
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(processed_questions)

# Step 7: Function to find best matching FAQ answer
def get_response(user_question):
    user_processed = preprocess(user_question)
    user_vec = vectorizer.transform([user_processed])
    similarities = cosine_similarity(user_vec, tfidf_matrix)
    best_idx = similarities.argmax()
    return faqs[best_idx]['answer']

# Step 8: Chat loop
print("Welcome to Smartphone FAQ Chatbot! Type 'exit' to quit.\n")

while True:
    user_input = input("You: ")
    if user_input.lower() == 'exit':
        print("Chatbot: Goodbye!")
        break
    answer = get_response(user_input)
    print("Chatbot:", answer)


Welcome to Smartphone FAQ Chatbot! Type 'exit' to quit.



You:  warenty?


Chatbot: The smartphone has a 4500mAh battery that supports fast charging.


You:  water resistancy?


Chatbot: Yes, it has an IP68 rating, making it water and dust resistant.


You:  conectivity?


Chatbot: The smartphone has a 4500mAh battery that supports fast charging.
