In [1]:
import nltk
nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Example FAQ data
faq_data = {
    "What is your return policy?": "Our return policy allows for returns within 30 days of purchase.",
    "How can I track my order?": "You can track your order by logging into your account.",
    "Do you offer international shipping?": "Yes, we offer international shipping to most countries."
}

# Preprocessing function
def preprocess_text(text):
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(text.lower())
    filtered_text = [word for word in word_tokens if word.isalnum() and word not in stop_words]
    return ' '.join(filtered_text)

# Preprocess FAQ data
preprocessed_faqs = {preprocess_text(question): answer for question, answer in faq_data.items()}

# Vectorization
vectorizer = TfidfVectorizer()
faq_questions = list(preprocessed_faqs.keys())
faq_answers = list(preprocessed_faqs.values())
vectorizer.fit(faq_questions)

# Function to get FAQ answer
def get_faq_answer(user_question):
    user_question = preprocess_text(user_question)
    user_question_vector = vectorizer.transform([user_question])
    similarities = cosine_similarity(user_question_vector, vectorizer.transform(faq_questions))
    most_similar_idx = similarities.argmax()
    return faq_answers[most_similar_idx]

# Example usage
user_question = "Do you offer international shipping?"
answer = get_faq_answer(user_question)
print(answer)


Yes, we offer international shipping to most countries.


[nltk_data] Downloading package stopwords to D:\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
