##### Importing packages

In [15]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


#### Applying tf-idf vectorizer and cosine similarity on to the structured dataset


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load the structured FAQ dataset
df = pd.read_csv("faq.csv")

# Step 2: Build faq_data dictionary (question -> answer)
faq_data = dict(zip(df['question'], df['answer']))

# Step 3: Create a pool of all possible question variations (question + synonyms)
question_list = []
answer_list = []

for idx, row in df.iterrows():
    # Add main question
    question_list.append(row['question'].lower())
    answer_list.append(row['answer'])

    # Add synonyms (if any)
    if pd.notna(row['synonyms']):
        synonyms = [s.strip().lower() for s in row['synonyms'].split(',')]
        question_list.extend(synonyms)
        answer_list.extend([row['answer']] * len(synonyms))

# Step 4: Fit TF-IDF vectorizer on all question variations
vectorizer = TfidfVectorizer(stop_words='english')
question_vectors = vectorizer.fit_transform(question_list)


# Step 5: Define the answer retrieval function
def get_faq_answer(user_query: str, threshold: float = 0.45) -> str:
    user_query = user_query.lower().strip()
    user_vector = vectorizer.transform([user_query])
    
    # Compute similarity
    similarities = cosine_similarity(user_vector, question_vectors)
    best_match_index = similarities.argmax()
    best_score = similarities[0][best_match_index]
    
    if best_score >= threshold:
        return answer_list[best_match_index]
    else:
        return "Sorry, I couldn’t find a suitable answer to your question."





In [None]:
# Step 5: Define the answer retrieval function
from sentence_transformers import SentenceTransformer, util

# Load model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Embed all question variations
question_embeddings = model.encode(question_list, convert_to_tensor=True)

def get_faq_answer(user_query: str, threshold: float = 0.7) -> str:
    user_embedding = model.encode(user_query, convert_to_tensor=True)
    
    cosine_scores = util.pytorch_cos_sim(user_embedding, question_embeddings)[0]
    best_score = cosine_scores.max().item()
    best_index = cosine_scores.argmax().item()
    
    if best_score >= threshold:
        return answer_list[best_index]
    else:
        return "Sorry, I couldn’t find a suitable answer to your question."

In [31]:
print(get_faq_answer("Do you offer home delivery?"))


print(get_faq_answer("Where is your shop?"))


print(get_faq_answer("Do you have makeups for sale?"))


print(get_faq_answer("How do I pay you?"))



Yes, we provide delivery service for all orders above 500 BDT.
We are located at 123 Main Road, Dhaka.
Yes, we frequently offer promotions. Subscribe to our newsletter to stay updated.
We accept cash on delivery, bKash, and all major credit/debit cards.
