In [36]:
import pandas as pd

# Load the scored reviews parquet file
df = pd.read_parquet('scored_reviews_hybrid.parquet')

In [28]:
import re

def parse_query(query):
    query = query.lower()

    aspects = {
        "durability": ["durable", "long lasting", "sturdy", "good"],
        "quality": ["quality", "well made", "excellent", "good"],
        "functionality": ["functional", "works", "effective", "reliable", "good"],
        "value": ["cheap", "affordable", "worth", "expensive", "value", "good"]
    }

    desired_aspects = []
    matched_terms = []

    # Match aspect keywords
    for aspect, terms in aspects.items():
        for term in terms:
            if re.search(rf"\b{re.escape(term)}\b", query):
                desired_aspects.append(aspect)
                matched_terms.append(term)

    # Remove matched aspect words
    for term in set(matched_terms):
        query = re.sub(rf"\b{re.escape(term)}\b", '', query)

    # Remove filler words
    filler_words = ["recommend", "me", "a", "an", "the", "and", "please", "i", "want", "to", "find", "but"]
    query = re.sub(rf"\b({'|'.join(filler_words)})\b", '', query)

    # Final cleanup
    query = re.sub(r'[^a-zA-Z0-9 ]', '', query)  # remove punctuation
    query = re.sub(r'\s+', ' ', query).strip()

    return query, list(set(desired_aspects))





In [29]:
parse_query("I want a good mouth wash")

('mouth wash', ['quality', 'functionality', 'durability', 'value'])

In [30]:
def recommend(query, df, top_n=1):
    product_keyword, desired_aspects = parse_query(query)

    if not product_keyword:
        return "Sorry, I couldn't figure out what product you're looking for."

    # Filter products by title
    matches = df[df['title'].str.contains(product_keyword, case=False, na=False)].copy()

    if matches.empty:
        return f"Sorry, no products found matching '{product_keyword}'."

    # Score products by desired aspects
    if desired_aspects:
        aspect_cols = [f"{a}_score" for a in desired_aspects if f"{a}_score" in df.columns]
        matches["aspect_score"] = matches[aspect_cols].mean(axis=1)
    else:
        matches["aspect_score"] = 0.5  # neutral fallback

    # Combine scores
    matches["final_score"] = (
        0.4 * matches["aspect_score"] +
        0.3 * matches["overall_sentiment_score"] +
        0.3 * (matches["average_rating"] / 5)
    )

    top_match = matches.sort_values(by="final_score", ascending=False).iloc[0]

    # Compose short paragraph
    product = top_match["title"]
    rating = top_match["average_rating"]
    sentiment = top_match["overall_sentiment_score"]
    price = top_match["price"]
    
    sentence = f"I recommend **{product}**."
    sentence += f" It has an average rating of {rating:.1f}/5"
    if desired_aspects:
        aspect_descriptions = []
        for aspect in desired_aspects:
            score = top_match.get(f"{aspect}_score")
            if score is not None:
                if score > 0.7:
                    phrase = f"strong {aspect}"
                elif score < 0.4:
                    phrase = f"weaker {aspect}"
                else:
                    phrase = f"moderate {aspect}"
                aspect_descriptions.append(phrase)
        if aspect_descriptions:
            sentence += f" and shows {', '.join(aspect_descriptions)} based on customer feedback"
    if sentiment > 0.7:
        sentence += ". Reviews are mostly positive"
    elif sentiment < 0.4:
        sentence += ". Reviews are mixed to negative"
    else:
        sentence += ". Reviews are neutral overall"

    if not pd.isna(price):
        sentence += f", and it's priced at ${price:.2f}."
    else:
        sentence += "."

    return sentence


In [34]:
print(recommend("Recommend me a good mouth wash", df, top_n=2))

I recommend **Propolinse Mouth Wash Travel Packets - 10 x Travel Packet (12ml / 0.41 fl oz)**. It has an average rating of 4.1/5 and shows moderate quality, strong functionality, moderate durability, strong value based on customer feedback. Reviews are mostly positive, and it's priced at $21.99.
