In [None]:
# Install fuzzywuzzy with optional speedup
!pip install fuzzywuzzy[speedup]


Collecting fuzzywuzzy[speedup]
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Collecting python-levenshtein>=0.12 (from fuzzywuzzy[speedup])
  Downloading python_levenshtein-0.27.3-py3-none-any.whl.metadata (3.9 kB)
Collecting Levenshtein==0.27.3 (from python-levenshtein>=0.12->fuzzywuzzy[speedup])
  Downloading levenshtein-0.27.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.7 kB)
Collecting rapidfuzz<4.0.0,>=3.9.0 (from Levenshtein==0.27.3->python-levenshtein>=0.12->fuzzywuzzy[speedup])
  Downloading rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Downloading python_levenshtein-0.27.3-py3-none-any.whl (9.5 kB)
Downloading levenshtein-0.27.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (153 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.3/153.3 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)

In [None]:
# Imports
import pandas as pd
import re
from fuzzywuzzy import process, fuzz

# Load your precomputed CSV with Flair, VADER, TextBlob sentiments
df = pd.read_csv('reviews_with_flair.csv')

# Get all unique product names
unique_products = df['product_name'].unique()


In [None]:
import pandas as pd

# Assuming your DataFrame is already loaded as df
unique_products = df['product_name'].dropna().unique()

# To just see the list
for i, product in enumerate(unique_products, 1):
    print(f"{i}. {product}")

# Or simply
print(unique_products)


1. Candes 12 L Room/Personal Air Cooler??????(White, Black, Elegant High Speed-Honey Comb Cooling Pad & Ice Chamber, Blower)
2. Candes 60 L Room/Personal Air Cooler??????(White, Black, CRETA)
3. MAHARAJA WHITELINE 65 L Desert Air Cooler??????(White, Grey, Rambo Grey / AC-303)
4. Crompton 75 L Desert Air Cooler??????(White, Teal, ACGC-DAC751)
5. boAt Rockerz 510 Super Extra Bass Bluetooth Headset??????(Molten Orange, On the Ear)
6. Aroma NB119 Titanium - 48 Hours Playtime Bluetooth Neckband Bluetooth Headset??????(Green, In the Ear)
7. OnePlus Bullets Wireless Z2 with Fast Charge, 30 Hrs Battery Life, Earphones with mic Bluetooth Headset??????(Magico Black, In the Ear)
8. OnePlus Bullets Wireless Z2 Bluetooth Headset??????(Acoustic Red, In the Ear)
9. Mivi Roam2 5 W Bluetooth Speaker??????(Black, Mono Channel)
10. etmax NANO BLACK 30 W Bluetooth Home Theatre??????(Black, Stereo Channel)
11. Mivi Fort S16 Soundbar with 2 full range drivers, Made in India 16 W Bluetooth Soundbar??????(Bla

In [None]:
# Drop empty, NaN, or placeholder reviews
df = df[~df['Review'].isna()]                     # Remove NaNs
df = df[df['Review'].str.strip().str.lower() != 'not specified']  # Remove 'not specified'

# Optional: reset index
df.reset_index(drop=True, inplace=True)

# Check
print(df[['product_name', 'Review', 'flair_sentiment']].head())
print(f"Remaining reviews: {len(df)}")



                                        product_name           Review  \
0  Candes 12 L Room/Personal Air Cooler??????(Whi...           super!   
1  Candes 12 L Room/Personal Air Cooler??????(Whi...          awesome   
2  Candes 12 L Room/Personal Air Cooler??????(Whi...             fair   
3  Candes 12 L Room/Personal Air Cooler??????(Whi...  useless product   
4  Candes 12 L Room/Personal Air Cooler??????(Whi...             fair   

  flair_sentiment  
0        positive  
1        positive  
2        negative  
3        negative  
4        negative  
Remaining reviews: 35877


In [None]:
# Normalize strings for safer matching
def normalize(text):
    if pd.isna(text):
        return ""
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# Precompute normalized product names
normalized_products = [normalize(p) for p in unique_products]


In [None]:
def products_by_phrase_or_token(keyword):
    """
    Return product names where the full phrase exists OR all tokens present.
    """
    k = normalize(keyword)
    if not k:
        return []

    # Phrase match
    phrase_matches = [prod for prod, nprod in zip(unique_products, normalized_products) if k in nprod]
    if phrase_matches:
        return phrase_matches

    #Token match
    tokens = k.split()
    token_matches = []
    for prod, nprod in zip(unique_products, normalized_products):
        if all(tok in nprod.split() for tok in tokens):
            token_matches.append(prod)
    return token_matches


In [None]:
# ------------------ Helper: Recommend similar products using flair ------------------
def recommend_similar_among_matches(selected_product, matched_products, keyword, top_n=3):
    """
    Recommend top products (excluding the selected one) among matched products
    based on positive flair sentiment.
    """
    # Exclude the selected product
    other_products = [p for p in matched_products if p != selected_product]

    if not other_products:
        print("No other products to recommend.")
        return

    # Filter only other products whose name contains the keyword
    mask_products = df['product_name'].isin(other_products)
    df_filtered = df[mask_products]

    # Filter only positive flair sentiment reviews
    df_positive = df_filtered[df_filtered['flair_sentiment'] == 'positive']

    if df_positive.empty:
        print("No positive reviews found among other matched products.")
        return

    # Count positive reviews per product
    positive_counts = df_positive.groupby('product_name').size().sort_values(ascending=False)

    # Take top N
    top_recommendations = positive_counts.head(top_n)

    print(f"\n--- Top {top_n} similar products based on positive Flair reviews ---")
    for idx, (prod, count) in enumerate(top_recommendations.items(), 1):
        print(f"{idx}. {prod} → Positive Reviews: {count}")



In [None]:
# ------------------ Main search function using only Flair sentiment ------------------
def search_reviews_by_sentiment_flair(threshold=70, max_fuzzy_matches=10):
    # Ask keyword and remember it
    keyword = input("Enter product name or keyword to search: ").strip()
    if not keyword:
        print("Empty keyword. Exiting.")
        return

    # Phrase/token matching
    matched_products = products_by_phrase_or_token(keyword)

    # Fuzzy fallback if no matches
    if not matched_products:
        print("No exact/phrase/token matches found — using fuzzy fallback.")
        fuzzy_results = []
        for prod, nprod in zip(unique_products, normalized_products):
            score = fuzz.token_set_ratio(normalize(keyword), nprod)
            fuzzy_results.append((prod, score))
        fuzzy_results = sorted(fuzzy_results, key=lambda x: x[1], reverse=True)
        fuzzy_matches = [prod for prod, score in fuzzy_results if score >= threshold]
        matched_products = fuzzy_matches[:max_fuzzy_matches]

    if not matched_products:
        print(f"No products found for keyword '{keyword}' (even after fuzzy).")
        return

    # Show matches
    print(f"\nFound {len(matched_products)} matches for '{keyword}':")
    for idx, p in enumerate(matched_products, 1):
        print(f"{idx}. {p}")

    # Let user select product(s)
    while True:
        choice = input("\nEnter the number of the product to see reviews, or 'all' to see all: ").strip().lower()
        if choice == "all":
            selected_products = matched_products
            break
        elif choice.isdigit() and 1 <= int(choice) <= len(matched_products):
            selected_products = [matched_products[int(choice)-1]]
            break
        else:
            print("Invalid input! Enter a valid number or 'all'.")

    # Filter reviews
    matched_norms = [normalize(p) for p in selected_products]
    mask_prod = df['product_name'].fillna('').apply(lambda x: normalize(x) in matched_norms)
    mask_review = df['Review'].fillna('').str.contains(keyword, case=False, na=False)
    filtered = df[mask_prod | mask_review].copy()
    if filtered.empty:
        print("No reviews found for selected products.")
        return

    # Sentiment filter (only Flair sentiment)
    chosen_sentiment = input("Enter sentiment (positive/negative/neutral/all): ").strip().lower()
    if chosen_sentiment != 'all':
        filtered = filtered[filtered['flair_sentiment'].str.lower() == chosen_sentiment]
        if filtered.empty:
            print(f"No reviews found with sentiment '{chosen_sentiment}'.")
            return

    # Display reviews
    print(f"\n--- Reviews for selected products (Flair sentiment = '{chosen_sentiment}') ---\n")
    for prod in selected_products:
        sub = filtered[filtered['product_name'].str.lower() == prod.lower()]
        if sub.empty:
            continue
        print(f"Product: {prod}   ({len(sub)} reviews)\n" + "-"*60)
        for i, row in enumerate(sub.itertuples(), 1):
            print(f"{i}. ({row.flair_sentiment}) {row.Review}")
        print("\n")

    # Show other reviews that matched keyword in review text
    other = filtered[~filtered['product_name'].str.lower().isin([p.lower() for p in selected_products])]
    if not other.empty:
        print("Other reviews matched by keyword in review text:\n" + "-"*60)
        for i, row in enumerate(other.itertuples(), 1):
            print(f"{i}. ({row.flair_sentiment}) [{row.product_name}] {row.Review}")
        print("\n")

    # Sentiment stats
    stats = filtered['flair_sentiment'].value_counts(normalize=True) * 100
    print("\n--- Combined Flair Sentiment Stats (% of matched reviews) ---")
    print(stats.round(2))

    # Recommend top products among other matched products
    recommend_similar_among_matches(selected_products[0], matched_products, keyword=keyword, top_n=3)


In [None]:
search_reviews_by_sentiment_flair()

Enter product name or keyword to search: cooler

Found 4 matches for 'cooler':
1. Candes 12 L Room/Personal Air Cooler??????(White, Black, Elegant High Speed-Honey Comb Cooling Pad & Ice Chamber, Blower)
2. Candes 60 L Room/Personal Air Cooler??????(White, Black, CRETA)
3. MAHARAJA WHITELINE 65 L Desert Air Cooler??????(White, Grey, Rambo Grey / AC-303)
4. Crompton 75 L Desert Air Cooler??????(White, Teal, ACGC-DAC751)

Enter the number of the product to see reviews, or 'all' to see all: 1
Enter sentiment (positive/negative/neutral/all): all

--- Reviews for selected products (Flair sentiment = 'all') ---

Product: Candes 12 L Room/Personal Air Cooler??????(White, Black, Elegant High Speed-Honey Comb Cooling Pad & Ice Chamber, Blower)   (10 reviews)
------------------------------------------------------------
1. (positive) super!
2. (positive) awesome
3. (negative) fair
4. (negative) useless product
5. (negative) fair
6. (positive) awesome
7. (positive) highly recommended
8. (positive)