<a href="https://colab.research.google.com/github/preetbhagat7/LovePreet-Portfolio/blob/main/searching.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import re
from collections import Counter
import matplotlib.pyplot as plt


In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
df = pd.read_excel('Searches.xlsx')

In [None]:
df_real = df.copy(deep=True)   # master copy
df_work = df.copy()            # change this

In [None]:
# ---------------- MASTER CATEGORY MAP ----------------
CATEGORY_MAP = {
    "fashion": ["shirt", "jeans", "tshirt", "kurti", "saree", "dress", "hoodie", "watch", "shoes", "sandal", "bag"],
    "home & kitchen": ["mixer", "grinder", "utensils", "bottle", "cup", "pan","stove", "gas stove", "tawa", "bed sheet", "pillow"],
    "sports": ["bat", "ball", "football", "cricket", "treadmill", "dumbbell", "cycling", "skating"],
    "baby products": ["baby", "diaper", "stroller", "crib", "toy", "baby pillow", "baby carrier", "tricycle"],
    "electronics": ["phone", "mobile", "charger", "earphones", "laptop", "tablet", "tv"],
}

In [None]:
# Reverse mapping → word → category
WORD_TO_CATEGORY = {}
for cat, words in CATEGORY_MAP.items():
    for w in words:
        WORD_TO_CATEGORY[w] = cat

In [None]:
# ---------------- LOAD YOUR DATA ----------------
df = df_work.copy()  # your dataset from the notebook
df['query'] = df['query'].astype(str)


In [None]:
# ---------------- CLEAN TEXT ----------------
all_text = " ".join(df['query']).lower()
cleaned = re.sub(r'[^a-z\s]', " ", all_text)
words = cleaned.split()
unique_words = sorted(set(words))


In [None]:
# ---------------- COUNT KEYWORD FREQUENCY ----------------
keyword_counts = {}
for kw in unique_words:
    count = df['query'].str.contains(rf"\b{kw}\b", case=False, na=False).sum()
    keyword_counts[kw] = count

In [None]:
# ---------------- CATEGORY TOTALS ----------------
category_totals = {}
for kw, count in keyword_counts.items():
    cat = WORD_TO_CATEGORY.get(kw)
    if cat:
        category_totals[cat] = category_totals.get(cat, 0) + count

In [None]:
# ---------------- FUNCTION FOR PIE CHART ----------------
def show_pie_chart():
    plt.figure(figsize=(5, 5))
    plt.pie(
        category_totals.values(),
        labels=category_totals.keys(),
        autopct="%1.1f%%",
        startangle=140
    )
    plt.title("Category-wise Total Search Distribution")
    plt.show()

In [None]:
# ---------------- KEYWORD LOOKUP ----------------
def lookup_keyword():
    user_kw = input("Enter keyword to search: ").strip().lower()
    pattern = rf"\b{re.escape(user_kw)}\b"

    mask = df['query'].str.contains(pattern, case=False, na=False)

    if mask.sum() == 0:
        print("\n Never Searched Before\n")
        show_pie_chart()
        return

    matched_queries = df.loc[mask, 'query'].str.lower().tolist()
    query_counter = Counter(matched_queries)

    print(f"\nKeyword: {user_kw}")
    print(f"Total Searches (exact match): {sum(query_counter.values())}\n")
    print("Matching Queries:")
    for q, c in query_counter.items():
        print(f"  {q} → {c}")
          # Category details
    category = WORD_TO_CATEGORY.get(user_kw)
    if category:
        print(f"\nCategory: {category}")
        print(f"Total Searches in this Category: {category_totals.get(category, 0)}")
    else:
        print("\n No category found for this keyword.")

    # Always show pie chart
    print("\n Category Search Distribution:\n")
    show_pie_chart()

In [None]:
# ---------------- RUN ----------------
lookup_keyword()
