<a href="https://colab.research.google.com/github/pvsk0226/Guvi-Final-project/blob/main/Google_Maps_Reviews_LLM20_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install langchain==0.2.15 langchain_community transformers keybert torch gradio textblob nltk requests pandas



In [2]:
!pip install keybert



In [3]:
import os
import re
import getpass
import requests
import pandas as pd
import gradio as gr
from datetime import datetime, timedelta
from collections import Counter
from textblob import TextBlob
import nltk
import matplotlib.pyplot as plt
from nltk.sentiment import SentimentIntensityAnalyzer
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from keybert import KeyBERT
import torch

# --- Setup ---
nltk.download("vader_lexicon", quiet=True)
nltk.download("punkt", quiet=True)
nltk.download("stopwords", quiet=True)
nltk.download("wordnet", quiet=True)
sia = SentimentIntensityAnalyzer()
kw_model = KeyBERT()

# ==========================================
# 1Ô∏è‚É£ SerpAPI Setup
# ==========================================
SERPAPI_API_KEY = os.getenv("serpapi")
if not SERPAPI_API_KEY:
    print("üîê Enter SerpAPI key (optional):")
    SERPAPI_API_KEY = getpass.getpass("SerpAPI key: ").strip()
if not SERPAPI_API_KEY:
    print("‚ö†Ô∏è No SerpAPI key found ‚Äî running in offline demo mode.")

# ==========================================
# 2Ô∏è‚É£ Load Summarizer
# ==========================================
try:
    summarizer_pipe = pipeline(
        "summarization",
        model="facebook/bart-base",
        tokenizer="facebook/bart-base",
        max_length=120,
        min_length=30,
        device=-1
    )
    print("‚úÖ Summarizer loaded.")
except Exception as e:
    print(f"‚ò† Summarizer load failed: {e}")
    summarizer_pipe = None

# ==========================================
# 3Ô∏è‚É£ Load Transformer Sentiment Model
# ==========================================
try:
    bert_tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
    bert_model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
    print("‚úÖ Transformer Sentiment Model Loaded.")
except Exception as e:
    bert_model = None
    print("‚ö†Ô∏è Transformer sentiment model not loaded:", e)

# ==========================================
# 4Ô∏è‚É£ Fetch Google Reviews
# ==========================================
def fetch_google_reviews(place_id: str, max_reviews: int = 50) -> pd.DataFrame:
    if not SERPAPI_API_KEY:
        print("‚ö†Ô∏è Demo mode ‚Äî using sample data.")
        return pd.DataFrame([
            {"user": "Alice", "rating": 5, "date": "2024-06-10", "review_text": "Excellent coffee and friendly staff!"},
            {"user": "Bob", "rating": 2, "date": "2024-06-12", "review_text": "Slow service, long wait times."},
            {"user": "Clara", "rating": 4, "date": "2024-06-13", "review_text": "Good atmosphere but slightly pricey."},
            {"user": "Dan", "rating": 1, "date": "2024-05-22", "review_text": "Terrible hygiene, won‚Äôt come again."},
            {"user": "Magesh", "rating": 1, "date": "2024-05-20", "review_text": "Poor service. If you are specific about the price, the service standards should be good."},
        ])

    try:
        # DEBUG print to confirm SERPAPI_API_KEY value
        print(f"DEBUG: SERPAPI_API_KEY is {'set' if SERPAPI_API_KEY else 'NOT SET'}. Value starts with: {SERPAPI_API_KEY[:5]}..." if SERPAPI_API_KEY else "DEBUG: SERPAPI_API_KEY is NOT SET.")
        res = requests.get("https://serpapi.com/search.json", params={
            "engine": "google_maps_reviews",
            "place_id": place_id,
            "hl": "en",
            "api_key": SERPAPI_API_KEY
        }, timeout=30)
        res.raise_for_status()
        data = res.json().get("reviews", [])
        if not data:
            return pd.DataFrame()
        return pd.DataFrame([
            {
                "user": r.get("user", {}).get("name", "Anonymous"),
                "rating": r.get("rating", 0),
                "date": r.get("date", ""),
                "review_text": r.get("snippet", "")
            } for r in data
        ])
    except Exception as e:
        print(f"‚ùå SerpAPI error: {e}")
        return pd.DataFrame()

# ==========================================
# 5Ô∏è‚É£ Helpers
# ==========================================
def parse_relative_date(text):
    now = datetime.now()
    text = text.lower()
    for pattern, mult in [
        (r'(\d+)\s*years?\s*ago', 365),
        (r'(\d+)\s*months?\s*ago', 30),
        (r'(\d+)\s*weeks?\s*ago', 7),
        (r'(\d+)\s*days?\s*ago', 1),
        (r'yesterday', 1),
    ]:
        m = re.search(pattern, text)
        if m:
            days = int(m.group(1)) if m.group(1).isdigit() else 1
            return now - timedelta(days=days * mult)
    return pd.NaT

def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-z0-9\s.,!?]", "", text)
    return re.sub(r"\s+", " ", text).strip()

# ==========================================
# 6Ô∏è‚É£ Improved Sentiment Analysis
# ==========================================
def analyze_sentiment(text):
    if not text or len(text.strip()) == 0:
        return "Neutral ‚ò∂", 0, 0

    vader = sia.polarity_scores(text)["compound"]
    blob = TextBlob(text).sentiment.polarity
    avg_score = (vader * 0.6 + blob * 0.4)

    transformer_score = 0
    if bert_model:
        inputs = bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        with torch.no_grad():
            logits = bert_model(**inputs).logits
        probs = torch.nn.functional.softmax(logits, dim=1)
        rating = torch.argmax(probs, dim=1).item() + 1
        transformer_score = (rating - 3) / 2  # scale to [-1, 1]

    final_score = (avg_score * 0.4 + transformer_score * 0.6)

    if final_score > 0.15:
        label = "Positive ‚ò∫Ô∏è"
    elif final_score < -0.15:
        label = "Negative ‚òπÔ∏è"
    else:
        label = "Neutral ‚ò∂"

    confidence = round(abs(final_score) * 100, 1)
    return label, final_score, confidence

# ==========================================
# 7Ô∏è‚É£ Keywords, Summaries & Recommendations
# ==========================================
def extract_keywords(text, n=5):
    if len(text) < 20:
        return []
    try:
        kws = kw_model.extract_keywords(text, top_n=n, stop_words="english")
        return [kw[0] for kw in kws]
    except Exception:
        return []

def summarize_reviews(text):
    if not summarizer_pipe or len(text) < 100:
        return None
    try:
        result = summarizer_pipe(text, max_length=150, min_length=50, do_sample=False)
        return result[0]["summary_text"]
    except Exception:
        return None

def fallback_summary(df):
    total = len(df)
    avg = df["rating"].mean()
    pos = len(df[df["sentiment_label"] == "Positive ‚ò∫Ô∏è"])
    neg = len(df[df["sentiment_label"] == "Negative ‚òπÔ∏è"])
    sentiment = "Positive" if pos > neg else "Negative" if neg > pos else "Mixed"
    keywords = Counter(sum(df["keywords"], []))
    top_kw = ", ".join([k for k, _ in keywords.most_common(5)]) or "service, quality"
    return f"Summary of {total} Reviews\nAverage Rating: {avg:.1f}/5\nSentiment: {sentiment}\nCommon Themes: {top_kw}"

# ‚úÖ FIXED: Actionable Recommendations (Negative-first, else Positive)
def generate_ai_recommendations(df):
    """
    Generates actionable recommendations.
    - If negative reviews found ‚Üí suggest improvements.
    - If only positive reviews found ‚Üí highlight what to maintain.
    """
    neg = df[df["sentiment_label"] == "Negative ‚òπÔ∏è"]
    pos = df[df["sentiment_label"] == "Positive ‚ò∫Ô∏è"]

    # Case 1: Negative reviews found ‚Üí suggest improvements
    if not neg.empty:
        keywords = Counter(sum(neg["keywords"], []))
        issues = [k for k, _ in keywords.most_common(3)] or ["service", "quality", "wait time"]
        return (
            f"Recommendations:\n"
            f"1. Address {issues[0].title()} issues.\n"
            f"2. Improve {issues[1].title()} quality.\n"
            f"3. Monitor {issues[2].title()} feedback."
        )

    # Case 2: No negatives ‚Üí reinforce positives
    elif not pos.empty:
        keywords = Counter(sum(pos["keywords"], []))
        top_pos = [k for k, _ in keywords.most_common(3)] or ["service", "staff", "experience"]
        return (
            f"Recommendations:\n"
            f"1. Maintain excellent {top_pos[0].title()} standards.\n"
            f"2. Continue ensuring {top_pos[1].title()} satisfaction.\n"
            f"3. Enhance {top_pos[2].title()} consistency for long-term loyalty."
        )

    # Fallback (no data)
    else:
        return "No sufficient review data for actionable insights."

# ==========================================
# 8Ô∏è‚É£ Sentiment Trend Plot
# ==========================================
def plot_sentiment_trend(df):
    df_plot = df.dropna(subset=["date_parsed"])
    if df_plot.empty:
        return None
    trend = df_plot.groupby("month_year")["sentiment_score"].mean()
    plt.figure(figsize=(6, 3))
    plt.plot(trend.index, trend.values, marker="o", linestyle="-")
    plt.xticks(rotation=45)
    plt.title("üìà Sentiment Trend Over Time")
    plt.xlabel("Month-Year")
    plt.ylabel("Average Sentiment Score")
    plt.grid(True)
    plt.tight_layout()
    return plt

# ==========================================
# 9Ô∏è‚É£ Main Analysis
# ==========================================
def run_analysis(place_id):
    df = fetch_google_reviews(place_id)
    if df.empty:
        return "‚ùå No reviews found", {}, pd.DataFrame(), "No Data", "N/A", None

    df["clean_text"] = df["review_text"].apply(clean_text)
    df["date_parsed"] = df["date"].apply(lambda d: parse_relative_date(d) if isinstance(d, str) and "ago" in d else pd.to_datetime(d, errors="coerce"))
    df["month_year"] = df["date_parsed"].dt.strftime("%b %Y")

    results = df["clean_text"].apply(analyze_sentiment)
    df["sentiment_label"] = results.apply(lambda x: x[0])
    df["sentiment_score"] = results.apply(lambda x: x[1])
    df["keywords"] = df["clean_text"].apply(lambda x: extract_keywords(x, 3))

    all_text = " ".join(df["review_text"].tolist())[:2500]
    summary = summarize_reviews(all_text) or fallback_summary(df)
    suggestions = generate_ai_recommendations(df)
    sentiment_stats = df["sentiment_label"].value_counts().to_dict()

    overall = "‚ò∫Ô∏è Positive" if sentiment_stats.get("Positive ‚ò∫Ô∏è", 0) > sentiment_stats.get("Negative ‚òπÔ∏è", 0) else "‚òπÔ∏è Negative" if sentiment_stats.get("Negative ‚òπÔ∏è", 0) > sentiment_stats.get("Positive ‚ò∫Ô∏è", 0) else "‚ò∂ Mixed"
    trend_plot = plot_sentiment_trend(df)

    df_display = df[["user", "rating", "month_year", "sentiment_label", "review_text"]].rename(
        columns={
            "user": "User",
            "rating": "Rating",
            "month_year": "Date",
            "sentiment_label": "Sentiment",
            "review_text": "Review"
        }
    )

    return summary, sentiment_stats, df_display, overall, suggestions, trend_plot

# ==========================================
# üîü Gradio Dashboard
# ==========================================
with gr.Blocks(title="Google Reviews Dashboard", theme=gr.themes.Soft()) as app:
    gr.Markdown("## üè™ Business Reputation & Insights Analyzer using Google Maps Reviews + LLMs")

    with gr.Row():
        place_id = gr.Textbox(label="Google Place ID", placeholder="e.g. ChIJN1t_tDeuEmsRUsoyG83frY4", scale=3)
        analyze_btn = gr.Button("üöÄ Analyze Reviews", scale=1)
    status = gr.Textbox(label="Status", value="Ready", interactive=False)

    with gr.Tabs():
        with gr.Tab("üìä Overview"):
            summary_box = gr.Textbox(label="AI Summary", lines=6)
            sentiment_json = gr.JSON(label="Sentiment Breakdown")
            overall_box = gr.Textbox(label="Overall Sentiment")
            trend_plot = gr.Plot(label="Sentiment Trend")
        with gr.Tab("üí¨ Reviews"):
            reviews_table = gr.DataFrame(label="Detailed Reviews", wrap=True)
        with gr.Tab("üí° Recommendations"):
            suggest_box = gr.Textbox(label="Actionable Recommendations", lines=8)

    def set_status():
        return "Analyzing... Please wait ‚è≥"

    analyze_btn.click(set_status, outputs=status).then(
        run_analysis,
        inputs=place_id,
        outputs=[summary_box, sentiment_json, reviews_table, overall_box, suggest_box, trend_plot]
    ).then(lambda: "‚úÖ Analysis complete", outputs=status)

# ==========================================
# üöÄ Run App
# ==========================================
if __name__ == "__main__":
    print("üöÄ Launching Dashboard: http://localhost:7860")
    app.launch(debug=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

üîê Enter SerpAPI key (optional):
SerpAPI key: ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


‚úÖ Summarizer loaded.


tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/669M [00:00<?, ?B/s]

‚úÖ Transformer Sentiment Model Loaded.


  with gr.Blocks(title="Google Reviews Dashboard", theme=gr.themes.Soft()) as app:


üöÄ Launching Dashboard: http://localhost:7860
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://560e9a0fb2c8d97ed6.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


DEBUG: SERPAPI_API_KEY is set. Value starts with: ef05e...


  plt.tight_layout()
  plt.savefig(output_bytes, format=fmt)


DEBUG: SERPAPI_API_KEY is set. Value starts with: ef05e...


  plt.tight_layout()
  plt.savefig(output_bytes, format=fmt)


DEBUG: SERPAPI_API_KEY is set. Value starts with: ef05e...


  plt.tight_layout()
  plt.savefig(output_bytes, format=fmt)


DEBUG: SERPAPI_API_KEY is set. Value starts with: ef05e...


  plt.tight_layout()
  plt.savefig(output_bytes, format=fmt)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://560e9a0fb2c8d97ed6.gradio.live
