In [17]:
import requests
import re
import os
from dotenv import load_dotenv

# Load SerpAPI key from .env
load_dotenv()
serpapi_key = os.getenv("SERPAPI_KEY") 


In [29]:
def extract_data_id_from_url(url: str) -> str:
    """
    Extracts the Google Maps data_id from a full Google Maps URL.
    Looks for pattern: !1s0x...:0x...
    """
    decoded = re.sub(r"%2F|%2f", "/", url)
    match = re.search(r"!1s(0x[0-9a-f]+:0x[0-9a-f]+)", decoded)
    return match.group(1) if match else None


def fetch_reviews_by_data_id(data_id: str, max_reviews: int = 100, sort_by: str = "newestFirst", lang: str = "en"):
    """
    Fetches up to `max_reviews` Google Maps reviews using SerpAPI.
    Handles pagination with next_page_token.
    """
    base_url = "https://serpapi.com/search.json"
    all_reviews = []
    page_token = None
    page = 0

    while len(all_reviews) < max_reviews:
        params = {
            "engine": "google_maps_reviews",
            "data_id": data_id,
            "api_key": serpapi_key,
            "hl": lang,
            "sort_by": sort_by,
        }

        # Only use num and next_page_token on follow-up pages
        if page_token:
            params["next_page_token"] = page_token
            params["num"] = 20  # Max allowed per follow-up page

        response = requests.get(base_url, params=params)
        response.raise_for_status()
        data = response.json()

        new_reviews = data.get("reviews", [])
        all_reviews.extend(new_reviews)

        # Break if no more pages
        page_token = data.get("serpapi_pagination", {}).get("next_page_token")
        if not page_token:
            break

        page += 1

    return all_reviews[:max_reviews]



In [30]:
maps_url = "https://www.google.com/maps/place/Safeway/@45.4281317,-122.5607997,14z/data=!4m8!3m7!1s0x54959e215df5e1b7:0x524bf6f735f45390!8m2!3d45.428135!4d-122.5401986!9m1!1b1!16s%2Fg%2F1w455b36?entry=ttu"
data_id = extract_data_id_from_url(maps_url)
print("✅ Extracted data_id:", data_id)

if data_id:
    reviews = fetch_reviews_by_data_id(data_id, max_reviews=100, sort_by="newestFirst")
    print(f"\n📄 Retrieved {len(reviews)} reviews")

    for i, r in enumerate(reviews[:5], 1):
        print(f"\n{i}. ⭐ {r.get('rating')} — {r.get('date')}")
        print(r.get('snippet', '(no text)'))
else:
    print("❌ Could not extract data_id from URL.")


✅ Extracted data_id: 0x54959e215df5e1b7:0x524bf6f735f45390

📄 Retrieved 100 reviews

1. ⭐ 5.0 — a week ago
(no text)

2. ⭐ 3.0 — a week ago
(no text)

3. ⭐ 4.0 — 2 weeks ago
(no text)

4. ⭐ 5.0 — 3 weeks ago
Nice place to visit and meet people. The food is excellent, but just know that they want you to purchase it before you eat it.

5. ⭐ 5.0 — 3 weeks ago
(no text)


In [33]:
len(reviews)

100

In [34]:
reviews

[{'link': 'https://www.google.com/maps/reviews/data=!4m8!14m7!1m6!2m5!1sChdDSUhNMG9nS0VJQ0FnTURJaEpTZ3lnRRAB!2m1!1s0x0:0x524bf6f735f45390!3m1!1s2@1:CIHM0ogKEICAgMDIhJSgygE%7CCgwIyIPNvwYQwKe71QM%7C?hl=en-US',
  'rating': 5.0,
  'date': 'a week ago',
  'iso_date': '2025-04-07T03:08:56Z',
  'iso_date_of_last_edit': '2025-04-07T03:08:56Z',
  'source': 'Google',
  'review_id': 'ChdDSUhNMG9nS0VJQ0FnTURJaEpTZ3lnRRAB',
  'user': {'name': 'iva martinez',
   'link': 'https://www.google.com/maps/contrib/106887247089577412636?hl=en-US',
   'contributor_id': '106887247089577412636',
   'thumbnail': 'https://lh3.googleusercontent.com/a-/ALV-UjWAT1fOQ0-UT33Me-HSpXLXFJUphjbqWT7TGqa93u1bxyDl5ktZEw=s120-c-rp-mo-ba4-br100',
   'local_guide': True,
   'reviews': 13,
   'photos': 220},
  'response': {'date': 'a week ago',
   'iso_date': '2025-04-07T03:19:16Z',
   'iso_date_of_last_edit': '2025-04-07T03:19:16Z',
   'snippet': "Hello iva martinez, thank you for the 5 star review! We're committed to providing

In [None]:
def analyze_keyword_mentions(reviews, keyword_string):
    """
    Analyzes keyword mentions in review text (case-insensitive).
    - Each keyword is only counted once per review.
    - Keyword must be matched as a whole word (not inside another word).
    """
    keywords = [kw.strip().lower() for kw in keyword_string.split(",") if kw.strip()]
    total_reviews = len(reviews)

    keyword_counts = {kw: 0 for kw in keywords}
    reviews_with_any_keyword = 0
    non_empty_reviews = 0

    for review in reviews:
        text = (review.get("snippet") or "").strip().lower()
        if not text:
            continue

        non_empty_reviews += 1
        matched_any = False

        for kw in keywords:
            pattern = r'\b' + re.escape(kw) + r'\b'
            if re.search(pattern, text):
                keyword_counts[kw] += 1
                matched_any = True

        if matched_any:
            reviews_with_any_keyword += 1

    empty_reviews = total_reviews - non_empty_reviews

    raw_percentages = {
        kw: (count / total_reviews * 100) if total_reviews else 0
        for kw, count in keyword_counts.items()
    }
    normalized_percentages = {
        kw: (count / non_empty_reviews * 100) if non_empty_reviews else 0
        for kw, count in keyword_counts.items()
    }

    return {
        "keyword_counts": keyword_counts,
        "raw_percentages": raw_percentages,
        "normalized_percentages": normalized_percentages,
        "reviews_with_any_keyword": reviews_with_any_keyword,
        "raw_any_percentage": (reviews_with_any_keyword / total_reviews * 100) if total_reviews else 0,
        "norm_any_percentage": (reviews_with_any_keyword / non_empty_reviews * 100) if non_empty_reviews else 0,
        "empty_count": empty_reviews,
        "empty_percentage": (empty_reviews / total_reviews * 100) if total_reviews else 0,
        "total": total_reviews,
        "non_empty": non_empty_reviews,
    }

In [48]:
# Analyze keywords
keyword_string = "chill, happy,delicious, stinky, bread"
stats = analyze_keyword_mentions(reviews, keyword_string)

print(f"\n🔍 Total reviews: {stats['total']}")
print(f"📝 Reviews with no text: {stats['empty_count']}")
print(f"📝 Non-empty reviews: {stats['non_empty']}")

print("\n📊 Keyword Mentions (Raw %):")
for kw in stats['keyword_counts']:
    print(f"- {kw}: {stats['keyword_counts'][kw]} ({stats['raw_percentages'][kw]:.1f}%)")

print("\n📊 Keyword Mentions (Normalized to non-empty reviews):")
for kw in stats['keyword_counts']:
    print(f"- {kw}: {stats['normalized_percentages'][kw]:.1f}%")

print(f"\n📈 % of all reviews mentioning any keyword: {stats['raw_any_percentage']:.1f}%")
print(f"📈 % of non-empty reviews mentioning any keyword: {stats['norm_any_percentage']:.1f}%")



🔍 Total reviews: 100
📝 Reviews with no text: 44
📝 Non-empty reviews: 56

📊 Keyword Mentions (Raw %):
- chill: 0 (0.0%)
- happy: 1 (1.0%)
- delicious: 0 (0.0%)
- stinky: 0 (0.0%)
- bread: 1 (1.0%)

📊 Keyword Mentions (Normalized to non-empty reviews):
- chill: 0.0%
- happy: 1.8%
- delicious: 0.0%
- stinky: 0.0%
- bread: 1.8%

📈 % of all reviews mentioning any keyword: 2.0%
📈 % of non-empty reviews mentioning any keyword: 3.6%
