### import packages and env variables

In [5]:
import googlemaps
import os
import pandas as pd
import time
from dotenv import load_dotenv
import yaml
import json

In [2]:
load_dotenv()

True

### load config

In [6]:
# load in yaml file
with open('config/neighborhoods.yaml', 'r') as file:
    config = yaml.safe_load(file)

In [None]:
# flatten yaml file's contents
neighborhoods = []

for region, neighborhood in config.items():
    for name, details in neighborhood.items():
        details['neighborhood_name'] = name
        details['region'] = region
        neighborhoods.append(details)

print(f'Client initialized and cafe reviews for {len(neighborhoods)} neighborhoods will be extracted')

In [10]:
config['central']['cbd']

{'center': [1.2840355, 103.8420201], 'radius': 1200, 'region': 'central'}

In [9]:
config

{'central': {'cbd': {'center': [1.2840355, 103.8420201],
   'radius': 1200,
   'region': 'central'},
  'bras_basah': {'center': [1.2958975, 103.8547797],
   'radius': 1200,
   'region': 'central'},
  'orchard': {'center': [1.3028628, 103.8477971],
   'radius': 1200,
   'region': 'central'}},
 'north': {'balestier': {'center': [1.3234477, 103.8480894],
   'radius': 1200,
   'region': 'north'},
  'upper_thomson': {'center': [1.3545179, 103.8351714],
   'radius': 1200,
   'region': 'north'},
  'serangoon_gardens': {'center': [1.360303, 103.8686115],
   'radius': 1200,
   'region': 'north'}},
 'east': {'geylang': {'center': [1.314752, 103.8837181],
   'radius': 1200,
   'region': 'east'},
  'katong': {'center': [1.3088691, 103.9044623],
   'radius': 1200,
   'region': 'east'},
  'siglap': {'center': [1.3135075, 103.9256521],
   'radius': 1200,
   'region': 'east'}},
 'west': {'holland_village': {'center': [1.3120885, 103.7958535],
   'radius': 1200,
   'region': 'west'},
  'tiong_bahru': {

### initialize client

In [5]:
# initialize client (have a separate dto for this later)
gmaps = googlemaps.Client(key=os.getenv('GOOGLE_MAPS_API_KEY'))

API reference: https://googlemaps.github.io/google-maps-services-python/docs/index.html

### test

In [7]:
# Basic nearby search for cafes
cafes_result = gmaps.places_nearby(
    location=(1.3015, 103.8983),  # home
    radius=5000,  # in meters
    type='cafe',  # search key
    language='en'  # english
)

# convert results to list
cafes = cafes_result.get('results', [])
print(f"Found {len(cafes)} cafes")

Found 20 cafes


In [None]:
'reviews', 'rating', 'adr_address', 'opening_hours', 'name', 'wheelchair_accessible_entrance', 'geometry/viewport/northeast', 'serves_brunch', 
'geometry/viewport/southwest', 'current_opening_hours', 'geometry/viewport/northeast/lat', 'geometry/viewport/southwest/lng', 'utc_offset', 
'user_ratings_total', 'international_phone_number', 'secondary_opening_hours', 'review', 'serves_dinner', 'geometry/viewport/northeast/lng', 
'editorial_summary', 'geometry/viewport', 'geometry/location/lat', 'place_id', 'icon', 'serves_lunch', 'takeout', 'photo', 'vicinity', 
'curbside_pickup', 'delivery', 'dine_in', 'serves_wine', 'url', 'plus_code', 'serves_vegetarian_food', 'formatted_address', 'geometry/location/lng', 
'business_status', 'serves_beer', 'price_level', 'geometry/location', 'formatted_phone_number', 'website', 
'address_component', 'serves_breakfast', 'permanently_closed', 'geometry/viewport/southwest/lat', 'reservable', 'type', 'geometry'

In [26]:
# get unique identifier for each cafe
place_id = cafes[0]['place_id']

details = gmaps.place(
    place_id = place_id,
    fields = [
        # metadata
        'name', 
        'formatted_address',
        'geometry/location', # coordinates
        'opening_hours',

        # reviews/ratings
        'rating',
        'user_ratings_total',
        'reviews',

        # food
        'serves_brunch',
        'serves_lunch',
        'serves_dinner',
        'serves_breakfast',
        
        # others
        'price_level',  # 0-4 scale
        'type', 
        'business_status'
    ],
    language='en'
)

reviews[].text: actual content -- others: author_name, language, rating, relative_time_description, time
- next steps: filter out OLD reviews


In [28]:
place_info = details.get('result', [])
reviews = place_info.get('reviews', [])
reviews

[{'author_name': 'Angel Somohano',
  'author_url': 'https://www.google.com/maps/contrib/110743587808350855164/reviews',
  'language': 'en',
  'original_language': 'en',
  'profile_photo_url': 'https://lh3.googleusercontent.com/a-/ALV-UjVssnuYCdCcDk1Kjtk7JcaxqWxAmB2QDnrqt9ooKm6osFFm_-6m=s128-c0x00000000-cc-rp-mo-ba3',
  'rating': 5,
  'relative_time_description': 'in the last week',
  'text': 'It Is Paradise for coffee lovers. We had a very delicious breakfast. Eggs, salmón Toast and pumpkin mushnroom Toast (incredible). The very foamy nitro cold brew deserves a #1 prize. I love this place!',
  'time': 1758681760,
  'translated': False},
 {'author_name': 'Eric Lee',
  'author_url': 'https://www.google.com/maps/contrib/113775188408100673442/reviews',
  'language': 'en',
  'original_language': 'en',
  'profile_photo_url': 'https://lh3.googleusercontent.com/a-/ALV-UjU2epHzdoE79ycVleKwVsaMMIVU5JaAP2-flhYJz8G_-sYdj13R=s128-c0x00000000-cc-rp-mo-ba6',
  'rating': 4,
  'relative_time_descriptio

### full test scripts

In [4]:
"""Test script to query one neighborhood"""

from src.data.query import CafeQuery
import pandas as pd
from datetime import datetime

def test_single_neighborhood():
    # Initialize the query class
    print("Initializing cafe query...")
    cafe_query = CafeQuery()
    
    # Pick one neighborhood to test (Holland Village)
    test_neighborhood = {
        'center': [1.3120885, 103.7958535],
        'radius': 1200,
        'neighborhood_name': 'katong',
        'region': 'west'
    }
    
    print(f"\nTesting with {test_neighborhood['neighborhood_name']}...")
    print(f"Center: {test_neighborhood['center']}")
    print(f"Radius: {test_neighborhood['radius']}m")
    
    # Collect cafes
    try:
        df = cafe_query.collect_cafes(test_neighborhood)
        
        if not df.empty:
            print(f"\n✅ Success! Found {len(df)} cafes")
            print("\nSample data:")
            print(df[['name', 'rating', 'user_ratings_total']].head())
            
            # Save test results
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            output_path = f'data/test_holland_village_{timestamp}.parquet'
            df.to_parquet(output_path)
            print(f"\nData saved to: {output_path}")
            
            # Show summary
            print("\nSummary:")
            print(f"- Total cafes: {len(df)}")
            print(f"- Avg rating: {df['rating'].mean():.2f}")
            print(f"- Has price info: {df['price_level'].notna().sum()}")
            
        else:
            print("❌ No cafes found")
            
    except Exception as e:
        print(f"❌ Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    test_single_neighborhood()

Initializing cafe query...
Client initialized and cafe reviews for 12 neighborhoods will be extracted

Testing with katong...
Center: [1.3120885, 103.7958535]
Radius: 1200m

✅ Success! Found 60 cafes

Sample data:
                                  name  rating  user_ratings_total
0                      The Daily Scoop     4.3               857.0
1  Tai Cheong Bakery (Holland Village)     4.1               877.0
2       Chang Cheng Mee Wah Coffeeshop     3.8               195.0
3            Attap House - Buona Vista     3.4                98.0
4                            Fun Toast     3.3               142.0

Data saved to: data/test_holland_village_20250929_234839.parquet

Summary:
- Total cafes: 60
- Avg rating: 4.13
- Has price info: 14


In [5]:
df_katong = pd.read_parquet('data/test_katong_20250929_234839.parquet')
df_katong

Unnamed: 0,business_status,geometry,icon,icon_background_color,icon_mask_base_uri,name,opening_hours,photos,place_id,plus_code,rating,reference,scope,types,user_ratings_total,vicinity,price_level,permanently_closed,neighborhood,region
0,OPERATIONAL,"{'location': {'lat': 1.3105313, 'lng': 103.796...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,The Daily Scoop,{'open_now': False},"[{'height': 3000, 'html_attributions': ['<a hr...",ChIJSf_v4moa2jERfL_gQn4nX7k,"{'compound_code': '8Q6W+6F Singapore', 'global...",4.3,ChIJSf_v4moa2jERfL_gQn4nX7k,GOOGLE,"[cafe, store, food, point_of_interest, establi...",857.0,"118 Holland Avenue, #02-04 Raffles",,,katong,west
1,OPERATIONAL,"{'location': {'lat': 1.3106268, 'lng': 103.794...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Tai Cheong Bakery (Holland Village),{'open_now': False},"[{'height': 1200, 'html_attributions': ['<a hr...",ChIJ_wM9jJEZ2jERUe-5RzyA24w,"{'compound_code': '8Q6V+7X Singapore', 'global...",4.1,ChIJ_wM9jJEZ2jERUe-5RzyA24w,GOOGLE,"[cafe, bakery, store, food, point_of_interest,...",877.0,31 Lorong Liput,2.0,,katong,west
2,OPERATIONAL,"{'location': {'lat': 1.3072268, 'lng': 103.793...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Chang Cheng Mee Wah Coffeeshop,{'open_now': True},"[{'height': 2304, 'html_attributions': ['<a hr...",ChIJH3Fr7UEa2jERuwMcr9cFUvw,"{'compound_code': '8Q4V+V9 Singapore', 'global...",3.8,ChIJH3Fr7UEa2jERuwMcr9cFUvw,GOOGLE,"[cafe, store, food, point_of_interest, establi...",195.0,"Blk 40 Holland Drive #01-39, Block 40 Holland ...",1.0,,katong,west
3,OPERATIONAL,"{'location': {'lat': 1.3056275, 'lng': 103.791...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Attap House - Buona Vista,{'open_now': False},"[{'height': 3840, 'html_attributions': ['<a hr...",ChIJOxwfQEUa2jERPpaLevc5gyo,"{'compound_code': '8Q4R+7J Singapore', 'global...",3.4,ChIJOxwfQEUa2jERPpaLevc5gyo,GOOGLE,"[restaurant, cafe, food, point_of_interest, es...",98.0,"9 North Buona Vista Drive, #01-17 The Metropol...",,,katong,west
4,OPERATIONAL,"{'location': {'lat': 1.307062, 'lng': 103.7880...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Fun Toast,{'open_now': False},"[{'height': 3024, 'html_attributions': ['<a hr...",ChIJp0JVlEIa2jERu8As-QXcW1Q,"{'compound_code': '8Q4Q+R6 Singapore', 'global...",3.3,ChIJp0JVlEIa2jERu8As-QXcW1Q,GOOGLE,"[cafe, store, food, point_of_interest, establi...",142.0,"1 Vista Exchange Green, #02-03 The Star Vista",,,katong,west
5,OPERATIONAL,"{'location': {'lat': 1.3070419, 'lng': 103.787...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Caffè Affogato,{'open_now': False},"[{'height': 2268, 'html_attributions': ['<a hr...",ChIJAYFXlgYa2jERzXR4KdyTEM4,"{'compound_code': '8Q4Q+R4 Singapore', 'global...",4.1,ChIJAYFXlgYa2jERzXR4KdyTEM4,GOOGLE,"[cafe, store, food, point_of_interest, establi...",397.0,"1 Vista Exchange Green, #01-02 The Star Vista",2.0,,katong,west
6,OPERATIONAL,"{'location': {'lat': 1.3032732, 'lng': 103.791...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Bodacious bar & bistro,{'open_now': False},"[{'height': 2592, 'html_attributions': ['<a hr...",ChIJ225l_0Ma2jERCNNkY0Fmqr0,"{'compound_code': '8Q3R+8P Singapore', 'global...",4.1,ChIJ225l_0Ma2jERCNNkY0Fmqr0,GOOGLE,"[cafe, restaurant, food, point_of_interest, es...",368.0,"70 Biopolis Street, #01-05",2.0,,katong,west
7,OPERATIONAL,"{'location': {'lat': 1.3023818, 'lng': 103.792...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Nanyang Old Coffee,{'open_now': False},"[{'height': 4000, 'html_attributions': ['<a hr...",ChIJITFuHkQa2jERJRPve-kqGKU,"{'compound_code': '8Q2V+X2 Singapore', 'global...",3.9,ChIJITFuHkQa2jERJRPve-kqGKU,GOOGLE,"[cafe, food, point_of_interest, establishment]",70.0,"Genome@Biopolis #01-05,, 60 Biopolis Street",,,katong,west
8,OPERATIONAL,"{'location': {'lat': 1.3021485, 'lng': 103.793...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Little Big Coffee Roasters,{'open_now': False},"[{'height': 2921, 'html_attributions': ['<a hr...",ChIJYWEEl0Ya2jERSazPjSyLxe4,"{'compound_code': '8Q2V+V6 Singapore', 'global...",4.7,ChIJYWEEl0Ya2jERSazPjSyLxe4,GOOGLE,"[cafe, food, point_of_interest, establishment]",193.0,"8 Biomedical Grove, #01-06",,,katong,west
9,OPERATIONAL,"{'location': {'lat': 1.305518, 'lng': 103.7877...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Starbucks Rochester Park,{'open_now': False},"[{'height': 2160, 'html_attributions': ['<a hr...",ChIJTyS910Ia2jERCTho_ldH97E,"{'compound_code': '8Q4Q+64 Singapore', 'global...",4.3,ChIJTyS910Ia2jERCTho_ldH97E,GOOGLE,"[cafe, store, food, point_of_interest, establi...",874.0,37 Rochester Drive,2.0,,katong,west


In [1]:
"""Test reviews collection for Holland Village cafes"""

from src.data.query import CafeQuery
import pandas as pd
from datetime import datetime

def test_reviews_collection():
    # Initialize
    print("Initializing...")
    cafe_query1 = CafeQuery()
    
    # First get cafes from one neighborhood
    test_neighborhood = {
        'center': [1.3088691, 103.9044623],
        'radius': 1200,
        'neighborhood_name': 'katong',
        'region': 'east'
    }
    
    print(f"\nGetting cafes from {test_neighborhood['neighborhood_name']}...")
    cafes_df = cafe_query1.collect_cafes(test_neighborhood)
    print(f"Found {len(cafes_df)} cafes")
    
    # Now get reviews for these cafes
    print(f"\nCollecting reviews for all cafes...")
    place_ids = cafes_df['place_id'].tolist()
    reviews_df = cafe_query1.collect_reviews(place_ids)
    
    print(f"\n✅ Collected {len(reviews_df)} total reviews")
    print(f"Average reviews per cafe: {len(reviews_df)/len(cafes_df):.1f}")
    
    # Merge neighborhood info into reviews
    reviews_df = reviews_df.merge(
        cafes_df[['place_id', 'neighborhood', 'region']],
        on='place_id',
        how='left'
    )
    
    # Save both datasets
    timestamp = datetime.now().strftime('%Y%m%d')
    
    cafes_path = f'data/cafes_katong_{timestamp}.parquet'
    reviews_path = f'data/reviews_katong_{timestamp}.parquet'
    
    cafes_df.to_parquet(cafes_path)
    reviews_df.to_parquet(reviews_path)
    
    print(f"\nData saved:")
    print(f"  Cafes: {cafes_path}")
    print(f"  Reviews: {reviews_path}")
    
    # Quick preview
    print("\nSample reviews:")
    print(reviews_df[['place_name', 'rating', 'text']].head(3))

if __name__ == "__main__":
    test_reviews_collection()



Initializing...
Client initialized and cafe reviews for 12 neighborhoods will be extracted

Getting cafes from katong...
Found 60 cafes

Collecting reviews for all cafes...

✅ Collected 266 total reviews
Average reviews per cafe: 4.4

Data saved:
  Cafes: data/cafes_katong_20250930.parquet
  Reviews: data/reviews_katong_20250930.parquet

Sample reviews:
                    place_name  rating  \
0  Choice Cuts Pizza & Records       5   
1  Choice Cuts Pizza & Records       4   
2  Choice Cuts Pizza & Records       5   

                                                text  
0  There’s a real hustle and bustle energy in the...  
1  Choice Cut nails the New York-style pizza vibe...  
2  I had a pizza from them and I enjoyed it! Pric...  


In [2]:
df_katong = pd.read_parquet('data/reviews_katong_20250930.parquet')
df_katong

Unnamed: 0,place_id,place_name,author_name,author_url,rating,text,time,relative_time_description,neighborhood,region
0,ChIJp0cob88Z2jER6mvxImemres,Choice Cuts Pizza & Records,Howcan Eats,https://www.google.com/maps/contrib/1122892009...,5,There’s a real hustle and bustle energy in the...,1756036634,a month ago,katong,east
1,ChIJp0cob88Z2jER6mvxImemres,Choice Cuts Pizza & Records,Yordan,https://www.google.com/maps/contrib/1117947906...,4,Choice Cut nails the New York-style pizza vibe...,1747588967,4 months ago,katong,east
2,ChIJp0cob88Z2jER6mvxImemres,Choice Cuts Pizza & Records,Marek B,https://www.google.com/maps/contrib/1101141077...,5,I had a pizza from them and I enjoyed it! Pric...,1757258839,3 weeks ago,katong,east
3,ChIJp0cob88Z2jER6mvxImemres,Choice Cuts Pizza & Records,Maila Cabatic,https://www.google.com/maps/contrib/1053578725...,5,Loved the NYC-style pizza. Felt like I was bac...,1752717664,2 months ago,katong,east
4,ChIJp0cob88Z2jER6mvxImemres,Choice Cuts Pizza & Records,Daniel Ng,https://www.google.com/maps/contrib/1079990664...,5,Lovely little New York style pizza joint!! The...,1746704597,4 months ago,katong,east
...,...,...,...,...,...,...,...,...,...,...
261,ChIJh9We8VkZ2jERoBWE-LhhvI0,Butter & Spice Katong,Andrew Augustine,https://www.google.com/maps/contrib/1169233140...,5,This spot is apparently popular among locals a...,1750160457,3 months ago,katong,east
262,ChIJh9We8VkZ2jERoBWE-LhhvI0,Butter & Spice Katong,Arnold Teo,https://www.google.com/maps/contrib/1162119341...,5,One of the best places in Singapore to get you...,1756654323,4 weeks ago,katong,east
263,ChIJh9We8VkZ2jERoBWE-LhhvI0,Butter & Spice Katong,Kester Chan,https://www.google.com/maps/contrib/1013444622...,5,Our usual go-to for some Indian cuisine fix!\n...,1733675060,9 months ago,katong,east
264,ChIJR34pKgAZ2jERF6MoGw4mk0A,Alishan Taiwanese Cuisine,Jed Lim,https://www.google.com/maps/contrib/1172530796...,3,Simple Taiwanese fare of braised pork belly ri...,1729856228,11 months ago,katong,east


columns needed: place_id, place_name, author_name, author_url (if author_url is unique, no need author_name), text, relative_time_description, neighborhood and region

In [3]:
sample_reviews = pd.read_parquet('data/raw/reviews_20250930.parquet')
sample_reviews.head()

Unnamed: 0,place_id,place_name,author_id,rating,text,review_date,relative_time_description,neighborhood,region
0,ChIJW-HHrWwZ2jERC16smRC_0NQ,Carlton City Hotel Singapore,reviews,5,"Great experience at the Carlton City. Clean, b...",2025-06-06,3 months ago,cbd,central
1,ChIJW-HHrWwZ2jERC16smRC_0NQ,Carlton City Hotel Singapore,reviews,4,Stayed at Carlton City Hotel from Saturday to ...,2025-09-15,2 weeks ago,cbd,central
2,ChIJW-HHrWwZ2jERC16smRC_0NQ,Carlton City Hotel Singapore,reviews,1,The rooms are ok. But a five star hotel should...,2025-08-18,a month ago,cbd,central
3,ChIJW-HHrWwZ2jERC16smRC_0NQ,Carlton City Hotel Singapore,reviews,3,We did encounter a couple of minor issues – th...,2025-07-19,2 months ago,cbd,central
4,ChIJW-HHrWwZ2jERC16smRC_0NQ,Carlton City Hotel Singapore,reviews,5,I personally really enjoyed our stay at Carlto...,2025-08-19,a month ago,cbd,central


In [6]:
len(sample_reviews)

2886

In [5]:
sample_cafes = pd.read_parquet('data/raw/cafes_20250930.parquet')
sample_cafes.head()

Unnamed: 0,name,place_id,rating,user_ratings_total,price_level,types,business_status,permanently_closed,neighborhood,region
0,Carlton City Hotel Singapore,ChIJW-HHrWwZ2jERC16smRC_0NQ,4.4,2272.0,,"[bar, cafe, lodging, restaurant, food, point_o...",OPERATIONAL,,cbd,central
1,The Book Cafe,ChIJm-22KpwZ2jERHav69tu9J48,4.3,1247.0,2.0,"[cafe, restaurant, food, point_of_interest, es...",OPERATIONAL,,cbd,central
2,Tiong Bahru Bakery,ChIJ--NsrnAZ2jER-Ry6WJB26Uo,4.3,3181.0,2.0,"[cafe, meal_delivery, meal_takeaway, bakery, r...",OPERATIONAL,,cbd,central
3,Nylon Coffee,ChIJE7ZtOWwZ2jERFcasWYmljJQ,4.7,2100.0,1.0,"[cafe, food, point_of_interest, store, establi...",OPERATIONAL,,cbd,central
4,The FernTree Café - International Buffet Singa...,ChIJk2JMPHYZ2jERS2cY70LXric,4.4,416.0,2.0,"[restaurant, cafe, food, point_of_interest, es...",OPERATIONAL,,cbd,central


In [7]:
len(sample_cafes)

658