In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import googlemaps
import time
from datetime import datetime
from typing import Dict, List, Tuple
import os
import json

In [None]:
# check how many free credits you get this month...
# clean up collect_and_cache data function...
# comment the functions...
# how to do this with classes?


# feature collection from all reviews...

In [None]:
"""
Save jsons of gmap queries for a few major cities in LA
Begin to get a handle on text sentiment analyses

Idea:
in spatial transcriptomics, we often define "niches" to be recurring neighborhoods of gene expression or cell types. 
In one version of this, we look at the proportion of cells within a radius around each cell, and cluster these cell type neighborhood compositions. 

Here, based on the vibes of a bunch of shops, maybe just cafes, or a collection of cafes, bookstores, grocery stores etc, 
compute the proportion of each type within a radius around each major city and determine the overall "vibe"of the city...
"""

'\nSave jsons of gmap queries for 100 major cities in LA\nBegin to get a handle on text sentiment analyses\n\nIdea:\nin spatial transcriptomics, we often define "niches" to be recurring neighborhoods of gene expression or cell types. \nIn one version of this, we look at the proportion of cells within a radius around each cell, and cluster these cell type neighborhood compositions. \n\nHere, based on the vibes of a bunch of shops, maybe just cafes, or a collection of cafes, bookstores, grocery stores etc, \ncompute the proportion of each type within a radius around each major city and determine the overall "vibe"of the city...\n'

In [2]:
gmaps = googlemaps.Client(key='AIzaSyCCw8D8JwwBQkKvbF7yLWCgijKwJpmO6iM')
LA_AREAS = {
    'Downtown LA': (34.0522, -118.2437),
    'Santa Monica': (34.0195, -118.4912),
    'Hollywood': (34.0928, -118.3287),
    'Beverly Hills': (34.0736, -118.4004),
    'Venice': (33.9850, -118.4695)
}
BUSINESS_TYPES = ['cafe', 'restaurant', 'book_store']


In [3]:
RADIUS = 5000
CACHE_DIR = './gmap_cache'

In [None]:
#helper functions
#def get_city_data()
#def get_business_details()

def collect_and_cache_data(cache_dir: str, areas:  Dict[str, Tuple], business_types: List[str], radius: int = 5000):
    if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    session_cache_file = os.path.join(cache_dir, f"full_collection_{timestamp}.json")
    
    all_data = {
        "collection_info": {
            "timestamp": timestamp,
            "areas_searched": list(areas.keys()),
            "business_types": business_types,
            "radius": radius
        },
        "areas": {}
    }

    for city,location in areas.items():
        city_cache_file = os.path.join(cache_dir, f"{city.replace(' ', '_').lower()}.json")
        if os.path.exists(city_cache_file):
            print(f"  Loading cached data for {city}")
            with open(city_cache_file, 'r', encoding='utf-8') as f:
                city_data = json.load(f)
            all_data["areas"][city] = city_data
            continue

        lat,lng = location
        city_data = {
            "city_info": {
                "name": city,
                "coordinates": {"lat": lat, "lng": lng},
                "search_radius": radius,
                "fetch_timestamp": datetime.now().isoformat()
            },
            "businesses": {}
        }

        for business_type in BUSINESS_TYPES:
            places_result = gmaps.places_nearby(location=(lat, lng), radius=radius, type=business_type)
            places = places_result.get('results',[])    

            business_details = []
            for place in places:
                place_id = place['place_id']
                details = gmaps.place(
                    place_id=place_id,
                    fields=['name', 'rating', 'reviews', 'price_level'] #editorial_summary
                )
                combined_data = {
                    "basic_info": place,
                    "detailed_info": details.get('result', {}),
                    "search_type": business_type
                }
                business_details.append(combined_data)
                time.sleep(0.1)
            city_data["businesses"][business_type] = business_details
            time.sleep(1)

        with open(city_cache_file, 'w', encoding='utf-8') as f:
            json.dump(city_data, f, ensure_ascii=False, indent=2)
        all_data["areas"][city] = city_data
        print(f"  Cached data for {city} to {city_cache_file}")

    with open(session_cache_file, 'w', encoding='utf-8') as f:
        json.dump(all_data, f, ensure_ascii=False, indent=2)
    print(f"\nComplete dataset saved to: {session_cache_file}")

    return all_data


In [72]:
all_data = collect_and_cache_data(CACHE_DIR,LA_AREAS,BUSINESS_TYPES)

  Cached data for Downtown LA to ./gmap_cache\downtown_la.json
  Cached data for Santa Monica to ./gmap_cache\santa_monica.json
  Cached data for Hollywood to ./gmap_cache\hollywood.json
  Cached data for Beverly Hills to ./gmap_cache\beverly_hills.json
  Cached data for Venice to ./gmap_cache\venice.json

Complete dataset saved to: ./gmap_cache\full_collection_20250703_214634.json
