In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import googlemaps
import time
from datetime import datetime
from typing import Dict, List, Tuple
import os
import json

In [None]:
# check how many free credits you get this month...
# clean up collect_and_cache data function...
# how to do this with classes?

# feature collection from all reviews...

In [None]:
"""
Idea:
in spatial transcriptomics, we often define "niches" to be recurring neighborhoods of gene expression or cell types. 
In one version of this, we look at the proportion of cells within a radius around each cell, and cluster these cell type neighborhood compositions. 

Here, based on the vibes of a bunch of shops, maybe just cafes, or a collection of cafes, bookstores, grocery stores etc, 
compute the proportion of each type within a radius around each major city and determine the overall "vibe"of the city...
"""

'\nSave jsons of gmap queries for 100 major cities in LA\nBegin to get a handle on text sentiment analyses\n\nIdea:\nin spatial transcriptomics, we often define "niches" to be recurring neighborhoods of gene expression or cell types. \nIn one version of this, we look at the proportion of cells within a radius around each cell, and cluster these cell type neighborhood compositions. \n\nHere, based on the vibes of a bunch of shops, maybe just cafes, or a collection of cafes, bookstores, grocery stores etc, \ncompute the proportion of each type within a radius around each major city and determine the overall "vibe"of the city...\n'

In [2]:
gmaps = googlemaps.Client(key='AIzaSyCCw8D8JwwBQkKvbF7yLWCgijKwJpmO6iM')
LA_AREAS = {
    'Downtown LA': (34.0522, -118.2437),
    'Santa Monica': (34.0195, -118.4912),
    'Hollywood': (34.0928, -118.3287),
    'Beverly Hills': (34.0736, -118.4004),
    'Venice': (33.9850, -118.4695)
}
BUSINESS_TYPES = ['cafe', 'restaurant', 'book_store']


In [3]:
RADIUS = 5000
CACHE_DIR = './gmap_cache'

In [15]:
places

[{'business_status': 'OPERATIONAL',
  'geometry': {'location': {'lat': 34.04769639999999, 'lng': -118.2498964},
   'viewport': {'northeast': {'lat': 34.0490083302915,
     'lng': -118.2483218697085},
    'southwest': {'lat': 34.0463103697085, 'lng': -118.2510198302915}}},
  'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/shopping-71.png',
  'icon_background_color': '#4B96F3',
  'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/shopping_pinlet',
  'name': 'The Last Bookstore',
  'opening_hours': {'open_now': True},
  'photos': [{'height': 4032,
    'html_attributions': ['<a href="https://maps.google.com/maps/contrib/105222138379931931220">Sikun</a>'],
    'photo_reference': 'ATKogpdRz3e7Fds9yNwr8xEkghbi7R0CpDKOoLVjTmKOgS2MRBq9HcS2SIyJ7_utiXrtIYhxQTNfSPCg5qm2WHVURS5AfxVQ5Ic523r3xxUW3jjyDoWjiwes2NrbSWhuRoaAAgAX-FCWSBbz77BXxGh5ZZKH2AEvI5tvGQ2vYLAjGL-Y35eqpEOMmLiY2uKglPC9dCHkL4FxwzObk-sxwZH8L43LSkcbO0IT3Pd7YgVV26m7HFMAVxtPZBSjzVEhBgzJz3P4uuwpFA

In [None]:
def get_business_details(places: List[Dict], business_type: str):
    """Helper function to get_city_data(): 
    
    Grabs details for each place of a certain business type

    Args:
        places (List[Dict]): dictionary mapping places to data (e.g., reviews, name, coordinates,...)
        business_type (str): type of business (e.g., cafe, book_store,...)

    Returns:
        List[Dict]: list of dictionaries mapping information categories to the information itself for each place
    """
    business_details = []
    for place in places:
        place_id = place['place_id']
        details = gmaps.place(
            place_id=place_id,
            fields=['name', 'rating', 'reviews', 'price_level'] #editorial_summary
        )
        combined_data = {
            "basic_info": place,
            "detailed_info": details.get('result', {}),
            "search_type": business_type
        }
        business_details.append(combined_data)
        time.sleep(0.1)
    return business_details

def get_city_data(location: Tuple, city: str, business_types: List[str], radius: int):
    """Helper function top collect_and_cache_data() 
    
    Grabs detals for all places of all business types for given city

    Args:
        location (Tuple): coordinates for given city
        city (str): city name
        business_types (List[str]): list of types of businesses to query (e.g., cafe, book_store,...)
        radius (int): search radius around the city
    
    Returns:
        Dict: Nested dictionary containing city meta data and business data for current city
            Structure: {
                    "city_info": {metadata about the city},
                    "businesses": {
                        "business_type": [list of business records with basic + detailed info]
                    }
                }        
    """
    lat,lng = location
    city_data = {
            "city_info": {
                "name": city,
                "coordinates": {"lat": lat, "lng": lng},
                "search_radius": radius,
                "fetch_timestamp": datetime.now().isoformat()
            },
            "businesses": {}
        }
    for business_type in business_types:
        places_result = gmaps.places_nearby(location=(lat, lng), radius=radius, type=business_type)
        places = places_result.get('results',[])    
        business_details = get_business_details(places, business_type)
        city_data["businesses"][business_type] = business_details
        time.sleep(1)
    return city_data

def collect_and_cache_data(cache_dir: str, areas:  Dict[str, Tuple], business_types: List[str], radius: int = 5000):
    """Extract all city data
    
    For each major city, collects data for selection of businesses within specified radius
    
    Args:
        cache_dir (str): Path to directory for data caching
        areas (Dict[str, Tuple]): dictionary mapping city names to coordinates
        business_types (List[str]): list of types of business to query (e.g., cafe, book_store,...)
        radius (int): search radius around each city
        
    Returns:
        Dict: Nested dictionary containing collection metadata and business data organized by city.
              Structure: {
                  "collection_info": {metadata about the data collection session},
                  "areas": {
                      "city_name": {
                          "city_info": {city metadata and search parameters},
                          "businesses": {
                              "business_type": [list of business records with basic + detailed info]
                          }
                      }
                  }
              }           
    """
    if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    session_cache_file = os.path.join(cache_dir, f"full_collection_{timestamp}.json")
    
    all_data = {
        "collection_info": {
            "timestamp": timestamp,
            "areas_searched": list(areas.keys()),
            "business_types": business_types,
            "radius": radius
        },
        "areas": {}
    }

    for city,location in areas.items():
        city_cache_file = os.path.join(cache_dir, f"{city.replace(' ', '_').lower()}.json")
        if os.path.exists(city_cache_file):
            print(f"  Loading cached data for {city}")
            with open(city_cache_file, 'r', encoding='utf-8') as f:
                city_data = json.load(f)
            all_data["areas"][city] = city_data
            continue

        lat,lng = location
        city_data = get_city_data(location, city, business_types, radius)

        with open(city_cache_file, 'w', encoding='utf-8') as f:
            json.dump(city_data, f, ensure_ascii=False, indent=2)
        all_data["areas"][city] = city_data
        print(f"  Cached data for {city} to {city_cache_file}")

    with open(session_cache_file, 'w', encoding='utf-8') as f:
        json.dump(all_data, f, ensure_ascii=False, indent=2)
    print(f"\nComplete dataset saved to: {session_cache_file}")

    return all_data

In [9]:
all_data = collect_and_cache_data(CACHE_DIR,LA_AREAS,BUSINESS_TYPES)

  Loading cached data for Downtown LA
  Loading cached data for Santa Monica
  Loading cached data for Hollywood
  Loading cached data for Beverly Hills
  Loading cached data for Venice

Complete dataset saved to: ./gmap_cache\full_collection_20250704_171659.json


In [None]:
# trendy
# cozy
# upscale
# 

In [14]:
all_data

{'collection_info': {'timestamp': '20250704_171659',
  'areas_searched': ['Downtown LA',
   'Santa Monica',
   'Hollywood',
   'Beverly Hills',
   'Venice'],
  'business_types': ['cafe', 'restaurant', 'book_store'],
  'radius': 5000},
 'areas': {'Downtown LA': {'city_info': {'name': 'Downtown LA',
    'coordinates': {'lat': 34.0522, 'lng': -118.2437},
    'search_radius': 5000,
    'fetch_timestamp': '2025-07-03T21:46:34.343219'},
   'businesses': {'cafe': [{'basic_info': {'business_status': 'OPERATIONAL',
       'geometry': {'location': {'lat': 34.0470524, 'lng': -118.2565439},
        'viewport': {'northeast': {'lat': 34.0484688802915,
          'lng': -118.2551797697085},
         'southwest': {'lat': 34.0457709197085, 'lng': -118.2578777302915}}},
       'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/restaurant-71.png',
       'icon_background_color': '#FF9E67',
       'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/restaurant_pinle