In [1]:
import pandas as pd
import json
import numpy as np
from matplotlib import pyplot as plt

In [2]:
def read_json_in_chunks(file_path, chunk_size=10000):
    """Read large JSON file in chunks"""
    chunks = []
    
    with open(file_path, 'r') as file:
        chunk = []
        for i, line in enumerate(file):
            chunk.append(line)
            
            if (i + 1) % chunk_size == 0:
                chunk_df = pd.read_json('\n'.join(chunk), lines=True)
                chunks.append(chunk_df)
                chunk = []  
        
        # process remaining lines
        if chunk:
            chunk_df = pd.read_json('\n'.join(chunk), lines=True)
            chunks.append(chunk_df)
    
    return pd.concat(chunks, ignore_index=True)

In [None]:
reviews_file_path = "../data/raw/restaurant-reviews-cali-10.json"
reviews_df = read_json_in_chunks(reviews_file_path)
restaurants_file_path = "../data/raw/cali-restaurants.json"
restaurants_df = read_json_in_chunks(restaurants_file_path)

In [15]:
lat = (36.8, 38.3)
long= (-123.0, -121.5)

def regional_filtering(df, long, lat):
    min_lat = min(lat)
    max_lat = max(lat)
    min_lon = min(long)
    max_lon = max(long)

    return df[
        (df["latitude"].between(min_lat, max_lat)) &
        (df["longitude"].between(min_lon, max_lon))
    ]

bay_restaurants = regional_filtering(restaurants_df, long, lat)

bay_gmap_ids = bay_restaurants["gmap_id"].unique()
bay_reviews = reviews_df[reviews_df["gmap_id"].isin(bay_gmap_ids)]


In [6]:
bay_df = pd.read_parquet("../data/processed/bay_area/bayarea-sampled.parquet")

In [16]:
bay_reviews.shape

(3972640, 6)

In [8]:
import folium

def get_bounding_box_from_coords(long, lat):
    """
    Returns the bounding box defined by two latitude/longitude coordinates.
    """

    min_lat = min(lat)
    max_lat = max(lat)
    min_lon = min(long)
    max_lon = max(long)

    return {
        "min_lat": min_lat,
        "max_lat": max_lat,
        "min_lon": min_lon,
        "max_lon": max_lon
    }

def visualize_bounding_box(coord1, coord2):
    """
    Visualizes the bounding box defined by two coordinates using Folium.
    """
    bbox = get_bounding_box_from_coords(coord1, coord2)

    # Define the four corners of the bounding box
    corners = [
        (bbox["min_lat"], bbox["min_lon"]),  # Bottom-left
        (bbox["min_lat"], bbox["max_lon"]),  # Bottom-right
        (bbox["max_lat"], bbox["max_lon"]),  # Top-right
        (bbox["max_lat"], bbox["min_lon"]),  # Top-left
        (bbox["min_lat"], bbox["min_lon"])   # Close the loop
    ]

    # Center of the map
    center_lat = (bbox["min_lat"] + bbox["max_lat"]) / 2
    center_lon = (bbox["min_lon"] + bbox["max_lon"]) / 2

    # Create map
    m = folium.Map(location=[center_lat, center_lon], zoom_start=13)

    # Add the rectangle (bounding box)
    folium.Polygon(
        locations=corners,
        color="blue",
        weight=2,
        fill=True,
        fill_opacity=0.2,
        popup="Bounding Box"
    ).add_to(m)

    # Add markers for the two input points
    folium.Marker(coord1, popup="Point 1", icon=folium.Icon(color="red")).add_to(m)
    folium.Marker(coord2, popup="Point 2", icon=folium.Icon(color="green")).add_to(m)

    return m


# Example usage:
lat= (36.8, 38.3)
long = (-123.0, -121.5)
map_box = visualize_bounding_box(lat, long)
region_name = "bay_area"
map_box.save(f"../bounding_boxes_visuals/{region_name}.html")
print(f"✅ Map saved as {region_name}.html")


✅ Map saved as bay_area.html
