<a href="https://colab.research.google.com/github/kavyajeetbora/foursquare_ai/blob/master/notebooks/10_PlantOSM_places.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Exploring Foursquare POI Data


1. [Foursquare's 104M Points of Interest](https://tech.marksblogg.com/foursquare-open-global-poi-dataset.html)

# Setup Environment

In [None]:
!pip install --quiet duckdb
!pip install --quiet jupysql
!pip install --quiet duckdb-engine

import duckdb
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import urllib
import subprocess
import json
## Setup
# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

%sql duckdb:///:memory:

## Installing Tippecanoe

In [None]:
%%shell

## Install duckdb CLI quietly
curl https://install.duckdb.org | sh > /dev/null 2>&1

## Install tippecanoe quietly

# Clone the tippecanoe repository from GitHub quietly
git clone --quiet https://github.com/mapbox/tippecanoe.git
# Enter the tippecanoe folder
cd tippecanoe
# Build tippecanoe using multiple cores (-j) and silently (-s)
make -j -s > /dev/null 2>&1
# Install tippecanoe in the system silently
make install -s > /dev/null 2>&1
# Go back to the previous directory
cd ..

## Check if duckdb & tippecanoe are installed (minimal output)
echo "Installation complete."
echo "Tippecanoe version: $(/content/tippecanoe/tippecanoe --version 2>/dev/null || echo 'Not found')"

## Installing Pmtile CLI
Refer to this page: [go-pmtiles/releases](https://github.com/protomaps/go-pmtiles/releases)

In [None]:
%%shell

# Download go-pmtiles for Linux x86_64 quietly
wget -q https://github.com/protomaps/go-pmtiles/releases/download/v1.28.3/go-pmtiles_1.28.3_Linux_x86_64.tar.gz

# Verify SHA256 checksum
echo "06cf492adc2c7fcd23c4f11a98a5292f4cbe04d3afc3a6b38a07bb47452daca2 go-pmtiles_1.28.3_Linux_x86_64.tar.gz" | sha256sum --check --quiet

# Extract quietly
tar -xzf go-pmtiles_1.28.3_Linux_x86_64.tar.gz -C /tmp/ > /dev/null 2>&1

# Install binary to /usr/local/bin (assuming binary is named 'pmtiles')
sudo mv /tmp/pmtiles /usr/local/bin/pmtiles > /dev/null 2>&1

# Clean up
rm go-pmtiles_1.28.3_Linux_x86_64.tar.gz

# Check installation
echo "Installation complete."
pmtiles version

##

## Download OSM


In [None]:
osm_url = "https://download.geofabrik.de/asia/india-latest.osm.pbf"
osm_path = "india-latest.osm.pbf"

if not os.path.exists(osm_path):
    print("Downloading India OSM extract (~1GB; may take 10-20 mins)...")
    urllib.request.urlretrieve(osm_url, osm_path)
    print("Download complete.")

In [None]:
# Assuming you have the PBF file, e.g., 'india-latest.osm.pbf' (regional for practicality)
pbf_path = 'india-latest.osm.pbf'  # Replace with your full path
folder_path = 'places_by_type'  # Output folder for individual GeoJSON files

# Define place_zoom_map as DataFrame for DuckDB registration
place_zoom_map = {
    'country': {'min_zoom': 0, 'max_zoom': 3},
    'state': {'min_zoom': 2, 'max_zoom': 6},
    'region': {'min_zoom': 3, 'max_zoom': 7},
    'county': {'min_zoom': 4, 'max_zoom': 8},
    'district': {'min_zoom': 4, 'max_zoom': 8},
    'borough': {'min_zoom': 5, 'max_zoom': 9},
    'city': {'min_zoom': 5, 'max_zoom': 10},
    'town': {'min_zoom': 7, 'max_zoom': 12},
    'suburb': {'min_zoom': 9, 'max_zoom': 13},
    'neighbourhood': {'min_zoom': 9, 'max_zoom': 13},
    'quarter': {'min_zoom': 10, 'max_zoom': 14},
    'village': {'min_zoom': 11, 'max_zoom': 16},
    'hamlet': {'min_zoom': 13, 'max_zoom': 17},
    'locality': {'min_zoom': 13, 'max_zoom': 17},
    'isolated_dwelling': {'min_zoom': 14, 'max_zoom': 18}
}
zoom_df = pd.DataFrame([
    {'place_type': k, 'min_zoom': v['min_zoom'], 'max_zoom': v['max_zoom']} for k, v in place_zoom_map.items()
])

# Create output folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)

con = duckdb.connect()
con.execute("INSTALL spatial;")
con.execute("LOAD spatial;")

# Register the zoom map DataFrame
con.register('zoom_map', zoom_df)

# Loop over each place_type and export individual GeoJSON
for place_type, zoom_info in place_zoom_map.items():
    geojson_file = os.path.join(folder_path, f'{place_type}.geojson')

    con.execute(f"""
    COPY (
      SELECT
        tags['name'] AS place_name,
        tags['place'] AS place_type,
        TRY_CAST(tags['population'] AS BIGINT) AS population,
        ST_Point(CAST(lon AS DOUBLE), CAST(lat AS DOUBLE)) AS geom
      FROM ST_ReadOSM('{pbf_path}')
      WHERE
        tags['name'] IS NOT NULL
        AND tags['place'] = '{place_type}'
        AND kind = 'node'  -- Ensures point geometries (nodes)
        AND lat IS NOT NULL
        AND lon IS NOT NULL
      -- LIMIT 100  -- Uncomment for testing; remove for full export
    ) TO '{geojson_file}' WITH (FORMAT GDAL, DRIVER 'GeoJSON');
    """)

    print(f"Exported {place_type} places to {geojson_file}")

con.close()

# Converting to PMTiles Using Tippecanoe

In [None]:
# Paths
places_folder = 'places_by_type'  # Input GeoJSON folder
pmtiles_folder = 'pmtiles_by_type'  # Temp folder for individual MBTiles
merged_mbtiles = 'merged_places.mbtiles'  # Merged output (final MBTiles export)
final_pmtiles = 'india_places_combined.pmtiles'  # Final PMTiles

# Define place_zoom_map (min_zoom as -Z, max_zoom as -z)
place_zoom_map = {
    'country': {'min_zoom': 0, 'max_zoom': 3},
    'state': {'min_zoom': 2, 'max_zoom': 6},
    'region': {'min_zoom': 3, 'max_zoom': 7},
    'county': {'min_zoom': 4, 'max_zoom': 8},
    'district': {'min_zoom': 4, 'max_zoom': 8},
    'borough': {'min_zoom': 5, 'max_zoom': 9},
    'city': {'min_zoom': 5, 'max_zoom': 10},
    'town': {'min_zoom': 7, 'max_zoom': 12},
    'suburb': {'min_zoom': 9, 'max_zoom': 13},
    'neighbourhood': {'min_zoom': 9, 'max_zoom': 13},
    'quarter': {'min_zoom': 10, 'max_zoom': 14},
    'village': {'min_zoom': 11, 'max_zoom': 16},
    'hamlet': {'min_zoom': 13, 'max_zoom': 17},
    'locality': {'min_zoom': 13, 'max_zoom': 17},
    'isolated_dwelling': {'min_zoom': 14, 'max_zoom': 18}
}

# Create temp folder
os.makedirs(pmtiles_folder, exist_ok=True)

# List to hold individual MBTiles files
mbtiles_files = []

# Step 1: Generate individual MBTiles from GeoJSONs
for place_type, zoom_info in place_zoom_map.items():
    geojson_in = os.path.join(places_folder, f'{place_type}.geojson')
    if not os.path.exists(geojson_in):
        print(f"Warning: {geojson_in} not found, skipping {place_type}")
        continue

    # Check if GeoJSON is empty
    try:
        with open(geojson_in, 'r') as f:
            data = json.load(f)
            if len(data.get('features', [])) == 0:
                print(f"Warning: {geojson_in} is empty, skipping {place_type}")
                continue
    except json.JSONDecodeError:
        print(f"Warning: Invalid JSON in {geojson_in}, skipping {place_type}")
        continue

    mb_out = os.path.join(pmtiles_folder, f'{place_type}.mbtiles')
    layer_name = place_type

    # Tippecanoe to MBTiles with explicit zooms
    cmd_tippecanoe = [
        'tippecanoe',
        '-o', mb_out,
        '-l', layer_name,
        '-Z', str(zoom_info['min_zoom']),
        '-z', str(zoom_info['max_zoom']),
        '--force',
        '--read-parallel',
        '--no-feature-limit',
        '--no-tile-size-limit',
        geojson_in
    ]
    result_tip = subprocess.run(cmd_tippecanoe, capture_output=True, text=True)
    if result_tip.returncode != 0:
        print(f"Error in tippecanoe for {place_type}: {result_tip.stderr}")
        continue

    if not os.path.exists(mb_out) or os.path.getsize(mb_out) == 0:
        print(f"Warning: Empty MBTiles for {place_type}")
        if os.path.exists(mb_out):
            os.remove(mb_out)
        continue

    mbtiles_files.append(mb_out)
    print(f"Created {mb_out} (zoom {zoom_info['min_zoom']}-{zoom_info['max_zoom']})")

# Step 2: Merge MBTiles using tile-join
if mbtiles_files:
    # Build command: tile-join -f -o merged [sources...]
    cmd_join = ['tile-join', '-f', '-o', merged_mbtiles] + mbtiles_files
    result_join = subprocess.run(cmd_join, capture_output=True, text=True)
    if result_join.returncode != 0:
        print(f"Error in tile-join: {result_join.stderr}")
    else:
        print(f"Merged into {merged_mbtiles}")

        # Step 3: Convert merged MBTiles to PMTiles
        cmd_convert = ['pmtiles', 'convert', merged_mbtiles, final_pmtiles]
        result_convert = subprocess.run(cmd_convert, capture_output=True, text=True)
        if result_convert.returncode != 0:
            print(f"Error in final convert: {result_convert.stderr}")
        else:
            print(f"Final PMTiles: {final_pmtiles}")
            # Show metadata
            subprocess.run(['pmtiles', 'show', final_pmtiles])

        # Cleanup: Remove individuals but KEEP merged MBTiles and PMTiles
        for mb in mbtiles_files:
            os.remove(mb)
        # Do NOT remove merged_mbtiles or final_pmtiles
else:
    print("No MBTiles to merge.")