<a href="https://colab.research.google.com/github/kavyajeetbora/foursquare_ai/blob/master/notebooks/10_PlantOSM_places.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Exploring Foursquare POI Data


1. [Foursquare's 104M Points of Interest](https://tech.marksblogg.com/foursquare-open-global-poi-dataset.html)

# Setup Environment

In [None]:
!pip install --quiet duckdb
!pip install --quiet jupysql
!pip install --quiet duckdb-engine

import duckdb
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import urllib

## Setup
# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

%sql duckdb:///:memory:

## Installing Tippecanoe

In [None]:
%%shell

## Install duckdb CLI quietly
curl https://install.duckdb.org | sh > /dev/null 2>&1

## Install tippecanoe quietly

# Clone the tippecanoe repository from GitHub quietly
git clone --quiet https://github.com/mapbox/tippecanoe.git
# Enter the tippecanoe folder
cd tippecanoe
# Build tippecanoe using multiple cores (-j) and silently (-s)
make -j -s > /dev/null 2>&1
# Install tippecanoe in the system silently
make install -s > /dev/null 2>&1
# Go back to the previous directory
cd ..

## Check if duckdb & tippecanoe are installed (minimal output)
echo "Installation complete."
echo "Tippecanoe version: $(/content/tippecanoe/tippecanoe --version 2>/dev/null || echo 'Not found')"

## Installing Pmtile CLI
Refer to this page: [go-pmtiles/releases](https://github.com/protomaps/go-pmtiles/releases)

In [None]:
%%shell

# Download go-pmtiles for Linux x86_64 quietly
wget -q https://github.com/protomaps/go-pmtiles/releases/download/v1.28.3/go-pmtiles_1.28.3_Linux_x86_64.tar.gz

# Verify SHA256 checksum
echo "06cf492adc2c7fcd23c4f11a98a5292f4cbe04d3afc3a6b38a07bb47452daca2 go-pmtiles_1.28.3_Linux_x86_64.tar.gz" | sha256sum --check --quiet

# Extract quietly
tar -xzf go-pmtiles_1.28.3_Linux_x86_64.tar.gz -C /tmp/ > /dev/null 2>&1

# Install binary to /usr/local/bin (assuming binary is named 'pmtiles')
sudo mv /tmp/pmtiles /usr/local/bin/pmtiles > /dev/null 2>&1

# Clean up
rm go-pmtiles_1.28.3_Linux_x86_64.tar.gz

# Check installation
echo "Installation complete."
pmtiles version

##

## Download OSM


In [None]:
osm_url = "https://download.geofabrik.de/asia/india-latest.osm.pbf"
osm_path = "india-latest.osm.pbf"

if not os.path.exists(osm_path):
    print("Downloading India OSM extract (~1GB; may take 10-20 mins)...")
    urllib.request.urlretrieve(osm_url, osm_path)
    print("Download complete.")

In [None]:
# Assuming you have the PBF file, e.g., 'india-latest.osm.pbf' (regional for practicality)
pbf_path = 'india-latest.osm.pbf'  # Replace with your full path
geojson_path = 'places_india.geojson'  # Output GeoJSON file

# Define place_zoom_map as DataFrame for DuckDB registration
place_zoom_map = {
    'country': {'min_zoom': 0, 'max_zoom': 3},
    'state': {'min_zoom': 2, 'max_zoom': 6},
    'region': {'min_zoom': 3, 'max_zoom': 7},
    'county': {'min_zoom': 4, 'max_zoom': 8},
    'district': {'min_zoom': 4, 'max_zoom': 8},
    'borough': {'min_zoom': 5, 'max_zoom': 9},
    'city': {'min_zoom': 5, 'max_zoom': 10},
    'town': {'min_zoom': 7, 'max_zoom': 12},
    'suburb': {'min_zoom': 9, 'max_zoom': 13},
    'neighbourhood': {'min_zoom': 9, 'max_zoom': 13},
    'quarter': {'min_zoom': 10, 'max_zoom': 14},
    'village': {'min_zoom': 11, 'max_zoom': 16},
    'hamlet': {'min_zoom': 13, 'max_zoom': 17},
    'locality': {'min_zoom': 13, 'max_zoom': 17},
    'isolated_dwelling': {'min_zoom': 14, 'max_zoom': 18}
}
zoom_df = pd.DataFrame([
    {'place_type': k, 'min_zoom': v['min_zoom'], 'max_zoom': v['max_zoom']} for k, v in place_zoom_map.items()
])

con = duckdb.connect()
con.execute("INSTALL spatial;")
con.execute("LOAD spatial;")

# Register the zoom map DataFrame
con.register('zoom_map', zoom_df)

con.execute(f"""
COPY (
  SELECT
    tags['name'] AS place_name,
    tags['place'] AS place_type,
    TRY_CAST(tags['population'] AS BIGINT) AS population,
    ST_Point(CAST(lon AS DOUBLE), CAST(lat AS DOUBLE)) AS geom,
    zm.min_zoom AS "tippecanoe:minzoom",
    zm.max_zoom AS "tippecanoe:maxzoom"
  FROM ST_ReadOSM('{pbf_path}')
  INNER JOIN zoom_map zm ON tags['place'] = zm.place_type
  WHERE
    tags['name'] IS NOT NULL
    AND tags['place'] IS NOT NULL
    AND kind = 'node'  -- Ensures point geometries (nodes)
    AND lat IS NOT NULL
    AND lon IS NOT NULL
  -- LIMIT 100  -- Uncomment for testing; remove for full export
) TO '{geojson_path}' WITH (FORMAT GDAL, DRIVER 'GeoJSON');
""")

con.close()

print(f"Exported filtered places to {geojson_path}")

# Converting to PMTiles Using Tippecanoe

In [None]:
%%shell
set -euo pipefail

# Verify tippecanoe
tippecanoe --version || { echo "Error: tippecanoe not found."; exit 1; }

IN="places_india.geojson"
MB_OUT="india_places.mbtiles"
PM_OUT="india_places.pmtiles"
LAYER="places"
MAX_Z="18"
MIN_Z="5"

# Tile with per-feature minzoom honored (no drops) - SHOW OUTPUT for debugging
tippecanoe \
  -o "$MB_OUT" \
  -l "$LAYER" \
  -Z "$MIN_Z" -z "$MAX_Z" \
  --force \
  --read-parallel \
  --no-feature-limit \
  --no-tile-size-limit \
  "$IN" > /dev/null 2>&1

# If tippecanoe succeeds, proceed
if [[ -f "$MB_OUT" ]]; then
  # Convert to PMTiles (quietly, no pipe to avoid SIGPIPE)
  pmtiles convert "$MB_OUT" "$PM_OUT" > /dev/null 2>&1

  # Cleanup
  # rm "$MB_OUT"

  echo "PMTiles ready: $PM_OUT (all features preserved with zoom thresholds)"
  pmtiles show "$PM_OUT"
else
  echo "Error: MBTiles not created - tippecanoe failed."
  exit 1
fi