<a href="https://colab.research.google.com/github/kavyajeetbora/foursquare_ai/blob/master/notebooks/10_PlantOSM_places.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Exploring Foursquare POI Data


1. [Foursquare's 104M Points of Interest](https://tech.marksblogg.com/foursquare-open-global-poi-dataset.html)

# Setup Environment

In [None]:
!pip install --quiet duckdb
!pip install --quiet jupysql
!pip install --quiet duckdb-engine

import duckdb
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import urllib
import subprocess
import json
## Setup
# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

%sql duckdb:///:memory:

In [None]:
%%shell
## Download OSMNames data
wget https://github.com/OSMNames/OSMNames/releases/download/v2.0.4/planet-latest_geonames.tsv.gz -O planet-latest_geonames.tsv.gz
## Extract the OSMNames by country and export it to tsv
zcat planet-latest_geonames.tsv.gz | awk -F '\t' -v OFS='\t' 'NR == 1 || $16 == "in"' > india.tsv

## Installing Tippecanoe

In [None]:
%%shell

## Install duckdb CLI quietly
curl https://install.duckdb.org | sh > /dev/null 2>&1

## Install tippecanoe quietly

# Clone the tippecanoe repository from GitHub quietly
git clone --quiet https://github.com/mapbox/tippecanoe.git
# Enter the tippecanoe folder
cd tippecanoe
# Build tippecanoe using multiple cores (-j) and silently (-s)
make -j -s > /dev/null 2>&1
# Install tippecanoe in the system silently
make install -s > /dev/null 2>&1
# Go back to the previous directory
cd ..

## Check if duckdb & tippecanoe are installed (minimal output)
echo "Installation complete."
echo "Tippecanoe version: $(/content/tippecanoe/tippecanoe --version 2>/dev/null || echo 'Not found')"

## Installing Pmtile CLI
Refer to this page: [go-pmtiles/releases](https://github.com/protomaps/go-pmtiles/releases)

In [None]:
%%shell

# Download go-pmtiles for Linux x86_64 quietly
wget -q https://github.com/protomaps/go-pmtiles/releases/download/v1.28.3/go-pmtiles_1.28.3_Linux_x86_64.tar.gz

# Verify SHA256 checksum
echo "06cf492adc2c7fcd23c4f11a98a5292f4cbe04d3afc3a6b38a07bb47452daca2 go-pmtiles_1.28.3_Linux_x86_64.tar.gz" | sha256sum --check --quiet

# Extract quietly
tar -xzf go-pmtiles_1.28.3_Linux_x86_64.tar.gz -C /tmp/ > /dev/null 2>&1

# Install binary to /usr/local/bin (assuming binary is named 'pmtiles')
sudo mv /tmp/pmtiles /usr/local/bin/pmtiles > /dev/null 2>&1

# Clean up
rm go-pmtiles_1.28.3_Linux_x86_64.tar.gz

# Check installation
echo "Installation complete."
pmtiles version

In [None]:
!wget https://raw.githubusercontent.com/udit-001/india-maps-data/main/geojson/india.geojson -O india.geojson

##

In [None]:
%%shell
pmtiles extract https://build.protomaps.com/20251207.pmtiles my_extracted.pmtiles --region=india.geojson

## Download OSM


- OSMNames - https://github.com/OSMNames/OSMNames | To Download: https://osmnames.org/download/
- https://download.geofabrik.de/asia/india-latest.osm.pbf


In [None]:
# osm_url = "https://download.geofabrik.de/asia/india-latest.osm.pbf"
# osm_path = "india-latest.osm.pbf"

# if not os.path.exists(osm_path):
#     print("Downloading India OSM extract (~1GB; may take 10-20 mins)...")
#     urllib.request.urlretrieve(osm_url, osm_path)
#     print("Download complete.")

## Convert the Geometry Data to GeoJSON

In [None]:
tsv_path = 'india.tsv'
out_folder = 'places_by_type_simple'
os.makedirs(out_folder, exist_ok=True)

con = duckdb.connect()
con.execute("INSTALL spatial;")
con.execute("LOAD spatial;")

df = con.execute(f"""
    SELECT
        name AS place_name,
        type AS place_type,
        west, south, east, north,
        place_rank,
        CASE place_type
            WHEN 'country'        THEN 2
            WHEN 'administrative' THEN 3
            WHEN 'city'           THEN 6
            WHEN 'town'           THEN 8
            WHEN 'village'        THEN 12
            WHEN 'suburb'         THEN 15
            WHEN 'locality'       THEN 15
            WHEN 'hamlet'         THEN 16
        END AS min_zoom,
        CASE place_type
            WHEN 'country'        THEN 4
            WHEN 'administrative' THEN 6
            WHEN 'city'           THEN 16
            WHEN 'town'           THEN 18
            WHEN 'village'        THEN 18
            WHEN 'suburb'         THEN 20
            WHEN 'locality'       THEN 20
            WHEN 'hamlet'         THEN 21
        END AS max_zoom,
        ST_AsGeoJSON(ST_Point(CAST(lon AS DOUBLE), CAST(lat AS DOUBLE))) AS geometry
    FROM read_csv('{tsv_path}', delim='\t', header=true)
    WHERE
        (osm_type = 'node') OR (osm_type='relation' AND place_type='administrative' and place_rank=8)
        AND name IS NOT NULL
        AND lon IS NOT NULL AND lat IS NOT NULL
        AND name ~ '^[A-Za-z0-9\\s\\.,\\-\\(\\)]+$'   -- English/Latin names only
        AND place_type IN ('administrative', 'city', 'village', 'country')
    """).df()

df = df[df['place_type'].isin(['administrative', 'country', 'village', 'city', 'town'])].copy()
df = df.drop(df.loc[(df['place_rank'] != 8) & (df['place_type']=='administrative')].index).copy()

con.close()


features = []
for _, row in df.iterrows():
    features.append({
        "type": "Feature",
        "tippecanoe": {
            "minzoom": int(row['min_zoom']),
            "maxzoom": int(row['max_zoom'])
        },
        "properties": {
            "place_name": row['place_name'],
            "place_type": row['place_type'],
            "west": row['west'],
            "south": row['south'],
            "east": row['east'],
            "north": row['north']
        },
        "geometry": json.loads(row['geometry'])
    })

out_file = os.path.join(out_folder, f'osm_places_india.geojson')
geojson = {"type": "FeatureCollection", "features": features}

with open(out_file, 'w', encoding='utf-8') as f:
    json.dump(geojson, f, ensure_ascii=False, indent=2)

In [None]:
%%shell

# 1. Make sure the exact file name and path
ls -lh places_by_type_simple/osm_places_india.geojson

# 2. Remove any old files
rm -f india_places.mbtiles india_places_final.pmtiles

# 3. Run tippecanoe — ONE SINGLE LINE, no comments inside the command
tippecanoe -o india_places.mbtiles -l places --drop-densest-as-needed --extend-zooms-if-still-dropping -r1 --force places_by_type_simple/osm_places_india.geojson

# 4. Check how many features were processed
echo "Features processed by tippecanoe:"
sqlite3 india_places.mbtiles "SELECT COUNT(*) FROM tiles;"

# 5. Convert to PMTiles
pmtiles convert india_places.mbtiles india_places_final.pmtiles

# 6. Show result
echo ""
echo "SUCCESS! Final PMTiles created:"
pmtiles show india_places_final.pmtiles

In [None]:
# import shutil
# from google.colab import files

# # Define the folder to be zipped
# folder_to_zip = 'places_by_type_simple'

# output_filename = 'places_by_type_simple.zip'

# # Create a zip archive of the folder
# shutil.make_archive(folder_to_zip, 'zip', folder_to_zip)

# # Offer the zip file for download
# files.download(output_filename)

# print(f"Folder '{folder_to_zip}' has been zipped to '{output_filename}' and offered for download.")

# Converting to PMTiles Using Tippecanoe

In [None]:
# CORRECT INPUT FOLDER (the one that actually has files!)
geojson_folder = 'places_by_type_simple'   # ← this folder has city.geojson, town.geojson, village.geojson

# Output
temp_folder    = 'temp_mbtiles'
merged_mbtiles = 'india_places.mbtiles'
final_pmtiles  = 'india_places.pmtiles'

os.makedirs(temp_folder, exist_ok=True)

# ONLY THE TYPES THAT ACTUALLY EXIST IN YOUR DATA
place_zoom_map = {
    'city':    (4,  16),
    'town':    (8,  18),
    'village': (12, 18)
}

mbtiles_files = []

for place_type, (min_z, max_z) in place_zoom_map.items():
    geojson_path = os.path.join(geojson_folder, f'{place_type}.geojson')

    if not os.path.exists(geojson_path):
        print(f"File not found: {geojson_path}")
        continue

    # Quick check if file is not empty
    try:
        with open(geojson_path) as f:
            data = json.load(f)
        if not data.get('features'):
            print(f"Empty file: {geojson_path}")
            continue
    except Exception as e:
        print(f"Cannot read {geojson_path}: {e}")
        continue

    mb_path = os.path.join(temp_folder, f'{place_type}.mbtiles')

    print(f"Creating {place_type}.mbtiles (z{min_z}–{max_z})...")
    cmd = [
        'tippecanoe',
        '-o', mb_path,
        '-l', place_type,
        '-Z', str(min_z),
        '-z', str(max_z),
        '--force',
        '--no-feature-limit',
        '--no-tile-size-limit',
        '--drop-densest-as-needed',
        geojson_path
    ]

    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        print(f"Tippecanoe failed for {place_type}:\n{result.stderr}")
        continue

    if os.path.getsize(mb_path) < 4000:  # less than ~4KB = empty
        print(f"Empty MBTiles: {mb_path}")
        os.remove(mb_path)
        continue

    mbtiles_files.append(mb_path)
    print(f"Done: {mb_path} ({os.path.getsize(mb_path)/1024:.1f} KB)")

# MERGE & CONVERT
if mbtiles_files:
    print(f"\nMerging {len(mbtiles_files)} MBTiles files...")
    subprocess.run(['tile-join', '-f', '-o', merged_mbtiles] + mbtiles_files, check=True)
    print(f"Merged → {merged_mbtiles}")

    print("Converting to PMTiles...")
    subprocess.run(['pmtiles', 'convert', merged_mbtiles, final_pmtiles], check=True)
    print(f"PMTiles ready → {final_pmtiles}")

    # Show info
    subprocess.run(['pmtiles', 'show', final_pmtiles])
else:
    print("No valid MBTiles created. Check the paths above.")

# Optional: clean temp folder but keep final files
# for f in mbtiles_files:
#     os.remove(f)
# print("Temp files cleaned.")