# Exploring Foursquare POI Data


1. [Foursquare's 104M Points of Interest](https://tech.marksblogg.com/foursquare-open-global-poi-dataset.html)

In [None]:
!pip install --quiet duckdb
!pip install --quiet jupysql
!pip install --quiet duckdb-engine

In [None]:
import duckdb
import os
import pandas as pd
import geopandas as gpd

Foursquare POI count in India
1. Feb 2025 Release = `12.87` Lakhs
2. Aug 2025 Relase = `13.67` Lakhs

In [None]:
!wget https://raw.githubusercontent.com/udit-001/india-maps-data/main/geojson/india.geojson -O india.geojson

In [None]:
gdf = gpd.read_file('india.geojson')
india_gdf = gdf.dissolve()
india_gdf.to_parquet('india.parquet')
india_gdf.plot()

In [None]:
# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

%sql duckdb:///:memory:

## Get Starbucks in India

In [None]:
latest_release = "2025-11-19.0"

In [None]:
df = con.execute("""
  SELECT
    country,
    COALESCE(names->>'primary', names->>'common') AS name,
    admin_level
  FROM parquet_scan('s3://overturemaps-us-west-2/release/latest/theme=divisions/*/*.parquet')
  WHERE country IS NOT NULL
  ORDER BY country, name
  LIMIT 10;
""").df()

In [None]:
import duckdb
import os
import pandas as pd
import geopandas as gpd

# Initialize DuckDB connection
con = duckdb.connect()

# Load the spatial extension if not already loaded
con.execute("INSTALL spatial;")
con.execute("LOAD spatial;")

# Define the output parquet file name
output_parquet_file = 'poi_india.parquet'

# Execute the query and directly copy the results to a parquet file
full_schema_df = con.execute(f"""
    SELECT
        *
 FROM read_parquet('s3://overturemaps-us-west-2/release/{latest_release}/theme=divisions/*/*', union_by_name=True) WHERE country = 'IN' LIMIT 100
""").df()

full_schema_df = con.execute(f"""
  SELECT
    names.primary,
    class,
    subtype,
    region,
    hierarchies,
    population,
    cartography,
    geometry
  FROM parquet_scan('s3://overturemaps-us-west-2/release/{latest_release}/theme=divisions/*/*', union_by_name=true) WHERE country = 'IN' LIMIT 100
""").df()

# Close the DuckDB connection
con.close()

In [None]:
full_schema_df.head()

In [None]:
full_schema_df['cartography'].iloc[0]