<a href="https://colab.research.google.com/github/kavyajeetbora/foursquare_ai/blob/master/notebooks/07_duckdb_ai_bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --quiet duckdb jupysql duckdb-engine

In [None]:
import duckdb
import os
import pandas as pd

In [None]:
# Initialize DuckDB connection
con = duckdb.connect()

# Load required extensions
con.execute("INSTALL httpfs; LOAD httpfs; INSTALL spatial; LOAD spatial;")

s3_places_path = 's3://fsq-os-places-us-east-1/release/dt=2025-09-09/places/parquet/places-*.zstd.parquet'
s3_categories_path = 's3://fsq-os-places-us-east-1/release/dt=2025-09-09/categories/parquet/categories.zstd.parquet'

# Execute the SELECT query and create a view
con.execute(f"""
CREATE OR REPLACE VIEW places_with_categories AS
WITH places AS (
    SELECT
        DISTINCT UNNEST(P.fsq_category_ids) as fsq_category_id,
        name,
        postcode,
        address,
        region,
        ST_Point(longitude, latitude) AS geom
    FROM read_parquet('{s3_places_path}') AS P
    WHERE latitude IS NOT NULL AND longitude IS NOT NULL AND country='IN'
),
places_with_categories AS (
    SELECT
        P.name AS name,
        C.level1_category_name AS category_level_1,
        C.level2_category_name AS category_level_2,
        postcode,
        address,
        region,
        P.geom
    FROM places AS P
    JOIN read_parquet('{s3_categories_path}') AS C
    ON P.fsq_category_id = C.category_id
)
SELECT
    name,
    category_level_1,
    category_level_2,
    address,
    region,
    postcode,
    geom
FROM places_with_categories;
""")

# Export the view to GeoParquet
con.execute("COPY (SELECT * FROM places_with_categories) TO 'output.geoparquet' WITH (FORMAT PARQUET, CODEC ZSTD);")

## Check the total count of the database:
# con.execute("SELECT COUNT(*) FROM places_with_categories;")
# result = con.fetchone()[0]
# print(result)
## Around 1358392 points are there

# Close the connection
con.close()