<a href="https://colab.research.google.com/github/kavyajeetbora/foursquare_ai/blob/master/notebooks/16%20-%20Generate%20tiles%20db%20from%20overture.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --upgrade duckdb

Collecting duckdb
  Downloading duckdb-1.4.4-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl.metadata (4.3 kB)
Downloading duckdb-1.4.4-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl (20.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.4/20.4 MB[0m [31m57.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: duckdb
  Attempting uninstall: duckdb
    Found existing installation: duckdb 1.3.2
    Uninstalling duckdb-1.3.2:
      Successfully uninstalled duckdb-1.3.2
Successfully installed duckdb-1.4.4


In [2]:
import duckdb
print(duckdb.__version__)          # should be 1.1.x or newer in 2026

con = duckdb.connect()           # or duckdb.connect(':memory:')

con.sql("INSTALL spatial;")
con.sql("INSTALL httpfs;")       # almost always needed for s3:// paths

con.sql("LOAD spatial;")
con.sql("LOAD httpfs;")

1.4.4


In [9]:
con = duckdb.connect('tiles.db')  # or choose a new name e.g. 'nyc_buildings.db' to avoid confusion

con.sql("INSTALL spatial; LOAD spatial;")   # make sure extension is ready

con.sql("""
CREATE OR REPLACE TABLE t1 AS (
  SELECT
    ST_Transform(geometry, 'EPSG:4326', 'EPSG:3857', always_xy := true) AS geometry,
    subtype,
    class,
    height
  FROM read_parquet(
    's3://overturemaps-us-west-2/release/2026-01-21.0/theme=buildings/type=building/*',
    filename = true,
    hive_partitioning = 1
  )
  WHERE
    bbox.xmin BETWEEN -74.2 AND -73.6
    AND bbox.ymin BETWEEN 40.5  AND 40.9
    AND bbox.xmax BETWEEN -74.2 AND -73.6
    AND bbox.ymax BETWEEN 40.5  AND 40.9
    AND subtype IS NOT NULL
    AND class   IS NOT NULL
    AND height  IS NOT NULL
);
""")

con.sql("CREATE INDEX my_idx ON t1 USING RTREE (geometry);")

# Quick reality check — should print a number > 0 if data loaded
con.sql("SELECT COUNT(*) FROM t1").show()

# Optional: force write to disk and close cleanly
con.sql("CHECKPOINT")
con.close()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│       425677 │
└──────────────┘



In [14]:
!ls -lh tiles.db

-rw-r--r-- 1 root root 78M Jan 31 05:35 tiles.db


In [11]:
# 1. Basic smoke test – does DuckDB even see the file as non-empty?
con = duckdb.connect('tiles.db')
print(con.sql("SELECT * FROM duckdb_tables()").fetchall())          # lists user tables
print(con.sql("SELECT * FROM duckdb_databases()").fetchall())       # should show main db
print(con.sql("PRAGMA database_size").fetchall())                   # rough size info

# 2. Also check for any leftover attached databases
con.sql("SHOW DATABASES").show()

[('tiles', 609, 'main', 590, 't1', 593, None, {}, False, False, False, 425677, 4, 1, 0, 'CREATE TABLE t1(geometry GEOMETRY, subtype VARCHAR, "class" VARCHAR, height DOUBLE);')]
[('tiles', 609, '/content/tiles.db', None, {'storage_version': 'v1.0.0+'}, False, 'duckdb', False, False, None), ('system', 0, None, None, {}, True, 'duckdb', False, False, None), ('temp', 2010, None, None, {}, True, 'duckdb', False, False, None)]
[('tiles', '77.2 MiB', 262144, 309, 309, 0, '0 bytes', '256.0 KiB', '10.1 GiB')]
┌───────────────┐
│ database_name │
│    varchar    │
├───────────────┤
│ tiles         │
└───────────────┘



In [13]:
con = duckdb.connect('tiles.db')   # creates file in Colab's /content/
# then run the same INSTALL / LOAD commands
# then your original code with attach/use/detach

con.sql("SELECT COUNT(*) FROM t1").show()

con.sql("""
SELECT
  subtype,
  COUNT(*) AS cnt,
  AVG(height) AS avg_height
FROM t1
GROUP BY 1
ORDER BY cnt DESC
LIMIT 10
""").show()

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│       425677 │
└──────────────┘

┌────────────────┬────────┬────────────────────┐
│    subtype     │  cnt   │     avg_height     │
│    varchar     │ int64  │       double       │
├────────────────┼────────┼────────────────────┤
│ residential    │ 408366 │  6.408758888182875 │
│ commercial     │   7015 │ 13.138676962733541 │
│ outbuilding    │   3105 │  4.355162292506768 │
│ education      │   2495 │ 16.342072511543037 │
│ industrial     │   1375 │ 7.1407133731547985 │
│ religious      │   1093 │ 12.793615347076086 │
│ service        │    751 │  9.868564686215194 │
│ civic          │    621 │ 11.731520634844994 │
│ transportation │    541 │ 15.351108213449368 │
│ medical        │    216 │ 31.118892931673255 │
├────────────────┴────────┴────────────────────┤
│ 10 rows                            3 columns │
└──────────────────────────────────────────────┘

