Dataset from:  
> Housing & Development Board. (2023). HDB Existing Building (2025) [Dataset]. data.gov.sg. Retrieved December 3, 2025 from https://data.gov.sg/datasets/d_16b157c52ed637edd6ba1232e026258d/view


#### 1. Creates the SQL script

```sql
CREATE TABLE IF NOT EXISTS public.hdb_blocks (
    objectid BIGINT,
    blk_no TEXT,
    street_code TEXT,
    entity_id BIGINT,
    postal_code TEXT,
    inc_crc TEXT,
    updated_at TEXT,
    shape_area DOUBLE PRECISION,
    shape_len DOUBLE PRECISION,
    geom_4326 geometry(MultiPolygon, 4326),
    geom_3414 geometry(MultiPolygon, 3414),
    centroid_4326 geometry(Point, 4326),
    centroid_3414 geometry(Point, 3414)
);
```

#### 2. Load the GeoJSON

In [1]:
import json
import pandas as pd
from shapely.geometry import shape
from shapely.wkt import dumps as wkt_dumps

geo_path = "data/HDBExistingBuilding.geojson"

with open(geo_path, "r") as f:
    gj = json.load(f)

rows = []
for feature in gj["features"]:
    geom = shape(feature["geometry"])
    props = feature["properties"]

    rows.append({
        "objectid": props.get("OBJECTID"),
        "blk_no": props.get("BLK_NO"),
        "street_code": props.get("ST_COD"),
        "entity_id": props.get("ENTITYID"),
        "postal_code": props.get("POSTAL_COD"),
        "inc_crc": props.get("INC_CRC"),
        "updated_at": props.get("FMEL_UPD_D"),
        "shape_area": props.get("SHAPE.AREA"),
        "shape_len": props.get("SHAPE.LEN"),
        "geom_wkt": wkt_dumps(geom),
        "centroid_wkt": wkt_dumps(geom.centroid)
    })

df = pd.DataFrame(rows)
df.head()

Unnamed: 0,objectid,blk_no,street_code,entity_id,postal_code,inc_crc,updated_at,shape_area,shape_len,geom_wkt,centroid_wkt
0,898584,514,BUS14E,8235,650514,74585E59F18D2E73,20130426120328,1033.685604,253.971941,POLYGON ((103.7529821808343655 1.3544208894918...,POINT (103.7525891826131073 1.3544681945247203)
1,898585,21,TEG02M,6192,600021,1550C06FF96161F6,20130426120305,1046.092028,397.417077,POLYGON ((103.7392355494047109 1.3236971900920...,POINT (103.7392896961472388 1.3236959487788726)
2,898586,53,MAM00B,9054,320053,6FBC002CCC25E0F0,20130426120341,465.684209,102.237582,POLYGON ((103.8560570595093111 1.3280968319452...,POINT (103.8560973613317202 1.3282785389346876)
3,898587,686B,CHC17Y,11347,682686,27633E9516C3217B,20130426120329,1164.381084,352.790348,POLYGON ((103.7483749037369449 1.4048411952521...,POINT (103.7485044884540741 1.4046647992030474)
4,898588,697,JUC08W,11051257,640697,6732FD889250ABAA,20201104102827,2044.932108,207.034765,POLYGON ((103.7081363495233575 1.3413769358491...,POINT (103.7080875361858290 1.3416050746094497)


#### 3. Insert into Postgres

In [2]:
from sqlalchemy import create_engine, text
import time

import requests
import pandas as pd
import time
from sqlalchemy import create_engine, text
from tqdm.auto import tqdm

# DB connection
engine = create_engine("postgresql://postgres:postgres@postgres-postgresql.postgres:5432/postgres")

BATCH_SIZE = 50
MAX_RETRIES = 3

def insert_batch(batch_df, retry_count=0):
    try:
        with engine.begin() as conn:
            for _, row in batch_df.iterrows():
                conn.execute(text("""
                    INSERT INTO public.hdb_blocks (
                        objectid, blk_no, street_code, entity_id, postal_code,
                        inc_crc, updated_at, shape_area, shape_len,
                        geom_4326, geom_3414,
                        centroid_4326, centroid_3414
                    )
                    VALUES (
                        :objectid, :blk_no, :street_code, :entity_id, :postal_code,
                        :inc_crc, :updated_at, :shape_area, :shape_len,
                        ST_SetSRID(ST_GeomFromText(:geom_4326), 4326),
                        ST_Transform(ST_SetSRID(ST_GeomFromText(:geom_4326), 4326), 3414),
                        ST_SetSRID(ST_GeomFromText(:centroid_4326), 4326),
                        ST_Transform(ST_SetSRID(ST_GeomFromText(:centroid_4326), 4326), 3414)
                    );
                """), {
                    "objectid": row["objectid"],
                    "blk_no": row["blk_no"],
                    "street_code": row["street_code"],
                    "entity_id": row["entity_id"],
                    "postal_code": row["postal_code"],
                    "inc_crc": row["inc_crc"],
                    "updated_at": row["updated_at"],
                    "shape_area": row["shape_area"],
                    "shape_len": row["shape_len"],
                    "geom_4326": row["geom_wkt"],
                    "centroid_4326": row["centroid_wkt"],
                })
        return True
    except Exception as e:
        if retry_count < MAX_RETRIES:
            print(f"⚠ Batch failed: {str(e)[:100]}... Retrying ({retry_count + 1}/{MAX_RETRIES})...")
            time.sleep(2 ** retry_count)  # Exponential backoff
            return insert_batch(batch_df, retry_count + 1)
        else:
            print(f"✗ Batch failed after {MAX_RETRIES} retries")
            return False

# Process in batches
total_batches = (len(df) + BATCH_SIZE - 1) // BATCH_SIZE
successful = 0

for i in range(0, len(df), BATCH_SIZE):
    batch = df.iloc[i:i+BATCH_SIZE]
    batch_num = i // BATCH_SIZE + 1
    print(f"Processing batch {batch_num}/{total_batches}...")
    
    if insert_batch(batch):
        successful += len(batch)
        print(f"  ✔ Batch {batch_num} complete ({successful}/{len(df)} total)")
    else:
        print(f"  ✗ Batch {batch_num} failed")

print(f"\n✔ Completed: {successful}/{len(df)} records inserted")

Processing batch 1/268...
  ✔ Batch 1 complete (50/13352 total)
Processing batch 2/268...
  ✔ Batch 2 complete (100/13352 total)
Processing batch 3/268...
  ✔ Batch 3 complete (150/13352 total)
Processing batch 4/268...
  ✔ Batch 4 complete (200/13352 total)
Processing batch 5/268...
  ✔ Batch 5 complete (250/13352 total)
Processing batch 6/268...
  ✔ Batch 6 complete (300/13352 total)
Processing batch 7/268...
  ✔ Batch 7 complete (350/13352 total)
Processing batch 8/268...
  ✔ Batch 8 complete (400/13352 total)
Processing batch 9/268...
  ✔ Batch 9 complete (450/13352 total)
Processing batch 10/268...
  ✔ Batch 10 complete (500/13352 total)
Processing batch 11/268...
  ✔ Batch 11 complete (550/13352 total)
Processing batch 12/268...
  ✔ Batch 12 complete (600/13352 total)
Processing batch 13/268...
  ✔ Batch 13 complete (650/13352 total)
Processing batch 14/268...
  ✔ Batch 14 complete (700/13352 total)
Processing batch 15/268...
  ✔ Batch 15 complete (750/13352 total)
Processing bat