Dataset from:  
> Housing & Development Board. (2018). HDB Property Information (2025) [Dataset]. data.gov.sg. Retrieved December 3, 2025 from https://data.gov.sg/datasets/d_17f5382f26140b1fdae0ba2ef6239d2f/view

#### 1. Creates the SQL script

```sql
CREATE TABLE public.hdb_property_info (
    blk_no TEXT,
    street TEXT,
    max_floor_lvl INT,
    year_completed INT,
    residential TEXT,
    commercial TEXT,
    market_hawker TEXT,
    miscellaneous TEXT,
    multistorey_carpark TEXT,
    precinct_pavilion TEXT,
    bldg_contract_town TEXT,
    total_dwelling_units INT,
    room1_sold INT,
    room2_sold INT,
    room3_sold INT,
    room4_sold INT,
    room5_sold INT,
    exec_sold INT,
    multigen_sold INT,
    studio_apartment_sold INT,
    room1_rental INT,
    room2_rental INT,
    room3_rental INT,
    other_room_rental INT,
    -- Enrichment from OneMap
    postal TEXT,
    full_address TEXT,
    lon DOUBLE PRECISION,
    lat DOUBLE PRECISION,
    x DOUBLE PRECISION,
    y DOUBLE PRECISION,
    geom_4326 geometry(Point,4326),
    geom_3414 geometry(Point,3414),

    updated_at TIMESTAMPTZ DEFAULT NOW()
);

CREATE INDEX hdb_property_info_block_idx ON public.hdb_property_info(blk_no, street);
CREATE INDEX hdb_property_postal_idx ON public.hdb_property_info(postal);
CREATE INDEX hdb_property_geom_idx ON public.hdb_property_info USING GIST (geom_3414);
```

#### 2. Load the CSV

In [1]:
import pandas as pd

df = pd.read_csv("data/HDBPropertyInformation.csv")

df.columns = (
    df.columns.str.strip()
      .str.lower()
      .str.replace(" ", "_")
      .str.replace("1room", "room1")
      .str.replace("2room", "room2")
      .str.replace("3room", "room3")
      .str.replace("4room", "room4")
      .str.replace("5room", "room5")
)
df.head()

Unnamed: 0,blk_no,street,max_floor_lvl,year_completed,residential,commercial,market_hawker,miscellaneous,multistorey_carpark,precinct_pavilion,...,room3_sold,room4_sold,room5_sold,exec_sold,multigen_sold,studio_apartment_sold,room1_rental,room2_rental,room3_rental,other_room_rental
0,1,BEACH RD,16,1970,Y,Y,N,N,N,N,...,138,1,2,0,0,0,0,0,0,0
1,1,BEDOK STH AVE 1,14,1975,Y,N,N,Y,N,N,...,204,0,2,0,0,0,0,0,0,0
2,1,CANTONMENT RD,2,2010,N,Y,N,N,N,N,...,0,0,0,0,0,0,0,0,0,0
3,1,CHAI CHEE RD,15,1982,Y,N,N,N,N,N,...,0,10,92,0,0,0,0,0,0,0
4,1,CHANGI VILLAGE RD,4,1975,Y,Y,N,N,N,N,...,54,0,1,0,0,0,0,0,0,0


#### 3. Insert into Postgres

In [2]:
from sqlalchemy import create_engine, text

# DB connection
engine = create_engine("postgresql://postgres:postgres@postgres-postgresql.postgres:5432/postgres")

insert_sql = text("""
INSERT INTO public.hdb_property_info (
    blk_no, street, max_floor_lvl, year_completed,
    residential, commercial, market_hawker, miscellaneous,
    multistorey_carpark, precinct_pavilion, bldg_contract_town,
    total_dwelling_units, room1_sold, room2_sold, room3_sold,
    room4_sold, room5_sold, exec_sold, multigen_sold,
    studio_apartment_sold, room1_rental, room2_rental, room3_rental,
    other_room_rental
)
VALUES (
    :blk_no, :street, :max_floor_lvl, :year_completed,
    :residential, :commercial, :market_hawker, :miscellaneous,
    :multistorey_carpark, :precinct_pavilion, :bldg_contract_town,
    :total_dwelling_units, :room1_sold, :room2_sold, :room3_sold,
    :room4_sold, :room5_sold, :exec_sold, :multigen_sold,
    :studio_apartment_sold, :room1_rental, :room2_rental, :room3_rental,
    :other_room_rental
);
""")

with engine.begin() as conn:
    for _, row in df.iterrows():
        conn.execute(insert_sql, row.to_dict())

print("✔ Raw property info inserted.")


✔ Raw property info inserted.


#### 4. Geocode Enrichment

##### 4.1 Add canonical road name column

```sql
ALTER TABLE public.hdb_property_info
ADD COLUMN canonical_street TEXT;

UPDATE public.hdb_property_info
SET canonical_street = canonicalize_road_name(street);

CREATE INDEX IF NOT EXISTS idx_hdb_canonical_street
ON public.hdb_property_info(canonical_street);
```

##### 4.2 Join using canonical + block number

```sql
UPDATE public.hdb_property_info p
SET 
    postal      = g.postal,
    full_address = g.address,
    lat        = g.lat,
    lon        = g.lon,
    x          = g.x,
    y          = g.y,
    geom_4326  = g.geom_4326,
    geom_3414  = g.geom_3414,
    updated_at = NOW()
FROM public.geocode_cache g
WHERE 
    p.blk_no = g.blk_no
    AND p.canonical_street = g.canonical_street;

```