In [1]:
import pandas as pd
import geopandas as gpd
from sqlalchemy import create_engine,text 
import leafmap

# Download data from CSDI Portal

In [5]:
from fgdbDL import download_and_extract_gdb 

url = "https://static.csdi.gov.hk/csdi-webpage/download/51d63757e2675874af80eef94afb6a35/fgdb"

#storage_path = "/home/steeb/Documents/GIS/"
storage_path = r"C:\Users\Steve_Lau\Desktop\LS Training\ls_project1"

download_and_extract_gdb(url, storage_path)

Downloaded file to C:\Users\Steve_Lau\Desktop\LS Training\ls_project1\downloaded_file.zip
Removed old version of Building_Footprint.gdb
Extracted Building_Footprint.gdb to C:\Users\Steve_Lau\Desktop\LS Training\ls_project1
Removed the original downloaded file C:\Users\Steve_Lau\Desktop\LS Training\ls_project1\downloaded_file.zip


In [2]:
from shpDL import download_and_extract_shp

url = 'https://static.csdi.gov.hk/csdi-webpage/download/0e55c533715b5da3ae0ca6e6024e90b4/shp'

storage_path = "/home/steeb/Documents/GIS/"
#storage_path = r"C:\Users\Steve_Lau\Desktop\LS Training\ls_project1"

download_and_extract_shp(url, storage_path)

Downloaded file to /home/steeb/Documents/GIS/downloaded_file.zip
Extracted shapefiles to /home/steeb/Documents/GIS/
Removed the original downloaded file /home/steeb/Documents/GIS/downloaded_file.zip


# Set up paths and layers + db connection parameters

In [2]:
# Define the paths and layer name (comment out either one gdb_path when not in use)
gdb_path = "/home/steeb/Documents/GIS/20240509/Building_Footprint.gdb"
#gdb_path = r"C:\Users\Steve_Lau\Desktop\LS Training\ls_project1\Building_Footprint.gdb"

shp_file = "/home/steeb/Documents/GIS/BDBIAR.shp"
#shp_file = r"C:\Users\Steve_Lau\Desktop\LS Training\ls_project1\BDBIAR.shp"

# Define the db connection parameters
username = "postgres"
password = "12345"
host = "localhost"
dbname = "Buildings"
port = "5432"

pg_connection = f"PG:host={host} port={port} dbname={dbname} user={username} password={password}"
engine = create_engine(f"postgresql://{username}:{password}@{host}:{port}/{dbname}")

# Import into a PostgreSQL database using ogr2ogr

In [None]:
# Imports a GDB into a PostgreSQL database using ogr2ogr
from gdb2pgsql import transfer_gdb_to_postgis

transfer_gdb_to_postgis(gdb_path, pg_connection)

In [5]:
# Imports a SHP into a PostgreSQL database using ogr2ogr
from shp2pgsql import import_shapefile_to_postgresql

import_shapefile_to_postgresql(shp_file, pg_connection)




In [None]:
from gdbList import list_layers_with_types

list_layers_with_types(gdb_path)

# Read data from PostgreSQL database into dataframes

In [3]:
table_op = "OCCUPATION_PERMIT"
table_op_blgstr = "OP_BUILDING_STRUCTURE"
table_blgstr = "BUILDING_STRUCTURE"
table_blgcat = "CT_BUILDING_CATEGORY"
table_bdbiar = "BDBIAR"

sql_op = text(f"SELECT * FROM {table_op}")
sql_op_blgstr = text(f"SELECT * FROM {table_op_blgstr}")
sql_blstr = text(f"SELECT * FROM {table_blgstr}")
sql_blgcat = text(f"SELECT * FROM {table_blgcat}")
sql_bdbiar = text(f"SELECT * FROM {table_bdbiar}")

In [4]:
# Read the tables into DataFrames
df_op = pd.read_sql(sql_op, con=engine.connect())
df_op_blgstr = pd.read_sql(sql_op_blgstr, con=engine.connect())
df_blgcat = pd.read_sql(sql_blgcat, con=engine.connect())

# Read the tables with geometry into DataFrames
gdf_blgstr = gpd.read_postgis(sql_blstr, con=engine.connect(), geom_col="shape") 
gdf_bdbiar = gpd.read_postgis(sql_bdbiar, con=engine.connect(), geom_col="wkb_geometry") 

In [5]:
# Select only the "opno" and "opdate" columns from df_op
df_op_subset = df_op[["opno", "opdate"]]

df_op_subset.opdate = pd.to_datetime(df_op_subset["opdate"], utc=True)

# Merge df_op_blgstr with the subset of df_op on the "opno" column
df_merge_op_blgstr = pd.merge(df_op_blgstr,
                            df_op_subset,
                            on="opno",
                            how="right")

In [6]:
# Select only the "buildingstructureid" and "opdate" columns from df_merge_op_blgstr
df_merge_op_blgstr_subset = df_merge_op_blgstr[["buildingstructureid", "opdate"]]

# Merge gdf_blgstr with the subset of df_merge_op_blgstr on the "buildingstructureid" column
gdf_merge_blgstr = pd.merge(gdf_blgstr,
                df_merge_op_blgstr_subset,
                on="buildingstructureid", how="left")

In [7]:
# Select only the "buildingstructureid" and "opdate" columns from df_merge_op_blgstr
df_blgcat_subset = df_blgcat[["code",
                              "description",
                              "note"]]

df_blgcat_subset = df_blgcat_subset.rename(columns={"code": "category",
                                 "description": "catdesc",
                                 "note": "catnote"})

gdf_merge_blgstr.category = gdf_merge_blgstr.category.astype("object").astype("int64")

# Merge gdf_blgstr with the subset of df_merge_op_blgstr on the "buildingstructureid" column
gdf_merge_blgstr = pd.merge(gdf_merge_blgstr,
                df_blgcat_subset,
                on="category", how="left")

In [8]:
today = pd.to_datetime('today', utc=True).normalize()

gdf_merge_blgstr["calcdate"] = today

In [9]:
gdf_merge_blgstr['age'] = (gdf_merge_blgstr["calcdate"] - gdf_merge_blgstr["opdate"]) / pd.Timedelta(days=365)

In [10]:
# Keep only relevant columns
gdf_merge_blgstr = gdf_merge_blgstr.loc[:, ("buildingstructureid",
                    "buildingcsuid",
                    "buildingstructuretype",
                    "catdesc",
                    "catnote",
                    "status",
                    "officialbuildingnameen",
                    "officialbuildingnametc",
                    "numabovegroundstoreys",
                    "numbasementstoreys",
                    "topheight",
                    "baseheight",
                    "opdate",
                    "age",
                    "shape")]

In [19]:
#gdf_merge_blgstr["opdate"] = gdf_merge_blgstr["opdate"].astype(str)

#shp_path = "/home/steeb/Documents/GIS/buildingstr.shp"

#shp_path = r"C:\Users\Steve_Lau\Desktop\LS Training\ls_project1\buildingstr.shp"

#gdf_merge_blgstr.to_file(shp_path, encoding='utf-8')

In [11]:
gdf_bdbiar["nsearch4_e"].value_counts()

nsearch4_e
Tower     44364
Podium     6168
Name: count, dtype: int64

# Filter Building structure and Building age by "Tower" type

In [18]:
gdf_blgstr_tower = gdf_merge_blgstr[gdf_merge_blgstr.buildingstructuretype == "T"]
gdf_bdbiar_tower = gdf_bdbiar[gdf_bdbiar.nsearch4_e == "Tower"]

In [21]:
gdf_bdbiar_tower.to_crs(epsg=2326, inplace=True)

In [22]:
gdf_bdbiar_tower["nsearch3_c"] = gdf_bdbiar_tower["nsearch3_c"].astype(str)
gdf_bdbiar_tower["nsearch3_e"] = gdf_bdbiar_tower["nsearch3_e"].astype(str)

shp_path = "/home/steeb/Documents/GIS/gdf_bdbiar_tower.shp"

#shp_path = r"C:\Users\Steve_Lau\Desktop\LS Training\ls_project1\gdf_bdbiar_tower.shp"

gdf_bdbiar_tower.to_file(shp_path, encoding='utf-8')

In [23]:
gdf_sjoin_blgstr = gpd.sjoin(gdf_blgstr_tower, gdf_bdbiar_tower, how="left")

gdf_sjoin_blgstr.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 333157 entries, 0 to 340950
Data columns (total 37 columns):
 #   Column                  Non-Null Count   Dtype              
---  ------                  --------------   -----              
 0   buildingstructureid     333157 non-null  int64              
 1   buildingcsuid           333157 non-null  object             
 2   buildingstructuretype   333157 non-null  object             
 3   catdesc                 333157 non-null  object             
 4   catnote                 333157 non-null  object             
 5   status                  333157 non-null  object             
 6   officialbuildingnameen  59893 non-null   object             
 7   officialbuildingnametc  59348 non-null   object             
 8   numabovegroundstoreys   41884 non-null   float64            
 9   numbasementstoreys      3934 non-null    float64            
 10  topheight               177236 non-null  float64            
 11  baseheight             

In [None]:
# Keep only relevant columns
gdf_sjoin_blg_site = gdf_sjoin_blg_site.loc[:, ("buildingid",
                    "typeofbuildingblock",
                    "baselevel",
                    "rooflevel",
                    "buildingstatus",
                    "englishbuildingname",
                    "chinesebuildingname",
                    "sitestype",
                    "sitecode",
                    "shape")]

In [None]:
m = leafmap.Map()

m.add_gdf(gdf_sjoin_blg_site, layer_name="buildings")
m