This notebook provides a sample script to follow, when loading the data to the database for the first time. In this script we are importing NZGB place name data to our gazetteer database.

### Connect with the database

In [None]:
import psycopg2

conn = psycopg2.connect(
    database = "biowhere-gazetteer",
    user = "postgres",
    host = "127.0.0.1",
    password = "2666",
    port = 5432)

cur = conn.cursor()

### Load NZGB data to PostgresSQL tables with PostGIS
Official shape files for New Zealand geographic features can be accessed via [LINZ](https://data.linz.govt.nz/data/). First we need to import those shapefiles to separate tables in our DB, before loading them to our unified gazetteer.

In [None]:
import geopandas as gpd
from sqlalchemy import create_engine

# Define a list of shapefile paths and corresponding table names
shapefile_data = [
    ('path/to/nz-place-names-nzgb.shp', 'nzgb_point'),
    ('path/to/nz-place-names-lines-nzgb.shp', 'nzgb_line'),
    ('path/to/nz-place-names-polygons-nzgb.shp', 'nzgb_polygon')
]

db_connection_string = "postgresql://postgres:<password>@localhost:<port>/biowhere-gazetteer"

for shp_path, table_name in shapefile_data:
    gdf = gpd.read_file(shp_path)
    engine = create_engine(db_connection_string)

    # Import the GeoDataFrame into PostGIS
    gdf.to_postgis(table_name, engine, if_exists='replace')
    print(f"Imported {shp_path} into {table_name}")

# Print a message when the loop is finished
print("Shapefile import process completed.")

### Merge NZGB data to the gazetteer

In [None]:
LAST_UPDATE_USER = "<YOUR-NAME>"

In [None]:
# Helper Functions
def execute_query_and_fetchone(query, params=None):
    """Execute a query and fetch a single result."""
    cur.execute(query, params or ())
    return cur.fetchone()

In [None]:
def execute_query_and_return_id(query, params=None):
    """Execute a query that returns an ID."""
    return execute_query_and_fetchone(query, params)[0]

In [None]:
def insert_whakapapa_data(feature_name_id, name_id, data):
    """Insert data into the Whakapapa and FeatureName_Whakapapa tables."""
    whakapapa_id = None
    print(f"Inserting whakapapa data for {name_id}...")
    for whakapapa_usage, whakapapa_value in data.items():
        if whakapapa_value:
            existing_id = execute_query_and_fetchone(
                "SELECT id FROM Whakapapa WHERE whakapapa = %s", (whakapapa_value,)
            )
            if existing_id:
                whakapapa_id = existing_id[0]
            else:
                whakapapa_id = execute_query_and_return_id(
                    "INSERT INTO Whakapapa (whakapapa, whakapapaUsage, lastUpdateDate, lastUpdateUser) "
                    "VALUES (%s, %s, current_timestamp, %s) RETURNING id",
                    (whakapapa_value, whakapapa_usage, LAST_UPDATE_USER)
                )

            cur.execute(
                "INSERT INTO FeatureName_Whakapapa (featureName_id, whakapapa_id, lastUpdateDate, lastUpdateUser) "
                "VALUES (%s, %s, current_timestamp, %s)",
                (feature_name_id, whakapapa_id, LAST_UPDATE_USER)
            )

In [None]:
def process_feature_geometry(geometry, feature_id, spatial_accuracy):
    """Process and insert spatial geometry representation based on its type."""
    spatial_accuracy_str = f"({spatial_accuracy[0]!r}, {spatial_accuracy[1]!r})"
    spatial_geom_id = execute_query_and_return_id(
        "INSERT INTO SpatialGeometryRepresentation "
        "(lastUpdateDate, lastUpdateUser, timePeriod, spatialAccuracy, feature_id, localityDescription_id) "
        "VALUES (current_timestamp, %s, NULL, %s, %s, NULL) RETURNING id",
        (LAST_UPDATE_USER, spatial_accuracy_str , feature_id)
    )
    geotype = execute_query_and_fetchone("SELECT ST_GeometryType(%s)", (geometry,))[0]
    table_name = {
        'ST_Point': 'SpatialGeometryRepresentation_point',
        'ST_LineString': 'SpatialGeometryRepresentation_line',
        'ST_MultiLineString': 'SpatialGeometryRepresentation_line',
        'ST_Polygon': 'SpatialGeometryRepresentation_polygon',
        'ST_MultiPolygon': 'SpatialGeometryRepresentation_polygon'
    }.get(geotype)
    if not table_name:
        raise ValueError(f"Unsupported geometry type: {geotype}")
    print(f"Inserting {geotype} into {table_name}...")
    cur.execute(
        f"INSERT INTO {table_name} (geodeticReferenceSystem, geometry, lastUpdateDate, lastUpdateUser, spatialGeometryRepresentation_id) "
        "VALUES (%s, %s, current_date, %s, %s)",
        ("EPSG 4326", geometry, LAST_UPDATE_USER, spatial_geom_id)
    )
    return spatial_geom_id

In [None]:
def process_feature(name, name_id, info_descr, feat_type, geometry, spatial_accuracy, is_maori_name):
    """Process and insert feature, feature name, type, and geometry."""
    print(f"Processing new feature: {name}")
    feature_id = execute_query_and_return_id(
        "INSERT INTO Feature (featureDescription, inferredFlag, lastUpdateDate, lastUpdateUser) "
        "VALUES (%s, NULL, current_timestamp, %s) RETURNING id",
        (info_descr, LAST_UPDATE_USER)
    )
    if is_maori_name:
        print(f"Adding Maori name: {name}")
        feature_name_id = execute_query_and_return_id(
            "INSERT INTO FeatureName (featureName, language, feature_id, lastUpdateDate, lastUpdateUser) "
            "VALUES (%s, %s, %s, current_timestamp, %s) RETURNING id",
            (name, 'mi', feature_id, LAST_UPDATE_USER)
        )
    else:
        print(f"Inserting feature name for {name}...")
        feature_name_id = execute_query_and_return_id(
            "INSERT INTO FeatureName (featureName, language, feature_id, lastUpdateDate, lastUpdateUser) "
            "VALUES (%s, NULL, %s, current_timestamp, %s) RETURNING id",
            (name, feature_id, LAST_UPDATE_USER)
        )
    print(f"Inserting feature type for {name}...")
    feature_type_id = execute_query_and_return_id(
        "INSERT INTO FeatureType (classificationScheme, featureClass, feature_id, lastUpdateDate, lastUpdateUser) "
        "VALUES ('nzgb_feat_type', %s, %s, current_timestamp, %s) RETURNING id",
        (feat_type, feature_id, LAST_UPDATE_USER))
    print(f"Inserting source for {name}...")
    spatial_geom_id = process_feature_geometry(geometry, feature_id, spatial_accuracy)
    cur.execute(
        "INSERT INTO Source (externalSourceId, source, spatialGeometryRepresentation_id, featureType_id, "
        "featureName_id, localityDescription_id, lastUpdateDate, lastUpdateUser) "
        "VALUES (%s, %s, %s, %s, %s, NULL, current_date, %s)",
        (name_id, "NZGB", spatial_geom_id, feature_type_id, feature_name_id, LAST_UPDATE_USER)
    )
    return feature_name_id

In [None]:
def process_duplicate_feature(feature_id, feature_name_id, name_id, feat_type, geometry, spatial_accuracy):
    """Process and insert feature, feature name, type, and geometry."""
    print(f"Processing duplicate feature: {name_id} -> {feature_name_id}.")
    existing_feature_type = execute_query_and_fetchone("SELECT featureClass, id FROM FeatureType WHERE feature_id = %s", (feature_id,))
    feature_type_id = None
    if existing_feature_type[0] != feat_type:
        print(f"New feature type for the existing feature: {feature_id} -> {feat_type}. Adding new feature type...")
        feature_type_id = execute_query_and_return_id(
            "INSERT INTO FeatureType (classificationScheme, featureClass, feature_id, lastUpdateDate, lastUpdateUser) "
            "VALUES ('nzgb_feat_type', %s, %s, current_timestamp, %s) RETURNING id",
            (feat_type, feature_id, LAST_UPDATE_USER))
    else:
        feature_type_id = existing_feature_type[1]
    spatial_geom_id = process_feature_geometry(geometry, feature_id, spatial_accuracy)
    print(f"Adding new source to existing feature: {name_id} -> {feature_name_id}.")
    cur.execute(
        "INSERT INTO Source (externalSourceId, source, spatialGeometryRepresentation_id, featureType_id, "
        "featureName_id, localityDescription_id, lastUpdateDate, lastUpdateUser) "
        "VALUES (%s, %s, %s, %s, %s, NULL, current_date, %s)",
        (name_id, "NZGB", spatial_geom_id, feature_type_id, feature_name_id, LAST_UPDATE_USER)
    )

In [None]:
def process_row(row):
    """Process a single row of data."""
    (info_descr, feat_type, geometry, feat_id, name_id, name, maori_name, info_ref,
     info_origi, info_note, feat_note, doc_gaz_re, treaty_leg, accuracy, rev_gaz_re, rev_treaty) = row
    # Register the composite type so psycopg2 knows how to handle it

    # Create an instance of the composite
    print(f"Start Processing {name}...")
    spatial_accuracy = (accuracy, 'NZGB')
    names = name.split('/')
    is_new_feature = True

    existing_id = execute_query_and_fetchone(
                "SELECT id, feature_id FROM FeatureName WHERE featurename = %s", (names[0],)
            )
    if existing_id:
        print(f"Feature name already exists - details: {existing_id[0]}, {existing_id[1]}.")
        feature_name_id = existing_id[0]
        external_source_id = execute_query_and_fetchone("SELECT externalSourceId FROM Source WHERE featurename_id = %s", (feature_name_id,))

        if external_source_id[0] == name_id:
            feature_id = existing_id[1]
            print(f"Feature already exists - name: {name} (id: {name_id}).")
            process_duplicate_feature(feature_id, feature_name_id, name_id, feat_type, geometry, spatial_accuracy)
            is_new_feature = False
    if is_new_feature:
        feature_name_id = process_feature(
            name, name_id, info_descr, feat_type, geometry, spatial_accuracy, maori_name == 'Yes'
        )
        whakapapa_data = {
            'info_ref': info_ref, 'info_origi': info_origi, 'info_note': info_note,
            'doc_gaz_re': doc_gaz_re, 'treaty_leg': treaty_leg, 'accuracy': accuracy,
            'rev_gaz_re': rev_gaz_re, 'rev_treaty': rev_treaty, 'feat_note': feat_note
        }
        insert_whakapapa_data(feature_name_id, name_id, whakapapa_data)

In [None]:
# Main Processing Loop
shapefiles = ['nzgb_point', 'nzgb_line', 'nzgb_polygon']
for shapefile in shapefiles:
    cur.execute(f"SELECT info_descr, feat_type, geometry, feat_id, name_id, name, maori_name, info_ref, "
                f"info_origi, info_note, feat_note, doc_gaz_re, treaty_leg, accuracy, rev_gaz_re, rev_treaty "
                f"FROM {shapefile}")
    rows = cur.fetchall()
    print(f"Processing rows {len(rows)}")
    for row in rows:
        process_row(row)

conn.commit()
print("Changes committed to database.")

In [None]:
from sqlalchemy import create_engine, inspect, text

# Connection string: replace with your credentials
engine = create_engine("postgresql://postgres:2666@localhost:5432/biowheregazetteermanawatufinal")


# Create inspector to get table names
inspector = inspect(engine)
schemas = inspector.get_schema_names()

# Loop through schemas and tables
with engine.connect() as conn:
    for schema in schemas:
        if schema.startswith('pg_') or schema == 'information_schema':
            # Skip system schemas
            continue
        tables = inspector.get_table_names(schema=schema)
        for table in tables:
            result = conn.execute(text(f'SELECT COUNT(*) FROM "{schema}"."{table}"'))
            count = result.scalar()
            print(f'Table "{schema}"."{table}": {count} rows')

In [None]:
cur.close()
conn.close()