In [72]:
# Dependencies
# !pip install sqlalchemy 
# !pip install psycopg2-binary
# !pip install pandas
# !pip install openpyxl

In [73]:
# Imports
from sqlalchemy import create_engine, text, inspect
from dotenv import load_dotenv
import pandas as pd
import os

In [74]:
# Establish DB connection

load_dotenv()    # loads .env from current directory by default
db_url = os.getenv("DATABASE_URL") # make sure to get the proper .env file from kristina !
engine = create_engine(db_url)

with engine.connect() as conn:
    result = conn.execute(text("SELECT NOW();"))
    print(result.fetchone())


(datetime.datetime(2025, 11, 22, 21, 38, 54, 898070, tzinfo=datetime.timezone.utc),)


In [75]:
# verify connection by getting db table names
insp = inspect(engine)
tables = insp.get_table_names()

print(tables)

['custom_poi', 'spatial_ref_sys', 'osm2pgsql_properties', 'planet_osm_rels', 'planet_osm_nodes', 'planet_osm_ways', 'planet_osm_point', 'planet_osm_line', 'planet_osm_polygon', 'planet_osm_roads']


In [None]:
# To drop custom poi table

with engine.connect() as conn:
    conn.execute(text("DROP TABLE IF EXISTS custom_poi CASCADE;"))
    conn.commit()

In [None]:
# create the custom_poi table
create_sql = """
CREATE TABLE IF NOT EXISTS custom_poi (
    poi_id SERIAL PRIMARY KEY,
    name TEXT NOT NULL DEFAULT 'untitled poi',
    alternate_names TEXT[] DEFAULT NULL,
    abbr TEXT DEFAULT NULL,
    location_description TEXT DEFAULT NULL,
    type TEXT DEFAULT NULL,
    poi_desc TEXT DEFAULT NULL,
    website TEXT DEFAULT NULL,
    coordinates geometry(Point, 4326) DEFAULT NULL,
    osm_object BIGINT,
    osm_verified TEXT,
    UNIQUE (name, location_description)
);
"""

with engine.connect() as conn:
    conn.execute(text(create_sql))
    conn.commit()

print("custom_poi table created!")

custom_poi table created!


In [78]:
"""
find_osm_table
    Desc: Fxn to find osm table of a passed in osm_id 
    Inputs: 
        osm_id - entered osm id in "osm_object" field
        conn - database connection
    Outputs:
        Returns the table the osm object was found in or None if the osm object was not found
"""
def find_osm_table(osm_id, conn):
    # Names of tables of osm objects
    tables = ["planet_osm_polygon", "planet_osm_point", "planet_osm_line"]

    # Traverse through each table holding a osm object
    for table in tables:

        # Query for a row with the matching osm_id
        q = text(f"SELECT 1 FROM {table} WHERE osm_id = :oid LIMIT 1")

        # Execute query and fetch result
        result = conn.execute(q, {"oid": osm_id}).fetchone()

        # If we find result, return the name of the table
        if result:
            return table
        
    # There's no osm object with matching osm_id
    return None


In [79]:
# Read in the custom_poi file 
df = pd.read_excel("custom_pois.xlsx")

# OPTIONAL: strip whitespace
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

  df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)


In [80]:
# Insert SQL

# SQL to insert into db
# ON CONFLICT... basically says to not insert (no nothing) if we find a duplicate poi
insert_sql = text("""
INSERT INTO custom_poi (osm_object, name, location_description, website)
VALUES (:osm_object, :name, :location_description, :website)
ON CONFLICT (name, location_description) DO NOTHING
RETURNING poi_id;
""")

update_status_sql = text("""
UPDATE custom_poi SET osm_verified = :status WHERE poi_id = :pid
""")

# Insert each row
with engine.begin() as conn:  # auto-commit
    for _, row in df.iterrows():
        osm_id = row.get("osm_object")
        if pd.isna(osm_id):
            osm_id = None

        name = row.get("name")
        location = row.get("location_description")
        website = row.get("website")

        poi_id = conn.execute(insert_sql, {
            "osm_object": osm_id,
            "name": name,
            "location_description": location,
            "website": website
        }).scalar()

        # Determine osm status
        if pd.isna(osm_id):
            status = None
        else:
            status = "YES" if find_osm_table(int(osm_id), conn) else "NO"

        conn.execute(update_status_sql, {"status": status, "pid": poi_id})

print("Import completed!")

ProgrammingError: (psycopg2.errors.InvalidColumnReference) there is no unique or exclusion constraint matching the ON CONFLICT specification

[SQL: 
INSERT INTO custom_poi (osm_object, name, location_description, website)
VALUES (%(osm_object)s, %(name)s, %(location_description)s, %(website)s)
ON CONFLICT (name, location_description) DO NOTHING
RETURNING poi_id;
]
[parameters: {'osm_object': 32429365.0, 'name': 'Academic Advising', 'location_description': 'Sherman Hall, B-Wing, room 224', 'website': 'https://advising.umbc.edu/'}]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [None]:
# Function to check if osm_object id matches to one of the geometry tables
def find_osm_target(osm_id, engine):
    for table in ["osm_planet_polygon", "osm_planet_point", "osm_planet_line"]:
        q = f"SELECT 1 FROM {table} WHERE osm_id = :id LIMIT 1"
        if engine.execute(text(q), {"id": osm_id}).fetchone():
            return table
    return None
