## PostgreSQL and PostGIS for Nearest Neighbour Query / Proximity Search


Objective is to leverage PostGIS for NNQ.

In [84]:
%pip install --user psycopg2

Note: you may need to restart the kernel to use updated packages.


To enable PostGIS extension, run the following command on database
CREATE EXTENSION postgis;

In [28]:
import psycopg2
from psycopg2 import Error
import pandas as pd

In [8]:
def getConfigOrDefault(config_filename, config,label,default=None):
    if config.get(label) is not None:
        return config.get(label)
    if default is not None:
        return default
    print(f"{config_filename} file does not have {label} parameter!")
    return None

In [19]:
from dotenv import dotenv_values
config_filename = './postgresql.cfg'
config = dotenv_values(config_filename)

USER_NAME = getConfigOrDefault(config_filename, config, 'POSTGRES_UNAME')
USER_PWD  = getConfigOrDefault(config_filename, config, 'POSTGRES_UPWD')
POSTGRES_HOST = getConfigOrDefault(config_filename, config, 'POSTGRES_HOST', 'localhost')
POSTGRES_PORT = getConfigOrDefault(config_filename, config, 'POSTGRES_PORT', '5432')
POSTGRES_DB = getConfigOrDefault(config_filename, config, 'POSTGRES_DBNAME', 'test')

if USER_PWD is None or USER_PWD is None:
    print(f"{config_filename} file does not have parameters: POSTGRES_UNAME and/or POSTGRES_UPWD!")


In [20]:
try:
    # Connect to an existing database
    connection = psycopg2.connect(user=USER_NAME,
                                  password=USER_PWD,
                                  host=POSTGRES_HOST,
                                  port=POSTGRES_PORT,
                                  database=POSTGRES_DB)

    # Create a cursor to perform database operations
    cursor = connection.cursor()
    # Print PostgreSQL details
    print("PostgreSQL server information")
    print(connection.get_dsn_parameters(), "\n")
    # Executing a SQL query
    cursor.execute("SELECT version();")
    # Fetch result
    record = cursor.fetchone()
    print("You are connected to - ", record, "\n")

    #Closing the connection
    connection.close()

except (Exception, Error) as error:
    print("Error while connecting to PostgreSQL", error)
finally:
    if (connection):
       cursor.close()
       connection.close()
       print("PostgreSQL connection is closed")

PostgreSQL server information
{'user': 'test_user', 'channel_binding': 'prefer', 'dbname': 'postgres', 'host': '127.0.0.1', 'port': '5432', 'options': '', 'sslmode': 'prefer', 'sslcompression': '0', 'sslcertmode': 'allow', 'sslsni': '1', 'ssl_min_protocol_version': 'TLSv1.2', 'gssencmode': 'disable', 'krbsrvname': 'postgres', 'gssdelegation': '0', 'target_session_attrs': 'any', 'load_balance_hosts': 'disable'} 

You are connected to -  ('PostgreSQL 17.5 on x86_64-windows, compiled by msvc-19.44.35209, 64-bit',) 

PostgreSQL connection is closed


In [25]:
try:
    # Connect to an existing database
    connection = psycopg2.connect(user=USER_NAME,
                                  password=USER_PWD,
                                  host=POSTGRES_HOST,
                                  port=POSTGRES_PORT,
                                  database=POSTGRES_DB)

    # Create a cursor to perform database operations
    cursor = connection.cursor()
    # Executing a SQL query
    cursor.execute("SELECT version();")
    # Fetch result
    record = cursor.fetchone()
    print("You are connected to - ", record, "\n")

    cursor.execute(f"DROP TABLE IF EXISTS test.restaurants;")

    sql_stmt=f"""
    CREATE TABLE IF NOT EXISTS test.restaurants (
    id SERIAL PRIMARY KEY,
    name VARCHAR(255),
    longitude FLOAT,
    latitude FLOAT,
    location GEOMETRY(Point, 4326)
    )
    """
    cursor.execute(sql_stmt)

    #record = cursor.fetchone()
    #print("Result - ", record, "\n")

    connection.commit()

    sql_stmt="""
     select column_name
        , data_type
        , udt_catalog
        , udt_schema
        , udt_name
        , character_maximum_length
        , column_default
        , is_nullable
    from INFORMATION_SCHEMA.COLUMNS
    where table_name = 'restaurants';
    """
    cursor.execute(sql_stmt)

    record = cursor.fetchall()
    print(record)

    #Closing the connection
    connection.close()

except (Exception, Error) as error:
    print("Error while connecting to PostgreSQL", error)
finally:
    if (connection):
       cursor.close()
       connection.close()
       print("PostgreSQL connection is closed")

You are connected to -  ('PostgreSQL 17.5 on x86_64-windows, compiled by msvc-19.44.35209, 64-bit',) 

[('id', 'integer', 'postgres', 'pg_catalog', 'int4', None, "nextval('test.restaurants_id_seq'::regclass)", 'NO'), ('name', 'character varying', 'postgres', 'pg_catalog', 'varchar', 255, None, 'YES'), ('longitude', 'double precision', 'postgres', 'pg_catalog', 'float8', None, None, 'YES'), ('latitude', 'double precision', 'postgres', 'pg_catalog', 'float8', None, None, 'YES'), ('location', 'USER-DEFINED', 'postgres', 'public', 'geometry', None, None, 'YES')]
PostgreSQL connection is closed


In [None]:
df_raw=record
df_raw

In [30]:
column_names = [ 'column_name', 'data_type',
                 'udt_catalog', 'udt_schema', 'udt_name',
                 'character_maximum_length', 'column_default', 'is_nullable']
df = pd.DataFrame(df_raw,columns=column_names)
df

Unnamed: 0,column_name,data_type,udt_catalog,udt_schema,udt_name,character_maximum_length,column_default,is_nullable
0,id,integer,postgres,pg_catalog,int4,,nextval('test.restaurants_id_seq'::regclass),NO
1,name,character varying,postgres,pg_catalog,varchar,255.0,,YES
2,longitude,double precision,postgres,pg_catalog,float8,,,YES
3,latitude,double precision,postgres,pg_catalog,float8,,,YES
4,location,USER-DEFINED,postgres,public,geometry,,,YES


In [31]:
filename="./restaurants.csv"
df_loaded = pd.read_csv(filename)
df_loaded[:10]

Unnamed: 0,Name,Lon,Lat
0,Morris Park Bake Shop,-73.856077,40.848447
1,Wendy'S,-73.961704,40.662942
2,Riviera Caterer,-73.98242,40.579505
3,Tov Kosher Kitchen,-73.860115,40.731174
4,Brunos On The Boulevard,-73.880383,40.764312
5,Dj Reynolds Pub And Restaurant,-73.985136,40.767692
6,Wilken'S Fine Food,-73.906851,40.619903
7,Regina Caterers,-74.005289,40.628886
8,Taste The Tropics Ice Cream,-73.948261,40.640827
9,Kosher Island,-74.137729,40.611957


In [36]:
df_loaded['Rid'] = df_loaded.index
printdf = df_loaded[:10]
print(printdf.to_string(index=False))

                          Name        Lon       Lat  Rid    Cuisine
         Morris Park Bake Shop -73.856077 40.848447    0 indonesian
                       Wendy'S -73.961704 40.662942    1 vietnamese
               Riviera Caterer -73.982420 40.579505    2 vietnamese
            Tov Kosher Kitchen -73.860115 40.731174    3      cajun
       Brunos On The Boulevard -73.880383 40.764312    4      cajun
Dj Reynolds Pub And Restaurant -73.985136 40.767692    5    chinese
            Wilken'S Fine Food -73.906851 40.619903    6    mexican
               Regina Caterers -74.005289 40.628886    7    mexican
   Taste The Tropics Ice Cream -73.948261 40.640827    8  colombian
                 Kosher Island -74.137729 40.611957    9    zambian


In [37]:
import random
def assignCuisineRandomly(df,cuisine,cname):
    for i in range(df.shape[0]):
        df.at[i,cname] = cuisine[random.randint(0,len(cuisine)-1)]

In [38]:
cuisine = ['italian', 'chinese', 'french', 'zambian', 'egyptian', 'canadian', 'mexican', 'vietnamese', 'cajun', 'korean', 'thai', 'brazilian','colombian','peruvian','ecuadorian', 'japanese','indian','malaysian','russian', 'indonesian']
assignCuisineRandomly(df_loaded,cuisine,'Cuisine')
df_loaded[-4:]

Unnamed: 0,Name,Lon,Lat,Rid,Cuisine
4996,Wagner College - Hawk' Nest,-74.092853,40.615121,4996,italian
4997,Ellen Deli & Grocery,-74.00781,40.725708,4997,chinese
4998,Crepes On Columbus,-73.961831,40.801052,4998,french
4999,Capital Grille,-73.974723,40.751244,4999,chinese


In [81]:
def formatSQL(df):
    name = df['Name'].replace("'"," ")
    sql_stmt = f"INSERT INTO test.restaurants (name, longitude, latitude, location) VALUES ('{name}',{df['Lon']},{df['Lat']},ST_GeomFromText('POINT({df['Lon']} {df['Lat']})', 4326));"

    return sql_stmt



In [82]:
#print(formatSQL(df_loaded[0]))
idx=4996
print(type(df_loaded.loc[idx].to_dict()))
df_loaded.loc[idx].to_dict()
print(formatSQL(df_loaded.loc[idx].to_dict()))

<class 'dict'>
INSERT INTO test.restaurants (name, longitude, latitude, location) VALUES ('Wagner College - Hawk  Nest',-74.09285299999999,40.61512099999999,ST_GeomFromText('POINT(-74.09285299999999 40.61512099999999)', 4326));


In [83]:
# Load data to test.restaurants table
# using ST_GeomFromText('POINT($longitude $latitude)', 4326)
try:
        # Connect to an existing database
        connection = psycopg2.connect(user=USER_NAME,
                                      password=USER_PWD,
                                      host=POSTGRES_HOST,
                                      port=POSTGRES_PORT,
                                      database=POSTGRES_DB)

        # Create a cursor to perform database operations
        cursor = connection.cursor()
        # Executing a SQL query
        cursor.execute("SELECT version();")
        # Fetch result
        record = cursor.fetchone()
        print("You are connected to - ", record, "\n")
        sql_stmt = ""
        for i in range(df_loaded.shape[0]):
            #addARestaurant(connection, df_loaded.loc[i].to_dict())
            sql_stmt = formatSQL(df_loaded.loc[i].to_dict())
            print(sql_stmt)
            cursor.execute(sql_stmt)

        connection.commit()

        #Closing the connection
        connection.close()

except (Exception, Error) as error:
        print("Error: ", error)
finally:
        if (connection):
            cursor.close()
            connection.close()
        print("PostgreSQL connection is closed")

You are connected to -  ('PostgreSQL 17.5 on x86_64-windows, compiled by msvc-19.44.35209, 64-bit',) 

INSERT INTO test.restaurants (name, longitude, latitude, location) VALUES ('Morris Park Bake Shop',-73.856077,40.848447,ST_GeomFromText('POINT(-73.856077 40.848447)', 4326));
INSERT INTO test.restaurants (name, longitude, latitude, location) VALUES ('Wendy S',-73.961704,40.662942,ST_GeomFromText('POINT(-73.961704 40.662942)', 4326));
INSERT INTO test.restaurants (name, longitude, latitude, location) VALUES ('Riviera Caterer',-73.98241999999999,40.579505,ST_GeomFromText('POINT(-73.98241999999999 40.579505)', 4326));
INSERT INTO test.restaurants (name, longitude, latitude, location) VALUES ('Tov Kosher Kitchen',-73.8601152,40.7311739,ST_GeomFromText('POINT(-73.8601152 40.7311739)', 4326));
INSERT INTO test.restaurants (name, longitude, latitude, location) VALUES ('Brunos On The Boulevard',-73.8803827,40.7643124,ST_GeomFromText('POINT(-73.8803827 40.7643124)', 4326));
INSERT INTO test.re

In [3]:
try:
    # Connect to an existing database
    connection = psycopg2.connect(user="test_user",
                                  password="tester",
                                  host="127.0.0.1",
                                  port="5432",
                                  database="test")

    # Create a cursor to perform database operations
    cursor = connection.cursor()
    # Print PostgreSQL details
    print("PostgreSQL server information")
    print(connection.get_dsn_parameters(), "\n")
    # Executing a SQL query
    cursor.execute("SELECT version();")
    # Fetch result
    record = cursor.fetchone()
    print("You are connected to - ", record, "\n")

    #Closing the connection
    connection.close()

except (Exception, Error) as error:
    print("Error while connecting to PostgreSQL", error)
finally:
    if (connection):
       cursor.close()
       connection.close()
       print("PostgreSQL connection is closed")


PostgreSQL server information
{'user': 'test_user', 'channel_binding': 'prefer', 'dbname': 'test', 'host': '127.0.0.1', 'port': '5432', 'options': '', 'sslmode': 'prefer', 'sslcompression': '0', 'sslcertmode': 'allow', 'sslsni': '1', 'ssl_min_protocol_version': 'TLSv1.2', 'gssencmode': 'disable', 'krbsrvname': 'postgres', 'gssdelegation': '0', 'target_session_attrs': 'any', 'load_balance_hosts': 'disable'} 

You are connected to -  ('PostgreSQL 17.5 on x86_64-windows, compiled by msvc-19.44.35209, 64-bit',) 

PostgreSQL connection is closed
