# Dependências

In [21]:
#!pip install numba==0.49.1
#!pip install numba==0.55.0
#!pip install numba
#!pip uninstall numba -y
#!pip install llvmlite

In [22]:
#!pip install llvmlite --ignore-installed

In [23]:
#!pip install gensim==3.6.0
#!pip install binary gensim==3.6.0

In [24]:
#!pip3 uninstall numpy -y

In [25]:
#!pip install numpy

# Bibliotecas

In [2]:
import pandas as pd
import numpy as np
import psycopg2 as postgres
import psycopg2.extras
import math
import csv
import random

from gensim.models import Word2Vec
from tqdm import tqdm
from sshtunnel import SSHTunnelForwarder
from tqdm import tqdm

import numba
from numba import njit
#from numba import cuda

# Funções

## Utilidades

In [3]:
def executeQuery(conn, sql):
    """
    Executes a SQL query on a PostgreSQL database.

    Args:
        conn (psycopg2.extensions.connection): A connection object to the database.
        sql (str): The SQL query to execute.

    Returns:
        list: A list of records (tuples) retrieved from the database.
    """
    record = None
    try:
        cur = conn.cursor()
        cur.execute(sql)
        record = cur.fetchall()
        cur.close()
    except psycopg2.Error as e:
        print(e)
        cur.execute("ROLLBACK")
        cur.close()
    return record

In [4]:
def executeInsert(conn, sql):
    """
    Executes an SQL INSERT statement on a PostgreSQL database.

    Args:
        conn (psycopg2.extensions.connection): A connection object to the database.
        sql (str): The SQL INSERT statement to execute.

    Returns:
        bool: True if the insertion was successful, False otherwise.
    """
    success = False
    try:
        cur = conn.cursor()
        cur.execute(sql)
        conn.commit()
        cur.close()
        success = True
    except psycopg2.Error as e:
        print(e)
        cur.execute("ROLLBACK")
        cur.close()

    return success

In [5]:
def getPOIInformation(conn, business_id):
    """
    Retrieves information about a Point of Interest (POI) based on its ID.

    Args:
        conn (psycopg2.extensions.connection): A connection object to the database.
        business_id (str): The unique ID of the POI.

    Returns:
        list: A list of tuples containing checkin count and name for the specified POI.
    """
     sql = """
        SELECT checkin_count, name FROM pois_information WHERE id  = \'"""+str(business_id)+ """\'
    ;"""

    result = executeQuery(conn, sql)
    return result

In [15]:
def getZoningInformation(conn, object_id):
    """
    Retrieves zoning information based on an object ID.

    Args:
        conn (psycopg2.extensions.connection): A connection object to the database.
        object_id (str): The unique object ID associated with the zoning information.

    Returns:
        list: A list of zoning IDs corresponding to the specified object ID.
    """
    sql = """
        SELECT id FROM zoning_pois WHERE objectid  = \'"""+str(object_id)+ """\'
    ;"""

    result = executeQuery(conn, sql)

    return result

In [6]:
def getBinPOIsInformation(conn, business_id, bin_number):

    """
    Retrieves information about points of interest (POIs) within a specific bin centered around a POI.

    Args:
        conn (psycopg2.extensions.connection): A connection object to the database.
        business_id (str): The unique ID of the central POI.
        bin_number (int): The bin number associated with the central POI.

    Returns:
        list: A list of tuples containing information about POIs related to the specified central POI.
            Each tuple includes the following:
            - id: The ID of the context POI.
            - name: The name of the context POI.
            - level: The level of the context POI.
            - checkin_count: The check-in count for the context POI.
            - distance_m: The distance between the central POI and the context POI (in meters).
    """

    sql = """
        SELECT fk_poi_id_context as id, name, level, checkin_count, distance_m 
        FROM bins_pois_information 
        WHERE fk_poi_id_center = \'"""+str(business_id)+"""\' AND fk_bin_number = """+str(bin_number)+""" order by distance_m asc;"""

    result = executeQuery(conn, sql)

    return result

In [7]:
def getPOIsDistance(conn, center_poi_id, context_poi_id):
    
    """
    Retrieves the distance between two points of interest (POIs) based on their IDs.

    Args:
        conn (psycopg2.extensions.connection): A connection object to the database.
        center_poi_id (str): The unique ID of the central POI.
        context_poi_id (str): The unique ID of the context POI.

    Returns:
        list: A list containing the distance (in meters) between the specified POIs.
    """
    
    sql = """
        SELECT distance_m
        FROM has_context_poi 
        WHERE fk_poi_id_center = \'"""+str(center_poi_id)+"""\' AND fk_poi_id_context = """+str(context_poi_id)+""";"""

    result = executeQuery(conn, sql)

    return result

In [11]:
# This function generates a segment by iteratively inserting waypoints (W) into an existing path (POI_path).
# The goal is to find the optimal path that minimizes the total distance while ensuring that the new waypoints are properly placed.
# The function returns the final optimized path.

def generate_segment(W, L, POI_path):
    """
    Generates an optimized path by iteratively inserting waypoints (W) into an existing path (POI_path).

    Args:
        W (list): List of waypoints to insert.
        L (float): Total distance of the initial path (POI_path).
        POI_path (list): Initial path containing points of interest (POIs).

    Returns:
        list: The final optimized path after inserting waypoints.
    """
    final_path = [pt for pt in POI_path]

    for wp in W:
        shortest_path = np.inf
        optimal_list = None

        # Test WP at all positions between 1 and len(list)-2
        for j in range(1, len(final_path)):
            local_list = [fp for fp in final_path]
            local_list.insert(j, wp)

            p_idx = local_list[j-1] 
            c_idx = local_list[j]
            n_idx = local_list[j+1]

            # Remove the old segment
            segment_distance = L - distances_matrix[p_idx][n_idx]

            # Calculate the size of the new segment
            dst = segment_distance + distances_matrix[p_idx][c_idx] + distances_matrix[c_idx][n_idx]

            if dst < shortest_path:
                shortest_path = dst
                optimal_list = [ll for ll in local_list] 
            
        final_path = [ol for ol in optimal_list] 
        L = shortest_path
    
    return final_path

In [12]:
# This function is optimized to run on a GPU using Numba.
# It generates an optimized path by iteratively inserting waypoints (W) into an existing path (POI_path).
# The goal is to minimize the total distance while ensuring that the new waypoints are properly placed.
# The function returns the final optimized path.

@njit
def generate_segment_numba(W, L, POI_path):
    """
    Generates an optimized path by iteratively inserting waypoints (W) into an existing path (POI_path).

    Args:
        W (list): List of waypoints to insert.
        L (float): Total distance of the initial path (POI_path).
        POI_path (list): Initial path containing points of interest (POIs).

    Returns:
        list: The final optimized path after inserting waypoints.
    """
    final_path = [pt for pt in POI_path]

    for wp in W:
        shortest_path = np.inf
        optimal_list = None

        # Test WP at all positions between 1 and len(list)-2
        for j in range(1, len(final_path)):
            local_list = [fp for fp in final_path]
            local_list.insert(j, wp)

            p_idx = local_list[j-1] 
            c_idx = local_list[j]
            n_idx = local_list[j+1]

            # Remove the old segment
            segment_distance = L - distances_matrix[p_idx][n_idx]

            # Calculate the size of the new segment
            dst = segment_distance + distances_matrix[p_idx][c_idx] + distances_matrix[c_idx][n_idx]

            if dst < shortest_path:
                shortest_path = dst
                optimal_list = [ll for ll in local_list] 
            
        final_path = [ol for ol in optimal_list] 
        L = shortest_path
    
    return final_path

In [47]:
# This function analyzes POIs in a context, finds the two most distant ones, and constructs the shortest path between them.

def Shortest_Path_FT(object_id, df):
    """
    Analyzes POIs in a context, finds the two most distant ones, and constructs the shortest path between them.

    Args:
        object_id (str): The unique ID of the context POI.
        df (pandas.DataFrame): A DataFrame containing information about points of interest (POIs).

    Returns:
        list: The shortest path between the two most distant POIs.
    """
    POI_path_bi = []
    POI_path_fl = []

    try:
        with SSHTunnelForwarder(
            ('localhost', 22),
            ssh_username="postgres",
            ssh_password="root",
            remote_bind_address=('localhost', 5432)) as server:

            server.start()

            params = {'database': 'austin_test',
                      'user': 'postgres',
                      'password': 'root',
                      'host': 'localhost',
                      'port': server.local_bind_port
                      }

            connection = psycopg2.connect(**params)

            # Obtain zoning information for the central POI
            poi_information = getZoningInformation(connection, object_id)

            # Execute the process if there is sufficient data (at least two POIs)
            if len(poi_information) >= 2:
                columns = list(dict(poi_information[0]).keys())
                poi_information = pd.DataFrame(poi_information, columns=columns)

                # Shortest Path construction
                pois_idx = []
                for i, cp_idx in poi_information.iterrows():
                    pois_idx.append(df.query("business_id=='" + str(cp_idx['id']) + "'").index[0])

                # Find Ps and Pe (the two most distant POIs)
                max_distances = []
                for poi_idx_01 in pois_idx:
                    for poi_idx_02 in pois_idx:
                        if poi_idx_01 != poi_idx_02:
                            max_distances.append((poi_idx_01, poi_idx_02, distances_matrix[poi_idx_01][poi_idx_02]))

                furthest_pair = max_distances[0]
                for md in max_distances:
                    if md[2] > furthest_pair[2]:
                        furthest_pair = md

                POI_path = [furthest_pair[0], furthest_pair[1]]
                L = furthest_pair[2]

                # A is all POIs
                W = set(pois_idx).difference(set(POI_path))

                # Adjust variables for the generate_segment function
                W = list(W)

                # Add the POI from A that has the shortest path in L and update W
                if len(W) > 0:
                    # Not optimized
                    # POI_path_fl = generate_segment(W, L, POI_path)
                    
                    # Optimized using Numba
                    POI_path_fl = generate_segment_numba(W, L, POI_path)
                else:
                    POI_path_fl = POI_path.copy()

                POI_path_bi = [df.iloc[pp]['business_id'] for pp in POI_path_fl]

            connection.close()

    except Exception as e:
        print(str(e))

    return POI_path_bi

# Corpus Generation Process

In [17]:
pois_file_name = './austin-ml-updated.csv'
poi_df = pd.read_csv(pois_file_name)
print(poi_df.shape)
poi_df = poi_df.dropna()
print(poi_df.shape)
poi_df.head()

(22399, 7)
(22399, 7)


Unnamed: 0,business_id,city,state,latitude,longitude,categories,checkin_count
0,N3_Gs3DnX4k9SgpwJxdEfw,Austin,TX,30.346169,-97.711458,"Shopping, Jewelry Repair, Appraisal Services, ...",14
1,tXvdYGvlEceDljN8gt2_3Q,Austin,TX,30.172706,-97.79992,"Barbers, Beauty & Spas",1
2,nTIhpR7MhsALPwg_Hh14EA,Austin,TX,30.326377,-97.704543,"Hotels, Hotels & Travel, Event Planning & Serv...",475
3,8XyEpVdAO0o6iVkVxkWosQ,Austin,TX,30.246465,-97.778738,"Home Services, Real Estate, Property Management",0
4,NVfOn7TdnHbaGH97CVB_Qg,Austin,TX,30.244902,-97.857409,"Chiropractors, Health & Medical",33


In [18]:
# Calculating the distance between all POIs.
from scipy.spatial.distance import cdist
distances_matrix = cdist(poi_df[['latitude', 'longitude']].values, poi_df[['latitude', 'longitude']].values)
distances_matrix[0]
Código gerado por IA. Examine e use 

In [51]:
# Loading the file that indicates which POIs belong to the zones.
final_zoning_gdf = pd.read_csv('./zoning_with_pois.csv')
print(final_zoning_gdf.shape)
final_zoning_gdf.head()

(105, 1)


Unnamed: 0,objectid
0,3521294
1,3521440
2,3515336
3,3519350
4,3514845


In [43]:
obj_id_list = list(final_zoning_gdf['objectid'])
obj_id_list[:5]

[3521294, 3521440, 3515336, 3519350, 3514845]

## Paralell Execution

In [48]:
import multiprocessing as mp
from timeit import default_timer as timer

start = timer() 

print("Number of processors: ", mp.cpu_count())

# Step 1: Initialize multiprocessing.Pool()
pool = mp.Pool(int(mp.cpu_count()))

# Step 2: Apply the `Shortest_Path_FT()` function to multiple items in parallel
items = [(object_id, poi_df) for object_id in obj_id_list]
results = pool.starmap(Shortest_Path_FT, items)

# Step 2.1: Write results to a CSV file
file_name = './austin-shortest_path-zoning-FT_gpu.csv'
csv_file_02 = open(file_name, "w", newline='')
writer_path = csv.writer(csv_file_02, delimiter=',')
for result in results:
    writer_path.writerow(result)
csv_file_02.close()

# Step 3: Close the pool
pool.close()

# Print the elapsed time
print("Elapsed time with GPU:", timer() - start)

Number of processors:  20
with GPU: 160.79571105900686
