In [197]:
import os
import psycopg2
from psycopg2.extras import execute_values
import aiosql
from dotenv import load_dotenv

# import environment variables
load_dotenv(override=True)

# import sql from folder
queries = aiosql.from_path("./sql", psycopg2)

# prepare db connection
user = os.getenv('USER')
pwd  = os.getenv('PASS')
host = os.getenv('HOST')
port = os.getenv('PORT')
db   = os.getenv('DB')
connect_db = psycopg2.connect(f"postgresql://{user}:{pwd}@{host}:{port}/{db}")

#### cleanup and validate

In [117]:
with connect_db as conn:
    queries.dml.cleanup_processing_tables(conn)

#### download sources and copy data to DB

#### preprocess data in the DB

**select the list of POI-IDs that will serve as upper and lower bounds for the parallelized processing**

In [186]:
chunk_size = 100000
with connect_db:
    bounds = list(queries.dml.find_lower_and_upper_bounds(connect_db, chunk_size=chunk_size))

bounds_list = []
for row in bounds:
    bound = row[0]
    bounds_list.append(bound)

print(len(bounds_list))
print(bounds_list)

243
[2595910006465660600, 2595959187947121400, 2595977728678422864, 2596038815262425256, 2596061951861706989, 2596508118642192148, 2596514704519076776, 2596522516798644529, 2596543751360019293, 2596572429496641822, 2596595174449681934, 2596615419343486848, 2596625763320190766, 2596636185404548508, 2596647617737599232, 2596660404125752225, 2596678111634635496, 2596695497984561871, 2596710285733230134, 2596721266333398667, 2596730858502529511, 2596742953945933948, 2596751055002619907, 2596764483599622573, 2596785362115815260, 2596808314629816447, 2596822723471149289, 2596828373580491376, 2596832337473543654, 2596839782064755084, 2596845959931253465, 2596849035572062493, 2596852002967843193, 2596854333143251726, 2596858893307217823, 2596868600776226529, 2596874565281446969, 2596880193962204418, 2596887614800008123, 2596892089021447258, 2596902392924902577, 2596905022678330574, 2596910492216030031, 2596915831228760127, 2596928972688798425, 2596947734876446947, 2596967901998901663, 25969840

In [198]:
i = 0
while i < len(bounds_list) - 1:
    lower_bound = bounds_list[i]
    upper_bound = bounds_list[i+1]
    counts = list(queries.dml.test(connect_db, lower_bound=lower_bound, upper_bound=upper_bound))
    print(counts)
    i+=1

[(2595910006465660600, 2595959187947121400, 100000)]
[(2595959187947121400, 2595977728678422864, 100000)]
[(2595977728678422864, 2596038815262425256, 100000)]
[(2596038815262425256, 2596061951861706989, 100000)]
[(2596061951861706989, 2596508118642192148, 100000)]
[(2596508118642192148, 2596514704519076776, 100000)]
[(2596514704519076776, 2596522516798644529, 100000)]
[(2596522516798644529, 2596543751360019293, 100000)]
[(2596543751360019293, 2596572429496641822, 100000)]
[(2596572429496641822, 2596595174449681934, 100000)]
[(2596595174449681934, 2596615419343486848, 100000)]
[(2596615419343486848, 2596625763320190766, 100000)]
[(2596625763320190766, 2596636185404548508, 100000)]
[(2596636185404548508, 2596647617737599232, 100000)]
[(2596647617737599232, 2596660404125752225, 100000)]
[(2596660404125752225, 2596678111634635496, 100000)]
[(2596678111634635496, 2596695497984561871, 100000)]
[(2596695497984561871, 2596710285733230134, 100000)]
[(2596710285733230134, 2596721266333398667, 10

In [68]:
lower_bound = 2595910006465660600 
upper_bound = 2595959187942176414
params = (lower_bound, upper_bound)
with connect_db.cursor() as cur:
    cur.execute(queries.dml.join_vertex_2_edge.sql, params)

call pgnetworks_staging.join_vertex_2_edge(%s, %s);
