# Trace route

This script is responsible to trace an optimized route!

In [1]:
%pip install psycopg2-binary

Note: you may need to restart the kernel to use updated packages.


In [2]:
from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))

In [3]:
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
from pyspark.sql.functions import explode, from_unixtime, col, to_date, sum, avg, udf, lit, date_trunc, when, max
from pyspark.sql.types import DateType, TimestampType, StructType, StructField, IntegerType, FloatType, StringType

import requests
import json
from collections import defaultdict
import math
import random
import os
import re
import string
from glob import glob
from datetime import datetime, timedelta, date, tzinfo, timezone
import psycopg2

DB_URL = "jdbc:postgresql://postgres:5432/themeparkwizard"
PROPERTIES_CUSTOM = {"user": os.environ['POSTGRES_USER'],"password": os.environ['POSTGRES_PASSWORD'], "driver": "org.postgresql.Driver"}

spark = SparkSession.builder \
    .appName("MetricPredict") \
    .config("spark.jars", "jars/postgresql-42.7.7.jar") \
    .config("spark.sql.sources.partitionOverwriteMode", "dynamic") \
    .getOrCreate()

In [4]:
conn = psycopg2.connect(
        host="postgres",
        port='5432',
        database="themeparkwizard",
        user=os.environ['POSTGRES_USER'],
        password=os.environ['POSTGRES_PASSWORD']
    )

In [5]:
with conn.cursor() as cur:
    cur.execute("""SELECT distinct id FROM themeparkwizard.dim_park_entity""")
    park_id_list = cur.fetchall()
    predicted_data = {}
    for park_id in park_id_list:
        cur.execute(f"""
    with number_row as (
        select
            entity_id,
            name,
            entity_name,
            latitude,
            longitude,
            wait_time,
            extracted_at_time,
            rating,
            row_number() over (partition by entity_id order by extracted_at_time) as rn
        from themeparkwizard.predictions_table pt
        left join themeparkwizard.dim_park_entity dpe using(entity_id)
        where was_predicted = 1 and dpe.id = '{park_id[0]}'
    ),
    avg_by_entity AS (
        SELECT
            entity_id,
            AVG(avg_standby_waittime) as alltime_avg_waittime
        FROM themeparkwizard.agg_avg_time
        GROUP BY 1
    ),
    first_group as (
        select extracted_at_time,
               wait_time,
               entity_name,
               entity_id,
               latitude,
               longitude,
               name,
               rating,
               alltime_avg_waittime
        from number_row
                 left join avg_by_entity
                           using (entity_id)
        where rn <> 1
        order by extracted_at_time, entity_id
    )
    select
        extracted_at_time,
        a.entity_id as src_node,
        b.entity_id as dst_node,
        SQRT(POWER((b.latitude - a.latitude)*111, 2) + POWER((b.longitude - a.longitude)*111, 2)) AS euclidean_distance,
        b.wait_time,
        b.alltime_avg_waittime,
        b.rating
    from first_group a
    full join first_group b
    using(extracted_at_time, name)
    order by 1,2,3
        """)
    # extracted_at_time datetime 0
    # source_node (A) string 1
    # destination_node (B) string 2
    # euclidean distance (from A to B) float 3
    # wait time (queue B) float 4
    # all time wait time (queue B) float 5
    # rating (B) float 6
        predicted_data[park_id[0]] = cur.fetchall()

In [6]:
# len(predicted_data)
for k,v in predicted_data.items():
    print(k, len(v), sep='->')

75ea578a-adc8-4116-a54d-dccb60765ef9->44604
eb3f4560-2383-4a36-9152-6b3e5ed6bc57->0
bc4005c5-8c7e-41d7-b349-cdddf1796427->0
47f90d2c-e191-4239-a466-5892ef59a88b->18416
288747d1-8b4f-4a64-867e-ea7c9b27bad8->19167
1c84a229-8862-4648-9c71-378ddd2c7693->3072
267615cc-8943-4c2a-ae2c-5da728ca591f->0


In [24]:
def calculate_weight(rating: float, queue_i: float, dist: float, queue_avg: float):
    # function = 4**(10/r)*q*(d+1)/AVG(q)
    value = 4**(10/rating)*(queue_i*(dist+1))/queue_avg
    return value, round(queue_i + 10, -1)

def fill_time_matrix(matrix, len_a, bad_node):
    check_null = 0
    new_matrix = []
    for i in range(len_a):
        new_matrix.append([None]*len_a)
        for j in range(len_a):
            try:
                new_matrix[i][j] = matrix[i][j]
            except IndexError:
                new_matrix[i][j] = bad_node
            finally:
                if new_matrix[i][j][0] == math.inf:
                    check_null += 1
    is_empty = True if len_a**2 == check_null else False
    return new_matrix, is_empty

def create_map_attr(query_result):
    map_attr = defaultdict(dict)

    for park_id in query_result:
        end_creation = False
        dest_node = ''
        attr_idx = 0
        for row in query_result[park_id]:
        # Fill map_attractions with a symbol
            for i in range(1,3):
                if not row[i] in [k for k, _ in map_attr[park_id].values()]:
                    while True:
                        key_map = ''.join(random.choices(string.ascii_uppercase, k=2))
                        if not map_attr[park_id].get(key_map):
                            break
                    map_attr[park_id][key_map] = (row[i],attr_idx)
                    attr_idx += 1
            if row[1] != row[2] and not end_creation:
                dest_node = row[2]
                end_creation = True
            if row[1] == dest_node:
                break
    return map_attr

def create_cost_by_time(query_result, len_attr):
    INF_MAX = math.inf
    bad_node = (INF_MAX, 60*24*10)
    cost_time = defaultdict(dict)
    last_node = ''

    for park_id in query_result:
        tmp_range = []
        for row in query_result[park_id]:
            # Next row on matrix!
            if last_node != row[1]:
                if last_node != '':
                    tmp_range.append(inner_tmp_range.copy())
                last_node = row[1]
                inner_tmp_range = []
            # Checkout date matrix
            if not cost_time[park_id].get(row[0]):
                if tmp_range:
                    cost_time[park_id][row[0]], is_empty = fill_time_matrix(tmp_range, len_attr[park_id], bad_node)
                    if is_empty:
                        del cost_time[park_id][row[0]]
                tmp_range = []
            # Calculate cost
            if row[1] == row[2]:
                inner_tmp_range.append(bad_node)
            else:
                inner_tmp_range.append(calculate_weight(row[6],row[4],row[3],row[5]))
    return cost_time

In [25]:
map_attractions = create_map_attr(predicted_data)
len_attractions = {k:len(map_attractions[k]) for k in map_attractions}
map_by_time = create_cost_by_time(predicted_data, len_attractions)


In [26]:
predicted_data['75ea578a-adc8-4116-a54d-dccb60765ef9'][:6]

[(datetime.datetime(2025, 9, 2, 12, 30),
  '72c7343a-f7fb-4f66-95df-c91016de7338',
  '72c7343a-f7fb-4f66-95df-c91016de7338',
  0.0,
  73.90908,
  22.62457089522438,
  4.5),
 (datetime.datetime(2025, 9, 2, 12, 40),
  '0aae716c-af13-4439-b638-d75fb1649df3',
  '0aae716c-af13-4439-b638-d75fb1649df3',
  0.0,
  5.0,
  7.491143096868747,
  4.5),
 (datetime.datetime(2025, 9, 2, 12, 40),
  '0aae716c-af13-4439-b638-d75fb1649df3',
  '0d94ad60-72f0-4551-83a6-ebaecdd89737',
  0.05326642569080827,
  16.369368,
  31.410336671827185,
  4.5),
 (datetime.datetime(2025, 9, 2, 12, 40),
  '0aae716c-af13-4439-b638-d75fb1649df3',
  '273ddb8d-e7b5-4e34-8657-1113f49262a5',
  0.15956667721046144,
  5.947694,
  7.644595231291575,
  4.5),
 (datetime.datetime(2025, 9, 2, 12, 40),
  '0aae716c-af13-4439-b638-d75fb1649df3',
  '3cba0cb4-e2a6-402c-93ee-c11ffcb127ef',
  0.12293031745044146,
  5.9155884,
  12.801651151476657,
  4.6),
 (datetime.datetime(2025, 9, 2, 12, 40),
  '0aae716c-af13-4439-b638-d75fb1649df3',
  '72

In [27]:
for idx, e in enumerate(map_by_time['75ea578a-adc8-4116-a54d-dccb60765ef9'].keys()):
    print('*'*60)
    print(f"Matrix of weights and distances for the given time: {e} [datetime]")
    print('[')
    for k in map_by_time['75ea578a-adc8-4116-a54d-dccb60765ef9'][e]:
        print(k)
    print(']')
    if idx == 2:
        print('.', '.', '.', sep='\n')
        break

************************************************************
Matrix of weights and distances for the given time: 2025-09-02 12:40:00 [datetime]
[
[(inf, 14400), (11.951121270446503, 30.0), (19.642688960442747, 20.0), (10.56600557936027, 20.0), (84.32189415378464, 80.0), (26.032460382244455, 20.0), (26.97550744470707, 20.0), (18.488207149668412, 30.0), (16.74602325460325, 20.0), (10.86225910257492, 20.0), (14.0559282569146, 40.0), (3.2467333128155205, 20.0), (9.072823691750726, 20.0), (17.588786466687658, 20.0), (10.993459870050588, 20.0), (13.882279064974572, 20.0)]
[(inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400)]
[(inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf, 14400), (inf,

In [28]:
map_attractions

defaultdict(dict,
            {'75ea578a-adc8-4116-a54d-dccb60765ef9': {'XM': ('72c7343a-f7fb-4f66-95df-c91016de7338',
               0),
              'AG': ('0aae716c-af13-4439-b638-d75fb1649df3', 1),
              'FD': ('0d94ad60-72f0-4551-83a6-ebaecdd89737', 2),
              'UZ': ('273ddb8d-e7b5-4e34-8657-1113f49262a5', 3),
              'KA': ('3cba0cb4-e2a6-402c-93ee-c11ffcb127ef', 4),
              'UQ': ('7c5e1e02-3a44-4151-9005-44066d5ba1da', 5),
              'CW': ('8183f3f2-1b59-4b9c-b634-6a863bdf8d84', 6),
              'JA': ('86a41273-5f15-4b54-93b6-829f140e5161', 7),
              'UK': ('890fa430-89c0-4a3f-96c9-11597888005e', 8),
              'OM': ('924a3b2c-6b4b-49e5-99d3-e9dc3f2e8a48', 9),
              'MB': ('9d4d5229-7142-44b6-b4fb-528920969a2c', 10),
              'LO': ('b2260923-9315-40fd-9c6b-44dd811dbe64', 11),
              'YZ': ('d9d12438-d999-4482-894b-8955fdb20ccf', 12),
              'IK': ('f163ddcd-43e1-488d-8276-2381c1db0a39', 13),
             

In [30]:
import genetic_algorithm_tour as gat

POPULATION_SIZE = 10
NUM_GENERATIONS = 10
CROSSOVER_RATE = 0.8
MUTATION_RATE = 0.02

dict_results = defaultdict(dict)
for park_id in map_attractions.keys():
    print('*'*10,f'Procreating population from park : {park_id}','*'*10)
    dict_results[park_id]['best_gene'], dict_results[park_id]['best_cost'], dict_results[park_id]['initial_time'] = gat.genetic_algorithm(
        pop_size=POPULATION_SIZE*math.ceil(len(predicted_data[park_id])/5000)+10,
        num_generations=NUM_GENERATIONS*math.ceil(len(predicted_data[park_id])/5000),
        allele_map=map_attractions[park_id],
        predicted_map=map_by_time[park_id],
        crossover_rate=CROSSOVER_RATE,
        mutation_rate=MUTATION_RATE
    )
    print('*'*10,f'End population from park : {park_id}','*'*10)
print('What an end of an Era...')

********** Procreating population from park : 75ea578a-adc8-4116-a54d-dccb60765ef9 **********
['XM', 'AG', 'FD', 'UZ', 'KA', 'UQ', 'CW', 'JA', 'UK', 'OM', 'MB', 'LO', 'YZ', 'IK', 'HO', 'RQ']
2025-09-02 12:40:00
Generation 1:
Best Fitness to f(r, q, d) = 10.0
Best individual is ['XM', 'RQ', 'HO', 'FD', 'OM', 'UZ', 'UK', 'JA', 'AG', 'CW', 'LO', 'UQ', 'KA', 'IK', 'MB', 'YZ']
Generation 2:
Best Fitness to f(r, q, d) = 94.0
Best individual is ['XM', 'RQ', 'IK', 'KA', 'HO', 'FD', 'UQ', 'OM', 'UK', 'JA', 'MB', 'AG', 'CW', 'LO', 'YZ', 'UZ']
Generation 3:
Best Fitness to f(r, q, d) = 62.0
Best individual is ['XM', 'IK', 'KA', 'HO', 'UK', 'FD', 'UQ', 'OM', 'UZ', 'JA', 'MB', 'AG', 'CW', 'LO', 'YZ', 'RQ']
Generation 4:
Best Fitness to f(r, q, d) = 7.0
Best individual is ['XM', 'YZ', 'KA', 'HO', 'FD', 'UQ', 'OM', 'UK', 'UZ', 'JA', 'MB', 'AG', 'CW', 'LO', 'IK', 'RQ']
Generation 5:
Best Fitness to f(r, q, d) = 2.0
Best individual is ['XM', 'YZ', 'KA', 'HO', 'FD', 'UQ', 'OM', 'UK', 'UZ', 'JA', 'MB', '

In [31]:
result_set = []

for park_id in dict_results:
    initial_time = min([t for t in map_by_time[park_id].keys()])
    print(f'PARK {park_id} ROUTE OSCAR')
    print('...AND THE WINNER OF BEST GENE IS:')
    print(dict_results[park_id]['best_gene'])
    print('...Starring:')
    last = None

    for node in dict_results[park_id]['best_gene']:
        if not last:
            result_set.append((park_id, map_attractions[park_id][node][0], initial_time, 0.0))
            print(f'Go to {map_attractions[park_id][node][0]} at {initial_time}')
            last = node
            continue
        weight_result, time_passed = map_by_time[park_id][initial_time][map_attractions[park_id][last][1]][map_attractions[park_id][node][1]]
        initial_time += timedelta(minutes=time_passed)
        result_set.append((park_id, map_attractions[park_id][node][0], initial_time, time_passed))
        print(f'Go to {map_attractions[park_id][node][0]} at {initial_time}')
        last = node

PARK 75ea578a-adc8-4116-a54d-dccb60765ef9 ROUTE OSCAR
...AND THE WINNER OF BEST GENE IS:
['XM', 'YZ', 'KA', 'FD', 'UQ', 'UZ', 'OM', 'UK', 'HO', 'JA', 'MB', 'AG', 'IK', 'LO', 'RQ', 'CW']
...Starring:
Go to 72c7343a-f7fb-4f66-95df-c91016de7338 at 2025-09-02 12:40:00
Go to d9d12438-d999-4482-894b-8955fdb20ccf at 2025-09-02 13:00:00
Go to 3cba0cb4-e2a6-402c-93ee-c11ffcb127ef at 2025-09-02 14:20:00
Go to 0d94ad60-72f0-4551-83a6-ebaecdd89737 at 2025-09-02 14:40:00
Go to 7c5e1e02-3a44-4151-9005-44066d5ba1da at 2025-09-02 15:00:00
Go to 273ddb8d-e7b5-4e34-8657-1113f49262a5 at 2025-09-02 15:20:00
Go to 924a3b2c-6b4b-49e5-99d3-e9dc3f2e8a48 at 2025-09-02 15:40:00
Go to 890fa430-89c0-4a3f-96c9-11597888005e at 2025-09-02 16:00:00
Go to f5aad2d4-a419-4384-bd9a-42f86385c750 at 2025-09-02 16:20:00
Go to 86a41273-5f15-4b54-93b6-829f140e5161 at 2025-09-02 17:10:00
Go to 9d4d5229-7142-44b6-b4fb-528920969a2c at 2025-09-02 18:00:00
Go to 0aae716c-af13-4439-b638-d75fb1649df3 at 2025-09-02 18:40:00
Go to f16

In [32]:

DB_URL = "jdbc:postgresql://postgres:5432/themeparkwizard"
PROPERTIES_CUSTOM = {"user": os.environ['POSTGRES_USER'],"password": os.environ['POSTGRES_PASSWORD'], "driver": "org.postgresql.Driver"}

schema = StructType([
    StructField("park_id", StringType(), False),
    StructField("entity_id", StringType(), False),
    StructField("datetime_point", TimestampType(), False),
    StructField("waiting_time", FloatType(), False)
])
spark.createDataFrame(result_set, schema)\
    .orderBy('park_id', 'datetime_point')\
    .write.jdbc(url=DB_URL, table=f"themeparkwizard.best_route", mode='overwrite', properties=PROPERTIES_CUSTOM)

In [33]:
result_set

[('75ea578a-adc8-4116-a54d-dccb60765ef9',
  '72c7343a-f7fb-4f66-95df-c91016de7338',
  datetime.datetime(2025, 9, 2, 12, 40),
  0.0),
 ('75ea578a-adc8-4116-a54d-dccb60765ef9',
  'd9d12438-d999-4482-894b-8955fdb20ccf',
  datetime.datetime(2025, 9, 2, 13, 0),
  20.0),
 ('75ea578a-adc8-4116-a54d-dccb60765ef9',
  '3cba0cb4-e2a6-402c-93ee-c11ffcb127ef',
  datetime.datetime(2025, 9, 2, 14, 20),
  80.0),
 ('75ea578a-adc8-4116-a54d-dccb60765ef9',
  '0d94ad60-72f0-4551-83a6-ebaecdd89737',
  datetime.datetime(2025, 9, 2, 14, 40),
  20.0),
 ('75ea578a-adc8-4116-a54d-dccb60765ef9',
  '7c5e1e02-3a44-4151-9005-44066d5ba1da',
  datetime.datetime(2025, 9, 2, 15, 0),
  20.0),
 ('75ea578a-adc8-4116-a54d-dccb60765ef9',
  '273ddb8d-e7b5-4e34-8657-1113f49262a5',
  datetime.datetime(2025, 9, 2, 15, 20),
  20.0),
 ('75ea578a-adc8-4116-a54d-dccb60765ef9',
  '924a3b2c-6b4b-49e5-99d3-e9dc3f2e8a48',
  datetime.datetime(2025, 9, 2, 15, 40),
  20.0),
 ('75ea578a-adc8-4116-a54d-dccb60765ef9',
  '890fa430-89c0-4a3f-9

In [34]:
# Finish session
spark.stop()

In [35]:
%pip list

Package                       Version
----------------------------- ------------
alembic                       1.12.0
altair                        5.1.2
anyio                         4.0.0
argon2-cffi                   23.1.0
argon2-cffi-bindings          21.2.0
arrow                         1.3.0
asttokens                     2.4.0
async-generator               1.10
async-lru                     2.0.4
attrs                         23.1.0
Babel                         2.13.0
backcall                      0.2.0
backports.functools-lru-cache 1.6.5
beautifulsoup4                4.12.2
bleach                        6.1.0
blinker                       1.6.3
bokeh                         3.3.0
boltons                       23.0.0
Bottleneck                    1.3.7
Brotli                        1.1.0
cached-property               1.5.2
certifi                       2023.7.22
certipy                       0.1.3
cffi                          1.16.0
charset-normalizer            3.3.0
click   