# Trace route

This script is responsible to trace an optimized route!

In [None]:
%pip install psycopg2-binary

In [None]:
from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))

In [None]:
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
from pyspark.sql.functions import explode, from_unixtime, col, to_date, sum, avg, udf, lit, date_trunc, when, max
from pyspark.sql.types import DateType, TimestampType, StructType, StructField, IntegerType, FloatType, StringType

import requests
import json
from collections import defaultdict
import math
import random
import os
import re
import string
from glob import glob
from datetime import datetime, timedelta, date, tzinfo, timezone
import psycopg2

DB_URL = "jdbc:postgresql://postgres:5432/themeparkwizard"
PROPERTIES_CUSTOM = {"user": os.environ['POSTGRES_USER'],"password": os.environ['POSTGRES_PASSWORD'], "driver": "org.postgresql.Driver"}

spark = SparkSession.builder \
    .appName("MetricPredict") \
    .config("spark.jars", "jars/postgresql-42.7.7.jar") \
    .config("spark.sql.sources.partitionOverwriteMode", "dynamic") \
    .getOrCreate()

In [None]:
conn = psycopg2.connect(
        host="postgres",
        port='5432',
        database="themeparkwizard",
        user=os.environ['POSTGRES_USER'],
        password=os.environ['POSTGRES_PASSWORD']
    )

In [None]:
with conn.cursor() as cur:
    cur.execute("""SELECT distinct id FROM themeparkwizard.dim_park_entity""")
    park_id_list = cur.fetchall()
    predicted_data = {}
    for park_id in park_id_list:
        cur.execute(f"""
    with number_row as (
        select
            entity_id,
            name,
            entity_name,
            latitude,
            longitude,
            wait_time,
            extracted_at_time,
            rating,
            row_number() over (partition by entity_id order by extracted_at_time) as rn
        from themeparkwizard.predictions_table pt
        left join themeparkwizard.dim_park_entity dpe using(entity_id)
        where was_predicted = 1 and dpe.id = '{park_id[0]}'
    ),
    avg_by_entity AS (
        SELECT
            entity_id,
            AVG(avg_standby_waittime) as alltime_avg_waittime
        FROM themeparkwizard.agg_avg_time
        GROUP BY 1
    ),
    first_group as (
        select extracted_at_time,
               wait_time,
               entity_name,
               entity_id,
               latitude,
               longitude,
               name,
               rating,
               alltime_avg_waittime
        from number_row
                 left join avg_by_entity
                           using (entity_id)
        where rn <> 1
        order by extracted_at_time, entity_id
    )
    select
        extracted_at_time,
        a.entity_id as src_node,
        b.entity_id as dst_node,
        SQRT(POWER((b.latitude - a.latitude)*111, 2) + POWER((b.longitude - a.longitude)*111, 2)) AS euclidean_distance,
        b.wait_time,
        b.alltime_avg_waittime,
        b.rating
    from first_group a
    full join first_group b
    using(extracted_at_time, name)
    order by 1,2,3
        """)
    # extracted_at_time datetime 0
    # source_node (A) string 1
    # destination_node (B) string 2
    # euclidean distance (from A to B) float 3
    # wait time (queue B) float 4
    # all time wait time (queue B) float 5
    # rating (B) float 6
        predicted_data[park_id[0]] = cur.fetchall()

In [None]:
# len(predicted_data)
for k,v in predicted_data.items():
    print(k, len(v), sep='->')

In [None]:
def calculate_weight(rating: float, queue_i: float, dist: float, queue_avg: float):
    # function = 4**(10/r)*q*(d+1)/AVG(q)
    value = 4**(10/rating)*(queue_i*(dist+1))/queue_avg
    return value, round(queue_i + 10, -1)

def fill_time_matrix(matrix, len_a, bad_node):
    new_matrix = []
    for i in range(len_a):
        new_matrix.append([None]*len_a)
        for j in range(len_a):
            try:
                new_matrix[i][j] = matrix[i][j]
            except IndexError:
                new_matrix[i][j] = bad_node
    return new_matrix

def create_map_attr(query_result):
    map_attr = defaultdict(dict)

    for park_id in query_result:
        end_creation = False
        dest_node = ''
        attr_idx = 0
        for row in query_result[park_id]:
        # Fill map_attractions with a symbol
            for i in range(1,3):
                if not row[i] in [k for k, _ in map_attr[park_id].values()]:
                    while True:
                        key_map = ''.join(random.choices(string.ascii_uppercase, k=2))
                        if not map_attr[park_id].get(key_map):
                            break
                    map_attr[park_id][key_map] = (row[i],attr_idx)
                    attr_idx += 1
            if row[1] != row[2] and not end_creation:
                dest_node = row[2]
                end_creation = True
            if row[1] == dest_node:
                break
    return map_attr

def create_cost_by_time(query_result, len_attr):
    INF_MAX = math.inf
    bad_node = (INF_MAX, 60*24*10)
    cost_time = defaultdict(dict)
    last_node = ''

    for park_id in query_result:
        tmp_range = []
        for row in query_result[park_id]:
            # Next row on matrix!
            if last_node != row[1]:
                if last_node != '':
                    tmp_range.append(inner_tmp_range.copy())
                last_node = row[1]
                inner_tmp_range = []
            # Checkout date matrix
            if not cost_time[park_id].get(row[0]):
                if tmp_range:
                    cost_time[park_id][row[0]] = fill_time_matrix(tmp_range, len_attr[park_id], bad_node)
                tmp_range = []
            # Calculate cost
            if row[1] == row[2]:
                inner_tmp_range.append(bad_node)
            else:
                inner_tmp_range.append(calculate_weight(row[6],row[4],row[3],row[5]))
    return cost_time

In [None]:
map_attractions = create_map_attr(predicted_data)
len_attractions = {k:len(map_attractions[k]) for k in map_attractions}
map_by_time = create_cost_by_time(predicted_data, len_attractions)


In [None]:
for idx, e in enumerate(map_by_time['1c84a229-8862-4648-9c71-378ddd2c7693'].keys()):
    print('*'*60)
    print(f"Matrix of weights and distances for the given time: {e} [datetime]")
    print('[')
    for k in map_by_time['1c84a229-8862-4648-9c71-378ddd2c7693'][e]:
        print(k)
    print(']')
    if idx == 2:
        print('.', '.', '.', sep='\n')
        break

In [None]:
map_attractions

In [None]:
import genetic_algorithm_tour as gat

POPULATION_SIZE = 10
NUM_GENERATIONS = 10
CROSSOVER_RATE = 0.8
MUTATION_RATE = 0.01

dict_results = defaultdict(dict)
for park_id in map_attractions.keys():
    print('*'*10,f'Procreating population from park : {park_id}','*'*10)
    dict_results[park_id]['best_gene'], dict_results[park_id]['best_cost'], dict_results[park_id]['initial_time'] = gat.genetic_algorithm(
        pop_size=POPULATION_SIZE*math.ceil(len(predicted_data[park_id])/5000)+10,
        num_generations=NUM_GENERATIONS*math.ceil(len(predicted_data[park_id])/5000),
        allele_map=map_attractions[park_id],
        predicted_map=map_by_time[park_id],
        crossover_rate=CROSSOVER_RATE,
        mutation_rate=MUTATION_RATE
    )
    print('*'*10,f'End population from park : {park_id}','*'*10)
print('What an end of an Era...')

In [None]:
result_set = []

for park_id in dict_results:
    initial_time = min([t for t in map_by_time[park_id].keys()])
    print(f'PARK {park_id} ROUTE OSCAR')
    print('...AND THE WINNER OF BEST GENE IS:')
    print(dict_results[park_id]['best_gene'])
    print('...Starring:')
    last = None

    for node in dict_results[park_id]['best_gene']:
        if not last:
            result_set.append((park_id, map_attractions[park_id][node][0], initial_time, 0.0))
            print(f'Go to {map_attractions[park_id][node][0]} at {initial_time}')
            last = node
            continue
        weight_result, time_passed = map_by_time[park_id][initial_time][map_attractions[park_id][last][1]][map_attractions[park_id][node][1]]
        initial_time += timedelta(minutes=time_passed)
        result_set.append((park_id, map_attractions[park_id][node][0], initial_time, time_passed))
        print(f'Go to {map_attractions[park_id][node][0]} at {initial_time}')
        last = node

In [None]:

DB_URL = "jdbc:postgresql://postgres:5432/themeparkwizard"
PROPERTIES_CUSTOM = {"user": os.environ['POSTGRES_USER'],"password": os.environ['POSTGRES_PASSWORD'], "driver": "org.postgresql.Driver"}

schema = StructType([
    StructField("park_id", StringType(), False),
    StructField("entity_id", StringType(), False),
    StructField("datetime_point", TimestampType(), False),
    StructField("waiting_time", FloatType(), False)
])
spark.createDataFrame(result_set, schema)\
    .orderBy('park_id', 'datetime_point')\
    .write.jdbc(url=DB_URL, table=f"themeparkwizard.best_route", mode='overwrite', properties=PROPERTIES_CUSTOM)

In [None]:
result_set

In [None]:
# Finish session
spark.stop()

In [None]:
%pip list