In [None]:
import pandas as pd
import numpy as np

import ipywidgets as widgets
from ipywidgets import interact

from datetime import timedelta

from src.data_load import load_tables, load_online_instance, load_directorio_hist_df
from src.filtering import flexible_filter
from src.online_algorithms import filter_dfs_for_insertion, get_drivers
from src.metrics import collect_hist_baseline_dfs
from src.experimentation_config import *
from src.config import *
from src.distance_utils import distance

data_path = '../data'

instance = 'instRD1'
distance_method = 'haversine'

directorio_df, labors_raw_df, cities_df, duraciones_df, valid_cities = load_tables(data_path, generate_labors=False)
labors_real_df, labors_static_df, labors_dynamic_df = load_online_instance(data_path, instance, labors_raw_df)
directorio_hist_df = load_directorio_hist_df(data_path, instance)
labors_dynamic_df['latest_arrival_time'] = labors_dynamic_df['schedule_date'] + timedelta(minutes=TIEMPO_GRACIA)

fechas = fechas_map(instance)

# Upload data

In [2]:
hist_inst = f'{instance[:5]}S{instance[6:]}'
labors_hist_df, moves_hist_df = collect_hist_baseline_dfs(data_path, hist_inst, fechas, distance_method)

In [3]:
import pickle
import os

metrics = ['hybrid']
alphas = [0]

def collect_alpha_results_to_df(data_path: str, instance: str, dist_method: str, metrics: list, alphas: list):
    labors_algo_df = pd.DataFrame()
    moves_algo_df = pd.DataFrame()

    for metric in metrics: 
        for alpha in alphas:
            upload_path = f'{data_path}/resultados/online_operation/{instance}/{dist_method}/res_{metric}_static.pkl'

            if not os.path.exists(upload_path):
                continue
            with open(upload_path, "rb") as f:
                res = pickle.load(f)
                inc_values, duration, results_df, moves_df, metrics_df = res

            if not results_df.empty:
                results_df = results_df.sort_values(["city", "date", "service_id", "labor_id"])
            if not moves_df.empty:
                moves_df = moves_df.sort_values(["city", "date", "service_id", "labor_id"])

            # Normalize datetime columns to Bogotá tz
            datetime_cols = [
                "labor_created_at",
                "labor_start_date",
                "labor_end_date",
                "created_at",
                "schedule_date",
                "actual_start", 
                "actual_end"
                ]


            for df in (results_df, moves_df):
                for col in datetime_cols:
                    if col in df.columns:
                        df[col] = (
                            pd.to_datetime(df[col], errors="coerce", utc=True)
                            .dt.tz_convert("America/Bogota")
                        )
                for col in ['city', 'alfred', 'service_id', 'assigned_driver']:
                    if col in df.columns:
                        df[col] = (
                            df[col]
                            .apply(lambda x: '' if (pd.isna(x) or x=='') else str(int(float(x))))
                        )

            results_df['labor_id'] = (
                results_df['labor_id']
                .apply(lambda x: '' if pd.isna(x) else str(int(float(x))))
            )

            labors_algo_df = pd.concat([labors_algo_df,results_df])
            moves_algo_df = pd.concat([moves_algo_df,moves_df])
    
    return labors_algo_df, moves_algo_df

labors_algo_static_df, moves_algo_static_df = collect_alpha_results_to_df(data_path, instance, 'haversine', metrics, alphas)

# Control example

In [35]:
labors_algo_static_df.columns

Index(['service_id', 'labor_id', 'labor_type', 'labor_name', 'labor_category',
       'labor_price', 'labor_created_at', 'labor_start_date', 'labor_end_date',
       'alfred', 'shop', 'created_at', 'schedule_date', 'client_type',
       'paying_customer', 'state_service', 'start_address_id',
       'start_address_point', 'end_address_id', 'end_address_point', 'city',
       'address_id', 'address_point', 'address_name', 'map_start_point',
       'map_end_point', 'assigned_driver', 'actual_start', 'actual_end',
       'dist_km', 'date', 'n_drivers'],
      dtype='object')

In [4]:
moves_algo_static_df.columns

Index(['service_id', 'labor_id', 'labor_context_id', 'labor_name',
       'labor_category', 'assigned_driver', 'schedule_date', 'actual_start',
       'actual_end', 'start_point', 'end_point', 'distance_km', 'duration_min',
       'city', 'date'],
      dtype='object')

In [6]:
city = '149'
date = '2026-01-08'

In [7]:
used_drivers = flexible_filter(labors_algo_static_df,
                city=city,
                schedule_date=date)['assigned_driver'].unique().tolist()

possible_drivers = flexible_filter(directorio_hist_df,
                city=city,
                date=date)['alfred'].unique().tolist()

In [8]:
unused_drivers = [i for i in possible_drivers if i not in used_drivers]
unused_drivers

['10449', '10500', '11714', '11988', '2331', '69861', '70431']

In [9]:
labors_dynamic_filtered_df = flexible_filter(
    labors_dynamic_df,
    city=city,
    schedule_date=date)

### Nuevo servicio llega a las 7:52 a.m. para las 10:30 a.m. En este caso es una sola labor de transporte.

In [10]:
new_service = labors_dynamic_filtered_df.iloc[0,:]

In [11]:
print(f'service_id: \t{new_service["service_id"]}')
print(f'labor_id: \t{new_service["labor_id"]}')
print(f'created_at: \t{new_service["created_at"].time()}')
print(f'schedule_date: \t{new_service["schedule_date"].time()}')
# print(f': {new_service[""]}')
# print(f': {new_service[""]}')
# print(f': {new_service[""]}')


service_id: 	254688
labor_id: 	350824
created_at: 	07:52:33.675000
schedule_date: 	10:30:00


### Tomar un conductor y revisar sus labores

In [32]:
drivers = get_drivers(
            labors_algo_df=labors_algo_static_df,
            directorio_hist_df=directorio_hist_df,
            city=city,
            fecha=date,
            get_all=True)

driver = drivers[6]
print(f'Driver: {driver}')

Driver: 14671


In [33]:
df = flexible_filter(moves_algo_static_df,
                city='149',
                schedule_date='2026-01-08',
                assigned_driver=driver).sort_values(['schedule_date', 'actual_start'])
df[['labor_context_id', 'schedule_date', 'actual_start', 'actual_end', 'start_point', 'end_point']]

Unnamed: 0,labor_context_id,schedule_date,actual_start,actual_end,start_point,end_point
891,349568_free,2026-01-08 16:30:00-05:00,2026-01-08 09:00:00-05:00,2026-01-08 15:48:34.777186-05:00,POINT (-74.1122312 4.7384985),POINT (-74.1122312 4.7384985)
892,349568_move,2026-01-08 16:30:00-05:00,2026-01-08 15:48:34.777186-05:00,2026-01-08 16:00:00-05:00,POINT (-74.1122312 4.7384985),POINT (-74.065413 4.759951)
893,349568_labor,2026-01-08 16:30:00-05:00,2026-01-08 16:00:00-05:00,2026-01-08 16:50:12.898351-05:00,POINT (-74.065413 4.759951),POINT (-74.0588765 4.7905312)


In [34]:
labors_driver_df, moves_driver_df = filter_dfs_for_insertion(
    labors_algo_static_df, 
    moves_algo_static_df,
    city=city,
    fecha=date,
    created_at=new_service['created_at'],
    driver=driver
)

order = ['labor_id','labor_context_id','schedule_date', 'actual_start', 'actual_end', 'start_point', 'end_point']
order += [j for j in moves_driver_df.columns.tolist() if j not in order]
display(labors_driver_df)
display(moves_driver_df[order])

Unnamed: 0,service_id,labor_id,labor_type,labor_name,labor_category,labor_price,labor_created_at,labor_start_date,labor_end_date,alfred,...,address_point,address_name,map_start_point,map_end_point,assigned_driver,actual_start,actual_end,dist_km,date,n_drivers
0,253543,349568,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-06-11 09:13:52.347000-05:00,2026-01-08 16:36:00-05:00,2026-01-08 20:13:00-05:00,5832,...,POINT (-74.0805306 4.755374499999999),casa,POINT (-74.065413 4.759951),POINT (-74.0588765 4.7905312),14671,2026-01-08 16:00:00-05:00,2026-01-08 16:50:12.898351-05:00,3.476648,2026-01-08,19


Unnamed: 0,labor_id,labor_context_id,schedule_date,actual_start,actual_end,start_point,end_point,service_id,labor_name,labor_category,assigned_driver,distance_km,duration_min,city,date
0,349568,349568_free,2026-01-08 16:30:00-05:00,2026-01-08 09:00:00-05:00,2026-01-08 15:48:34.777186-05:00,POINT (-74.1122312 4.7384985),POINT (-74.1122312 4.7384985),253543,FREE_TIME,FREE_TIME,14671,0.0,408.6,149,2026-01-08
1,349568,349568_move,2026-01-08 16:30:00-05:00,2026-01-08 15:48:34.777186-05:00,2026-01-08 16:00:00-05:00,POINT (-74.1122312 4.7384985),POINT (-74.065413 4.759951),253543,DRIVER_MOVE,DRIVER_MOVE,14671,5.71019,11.4,149,2026-01-08
2,349568,349568_labor,2026-01-08 16:30:00-05:00,2026-01-08 16:00:00-05:00,2026-01-08 16:50:12.898351-05:00,POINT (-74.065413 4.759951),POINT (-74.0588765 4.7905312),253543,Alfred Initial Transport,VEHICLE_TRANSPORTATION,14671,,50.2,149,2026-01-08


In [17]:
labor_iter = 0

# first_labor = labors_driver_df.iloc[labor_iter,:]

available_from = moves_driver_df['end_point'][labor_iter]
available_at = moves_driver_df['actual_end'][labor_iter]

available_from, available_at

('POINT (-74.0286017 4.9203296)',
 Timestamp('2026-01-08 09:42:38.635063-0500', tz='America/Bogota'))

In [13]:
next_service_start_pos = moves_driver_df['start_point'][labor_iter+3]
next_service_start_time = moves_driver_df['schedule_date'][labor_iter+3]

next_service_start_pos, next_service_start_time

('POINT (-74.0434496 4.703822299999999)',
 Timestamp('2026-01-08 09:00:00-0500', tz='America/Bogota'))

In [14]:
next_service_start_time <= new_service['schedule_date']

True

### Since the starting time of the next service is earlier than the schedule time of the new service, it is not 

In [15]:
labor_iter += 3

In [16]:
# first_labor = labors_driver_df.iloc[labor_iter,:]

available_from = moves_driver_df['end_point'][labor_iter]
available_at = moves_driver_df['actual_end'][labor_iter]

available_from, available_at

('POINT (-74.028923 4.704466)',
 Timestamp('2026-01-08 09:17:25.029013-0500', tz='America/Bogota'))

In [17]:
next_service_start_pos = moves_driver_df['start_point'][labor_iter+3]
next_service_start_time = moves_driver_df['schedule_date'][labor_iter+3]

next_service_start_pos, next_service_start_time

('POINT (-74.0343054 4.692379399999999)',
 Timestamp('2026-01-08 10:00:00-0500', tz='America/Bogota'))

In [18]:
next_service_start_time <= new_service['schedule_date']

True

### Since the starting time of the next service is earlier than the schedule time of the new service, it is not 

In [19]:
labor_iter += 3

In [20]:
# first_labor = labors_driver_df.iloc[labor_iter,:]

available_from = moves_driver_df['end_point'][labor_iter]
available_at = moves_driver_df['actual_end'][labor_iter]

available_from, available_at

('POINT (-74.06596379999999 4.6401052)',
 Timestamp('2026-01-08 10:25:11.051557-0500', tz='America/Bogota'))

In [21]:
next_service_start_pos = moves_driver_df['start_point'][labor_iter+3]
next_service_start_time = moves_driver_df['schedule_date'][labor_iter+3]

next_service_start_pos, next_service_start_time

('POINT (-74.10500789999999 4.6304576)',
 Timestamp('2026-01-08 14:00:00-0500', tz='America/Bogota'))

In [22]:
next_service_start_time <= new_service['schedule_date']

False

### This is the breaking point... this means that the next labor of the driver is after the scheduled time of the new labor. This is the point where we could potentially assign the new labor. 

### Now it's needed to check if the driver could make it from it's current position to the new service

In [23]:
distance_to_new_service, _ = distance(available_from, 
                                   new_service['start_address_point'],
                                   method='haversine')
travel_time = distance_to_new_service/ALFRED_SPEED*60

would_arrive_at = available_at + timedelta(minutes=travel_time)

In [25]:
would_arrive_at <= new_service['latest_arrival_time']

True

### The driver would be able to arrive to the service in time. Time to simulate the assignment and make sure he would arrive on time to the next service

In [27]:
new_service_travel_distance, _ = distance(new_service['start_address_point'],
                                       new_service['end_address_point'], 'haversine')

new_service_travel_time = new_service_travel_distance/VEHICLE_TRANSPORT_SPEED*60

total_new_service_duration = TIEMPO_ALISTAR + new_service_travel_time + TIEMPO_FINALIZACION

finish_new_service_time = would_arrive_at + timedelta(minutes=total_new_service_duration)
finish_new_service_pos = new_service['end_address_point']

In [28]:
finish_new_service_time

Timestamp('2026-01-08 11:23:01.646585-0500', tz='America/Bogota')

#### Compute driving time to the next scheduled_service

In [33]:
travel_distance_to_next_service, _ = distance(finish_new_service_pos, 
                                              next_service_start_pos,
                                              'haversine')
travel_time_to_next_service = travel_distance_to_next_service/ALFRED_SPEED*60
would_arrive_to_next_service_at = finish_new_service_time + timedelta(minutes=travel_distance_to_next_service)

In [38]:
would_arrive_to_next_service_at <= next_service_start_time + timedelta(minutes=TIEMPO_GRACIA)

True

In [None]:
### LOGICA COMPLETA

