In [15]:
import pandas as pd
import numpy as np

import ipywidgets as widgets
from ipywidgets import interact

from datetime import timedelta

from src.data_load import load_tables, load_online_instance, load_distances
from src.filtering import flexible_filter
from src.plotting import plot_metrics_comparison, plot_gantt_labors_by_driver, plot_results, plot_service_driver_distance, \
                         plot_gantt_by_services, plot_gantt_by_drivers
from src.metrics import collect_hist_baseline_dfs
from src.experimentation_config import *
from src.config import *
from src.distance_utils import distance

data_path = '../data'

instance = 'instAD1'
distance_method = 'haversine'

directorio_df, labors_raw_df, cities_df, duraciones_df, valid_cities = load_tables(data_path, generate_labors=False)
labors_real_df, labors_static_df, labors_dynamic_df = load_online_instance(data_path, instance, labors_raw_df)
labors_dynamic_df['latest_arrival_time'] = labors_dynamic_df['schedule_date'] + timedelta(minutes=TIEMPO_GRACIA)

fechas = fechas_dict[instance]

# Upload data

In [16]:
hist_inst = f'{instance[:5]}S{instance[6:]}'
labors_hist_df, moves_hist_df = collect_hist_baseline_dfs(data_path, hist_inst, fechas, distance_method)

In [17]:
import pickle
import os

metrics = ['hybrid']
alphas = [0]

def collect_alpha_results_to_df(data_path: str, instance: str, dist_method: str, metrics: list, alphas: list):
    labors_algo_df = pd.DataFrame()
    moves_algo_df = pd.DataFrame()

    for metric in metrics: 
        for alpha in alphas:
            upload_path = f'{data_path}/resultados/online_operation/{instance}/res_{metric}_{alpha:.1f}_static.pkl'

            if not os.path.exists(upload_path):
                continue
            with open(upload_path, "rb") as f:
                res = pickle.load(f)
                inc_values, duration, results_df, moves_df, metrics_df = res

            if not results_df.empty:
                results_df = results_df.sort_values(["city", "date", "service_id", "labor_id"])
            if not moves_df.empty:
                moves_df = moves_df.sort_values(["city", "date", "service_id", "labor_id"])

            # Normalize datetime columns to Bogotá tz
            datetime_cols = [
                "labor_created_at",
                "labor_start_date",
                "labor_end_date",
                "created_at",
                "schedule_date",
                "actual_start", 
                "actual_end"
                ]


            for df in (results_df, moves_df):
                for col in datetime_cols:
                    if col in df.columns:
                        df[col] = (
                            pd.to_datetime(df[col], errors="coerce", utc=True)
                            .dt.tz_convert("America/Bogota")
                        )

            labors_algo_df = pd.concat([labors_algo_df,results_df])
            moves_algo_df = pd.concat([moves_algo_df,moves_df])
    
    return labors_algo_df, moves_algo_df

labors_algo_df, moves_algo_df = collect_alpha_results_to_df(data_path, instance, 'haversine', metrics, alphas)

### Aditional testing and eval

In [5]:
labors_algo_filtered_df = flexible_filter(
    labors_algo_df,
    city='149',
    schedule_date=lambda x: x.dt.day==8
    ).sort_values(['schedule_date', 'actual_start'])

moves_algo_filtered_df = flexible_filter(
    moves_algo_df,
    city='149',
    schedule_date=lambda x: x.dt.day==8
    ).sort_values(['schedule_date', 'actual_start'])
moves_algo_filtered_df

Unnamed: 0,service_id,labor_id,labor_name,labor_category,assigned_driver,schedule_date,actual_start,actual_end,start_point,end_point,distance_km,duration_min,city,date
656,251608,347503,Alfred Initial Transport,VEHICLE_TRANSPORTATION,Yeisson Beltran Garzon,2026-01-08 07:00:00-05:00,2026-01-08 06:30:00-05:00,2026-01-08 07:26:39.264235-05:00,POINT (-74.069493 4.619111000000001),POINT (-74.1148716 4.672371),,56.7,149,2026-01-08
658,252778,348738,Alfred Initial Transport,VEHICLE_TRANSPORTATION,Daniel Toro,2026-01-08 07:00:00-05:00,2026-01-08 06:30:00-05:00,2026-01-08 07:19:04.232744-05:00,POINT (-74.0529186 4.709332799999999),POINT (-74.028923 4.704466),,49.1,149,2026-01-08
657,254181,350283,Alfred Initial Transport,VEHICLE_TRANSPORTATION,Jorge Cortes Rojas,2026-01-08 07:00:00-05:00,2026-01-08 06:30:00-05:00,2026-01-08 07:44:16.120658-05:00,POINT (-74.13613 4.601941),POINT (-74.0458582 4.752600599999999),,74.3,149,2026-01-08
659,250945,346786,Alfred Initial Transport,VEHICLE_TRANSPORTATION,Julio Cesar Parra,2026-01-08 07:30:00-05:00,2026-01-08 07:00:00-05:00,2026-01-08 07:50:16.836738-05:00,POINT (-74.0671607 4.742149799999999),POINT (-74.05576889999999 4.712595600000001),,50.3,149,2026-01-08
660,247759,343358_free,FREE_TIME,FREE_TIME,Iván Darío Pinta,2026-01-08 08:00:00-05:00,2026-01-08 07:00:00-05:00,2026-01-08 07:24:00.968102-05:00,POINT (-74.0199021 4.666103),POINT (-74.0199021 4.666103),0.000000,24.0,149,2026-01-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
734,252481,348410_move,DRIVER_MOVE,DRIVER_MOVE,Julio Cesar Parra,2026-01-08 15:30:00-05:00,2026-01-08 14:57:15.259406-05:00,2026-01-08 15:00:00-05:00,POINT (-74.05576889999999 4.712595600000001),POINT (-74.044195 4.7081936),1.372838,2.7,149,2026-01-08
735,252481,348410,Alfred Transport,VEHICLE_TRANSPORTATION,Julio Cesar Parra,2026-01-08 15:30:00-05:00,2026-01-08 15:00:00-05:00,2026-01-08 15:45:00-05:00,POINT (-74.044195 4.7081936),POINT (-74.044195 4.7081936),,45.0,149,2026-01-08
739,253543,349568_free,FREE_TIME,FREE_TIME,Manuel Romero,2026-01-08 16:30:00-05:00,2026-01-08 07:00:00-05:00,2026-01-08 15:51:49.786853-05:00,POINT (-74.0888225 4.7315706),POINT (-74.0888225 4.7315706),0.000000,531.8,149,2026-01-08
740,253543,349568_move,DRIVER_MOVE,DRIVER_MOVE,Manuel Romero,2026-01-08 16:30:00-05:00,2026-01-08 15:51:49.786853-05:00,2026-01-08 16:00:00-05:00,POINT (-74.0888225 4.7315706),POINT (-74.065413 4.759951),4.085110,8.2,149,2026-01-08


In [7]:
labors_dynamic_filtered_df = flexible_filter(
    labors_dynamic_df,
    city='149',
    schedule_date=lambda x: x.dt.day==8
    ).sort_values(['created_at', 'schedule_date', 'labor_start_date']).reset_index(drop=True)

labors_dynamic_filtered_df


Unnamed: 0,service_id,labor_id,labor_type,labor_name,labor_category,labor_price,labor_created_at,labor_start_date,labor_end_date,alfred,...,state_service,start_address_id,start_address_point,end_address_id,end_address_point,city,address_id,address_point,address_name,latest_arrival_time
0,254688,350824.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-06-13 07:52:33.735000-05:00,2026-01-08 10:10:00-05:00,2026-01-08 10:52:00-05:00,6229.0,...,COMPLETED,6814.0,POINT (-74.1098914 4.629117399999999),18702.0,POINT (-74.1103095 4.6459248),149,11727.0,POINT (-74.0583698 4.6510839),Casa,2026-01-08 10:45:00-05:00
1,254695,350831.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-06-13 08:10:32.491000-05:00,2026-01-08 10:20:00-05:00,2026-01-08 11:40:00-05:00,70934.0,...,COMPLETED,21923.0,POINT (-74.0678728 4.6711387),138194.0,POINT (-74.031745 4.7220171),149,128219.0,POINT (-74.19197489999999 4.6331451),Casa,2026-01-08 10:45:00-05:00
2,254696,350832.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,49485.0,2025-06-13 08:11:01.273000-05:00,2026-01-08 11:48:00-05:00,2026-01-08 12:42:00-05:00,70431.0,...,COMPLETED,33856.0,POINT (-74.043407 4.744333999999999),140269.0,POINT (-74.0566813 4.7039631),149,126994.0,POINT (-74.1988555 4.5810069),Casa,2026-01-08 12:15:00-05:00
3,254699,350835.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,49485.0,2025-06-13 08:14:58.477000-05:00,2026-01-08 09:29:00-05:00,2026-01-08 10:57:00-05:00,69860.0,...,COMPLETED,33850.0,POINT (-74.0389556 4.8005013),140270.0,POINT (-74.0497525 4.7045132),149,125736.0,POINT (-74.0313506 4.7138433),Apartamento,2026-01-08 10:15:00-05:00
4,254702,350839.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-06-13 08:18:49.653000-05:00,2026-01-08 11:52:00-05:00,2026-01-08 13:26:00-05:00,5366.0,...,COMPLETED,128927.0,POINT (-74.0566599 4.6942305),59037.0,POINT (-74.075518 4.6209114),149,93651.0,POINT (-74.08878779999999 4.7315578),ALFRED,2026-01-08 12:15:00-05:00
5,254705,350843.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,52988.0,2025-06-13 08:21:00.479000-05:00,2026-01-08 14:44:00-05:00,2026-01-08 16:29:00-05:00,10500.0,...,COMPLETED,1.0,POINT (-74.045085 4.777223),139198.0,POINT (-74.0478395 4.658269),149,16797.0,POINT (-74.118284 4.7485876),casa,2026-01-08 15:15:00-05:00
6,254714,350852.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-06-13 08:38:20.329000-05:00,2026-01-08 17:02:00-05:00,2026-01-08 17:40:00-05:00,70431.0,...,COMPLETED,15.0,POINT (-74.065413 4.759951),52629.0,POINT (-74.0487921 4.7004123),149,126994.0,POINT (-74.1988555 4.5810069),Casa,2026-01-08 17:15:00-05:00
7,254716,350854.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,49485.0,2025-06-13 08:47:29.258000-05:00,2026-01-08 11:46:00-05:00,2026-01-08 14:12:00-05:00,11988.0,...,COMPLETED,33856.0,POINT (-74.043407 4.744333999999999),140277.0,POINT (-74.0647912 4.7329571),149,20721.0,POINT (-74.0286017 4.9203296),casa,2026-01-08 12:45:00-05:00
8,254722,350860.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-06-13 09:07:32.358000-05:00,2026-01-08 16:29:00-05:00,2026-01-08 18:37:00-05:00,11712.0,...,COMPLETED,88962.0,POINT (-74.0664381 4.6704091),138479.0,POINT (-74.1014514 4.6367359),149,67588.0,POINT (-74.1563294 4.6057898),Casa,2026-01-08 16:45:00-05:00
9,254725,350865.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,80663.0,2025-06-13 09:14:26.337000-05:00,2026-01-08 10:17:00-05:00,2026-01-08 11:46:00-05:00,14671.0,...,COMPLETED,17793.0,POINT (-74.05576889999999 4.712595600000001),140286.0,POINT (-73.9971418 4.8929866),149,23068.0,POINT (-74.1122312 4.7384985),Casa,2026-01-08 10:45:00-05:00


# Online assignment

In [18]:
def filter_dynamic_df(labors_dynamic_df, city, fecha):
    labors_dynamic_filtered_df = flexible_filter(
        labors_dynamic_df,
        city=city,
        schedule_date=fecha
        ).sort_values(['created_at', 'schedule_date', 'labor_start_date']).reset_index(drop=True)

    return labors_dynamic_filtered_df


def get_drivers(labors_algo_df, city, fecha):
    labors_algo_filtered_df = flexible_filter(
        labors_algo_df,
        city=city,
        schedule_date=fecha)    
    drivers = labors_algo_df['assigned_driver'].unique().tolist()

    return drivers


def filter_dfs_for_new_labor(labors_algo_df, moves_algo_df, city, fecha, created_at):
    """
    Filters labors and moves dataframes based on city, date, and created_at timestamp.
    """
    
    def _filter_and_sort(df):
        # 1. Apply dynamic filters
        df = flexible_filter(df, city=city, schedule_date=fecha)
        # 2. Filter and sort
        df = (
            df[df["actual_end"] > created_at]
            .sort_values(["schedule_date", "actual_start"])
            .reset_index(drop=True)
        )
        return df

    # Apply to both
    labors_algo_filtered_df = _filter_and_sort(labors_algo_df)
    moves_algo_filtered_df = _filter_and_sort(moves_algo_df)

    return labors_algo_filtered_df, moves_algo_filtered_df

    
def filter_dfs_for_insertion(labors_algo_df, moves_algo_df, driver):
    labors_algo_filtered_df = flexible_filter(
        labors_algo_df,
        assigned_driver=driver
        ).sort_values(['schedule_date', 'actual_start']).reset_index(drop=True)

    moves_algo_filtered_df = flexible_filter(
        moves_algo_df,
        city=city,
        schedule_date=fecha,
        assigned_driver=driver
        ).sort_values(['schedule_date', 'actual_start']).reset_index(drop=True)
    
    return labors_algo_filtered_df, moves_algo_filtered_df


In [19]:
# ============================================================
# --- Helper Functions --------------------------------------
# ============================================================

def get_driver_context(moves_driver_df, idx):
    """Return the current and next labor context for driver."""
    curr_end_time = moves_driver_df.loc[idx, 'actual_end']
    curr_end_pos = moves_driver_df.loc[idx, 'end_point']
    next_start_time = moves_driver_df.loc[idx + 3, 'schedule_date']
    next_start_pos = moves_driver_df.loc[idx + 3, 'start_point']
    return curr_end_time, curr_end_pos, next_start_time, next_start_pos


def compute_arrival_time(current_end_time, current_end_pos, target_pos, speed, distance_fn):
    """Compute when driver would arrive at the target position."""
    dist, _ = distance_fn(current_end_pos, target_pos, method='haversine')
    travel_time = dist / speed * 60
    return current_end_time + timedelta(minutes=travel_time), dist, travel_time


def adjust_for_early_arrival(would_arrive_at, scheduled_date, early_buffer=30):
    """
    Adjusts arrival time if driver would arrive too early.
    Ensures driver waits to arrive no earlier than (schedule_date - early_buffer).
    """
    earliest_allowed = scheduled_date - timedelta(minutes=early_buffer)
    return max(would_arrive_at, earliest_allowed)


def compute_service_end_time(arrival_time, start_pos, end_pos, 
                             vehicle_speed, prep_time, finish_time, distance_fn):
    """Compute finish time and position of performing the new service."""
    dist, _ = distance_fn(start_pos, end_pos, method='haversine')
    travel_time = dist / vehicle_speed * 60
    total_duration = prep_time + travel_time + finish_time
    finish_time = arrival_time + timedelta(minutes=total_duration)
    return finish_time, end_pos, total_duration, dist


def can_reach_next_labor(new_finish_time, new_finish_pos, next_start_time, next_start_pos, 
                         driver_speed, grace_time, distance_fn):
    """Check if driver can arrive to next labor in time after finishing new service."""
    dist, _ = distance_fn(new_finish_pos, next_start_pos, method='haversine')
    travel_time = dist / driver_speed * 60
    would_arrive_next = new_finish_time + timedelta(minutes=travel_time)
    feasible = would_arrive_next <= next_start_time + timedelta(minutes=grace_time)
    return feasible, would_arrive_next, dist, travel_time


# ============================================================
# --- Main Evaluation Function -------------------------------
# ============================================================

def evaluate_driver_feasibility(
    new_labor,
    moves_driver_df,
    ALFRED_SPEED,
    VEHICLE_TRANSPORT_SPEED,
    TIEMPO_ALISTAR,
    TIEMPO_FINALIZACION,
    TIEMPO_GRACIA,
    distance_fn,
    EARLY_BUFFER=30
):
    """
    Evaluate if a driver can feasibly insert a new labor into their current schedule.

    Returns
    -------
    feasible : bool
        Whether insertion is feasible for this driver.
    infeasible_log : str
        Explanation if infeasible.
    prev_labor_id : str or None
        labor_id of the labor before the potential insertion point.
    next_labor_id : str or None
        labor_id of the labor after the potential insertion point.
    dist_to_new_service : float or None
        Distance (km) from previous endpoint to new labor start.
    dist_to_next_labor : float or None
        Distance (km) from new labor end to next scheduled labor start.
    """

    infeasible_log = ''
    feasible = False
    prev_labor_id = None
    next_labor_id = None
    dist_to_new_service = None
    dist_to_next_labor = None

    # --- Early exit: driver has no labors
    if moves_driver_df.empty:
        return False, "Driver has no scheduled labors.", None, None, None, None

    labor_iter = 0
    n_rows = len(moves_driver_df)

    # Iterate across each real labor (skipping _free and _move)
    while labor_iter < n_rows - 3:

        # 1. Get current + next labor context
        curr_end_time, curr_end_pos, next_start_time, next_start_pos = \
            get_driver_context(moves_driver_df, labor_iter)

        curr_labor_id = moves_driver_df.loc[labor_iter, "labor_id"]
        next_labor_id_candidate = moves_driver_df.loc[labor_iter + 3, "labor_id"]

        # 2. Skip if next labor starts before new labor’s schedule
        if next_start_time <= new_labor['schedule_date']:
            labor_iter += 3
            continue

        # 3. Compute arrival time to new service
        would_arrive_at, dist_to_new_service, travel_time_to_new = compute_arrival_time(
            curr_end_time, curr_end_pos, new_labor['start_address_point'],
            ALFRED_SPEED, distance_fn
        )

        # 4. Check if can arrive within allowed window
        if would_arrive_at > new_labor['latest_arrival_time']:
            infeasible_log = "Driver would not arrive on time to the new labor."
            break

        # 5. Adjust if arriving too early (driver waits)
        real_arrival_time = adjust_for_early_arrival(
            would_arrive_at, new_labor['schedule_date'], EARLY_BUFFER
        )

        # 6. Compute when driver would finish the new labor
        finish_new_labor_time, finish_new_labor_pos, _, dist_service = \
            compute_service_end_time(
                real_arrival_time,
                new_labor['start_address_point'],
                new_labor['end_address_point'],
                VEHICLE_TRANSPORT_SPEED,
                TIEMPO_ALISTAR,
                TIEMPO_FINALIZACION,
                distance_fn
            )

        # 7. Check feasibility to reach next scheduled labor
        feasible_next, would_arrive_next, dist_to_next_labor, travel_time_to_next = \
            can_reach_next_labor(
                finish_new_labor_time, finish_new_labor_pos,
                next_start_time, next_start_pos,
                ALFRED_SPEED, TIEMPO_GRACIA, distance_fn
            )

        if not feasible_next:
            infeasible_log = (
                "Driver would not make it to the next scheduled labor in time "
                "if new labor is inserted."
            )
            break

        # Feasible insertion point found
        feasible = True
        prev_labor_id = curr_labor_id
        next_labor_id = next_labor_id_candidate
        break

    # 8. Case: new labor occurs after all existing ones → append at end
    if not feasible and infeasible_log == '' and labor_iter >= n_rows - 3:
        feasible = True
        prev_labor_id = moves_driver_df.loc[n_rows - 1, "labor_id"]
        next_labor_id = None
        infeasible_log = ''
        dist_to_new_service = None
        dist_to_next_labor = None

    return feasible, infeasible_log, prev_labor_id, next_labor_id, dist_to_new_service, dist_to_next_labor


In [None]:
import random
import pandas as pd

def get_best_insertion(candidate_insertions, selection_mode="min_total_distance", random_state=None):
    """
    Selects the best driver among feasible insertions based on a chosen criterion.

    Parameters
    ----------
    candidate_insertions : list of tuples
        Each tuple should be:
        (driver, insertion_point, dist_to_new_labor, dist_to_next_labor)
    selection_mode : str, optional
        Selection criterion:
        - "random": choose a random driver
        - "min_total_distance": minimize (dist_to_new_labor + dist_to_next_labor)
        - "min_dist_to_new_labor": minimize distance to the new labor
    random_state : int, optional
        For reproducibility when using random selection.

    Returns
    -------
    selected_driver : str
        The chosen driver ID.
    insertion_point : int
        Where to insert the new labor in the driver's schedule.
    selection_df : pd.DataFrame
        Table summarizing all candidate metrics (for analysis/debugging).
    """

    if len(candidate_insertions) == 0:
        return None, None, pd.DataFrame()

    # Convert to DataFrame for easier computation
    selection_df = pd.DataFrame(candidate_insertions, columns=[
        "driver", "prev_labor_id", 'next_labor_id', "dist_to_new_labor", "dist_to_next_labor"
    ])

    # Replace None/NaN manually using np.where (bypasses .fillna())
    selection_df["dist_to_new_labor"] = np.where(
        selection_df["dist_to_new_labor"].isna(),
        0,
        selection_df["dist_to_new_labor"]
    ).astype(float)

    selection_df["dist_to_next_labor"] = np.where(
        selection_df["dist_to_next_labor"].isna(),
        0,
        selection_df["dist_to_next_labor"]
    ).astype(float)

    selection_df["total_distance"] = (
        selection_df["dist_to_new_labor"] + selection_df["dist_to_next_labor"]
    )

    # --- Selection logic ---
    if selection_mode == "random":
        if random_state is not None:
            random.seed(random_state)
        chosen_row = selection_df.sample(1, random_state=random_state).iloc[0]

    elif selection_mode == "min_dist_to_new_labor":
        chosen_row = selection_df.loc[selection_df["dist_to_new_labor"].idxmin()]

    elif selection_mode == "min_total_distance":
        chosen_row = selection_df.loc[selection_df["total_distance"].idxmin()]

    else:
        raise ValueError(f"Unknown selection_mode '{selection_mode}'")

    selected_driver = chosen_row["driver"]
    insertion_point = (chosen_row["prev_labor_id"], chosen_row['next_labor_id'])

    return selected_driver, insertion_point, selection_df


In [22]:
labors_algo_dynamic_df = labors_algo_df.copy()
moves_algo_dynamic_df = moves_algo_df.copy()

unassigned_labors = {}

for city in valid_cities[::]:
    for fecha in fechas[::]:
        unassigned_labors[(city,fecha)] = []

        labors_dynamic_filtered_df = filter_dynamic_df(
            labors_dynamic_df=labors_dynamic_df,
            city=city,
            fecha=fecha
            )
        
        drivers = get_drivers(
            labors_algo_df=labors_algo_df,
            city=city,
            fecha=fecha)
        
        for i, new_labor in labors_dynamic_filtered_df.iterrows():
            labors_active_df, moves_active_df = filter_dfs_for_new_labor(
                labors_algo_dynamic_df,
                moves_algo_dynamic_df,
                city=city,
                fecha=fecha,
                created_at=new_labor['created_at']
            )
            
            candidate_insertions = []

            for driver in drivers:          
                labors_driver_df, moves_driver_df = filter_dfs_for_insertion(
                    labors_algo_df=labors_active_df,
                    moves_algo_df=moves_active_df,
                    driver=driver
                    )

                feasible, inf_log, prev_labor_id, next_labor_id, new_dist, next_dist = evaluate_driver_feasibility(
                    new_labor=new_labor,
                    moves_driver_df=moves_driver_df,
                    ALFRED_SPEED=ALFRED_SPEED,
                    VEHICLE_TRANSPORT_SPEED=VEHICLE_TRANSPORT_SPEED,
                    TIEMPO_ALISTAR=TIEMPO_ALISTAR,
                    TIEMPO_FINALIZACION=TIEMPO_FINALIZACION, 
                    TIEMPO_GRACIA=TIEMPO_GRACIA,
                    distance_fn=distance
                    )

                if feasible: 
                    candidate_insertions.append((driver, prev_labor_id, next_labor_id, new_dist, next_dist))
            
            if len(candidate_insertions)==0:
                unassigned_labors[(city,fecha)].append(new_labor)
                continue
            
            selected_driver, insertion_point, selection_df = get_best_insertion(
                candidate_insertions, 
                selection_mode="min_total_distance", 
                random_state=None)

            # labors_algo_dynamic_df, moves_algo_dynamic_df = update_labors_and_moves_under_insertion()



In [10]:
unassigned_labors

{('149', '2026-01-05'): [], ('149', '2026-01-06'): []}

In [None]:
new_service = labors_dynamic_filtered_df.iloc[0,:]
infeasible_log = ''

labors_algo_filtered_df = labors_algo_filtered_df[labors_algo_filtered_df['actual_end'] > new_service['created_at']]
moves_algo_filtered_df = moves_algo_filtered_df[moves_algo_filtered_df['actual_end'] > new_service['created_at']]

driver = labors_algo_filtered_df['assigned_driver'].unique().tolist()[2]

labors_driver_df = flexible_filter(labors_algo_filtered_df,
                                  assigned_driver=driver).reset_index(drop=True)
moves_driver_df = flexible_filter(moves_algo_filtered_df,
                                  assigned_driver=driver).reset_index(drop=True)

order = ['labor_id','schedule_date', 'actual_start', 'actual_end', 'start_point', 'end_point']
order += [j for j in moves_driver_df.columns.tolist() if j not in order]
display(labors_driver_df)
display(moves_driver_df[order])

labor_iter = 0
while True:

    # Rescatar momento y posicion donde el conductor estará disponible
    available_pos = moves_driver_df['end_point'][labor_iter]
    available_time = moves_driver_df['actual_end'][labor_iter]

    # Rescatar momento y posición de la siguiente labor que tiene agendada el conductor
    next_labor_start_pos = moves_driver_df['start_point'][labor_iter+3]
    next_labor_start_time = moves_driver_df['schedule_date'][labor_iter+3]

    # Si el siguiente servicio agendado es antes del nuevo servicio, toca seguir evaluando los 
    # siguientes servicios del conductor 
    if next_labor_start_time <= new_service['schedule_date']:
        # Actualizar el contador de labores, toca sumarle 3 pues en el moves_df, de labor a labor se mueve cada tres filas,
        # pues hay _free, _move, y luego si el registro real del labor de transporte. 
        labor_iter += 3

        # TODO_1: Toca añadir la lógica de qué pasa si se acaban los labores y todos finalizaban antes del 
        # schedule_date del nuevo labor. En ese caso automáticamente se evaluaría si alcanza a llegar a tiempo y se entraría
        # a calcular los tiempos, pues no interferiría con ningún otro servicio.

        continue

    # En el primer momento en que no se cumple la condición, es cuando hay un espacio entre labores en el el labor anterior
    # está agendado antes del nuevo servicio y el siguiente labor está agendado después... Es entre estos dos labores donde
    #  se puede agendar el nuevo labor

    # Calcular el momento de llegada si el conductor se dirigiera al nuevo servicio una vez termine el previo
    distance_to_new_service, _ = distance(available_pos,
                                   new_service['start_address_point'],
                                   method='haversine')
    travel_time = distance_to_new_service/ALFRED_SPEED*60
    would_arrive_at = available_time + timedelta(minutes=travel_time)

    # Esta condición evalúa si el conductor alcanzaría a llegar al nuevo servicio a tiempo
    if would_arrive_at > new_service['latest_arrival_time']:
        infeasible_log = 'Driver would not arrive on time to the new labor.'
        break

    # TODO_2: Toca calcular el verdadero would_arrive at. El cálculo previo sólamente evalúa que alcanzaría a llegar dentro de 
    # la ventana de tiempo. Sin embargo, toca hacer la evaluación si es necesario esperar antes de salir para la nueva labor,
    # ya que si se llegaría antes de schedule_date - TIEMPO_PREVIO, sería necesario esperar. 
    real_would_arrive_at = would_arrive_at  # Place holder para el cálculo de arriva

    # Si alcanza a llegar, es necesario simular la asignación para evaluar si el conductor lograría llegar entonces al 
    # siguiente servicio.
    ## 1. Distancia y duración incurrida en la nueva labor
    new_labor_travel_distance, _ = distance(new_service['start_address_point'],
                                       new_service['end_address_point'], 'haversine')
    new_labor_travel_time = new_labor_travel_distance/VEHICLE_TRANSPORT_SPEED*60

    ## 2. Tiempo total del nuevo servicio, contemplanto el tiempo de alistamiento, el tiempo de transporte y de finalización
    total_new_labor_duration = TIEMPO_ALISTAR + new_labor_travel_time + TIEMPO_FINALIZACION
    
    ## 3. Tiempo y posición donde finalizaría la nueva labor
    finish_new_labor_time = real_would_arrive_at + timedelta(minutes=total_new_labor_duration)
    finish_new_labor_pos = new_service['end_address_point']

    ## 4. Distancia y duración del punto de finalización de la nueva labor a la siguiente labor ya agendada en la 
    ## planeación del conductor
    travel_distance_to_next_labor, _ = distance(finish_new_labor_pos, 
                                              next_labor_start_pos,
                                              'haversine')
    travel_time_to_next_labor = travel_distance_to_next_labor/ALFRED_SPEED*60

    ## 5. Momento el momento de llegada a la siguiente labor agendada desde la finalización del nuevo labor
    would_arrive_to_next_labor_at = finish_new_labor_time + timedelta(minutes=travel_distance_to_next_labor)

    if would_arrive_to_next_labor_at > next_labor_start_time + timedelta(minutes=TIEMPO_GRACIA):
        infeasible_log = 'Driver would not make in time to scheduled labor if new labor is assigned to him.'
        break

    # Si se llega a este momento de la lógica, ya se comprobó que la inserción es factible. Sería necesario actualizar 
    # tanto labors_algo_df como moves_algo_df. 


    

    


### Nuevo servicio llega a las 7:52 a.m. para las 10:30 a.m. En este caso es una sola labor de transporte.

In [8]:
new_service = labors_dynamic_filtered_df.iloc[0,:]

In [9]:
new_service

service_id                                            254688
labor_id                                            350824.0
labor_type                                              12.0
labor_name                          Alfred Initial Transport
labor_category                        VEHICLE_TRANSPORTATION
labor_price                                          55663.0
labor_created_at            2025-06-13 07:52:33.735000-05:00
labor_start_date                   2026-01-08 10:10:00-05:00
labor_end_date                     2026-01-08 10:52:00-05:00
alfred                                                6229.0
shop                                                     NaN
created_at                  2026-01-08 07:52:33.675000-05:00
schedule_date                      2026-01-08 10:30:00-05:00
client_type                                              B2B
paying_customer                                      27317.0
state_service                                      COMPLETED
start_address_id        

### Filtrar todas las labores que finalizan antes de la fecha de creación

In [10]:
labors_algo_filtered_df = labors_algo_filtered_df[labors_algo_filtered_df['actual_end'] > new_service['created_at']]
moves_algo_filtered_df = moves_algo_filtered_df[moves_algo_filtered_df['actual_end'] > new_service['created_at']]

### Tomar un conductor y revisar sus labores

In [11]:
driver = labors_algo_filtered_df['assigned_driver'].unique().tolist()[2]

labors_driver_df = flexible_filter(labors_algo_filtered_df,
                                  assigned_driver=driver).reset_index(drop=True)
moves_driver_df = flexible_filter(moves_algo_filtered_df,
                                  assigned_driver=driver).reset_index(drop=True)

order = ['labor_id','schedule_date', 'actual_start', 'actual_end', 'start_point', 'end_point']
order += [j for j in moves_driver_df.columns.tolist() if j not in order]
display(labors_driver_df)
display(moves_driver_df[order])

Unnamed: 0,service_id,labor_id,labor_type,labor_name,labor_category,labor_price,labor_created_at,labor_start_date,labor_end_date,alfred,...,address_point,address_name,map_start_point,map_end_point,assigned_driver,actual_start,actual_end,dist_km,date,n_drivers
0,253144,349139.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-06-10 14:03:17.031000-05:00,2026-01-08 07:54:00-05:00,2026-01-08 09:07:00-05:00,68168.0,...,POINT (-74.0293686 4.7648946),Apartamento,POINT (-74.065413 4.759951),POINT (-74.0416449 4.694423699999999),Alberto Mora,2026-01-08 07:30:00-05:00,2026-01-08 08:26:37.297647-05:00,7.747752,2026-01-08,19
1,254126,350222.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,52988.0,2025-06-12 07:12:01.967000-05:00,2026-01-08 08:58:00-05:00,2026-01-08 09:49:00-05:00,5366.0,...,POINT (-74.08878779999999 4.7315578),ALFRED,POINT (-74.0434496 4.703822299999999),POINT (-74.028923 4.704466),Alberto Mora,2026-01-08 08:30:00-05:00,2026-01-08 09:17:25.029013-05:00,1.611433,2026-01-08,19
2,254324,350443.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-06-12 13:13:51.258000-05:00,2026-01-08 09:54:00-05:00,2026-01-08 11:20:00-05:00,68168.0,...,POINT (-74.0293686 4.7648946),Apartamento,POINT (-74.0343054 4.692379399999999),POINT (-74.06596379999999 4.6401052),Alberto Mora,2026-01-08 09:30:00-05:00,2026-01-08 10:25:11.051557-05:00,6.789462,2026-01-08,19
3,254410,350534.0,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,52988.0,2025-06-12 15:57:31.154000-05:00,2026-01-08 14:12:00-05:00,2026-01-08 15:30:00-05:00,5366.0,...,POINT (-74.08878779999999 4.7315578),ALFRED,POINT (-74.10500789999999 4.6304576),POINT (-74.0284861 4.7153205),Alberto Mora,2026-01-08 13:30:00-05:00,2026-01-08 14:34:01.844207-05:00,12.687158,2026-01-08,19


Unnamed: 0,labor_id,schedule_date,actual_start,actual_end,start_point,end_point,service_id,labor_name,labor_category,assigned_driver,distance_km,duration_min,city,date
0,349139,2026-01-08 08:00:00-05:00,2026-01-08 07:30:00-05:00,2026-01-08 08:26:37.297647-05:00,POINT (-74.065413 4.759951),POINT (-74.0416449 4.694423699999999),253144,Alfred Initial Transport,VEHICLE_TRANSPORTATION,Alberto Mora,,56.6,149,2026-01-08
1,350222_free,2026-01-08 09:00:00-05:00,2026-01-08 08:26:37.297647-05:00,2026-01-08 08:27:52.314995-05:00,POINT (-74.0416449 4.694423699999999),POINT (-74.0416449 4.694423699999999),254126,FREE_TIME,FREE_TIME,Alberto Mora,0.0,1.3,149,2026-01-08
2,350222_move,2026-01-08 09:00:00-05:00,2026-01-08 08:27:52.314995-05:00,2026-01-08 08:30:00-05:00,POINT (-74.0416449 4.694423699999999),POINT (-74.0434496 4.703822299999999),254126,DRIVER_MOVE,DRIVER_MOVE,Alberto Mora,1.064042,2.1,149,2026-01-08
3,350222,2026-01-08 09:00:00-05:00,2026-01-08 08:30:00-05:00,2026-01-08 09:17:25.029013-05:00,POINT (-74.0434496 4.703822299999999),POINT (-74.028923 4.704466),254126,Alfred Initial Transport,VEHICLE_TRANSPORTATION,Alberto Mora,,47.4,149,2026-01-08
4,350443_free,2026-01-08 10:00:00-05:00,2026-01-08 09:17:25.029013-05:00,2026-01-08 09:27:03.553261-05:00,POINT (-74.028923 4.704466),POINT (-74.028923 4.704466),254324,FREE_TIME,FREE_TIME,Alberto Mora,0.0,9.6,149,2026-01-08
5,350443_move,2026-01-08 10:00:00-05:00,2026-01-08 09:27:03.553261-05:00,2026-01-08 09:30:00-05:00,POINT (-74.028923 4.704466),POINT (-74.0343054 4.692379399999999),254324,DRIVER_MOVE,DRIVER_MOVE,Alberto Mora,1.470389,2.9,149,2026-01-08
6,350443,2026-01-08 10:00:00-05:00,2026-01-08 09:30:00-05:00,2026-01-08 10:25:11.051557-05:00,POINT (-74.0343054 4.692379399999999),POINT (-74.06596379999999 4.6401052),254324,Alfred Initial Transport,VEHICLE_TRANSPORTATION,Alberto Mora,,55.2,149,2026-01-08
7,350534_free,2026-01-08 14:00:00-05:00,2026-01-08 10:25:11.051557-05:00,2026-01-08 13:21:05.004511-05:00,POINT (-74.06596379999999 4.6401052),POINT (-74.06596379999999 4.6401052),254410,FREE_TIME,FREE_TIME,Alberto Mora,0.0,175.9,149,2026-01-08
8,350534_move,2026-01-08 14:00:00-05:00,2026-01-08 13:21:05.004511-05:00,2026-01-08 13:30:00-05:00,POINT (-74.06596379999999 4.6401052),POINT (-74.10500789999999 4.6304576),254410,DRIVER_MOVE,DRIVER_MOVE,Alberto Mora,4.458296,8.9,149,2026-01-08
9,350534,2026-01-08 14:00:00-05:00,2026-01-08 13:30:00-05:00,2026-01-08 14:34:01.844207-05:00,POINT (-74.10500789999999 4.6304576),POINT (-74.0284861 4.7153205),254410,Alfred Initial Transport,VEHICLE_TRANSPORTATION,Alberto Mora,,64.0,149,2026-01-08


In [12]:
labor_iter = 0

# first_labor = labors_driver_df.iloc[labor_iter,:]

available_from = moves_driver_df['end_point'][labor_iter]
available_at = moves_driver_df['actual_end'][labor_iter]

available_from, available_at

('POINT (-74.0416449 4.694423699999999)',
 Timestamp('2026-01-08 08:26:37.297647-0500', tz='America/Bogota'))

In [13]:
next_service_start_pos = moves_driver_df['start_point'][labor_iter+3]
next_service_start_time = moves_driver_df['schedule_date'][labor_iter+3]

next_service_start_pos, next_service_start_time

('POINT (-74.0434496 4.703822299999999)',
 Timestamp('2026-01-08 09:00:00-0500', tz='America/Bogota'))

In [14]:
next_service_start_time <= new_service['schedule_date']

True

### Since the starting time of the next service is earlier than the schedule time of the new service, it is not 

In [15]:
labor_iter += 3

In [16]:
# first_labor = labors_driver_df.iloc[labor_iter,:]

available_from = moves_driver_df['end_point'][labor_iter]
available_at = moves_driver_df['actual_end'][labor_iter]

available_from, available_at

('POINT (-74.028923 4.704466)',
 Timestamp('2026-01-08 09:17:25.029013-0500', tz='America/Bogota'))

In [17]:
next_service_start_pos = moves_driver_df['start_point'][labor_iter+3]
next_service_start_time = moves_driver_df['schedule_date'][labor_iter+3]

next_service_start_pos, next_service_start_time

('POINT (-74.0343054 4.692379399999999)',
 Timestamp('2026-01-08 10:00:00-0500', tz='America/Bogota'))

In [18]:
next_service_start_time <= new_service['schedule_date']

True

### Since the starting time of the next service is earlier than the schedule time of the new service, it is not 

In [19]:
labor_iter += 3

In [20]:
# first_labor = labors_driver_df.iloc[labor_iter,:]

available_from = moves_driver_df['end_point'][labor_iter]
available_at = moves_driver_df['actual_end'][labor_iter]

available_from, available_at

('POINT (-74.06596379999999 4.6401052)',
 Timestamp('2026-01-08 10:25:11.051557-0500', tz='America/Bogota'))

In [21]:
next_service_start_pos = moves_driver_df['start_point'][labor_iter+3]
next_service_start_time = moves_driver_df['schedule_date'][labor_iter+3]

next_service_start_pos, next_service_start_time

('POINT (-74.10500789999999 4.6304576)',
 Timestamp('2026-01-08 14:00:00-0500', tz='America/Bogota'))

In [22]:
next_service_start_time <= new_service['schedule_date']

False

### This is the breaking point... this means that the next labor of the driver is after the scheduled time of the new labor. This is the point where we could potentially assign the new labor. 

### Now it's needed to check if the driver could make it from it's current position to the new service

In [23]:
distance_to_new_service, _ = distance(available_from, 
                                   new_service['start_address_point'],
                                   method='haversine')
travel_time = distance_to_new_service/ALFRED_SPEED*60

would_arrive_at = available_at + timedelta(minutes=travel_time)

In [25]:
would_arrive_at <= new_service['latest_arrival_time']

True

### The driver would be able to arrive to the service in time. Time to simulate the assignment and make sure he would arrive on time to the next service

In [27]:
new_service_travel_distance, _ = distance(new_service['start_address_point'],
                                       new_service['end_address_point'], 'haversine')

new_service_travel_time = new_service_travel_distance/VEHICLE_TRANSPORT_SPEED*60

total_new_service_duration = TIEMPO_ALISTAR + new_service_travel_time + TIEMPO_FINALIZACION

finish_new_service_time = would_arrive_at + timedelta(minutes=total_new_service_duration)
finish_new_service_pos = new_service['end_address_point']

In [28]:
finish_new_service_time

Timestamp('2026-01-08 11:23:01.646585-0500', tz='America/Bogota')

#### Compute driving time to the next scheduled_service

In [33]:
travel_distance_to_next_service, _ = distance(finish_new_service_pos, 
                                              next_service_start_pos,
                                              'haversine')
travel_time_to_next_service = travel_distance_to_next_service/ALFRED_SPEED*60
would_arrive_to_next_service_at = finish_new_service_time + timedelta(minutes=travel_distance_to_next_service)

In [38]:
would_arrive_to_next_service_at <= next_service_start_time + timedelta(minutes=TIEMPO_GRACIA)

True

In [None]:
### LOGICA COMPLETA

