In [1]:
import pandas as pd
import pickle

import ipywidgets as widgets
from ipywidgets import interact

from src.data.data_load import (
    load_tables, 
    load_online_instance, 
    load_distances, 
    upload_ONLINE_static_solution
)
from src.data.solution_load import load_solution_dfs
from src.utils.filtering import flexible_filter
from src.utils.plotting import plot_metrics_comparison_dynamic
from src.data.metrics import collect_results_to_df, compute_metrics_with_moves, get_day_plotting_df
from src.config.experimentation_config import *
from src.config.SD_experimentation_config import *
from src.config.config import *

data_path = '../data'

distance_type = 'osrm'              # Options: ['osrm', 'manhattan']
dist_method = 'haversine'      # Options: ['precalced', 'haversine']

optimization_obj = 'driver_distance'

directorio_df, labors_raw_df, cities_df, duraciones_df, valid_cities = load_tables(data_path, generate_labors=False)
# dist_dict = load_distances(data_path, distance_type, instance, dist_method)

metricas = ['service_count', 'vt_count', 'num_drivers', 'driver_extra_time', 'driver_move_distance']


# Upload results

In [2]:
def upload_simulated_instances():
    
    results = {}

    for n_serv in n_services:
        labors_real_dfs = pd.DataFrame()
        labors_static_dfs = pd.DataFrame()
        labors_dynamic_dfs = pd.DataFrame()
        for scenario in scenarios:
            for seed in seeds:
                instance = f'N{n_serv}/{scenario}/seed_{seed}'
                labors_real_df, labors_static_df, labors_dynamic_df = load_online_instance(data_path, instance, labors_raw_df)

                for df in [labors_real_df, labors_static_df, labors_dynamic_df]:
                    df['n_serv'] = n_serv
                    df['scenario'] = scenario
                    df['seed'] = seed
                
                labors_real_dfs = pd.concat([labors_real_dfs, labors_real_df])
                labors_static_dfs = pd.concat([labors_static_dfs, labors_static_df])
                labors_dynamic_dfs = pd.concat([labors_dynamic_dfs, labors_dynamic_df])
        
        results[n_serv] = (labors_real_dfs, labors_static_dfs, labors_dynamic_dfs)

    return results
        
results = upload_simulated_instances()    


# Instance exploration

In [6]:
df = results[900]
df[0]

Unnamed: 0,service_id,labor_id,labor_type,labor_name,labor_category,labor_price,labor_created_at,labor_start_date,labor_end_date,alfred,...,city,address_id,address_point,address_name,is_static,is_dynamic,date,n_serv,scenario,seed
0,236696,331500,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-04-30 15:33:33.978000-05:00,2026-11-11 06:50:00-05:00,2026-11-11 08:22:00-05:00,2306,...,1004,109102.0,POINT (-76.5710842 3.406073),Alfred,True,False,2026-11-11,900,easy,0
1,236703,331507,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-04-30 15:42:28.753000-05:00,2026-11-11 10:29:00-05:00,2026-11-11 11:23:00-05:00,2306,...,1004,109102.0,POINT (-76.5710842 3.406073),Alfred,True,False,2026-11-11,900,easy,0
2,238382,333295,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-05-06 11:52:43.164000-05:00,2026-11-11 07:47:00-05:00,2026-11-11 09:42:00-05:00,2036,...,1,55748.0,POINT (-75.55748439999999 6.252939599999999),Apartamento,True,False,2026-11-11,900,easy,0
3,241202,336337,6.0,General Mechanics,GENERAL_MECHANICS,261800.0,2025-05-13 15:44:09.749000-05:00,2026-09-30 16:17:00-05:00,2026-10-03 06:36:00-05:00,,...,1004,58798.0,POINT (-76.5421774 3.4259662),Automotriz On line SD,True,False,2026-11-11,900,easy,0
4,241202,337340,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,90004.0,2025-05-15 15:35:54.681000-05:00,2026-10-03 06:36:00-05:00,2026-10-03 11:06:00-05:00,20269,...,1004,26088.0,POINT (-76.493671 3.414003),Casa,True,False,2026-11-11,900,easy,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1142,261150,357715,2.0,Alfred Transport,VEHICLE_TRANSPORTATION,35000.0,2025-06-27 16:34:31.456000-05:00,2026-11-11 08:31:39-05:00,2026-11-11 08:54:56-05:00,20269,...,1004,26088.0,POINT (-76.493671 3.414003),Casa,True,False,2026-11-11,900,hard,14
1143,261166,357733,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-06-27 17:00:53.908000-05:00,2026-11-11 12:30:00-05:00,2026-11-11 13:53:00-05:00,10451,...,149,16688.0,POINT (-74.0202608 4.6664181),casa,True,False,2026-11-11,900,hard,14
1144,261409,357976,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,55663.0,2025-06-28 07:57:27.130000-05:00,2026-11-11 10:22:00-05:00,2026-11-11 12:25:00-05:00,2036,...,1,55748.0,POINT (-75.55748439999999 6.252939599999999),Apartamento,False,True,2026-11-11,900,hard,14
1145,261416,357983,12.0,Alfred Initial Transport,VEHICLE_TRANSPORTATION,90004.0,2025-06-28 08:26:02.044000-05:00,2026-11-11 09:31:00-05:00,2026-11-11 11:33:00-05:00,6412,...,844,12312.0,POINT (-73.1037471 7.0910492),Apartamento,False,True,2026-11-11,900,hard,14


## Global

I want to have a clear visualization on how my simulated instances look, this means not considering any results of algorithms of any kind yet only the topology of the instances themselves. I've upload all the results of all the simulations into a dictionary called results, which has as keys the simulation sizes (e.g., 500) and as values a tuple with three dataframes labors_real_df, labors_static_df, and labors_dynamic_df, which correspond to all the labors, only the static labors and only the dynamic labors as we've worked on, respectively.
I want to have a dropdown to be able to select the number of services and the city as well, the options are in a list called n_services. Just as a reminder, each service number has three scenarios which are in the list scenarios, and inside there are x number of seeds which can be located in the list seeds. Then, for the selected number of services I want to have the following visualizations:
1. Number of vehicle transportation labors. I want to have three plots, one for each scenario. Each one, should have a histogram which has in the x-axis the number of Vehicle transportation labors and on the y-axis the ocurrence. The histogram for each scenario should be based on the number of vehicle transportation labors of the seeds.
2. Distribution of static/dynamic services: I want to visualize the experimental proportion of static vs. dynamic services in the seeds. For this, I want a plot that has three boxplots, one per each scenario. The box plot should display the proportion of static services vs dynamic services. Again, remembering that there are multi-labor services, in which case I don't want to double count those services since they have muteple rows. I want to count only once per service.

Also, I want to be able to control with a parameter of the plotting function (not necessarily editable on the dropdown, default False) to save the plots in a given directory.

Is the logic and the reasoning of what I just explained clear? Would you like to clarify or for me to further explain anything?

In [None]:
N = 200

### Number of vehicle transportation labors for a given N

A histogram with number of VT in x-axis and occurence in y-axis

In [11]:
# Cell: Instance topology plotting (plotly + ipywidgets)
import os
from typing import List, Tuple, Dict
import math

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display

# -------------------------
# Helper: main plotting fn
# -------------------------
def plot_instance_visualizations(
    results: Dict[int, Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]],
    n_services_list: List[int],
    scenarios: List[str],
    seeds: List[int],
    default_n: int = None,
    default_city: str = None,
    save_plots: bool = False,
    out_dir: str = "./plots",
    hist_bins: int = 20
):
    """
    Interactive visualization of simulated instances topology.

    Parameters
    ----------
    results : dict
        mapping n_services -> (labors_real_df, labors_static_df, labors_dynamic_df)
    n_services_list : list[int]
        available experiment sizes (keys of results)
    scenarios : list[str]
        scenarios names in your data, e.g. ['easy','normal','hard']
    seeds : list[int]
        list of possible seeds (used for dropdown suggestions only)
    default_n : int
        default n_services to show
    default_city : str
        optional default city
    save_plots : bool
        default False; can be toggled in the UI (also passed below)
    out_dir : str
        directory where to save files when saving enabled
    hist_bins : int
        number of bins for histograms
    """
    os.makedirs(out_dir, exist_ok=True)

    # default choices
    if default_n is None:
        default_n = n_services_list[0]
    # Extract available cities from the default dataset
    def available_cities_for_n(n):
        df_real = results[n][0]
        if "city" in df_real.columns:
            return sorted(df_real["city"].dropna().unique().tolist())
        return []

    # Widgets
    n_dropdown = widgets.Dropdown(options=sorted(n_services_list), value=default_n, description="N services")
    city_dropdown = widgets.Dropdown(options=available_cities_for_n(default_n), description="City")
    if default_city and default_city in city_dropdown.options:
        city_dropdown.value = default_city
    scenario_select = widgets.SelectMultiple(options=scenarios, value=tuple(scenarios), description="Scenarios")
    save_toggle = widgets.Checkbox(value=save_plots, description="Save plots")
    outdir_text = widgets.Text(value=out_dir, description="Out dir")
    update_button = widgets.Button(description="Draw", button_style="primary")

    controls = widgets.HBox([n_dropdown, city_dropdown, scenario_select, save_toggle, outdir_text, update_button])
    display(controls)

    # helper to compute counts & proportions
    def _prepare_aggregates(n_val, city_val, selected_scenarios):
        df_real, df_static, df_dynamic = results[n_val]

        # Filter to city if requested
        if city_val is not None and city_val != "":
            df_real_city = df_real[df_real["city"] == city_val].copy()
            df_static_city = df_static[df_static["city"] == city_val].copy()
            df_dynamic_city = df_dynamic[df_dynamic["city"] == city_val].copy()
        else:
            df_real_city = df_real.copy()
            df_static_city = df_static.copy()
            df_dynamic_city = df_dynamic.copy()

        # Ensure scenario/seeds are proper types
        if "scenario" not in df_real_city.columns:
            raise KeyError("'scenario' column not found in labors_real_df")
        if "seed" not in df_real_city.columns:
            raise KeyError("'seed' column not found in labors_real_df")

        # For histograms: compute per-seed count of vehicle transportation labors per scenario
        hist_data = []  # list of tuples (scenario, seed, vt_count)
        for sc in selected_scenarios:
            df_sc = df_real_city[df_real_city["scenario"] == sc]
            # group by seed
            for seed_val, g in df_sc.groupby("seed", sort=True):
                vt_count = int((g["labor_category"] == "VEHICLE_TRANSPORTATION").sum())
                hist_data.append({"scenario": sc, "seed": seed_val, "vt_count": vt_count})

        hist_df = pd.DataFrame(hist_data)

        # For static/dynamic proportions: compute proportion of services (unique service_id)
        box_data = []  # list of dicts: scenario, seed, prop_static, n_services_in_seed
        for sc in selected_scenarios:
            stat_df = df_static_city[df_static_city["scenario"] == sc]
            dyn_df = df_dynamic_city[df_dynamic_city["scenario"] == sc]

            # compute unique services per seed
            stat_by_seed = stat_df.groupby("seed")["service_id"].nunique()
            dyn_by_seed = dyn_df.groupby("seed")["service_id"].nunique()

            seeds_union = sorted(set(stat_by_seed.index.tolist()) | set(dyn_by_seed.index.tolist()))
            for seed_val in seeds_union:
                n_static = int(stat_by_seed.get(seed_val, 0))
                n_dyn = int(dyn_by_seed.get(seed_val, 0))
                total = n_static + n_dyn
                prop_static = float(n_static / total) if total > 0 else np.nan
                box_data.append({
                    "scenario": sc,
                    "seed": seed_val,
                    "n_static_services": n_static,
                    "n_dynamic_services": n_dyn,
                    "prop_static": prop_static,
                    "total_services": total
                })

        box_df = pd.DataFrame(box_data)

        return hist_df, box_df

    # plot function
    def _draw(_):
        n_val = n_dropdown.value
        city_val = city_dropdown.value
        selected_scenarios = list(scenario_select.value)
        save_flag = save_toggle.value
        od = outdir_text.value or out_dir

        hist_df, box_df = _prepare_aggregates(n_val, city_val, selected_scenarios)

        # create subplots: 2 rows - top has 3 histograms (one per scenario), bottom has 1 boxplot (all scenarios)
        n_cols = max(3, len(selected_scenarios))
        # We'll layout exactly 3 hist subplots (left->right) even if some scenarios are not selected: if fewer than 3 selected,
        # we leave some empty panels but keep consistent layout.
        fig = make_subplots(
            rows=2, cols=3,
            subplot_titles=[f"VT counts: {s}" if i < len(selected_scenarios) else "" for i, s in enumerate(scenarios)],
            vertical_spacing=0.18,
            specs=[[{"type": "xy"}, {"type": "xy"}, {"type": "xy"}],
                   [{"colspan": 3, "type": "xy"}, None, None]]
        )

        # Top row: histograms, one subplot per scenario (in the order of scenarios list)
        for i, sc in enumerate(scenarios[:3]):  # we only show up to 3 scenarios in top row
            col = i + 1
            sub_df = hist_df[hist_df["scenario"] == sc]
            if sub_df.empty:
                # blank
                fig.add_trace(go.Bar(x=[], y=[]), row=1, col=col)
                fig.update_xaxes(title_text="VT count", row=1, col=col)
                fig.update_yaxes(title_text="Seed frequency", row=1, col=col)
                continue

            # Build histogram bins from vt_count values
            counts = sub_df["vt_count"].dropna().astype(int).values
            if len(counts) == 0:
                fig.add_trace(go.Bar(x=[], y=[]), row=1, col=col)
                continue

            # Build histogram manually so we can align bins across scenarios if desired
            hist_vals, bin_edges = np.histogram(counts, bins=hist_bins)
            bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2.0
            fig.add_trace(go.Bar(x=bin_centers, y=hist_vals, name=f"{sc}", showlegend=False), row=1, col=col)
            fig.update_xaxes(title_text="VT count (per seed)", row=1, col=col)
            fig.update_yaxes(title_text="Seeds", row=1, col=col)

        # Bottom row: boxplot of prop_static per scenario (three boxes)
        # Prepare traces for the boxplot
        for j, sc in enumerate(scenarios):
            sc_box = box_df[box_df["scenario"] == sc]
            # take only seeds with total_services > 0
            sc_box = sc_box[sc_box["total_services"] > 0]
            y = sc_box["prop_static"].dropna().values
            # position boxes sequentially
            fig.add_trace(go.Box(y=y, name=sc, boxmean="sd", marker=dict(opacity=0.7)), row=2, col=1)

        # Layout tweaks
        fig.update_layout(
            height=700,
            width=1200,
            title_text=f"Instance topology — N={n_val}  city={city_val}",
            template="plotly_white"
        )

        # show
        fig.show()

        # Save if requested
        if save_flag:
            os.makedirs(od, exist_ok=True)
            safe_name = f"instance_N{n_val}_city_{city_val}"
            png_path = os.path.join(od, f"{safe_name}.png")
            html_path = os.path.join(od, f"{safe_name}.html")
            try:
                # requires kaleido for png
                fig.write_image(png_path, scale=2)
            except Exception as e:
                print("⚠️ Could not save PNG (kaleido required). Error:", e)
            try:
                fig.write_html(html_path)
            except Exception as e:
                print("⚠️ Could not save HTML. Error:", e)
            print(f"✅ Saved plots to: {od}")

    # update city options on n change
    def _on_n_change(change):
        n_val = change["new"]
        cities = available_cities_for_n(n_val)
        city_dropdown.options = cities
        if cities:
            city_dropdown.value = cities[0] if city_dropdown.value not in cities else city_dropdown.value

    n_dropdown.observe(_on_n_change, names="value")
    update_button.on_click(_draw)

    # initial draw
    _draw(None)

# -------------------------
# Example usage:
# -------------------------
# results dict must be available in the notebook environment.
# n_services_list, scenarios, seeds must also be defined lists.
#
# Then call:
plot_instance_visualizations(results, n_services, scenarios, seeds, default_n=n_services[0])


HBox(children=(Dropdown(description='N services', options=(900, 950, 1000, 1050, 1100, 1150, 1200), value=900)…

### Distribution static/dynamic in the three scenarios for a given N

Three box plots (one per scenario) showing the proportion of static. 

## Detail analysis of scenario/seed

### Thorough details for a given instance

Great, now I want to be able to recover the details of a particular instance. More specifically, I want to be able to select with dropdowns the n_services, the scenario and the seed. For that particular instance, I'd like to be able to visualize:
- Number of services
- Number of vehicle transportation labors

- Proportion of static/dynamic
- Number of static VT labors
- Number of dynamic VT labors

# Result visualization

## Global results

Now I want to step to visualizing the actual results of the algorithms in an instance. To visualize this I want to have a very similar visualization to the one that I had previously for the artificial and real instances (this are simulated instances). The metrics that I want to visualize are pretty much the same. However, the visualization of the results will be ia bit different. The main difference is that for the two previous kinds of instances, I was running a full week. This made that the most natural way of visualizing was a weekly time series and a bar graph to the side with aggregates. Now, there's only one day per run, and there are also scenarios. In that sense, I believe the best way to visualize the results will be with box plots. I'm thinking of having all the results in a single plot. This would mean to have a box plot that has three ticks in the x-axis, one per scenario (this would be kind of groups of box plots). For each group (scenario), have one box plot per algorithm. In that sense, I'd need the boxes of each algorithm in the three groups to be the same color and then a unique legend that explains the colors of the boxplots. Again, I want to be able to select the instance number and the city as well. Also, I want to be able to control with a parameter of the plotting function (not necessarily editable on the dropdown, default False) to save the plots in a given directory.

I will provide the plotting logic I'm curently using in the artificial instance results, for reference. Before that, Is the logic and the reasoning of what I just explained clear? Would you like to clarify or for me to further explain anything?

Have everything for all the metrics included in the AD_results visualization a boxplot per scenario (showing the seeds within each scenario). This will be per N in a dropdown.

## Results per sceneario

Have for all the metrics, bar chart that shows the value for each seed. 