In [1]:
import pandas as pd
import pickle

import ipywidgets as widgets
from ipywidgets import interact

from src.data.data_load import (
    load_tables, 
    load_online_instance, 
    load_distances, 
    upload_ONLINE_static_solution
)
from src.data.solution_load import load_solution_dfs, _include_all_city
from src.utils.filtering import flexible_filter
from src.utils.plotting import plot_metrics_comparison_dynamic
from src.data.metrics import collect_results_to_df, compute_metrics_with_moves, get_day_plotting_df
from src.config.experimentation_config import *
from src.config.SD_experimentation_config import *
from src.config.config import *

data_path = '../data'

distance_type = 'osrm'              # Options: ['osrm', 'manhattan']
dist_method = 'haversine'      # Options: ['precalced', 'haversine']

optimization_obj = 'driver_distance'

directorio_df, labors_raw_df, cities_df, duraciones_df, valid_cities = load_tables(data_path, generate_labors=False)
# dist_dict = load_distances(data_path, distance_type, instance, dist_method)

metricas = ['service_count', 'vt_count', 'num_drivers', 'driver_extra_time', 'driver_move_distance']


# Upload results

In [7]:
import pickle

inst_path = f"{data_path}/resultados/online_operation/instAD3/haversine"
labors_algo_df = pd.DataFrame()
moves_algo_df = pd.DataFrame()

upload_path = f"{inst_path}/res_algo_ONLINE_static.pkl"


with open(upload_path, "rb") as f:
    res = pickle.load(f)
    results_df, moves_df, postponed_labors = res

In [2]:
def upload_simulated_instances():
    
    results = {}

    for n_serv in n_services:
        labors_real_dfs = pd.DataFrame()
        labors_static_dfs = pd.DataFrame()
        labors_dynamic_dfs = pd.DataFrame()
        for scenario in scenarios:
            for seed in seeds:
                instance = f'N{n_serv}/{scenario}/seed_{seed}'
                labors_real_df, labors_static_df, labors_dynamic_df = load_online_instance(data_path, instance, labors_raw_df)

                for df in [labors_real_df, labors_static_df, labors_dynamic_df]:
                    df['n_serv'] = n_serv
                    df['scenario'] = scenario
                    df['seed'] = seed
                
                labors_real_df = _include_all_city(labors_real_df)
                labors_static_df = _include_all_city(labors_static_df)
                labors_dynamic_df = _include_all_city(labors_dynamic_df)

                labors_real_dfs = pd.concat([labors_real_dfs, labors_real_df])
                labors_static_dfs = pd.concat([labors_static_dfs, labors_static_df])
                labors_dynamic_dfs = pd.concat([labors_dynamic_dfs, labors_dynamic_df])
        
        results[n_serv] = (labors_real_dfs, labors_static_dfs, labors_dynamic_dfs)

    return results
        
results = upload_simulated_instances()    


# Instance exploration

## Global

In [3]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from ipywidgets import interact, Dropdown


def plot_topology_reactive(results, n_services_list, scenarios, seeds):
    """
    Creates two reactive dropdowns: n_services and city.
    Whenever the user selects a new value, the plots update.
    """

    # ---- helper: available cities for a given n_services ----
    def available_cities(n):
        df_real = results[n][0]
        if "city" in df_real.columns:
            return sorted(df_real["city"].dropna().unique())
        return []

    # ---- reactive plotting function ----
    def draw(n_services, city):
        df_real, df_static, df_dynamic = results[n_services]

        # ---------------- FILTER BY CITY ----------------
        dfR = df_real[df_real['city'] == city].copy()
        dfS = df_static[df_static['city'] == city].copy()
        dfD = df_dynamic[df_dynamic['city'] == city].copy()

        # ---------------- HISTOGRAM DATA ----------------
        hist_rows = []
        for sc in scenarios:
            sub = dfR[dfR["scenario"] == sc]
            for seed, g in sub.groupby("seed"):
                vt_count = int((g["labor_category"] == "VEHICLE_TRANSPORTATION").sum())
                hist_rows.append({"scenario": sc, "seed": seed, "vt": vt_count})

        hist_df = pd.DataFrame(hist_rows)

        # ---------------- BOX PLOT DATA ----------------
        box_rows = []
        for sc in scenarios:
            df_stat_sc = dfS[dfS["scenario"] == sc]
            df_dyn_sc = dfD[dfD["scenario"] == sc]

            stat_by_seed = df_stat_sc.groupby("seed")["service_id"].nunique()
            dyn_by_seed = df_dyn_sc.groupby("seed")["service_id"].nunique()

            union_seeds = sorted(set(stat_by_seed.index) | set(dyn_by_seed.index))
            for seed in union_seeds:
                n_static = int(stat_by_seed.get(seed, 0))
                n_dynamic = int(dyn_by_seed.get(seed, 0))
                total = n_static + n_dynamic
                prop_static = (n_static / total) if total > 0 else np.nan
                box_rows.append({
                    "scenario": sc,
                    "seed": seed,
                    "prop_static": prop_static
                })

        box_df = pd.DataFrame(box_rows)

        # ---------------- PLOTTING ----------------
        fig = make_subplots(
            rows=2, cols=3,
            subplot_titles=[f"VT counts: {sc}" for sc in scenarios] +
                           ["Static proportion per scenario"],
            specs=[[{"type": "xy"}]*3,
                   [{"colspan": 3, "type": "xy"}, None, None]],
            vertical_spacing=0.15
        )

        # -- top row histograms --
        bins = 20
        for i, sc in enumerate(scenarios):
            h = hist_df[hist_df.scenario == sc]
            if not h.empty:
                vals, edges = np.histogram(h["vt"].values, bins=bins)
                centers = (edges[:-1] + edges[1:]) / 2
                fig.add_trace(
                    go.Bar(x=centers, y=vals, showlegend=False),
                    row=1, col=i+1
                )
            fig.update_xaxes(title_text="VT per seed", row=1, col=i+1)
            fig.update_yaxes(title_text="Frequency", row=1, col=i+1)

        # -- bottom row boxplot --
        for sc in scenarios:
            fig.add_trace(
                go.Box(
                    y=box_df[box_df.scenario == sc]["prop_static"],
                    name=sc,
                    boxmean="sd"
                ),
                row=2, col=1
            )

        fig.update_layout(
            height=700,
            width=1200,
            title=f"Instance topology  |  N={n_services}  |  City={city}",
            showlegend=False,
            template="plotly_white"
        )

        fig.show()

    # ---- WIDGETS ----
    # Default values
    default_n = n_services_list[0]
    default_city = available_cities(default_n)[0]

    # Use interact to automatically re-run draw() on change
    interact(
        draw,
        n_services=Dropdown(options=n_services_list, value=default_n, description="N services"),
        city=Dropdown(options=available_cities(default_n), value='ALL', description="City")
    )

plot_topology_reactive(
    results=results,
    n_services_list=n_services,
    scenarios=scenarios,
    seeds=seeds
)


interactive(children=(Dropdown(description='N services', options=(900, 950, 1000, 1050, 1100, 1150, 1200), val…

## Detail analysis of scenario/seed

### Thorough details for a given instance

Great, now I want to be able to recover the details of a particular instance. More specifically, I want to be able to select with dropdowns the n_services, the scenario and the seed. For that particular instance, I'd like to be able to visualize:
- Number of services
- Number of vehicle transportation labors

- Proportion of static/dynamic
- Number of static VT labors
- Number of dynamic VT labors

# Result visualization

## Load results

In [None]:
import pickle
import pandas as pd
from pathlib import Path

def load_all_results_explicit(
    root_dir: str,
    n_services_list,
    scenarios_list,
    seeds_list,
    dist_methods_list,
    algorithms_list
) -> pd.DataFrame:
    """
    Clean, explicit loader for result files located in:
    root/N{n_services}/{scenario}/seed_{seed}/{dist_method}/{algorithm}.pkl

    Returns a tidy DataFrame with columns:
    [n_services, scenario, seed, dist_method, algorithm, labors_df, moves_df, metadata]
    """
    root = Path(root_dir)
    rows = []

    for n_services in n_services_list:
        n_dir = root / f"N{n_services}"

        for scenario in scenarios_list:
            scenario_dir = n_dir / scenario

            for seed in seeds_list:
                seed_dir = scenario_dir / f"seed_{seed}"

                for dist_method in dist_methods_list:
                    dist_dir = seed_dir / dist_method

                    for algorithm in algorithms_list:
                        pkl_path = dist_dir / f"res_algo_{algorithm}.pkl"

                        if not pkl_path.exists():
                            print(f"⚠ Missing file: {pkl_path}")
                            continue

                        # Load pickle
                        try:
                            with open(pkl_path, "rb") as f:
                                labors_df, moves_df, metadata = pickle.load(f)
                        except Exception as e:
                            print(f"❌ Error loading {pkl_path}: {e}")
                            continue

                        rows.append({
                            "n_services": n_services,
                            "scenario": scenario,
                            "seed": seed,
                            "dist_method": dist_method,
                            "algorithm": algorithm,
                            "labors_df": labors_df,
                            "moves_df": moves_df,
                            "metadata": metadata,
                            "path": str(pkl_path)
                        })

    return pd.DataFrame(rows)

algorithms_list = ['OFFLINE', 'ONLINE_static', 'INSERT', 'INSERT_BUFFER', 'REACT', 'REACT_BUFFER']

results_df = load_all_results_explicit(
    root_dir=f'{data_path}/resultados/',
    n_services_list=n_services,
    scenarios_list=scenarios,
    seeds_list=seeds,
    dist_methods_list=['haversine'],
    algorithms_list=algorithms_list
)


## Global results

Now I want to step to visualizing the actual results of the algorithms in an instance. To visualize this I want to have a very similar visualization to the one that I had previously for the artificial and real instances (this are simulated instances). The metrics that I want to visualize are pretty much the same. However, the visualization of the results will be ia bit different. The main difference is that for the two previous kinds of instances, I was running a full week. This made that the most natural way of visualizing was a weekly time series and a bar graph to the side with aggregates. Now, there's only one day per run, and there are also scenarios. In that sense, I believe the best way to visualize the results will be with box plots. I'm thinking of having all the results in a single plot. This would mean to have a box plot that has three ticks in the x-axis, one per scenario (this would be kind of groups of box plots). For each group (scenario), have one box plot per algorithm. In that sense, I'd need the boxes of each algorithm in the three groups to be the same color and then a unique legend that explains the colors of the boxplots. Again, I want to be able to select the instance number and the city as well. Also, I want to be able to control with a parameter of the plotting function (not necessarily editable on the dropdown, default False) to save the plots in a given directory.

I will provide the plotting logic I'm curently using in the artificial instance results, for reference. Before that, Is the logic and the reasoning of what I just explained clear? Would you like to clarify or for me to further explain anything?

Have everything for all the metrics included in the AD_results visualization a boxplot per scenario (showing the seeds within each scenario). This will be per N in a dropdown.

## Results per sceneario

Have for all the metrics, bar chart that shows the value for each seed. 