In [1]:
%matplotlib ipympl
import iteround
import lib.optimize as opt
import lib.simulation as sim
import lib.budget as budget
import lib.budget as budget
import lib.utils as utils
import lib.geo as geo
import lib.clustering as clustering
import lib.pipeline as pipeline
import lib.optimize.common as opt_common
import pandas as pd
import numpy as np
import geopandas as gpd
import seaborn
import functools
import itertools
import multiprocessing
import typing
import os
from multiprocessing import Pool, cpu_count
from datetime import datetime, timedelta, date
from dateutil.relativedelta import relativedelta
import json
import matplotlib.pyplot as plt
import matplotlib as mpl
import networkx as nx
from tqdm.auto import tqdm
import lib.traffic as trf

seaborn.set_theme(context='paper', style='white', font_scale=.85)

In [2]:
DATASET_NAME = "data"
COMPUTED_PERSISTENCE_DIR = "computed"
TEMPORARY_DIR = "tmp"

def fname(what: str, temporary: bool = False) -> str:
    directory = TEMPORARY_DIR if temporary else COMPUTED_PERSISTENCE_DIR
    return f"{directory}/{DATASET_NAME}-{what}.pgz"

# Load Data

In [3]:
charging_stations = utils.gzip_pickle_load('charging_stations.gz')
station_distances_mtx = utils.gzip_pickle_load('station_distances_mtx.gz')
traffic_full = utils.gzip_pickle_load('traffic_full.gz')

In [10]:
traffic_full # DataFrame index must be unique, 'station' and 'station_distance' are currently set to the closest station location

Unnamed: 0,vehicle,station,arrival,departure,lon,lat,station_distance,x,y
111046,567730,29,2019-03-01 00:11:51,2019-03-01 01:56:34,redacted,redacted,263.130912,redacted,redacted
63331,559851,10,2019-03-01 00:21:06,2019-03-01 03:58:46,redacted,redacted,34.471432,redacted,redacted
150847,550695,21,2019-03-01 00:22:30,2019-03-01 00:38:39,redacted,redacted,129.692525,redacted,redacted
170192,575428,21,2019-03-01 00:35:50,2019-03-01 04:29:12,redacted,redacted,24.639301,redacted,redacted
205821,565057,22,2019-03-01 00:41:16,2019-03-01 06:40:13,redacted,redacted,121.851845,redacted,redacted
...,...,...,...,...,...,...,...,...,...
154893,577352,2,2019-09-13 22:51:28,2019-09-13 23:12:33,redacted,redacted,162.439551,redacted,redacted
97261,586151,10,2019-09-13 22:59:31,2019-09-13 23:25:01,redacted,redacted,19.542211,redacted,redacted
16428,578689,33,2019-09-13 23:28:40,2019-09-14 00:14:17,redacted,redacted,177.617087,redacted,redacted
111996,577352,2,2019-09-13 23:31:37,2019-09-14 01:19:11,redacted,redacted,168.444444,redacted,redacted


In [19]:
charging_stations.index # only selected station locations were considered in the tests

Int64Index([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 19, 20, 21, 22,
            23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38],
           dtype='int64', name='station_id')

In [17]:
charging_stations.columns # DataFrame index represents station IDs

Index(['lon', 'lat', 'x', 'y'], dtype='object')

In [18]:
station_distances_mtx.shape

(39, 39)

In [28]:
station_distances_mtx[1:5, 1:5] # index 0 is ignored

array([[         inf, 546.98754689, 633.29764185, 966.55138914],
       [546.98754689,          inf, 230.58249453, 480.54789505],
       [633.29764185, 230.58249453,          inf, 334.84569151],
       [966.55138914, 480.54789505, 334.84569151,          inf]])

# Training Datasets

### Middle Date

In [4]:
tmp = traffic_full.sort_values('arrival')
middle_tstmp = tmp.iloc[len(tmp)//2]['arrival']
middle_tstmp

Timestamp('2019-05-30 16:22:56')

### Max Date

In [5]:
traffic_full['departure'].max().date() + timedelta(days=1)

datetime.date(2019, 9, 15)

### Min Date

In [6]:
traffic_full['arrival'].min().date()

datetime.date(2019, 3, 1)

### 'Most Median Day'

In [7]:
median_station_counts = traffic_full.reset_index().groupby(['station', pd.Grouper(key='arrival', freq='D')])['index'].nunique().groupby('station').median()

In [8]:
day_station_median_diff_minmax = traffic_full.reset_index().groupby([pd.Grouper(key='arrival', freq='D'), 'station'])['index'].nunique().groupby('arrival').apply(lambda x: x.reset_index(level=0, drop=True).subtract(median_station_counts, fill_value=0).abs().max()).idxmin()
day_station_median_diff_minmax

Timestamp('2019-08-16 00:00:00', freq='D')

In [9]:
day_station_median_diff_minsum = traffic_full.reset_index().groupby([pd.Grouper(key='arrival', freq='D'), 'station'])['index'].nunique().groupby('arrival').apply(lambda x: x.reset_index(level=0, drop=True).subtract(median_station_counts, fill_value=0).abs().sum()).idxmin()
day_station_median_diff_minsum

Timestamp('2019-08-07 00:00:00', freq='D')

### Largest Data Day

In [10]:
day_total_max = traffic_full.reset_index().groupby(pd.Grouper(key='arrival', freq='D'))['index'].nunique().idxmax()
day_total_max

Timestamp('2019-05-28 00:00:00', freq='D')

## Variants

In [11]:
training_traffic_specs = list(utils.factory_subsets(trf.TrafficSpec,
    [
        dict(name='1day-station-median-diff-minmax', start_timestamp=day_station_median_diff_minmax.date(), end_timestamp=day_station_median_diff_minmax.date() + timedelta(days=1)),
        dict(name='1day-station-median-diff-minsum', start_timestamp=day_station_median_diff_minsum.date(), end_timestamp=day_station_median_diff_minsum.date() + timedelta(days=1)),
        dict(name='1day-total-max', start_timestamp=day_total_max.date(), end_timestamp=day_total_max.date() + timedelta(days=1)),
        dict(name='mar', start_timestamp='2019-03-01', end_timestamp='2019-04-01'),
        dict(name='jul', start_timestamp='2019-07-01', end_timestamp='2019-08-01'),
        dict(name='jun-to-oct', start_timestamp='2019-06-01', end_timestamp=traffic_full['departure'].max().date() + timedelta(days=1)),
        dict(name='mar-to-jun', start_timestamp=traffic_full['arrival'].min().date(), end_timestamp='2019-06-01'),
    ],
    [
        dict(name='hassign', max_attempts=1, minimal_complete_station_subset=False, remove_redundant_station_attempts=False),
        dict(name='hassign-reduced', max_attempts=1, minimal_complete_station_subset=True, remove_redundant_station_attempts=False),
        dict(name='3att', max_attempts=3, minimal_complete_station_subset=False),
        dict(name='3att-reduced', max_attempts=3, minimal_complete_station_subset=True),
        dict(name='5att-reduced', max_attempts=5, minimal_complete_station_subset=True),
        dict(name='3att-origpos', max_attempts=3, minimal_complete_station_subset=False, subsequent_attempts_by_original_position=True),
        dict(name='Natt-origpos', max_attempts=None, minimal_complete_station_subset=False, subsequent_attempts_by_original_position=True),
    ],
    [
        dict(max_station_distance=300, max_stations_pair_distance=300)
    ], reductions=dict(name=lambda a, b: '__'.join(filter(lambda v: v is not None, [a, b],)))))

## Traffic Build

In [None]:
training_traffics = pipeline.build_all_traffics(training_traffic_specs, progress=True, traffic_full=traffic_full, charging_stations=charging_stations, station_distances_mtx=station_distances_mtx)

Dump built data:

Load existing data:

# Optimization

## Solvers

In [15]:
from pathlib import Path

In [16]:
CLUSTER_MODEL_SPEC = 'tmp/spec'
CLUSTER_MODEL_SOLUTION = 'tmp/solution'

Path(CLUSTER_MODEL_SPEC).mkdir(parents=True, exist_ok=True)
Path(CLUSTER_MODEL_SOLUTION).mkdir(parents=True, exist_ok=True)

request_ilp_no_fcfs_solve = functools.partial(opt_common.dump_default_soft_assign_ILP_opt_solution_request, out_dir=CLUSTER_MODEL_SPEC, in_dir=CLUSTER_MODEL_SOLUTION, first_come_first_served=False, max_n_stations_brute_force=0)
request_ilp_fcfs_solve = functools.partial(opt_common.dump_default_soft_assign_ILP_opt_solution_request, out_dir=CLUSTER_MODEL_SPEC, in_dir=CLUSTER_MODEL_SOLUTION, first_come_first_served=True)
#request_heuristic_solve = functools.partial(opt_common.dump_heuristic_solution_request, out_dir=CLUSTER_MODEL_SPEC, in_dir=CLUSTER_MODEL_SOLUTION)
#request_heuristic_solve_ilp_station_subset = functools.partial(opt_common.dump_heuristic_solution_request, out_dir=CLUSTER_MODEL_SPEC, in_dir=CLUSTER_MODEL_SOLUTION, with_ilp_station_subset=True, verbose=True)
opt_hard_assign_ilp_station_subset = functools.partial(opt_common.optimize_with_ilp_station_subset, opt.optimize_hard_assign)

In [17]:
solvers = [
    pipeline.Solver(name="ILP solution (no FCFS)", callable=request_ilp_no_fcfs_solve, traffic_filter=lambda spec, tr: (not spec.minimal_complete_station_subset or spec.max_attempts != 1) and tr.index.nunique() < 100000),
    pipeline.Solver(name="ILP solution", callable=request_ilp_fcfs_solve, traffic_filter=lambda spec, tr: spec.max_attempts != 1 and tr.index.nunique() < 100000),
    pipeline.Solver(name="Hard assignment (ILP subset)", callable=opt_hard_assign_ilp_station_subset, traffic_filter=lambda spec, tr: spec.max_attempts == 1 and not spec.minimal_complete_station_subset),
    #pipeline.Solver(name="Hard assignment (legacy)", callable=opt.hard_assign.optimize_hard_assign_legacy, traffic_filter=lambda spec, tr: spec.max_attempts == 1),
    pipeline.Solver(name="Hard assignment", callable=opt.optimize_hard_assign, traffic_filter=lambda spec, tr: spec.max_attempts == 1),
    #pipeline.Solver(name="Heuristic simple", callable=opt.optimize_soft_assign_heuristic_single, traffic_filter=lambda spec, tr: spec.max_attempts != 1),
    #pipeline.Solver(name="Heuristic", callable=request_heuristic_solve, traffic_filter=lambda spec, tr: spec.max_attempts != 1),
    #pipeline.Solver(name="Heuristic (ILP subset)", callable=request_heuristic_solve_ilp_station_subset, traffic_filter=lambda spec, tr: spec.max_attempts != 1 and not spec.minimal_complete_station_subset),
]

In [18]:
solvables = pipeline.assign_solvers(training_traffics, solvers)

In [19]:
import random
random.shuffle(solvables)

## Solve

Try loading all existing solutions:

In [20]:
solutions = utils.gzip_pickle_load(fname('solutions'))

In [21]:
if 'solutions' not in locals():
    solutions = dict()

Either directly compute the solutions, or output files for execution on cluster:

How to execute the output files for execution on cluster:

* Gurobi models are executed on cluster using `model_run_iterative.py`

In [22]:
build_traffic = functools.partial(trf.build_traffic, traffic_full=traffic_full, charging_stations=charging_stations, station_distances_mtx=station_distances_mtx)
pipeline.update_solutions(solutions, solvables, training_traffics, build_traffic, progress=True, multi_cpu_kwargs=dict(progress=True), threads=1, verbose=True)

Callable:   0%|          | 0/23 [00:00<?, ?it/s]

2019-08-07-to-2019-08-08-5att-300-300-minst-stfilt-gurobi-model-fcfs failed: IncompleteSolutionException('(stations: 9-10, 12, 29-32): missing 95/103 solutions: 1-95')
2019-03-01-to-2019-04-01-5att-300-300-minst-stfilt-gurobi-model-fcfs failed: FileNotFoundError(2, 'No such file or directory')
2019-03-01-to-2019-04-01-3att-300-300-minst-stfilt-gurobi-model-fcfs failed: FileNotFoundError(2, 'No such file or directory')
2019-08-07-to-2019-08-08-300-300-origpos-stfilt-gurobi-model-fcfs failed: FileNotFoundError(2, 'No such file or directory')
2019-08-16-to-2019-08-17-5att-300-300-minst-stfilt-gurobi-model-fcfs failed: IncompleteSolutionException('(stations: 2-4, 6, 8, 33-37): missing 122/135 solutions: 1-122')
2019-08-07-to-2019-08-08-3att-300-300-origpos-stfilt-gurobi-model-fcfs failed: FileNotFoundError(2, 'No such file or directory')
2019-03-01-to-2019-04-01-300-300-origpos-stfilt-gurobi-model-fcfs failed: FileNotFoundError(2, 'No such file or directory')
2019-05-28-to-2019-05-29-300-3

Serial: 0it [00:00, ?it/s]

Parallel: 0it [00:00, ?it/s]

Unresolved solutions: 23


Dump solutions:

In [23]:
print("Unresolved:")
print(pd.Series([k.solver.name for k in solvables if k not in solutions or callable(solutions[k])]).value_counts())

Unresolved:
ILP solution    23
dtype: int64


# Visualize Comparison of Optimization Methods

In [24]:
from typing import Dict, Union, Callable

def solutions_to_dataframe(solutions: Dict[pipeline.Solvable, Union[opt.utils.Solutions, Callable]]):
    rows = []

    for solvable, solutions_this in solutions.items():
        if callable(solutions_this):
            continue
            
        name_splits = solvable.traffic_spec.name.split('__')

        dct = {
            'dataset_name': name_splits[0],
            'dataset_specialization': name_splits[1] if len(name_splits) >= 2 else None,
            'solver': solvable.solver.name,
        }

        for solution in utils.iter_values(solutions_this):
            rows.append({**dct, **solution.__dict__})

    df = pd.DataFrame(rows)
    
    # drop the legacy solver data because we don't need it
    df.drop(index=df.index[df['solver'] == "Hard assignment (legacy)"], inplace=True)
    
    # rename hard assignment solver to reduced
    df.loc[df['dataset_specialization'] == 'hassign-reduced', 'solver'] = df.loc[df['dataset_specialization'] == 'hassign-reduced', 'solver'] + " (reduced)"
    
    # split into hard-assignment and the rest
    # df_rep = df['solver'].str.contains("Hard assignment")
    # df_rep, df_norep = df[df_rep], df[~df_rep]
    # df = pd.concat([df_norep, df_rep.drop(columns=['dataset_specialization']).merge(df_norep[['dataset_specialization']].drop_duplicates(), how='cross')])
    
    df['dataset'] = df['dataset_name'] + '__' + df['dataset_specialization']
    df['upper_bound'] = df['solver'] == "ILP solution (no FCFS)"
    
    return df

In [25]:
df = solutions_to_dataframe(solutions)

In [26]:
df.dataset_name.unique()

array(['1day-station-median-diff-minsum',
       '1day-station-median-diff-minmax', 'jun-to-oct', 'mar',
       'mar-to-jun', 'jul', '1day-total-max'], dtype=object)

In [27]:
df.solver.unique()

array(['Heuristic', 'Heuristic simple', 'ILP solution (no FCFS)',
       'Hard assignment (reduced)', 'ILP solution', 'Hard assignment',
       'Hard assignment (ILP subset)', 'Heuristic (ILP subset)'],
      dtype=object)

In [28]:
DAY_DATASET = '1day-station-median-diff-minmax'
#DAY_DATASET = '1day-total-max'
MONTH_DATASET = 'mar'
THE_DATASETS = [DAY_DATASET, MONTH_DATASET, 'jul', 'mar-to-jun']

df_plot = pd.concat([
    df.query("solver == 'Hard assignment (ILP subset)'"),
    df.query("solver == 'Hard assignment (reduced)'"),
    df.query("solver == 'Hard assignment'"),
    df.query("solver.str.contains('ILP solution') and (dataset_specialization.str.contains('origpos') or dataset_specialization.str.contains('hassign'))"),
    #df.query("solver == 'Heuristic' and (dataset_specialization.str.contains('origpos'))")
])
df_plot = df_plot.query("dataset_name.isin(@THE_DATASETS)").copy()

df_plot['solver'] = df_plot['solver'].replace({
    'Hard assignment (ILP subset)': 'Two-stage (ILP subset)',
    'Hard assignment (reduced)': 'Two-stage (iterative subset)',
    'ILP solution (no FCFS)': 'ILP (no FCFS)'
})

df_plot['dataset_specialization'] = df_plot['dataset_specialization'].replace({
    'hassign': 'hard',
    'Natt-origpos': 'Natt',
    '3att-origpos': '3att'
})

df_plot['dataset_name'] = df_plot['dataset_name'].replace({
    DAY_DATASET: next(iter((tr.start_timestamp.date().isoformat() for tr in training_traffic_specs if tr.name.split('__')[0] == DAY_DATASET))),
    MONTH_DATASET: 'March',
    'jul': 'July',
    'mar-to-jun': 'March-May'
})

msk = df_plot['solver'].isin(['ILP (no FCFS)', 'Heuristic']) & (df_plot['dataset_specialization'] != '')
df_plot.loc[msk, 'solver'] += ' (' + df_plot.loc[msk, 'dataset_specialization'] + ')'
df_plot.loc[msk, 'solver'] = df_plot.loc[msk, 'solver'].str.replace(') (', ', ', regex=False)

df_plot.loc[df_plot.solver.str.startswith('Two-stage'), 'is_hard_assignment'] = 'Hard, subset'
df_plot.loc[df_plot.solver == 'Hard assignment', 'is_hard_assignment'] = 'Hard, no subset'
df_plot.loc[df_plot.solver == 'ILP (no FCFS, 3att)', 'is_hard_assignment'] = 'Soft'
df_plot.loc[df_plot.solver == 'ILP (no FCFS, Natt)', 'is_hard_assignment'] = 'Soft'
df_plot.loc[df_plot.solver == 'ILP (no FCFS, hard)', 'is_hard_assignment'] = 'Hard, no subset'

df_plot['objective_perc'] = 100 * df_plot['objective'] / df_plot.groupby('dataset_name')['objective'].transform('max')

In [29]:
colnames = {
    'solver': 'Solver',
    'upper_bound': "FCFS upper bound",
    'is_hard_assignment': 'Station assignment',
    'objective': "Satisfied demand",
    'objective_perc': "Satisfied demand [%]",
    'objective_diff': "Satisfied demand\n(difference from best)",
    'objective_diff_perc': "Satisfied demand [%]\n(difference from best)",
    'budget': "Budget",
    'dataset_name': 'Train'
}

In [30]:
df_plot.solver.unique()

array(['Two-stage (ILP subset)', 'Two-stage (iterative subset)',
       'Hard assignment', 'ILP (no FCFS, 3att)', 'ILP (no FCFS, Natt)',
       'ILP (no FCFS, hard)'], dtype=object)

In [31]:
solvers_order = ['Hard assignment', 'Two-stage (ILP subset)', 'Two-stage (iterative subset)', 'ILP (no FCFS, hard)', 'ILP (no FCFS, 3att)', 'ILP (no FCFS, Natt)']#, 'Heuristic (3att)', 'Heuristic (Natt)']
palette = seaborn.color_palette("Set1", len(solvers_order))
palette

In [32]:
df_plot['dataset_name'].drop_duplicates().sort_values()

54615    2019-08-16
58801          July
53388         March
59996     March-May
Name: dataset_name, dtype: object

In [33]:
dataset_order = [v for v in [*set(tr.start_timestamp.date().isoformat() for tr in training_traffic_specs), 'March'] if v in df_plot['dataset_name'].unique()]

In [34]:
df_plot['solver'].unique()

array(['Two-stage (ILP subset)', 'Two-stage (iterative subset)',
       'Hard assignment', 'ILP (no FCFS, 3att)', 'ILP (no FCFS, Natt)',
       'ILP (no FCFS, hard)'], dtype=object)

In [35]:
plt.close('all')

In [36]:
SAVE = True

In [37]:
plt.ion()

g = seaborn.relplot(kind='line', facet_kws=dict(sharey=True, sharex=False), palette=palette, height=2,
                    data=df_plot.rename(columns=colnames), x=colnames['budget'], y=colnames['objective_perc'],
                    hue=colnames['solver'], hue_order=solvers_order,
                    style=colnames['is_hard_assignment'],
                    col=colnames['dataset_name'], col_wrap=2, col_order=dataset_order,
                   )
#g.fig.subplots_adjust(right=0.65, top=.83, bottom=.2)
g.tight_layout()
g.fig.subplots_adjust(right=0.65, left=0.12)
g._legend.set_bbox_to_anchor((0.67 + g._legend.get_window_extent().width / (g.fig.dpi * g.fig.get_size_inches()[0]), 0.5))
winch, hinch = g.fig.get_size_inches()

if SAVE:
    plt.savefig(f'plots/pdf/objective-values-training.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [38]:
THE_DATASETS=THE_DATASETS
diff_plot = df_plot.set_index(['dataset_name', 'budget', 'solver'])['objective'].unstack()
diff_plot_perc = df_plot.set_index(['dataset_name', 'budget', 'solver'])['objective_perc'].unstack()

In [39]:
a = diff_plot_perc.eval('`ILP (no FCFS, Natt)` - `Hard assignment`').groupby('dataset_name').agg(['min', 'max', 'mean'])
a

Unnamed: 0_level_0,min,max,mean
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-08-16,1.10538,24.686809,17.738331
July,4.844934,11.587847,7.987049
March,5.162376,8.572802,6.806934
March-May,,,


In [40]:
b = diff_plot_perc.eval('`ILP (no FCFS, hard)` - `Hard assignment`').groupby('dataset_name').agg(['min', 'max', 'mean'])
b

Unnamed: 0_level_0,min,max,mean
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-08-16,0.0,11.938099,6.131801
July,0.0,15.601319,5.895944
March,0.0,15.989211,5.927524
March-May,,,


In [41]:
a - b

Unnamed: 0_level_0,min,max,mean
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-08-16,1.10538,12.74871,11.60653
July,4.844934,-4.013472,2.091105
March,5.162376,-7.416409,0.87941
March-May,,,


In [42]:
diff_plot_perc.eval('`ILP (no FCFS, Natt)` - `ILP (no FCFS, 3att)`').groupby('dataset_name').agg(['min', 'max', 'mean'])

Unnamed: 0_level_0,min,max,mean
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-08-16,0.221076,0.884304,0.472736
July,0.017541,0.112265,0.061779
March,0.018695,0.066766,0.044785
March-May,,,


In [43]:
diff_plot_perc.eval('`ILP (no FCFS, Natt)` - `ILP (no FCFS, hard)`').groupby('dataset_name').agg(['min', 'max', 'mean'])

Unnamed: 0_level_0,min,max,mean
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-08-16,0.515844,12.822402,9.84438
July,3.687202,6.472776,5.168765
March,3.827048,5.707189,4.770611
March-May,,,


In [44]:
df_plot_this = df_plot.query("is_hard_assignment.str.startswith('Hard') and not solver.str.startswith('ILP')").copy()
df_plot_this['objective_diff'] = df_plot_this['objective'] - df_plot_this.groupby(['dataset_name', 'budget'])['objective'].transform('max')
df_plot_this['objective_diff_perc'] = 100 * df_plot_this['objective_diff'] / df_plot_this.groupby('dataset_name')['objective'].transform('max')

g = seaborn.relplot(kind='line', facet_kws=dict(sharey=True, sharex=False), palette=[c for c, s in zip(palette, solvers_order) if s in df_plot_this.solver.unique()], height=2,
                    data=df_plot_this.rename(columns=colnames), x=colnames['budget'], y=colnames['objective_diff_perc'],
                    hue=colnames['solver'], hue_order=[s for s in solvers_order if s in df_plot_this.solver.unique()],
                    style=colnames['is_hard_assignment'], #style_order=(False, True),
                    col=colnames['dataset_name'], col_order=sorted(df_plot['dataset_name'].unique()),
                    col_wrap=2
                   )
g.fig.set_size_inches(winch, g.fig.get_size_inches()[1])
g.tight_layout()
g.fig.subplots_adjust(right=0.65, left=0.12)
g._legend.set_bbox_to_anchor((0.67 + g._legend.get_window_extent().width / (g.fig.dpi * g.fig.get_size_inches()[0]), 0.5))

if SAVE:
    plt.savefig(f'plots/pdf/objective-values-training-diff-station-reduction.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [45]:
df_plot_this = df_plot.query("is_hard_assignment.str.startswith('Hard')").copy()
df_plot_this['objective_diff'] = df_plot_this['objective'] - df_plot_this.groupby(['dataset_name', 'budget'])['objective'].transform('max')
df_plot_this['objective_diff_perc'] = 100 * df_plot_this['objective_diff'] / df_plot_this.groupby('dataset_name')['objective'].transform('max')

g = seaborn.relplot(kind='line', facet_kws=dict(sharey=True, sharex=False), palette=[c for c, s in zip(palette, solvers_order) if s in df_plot_this.solver.unique()], height=2,
                    data=df_plot_this.rename(columns=colnames), x=colnames['budget'], y=colnames['objective_diff_perc'],
                    hue=colnames['solver'], hue_order=[s for s in solvers_order if s in df_plot_this.solver.unique()],
                    style=colnames['is_hard_assignment'],# style_order=(False, True),
                    col=colnames['dataset_name'], col_order=dataset_order
                   )
g.fig.set_size_inches(winch, hinch)
g.tight_layout()
g.fig.subplots_adjust(right=0.65, left=0.12)
g._legend.set_bbox_to_anchor((0.67 + g._legend.get_window_extent().width / (g.fig.dpi * g.fig.get_size_inches()[0]), 0.5))

if SAVE:
    plt.savefig(f'plots/pdf/objective-values-training-diff-hardassign.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [46]:
df_plot_this = df_plot.query("is_hard_assignment.str.startswith('Soft') or solver.str.contains('Two-stage') or solver == 'Hard assignment'")
df_plot_this = df_plot_this[df_plot_this.groupby(['dataset_name', 'budget'])['solver'].transform(lambda x: x.str.startswith('ILP').sum() > 0)].copy()
df_plot_this['objective_diff'] = df_plot_this['objective'] - df_plot_this.groupby(['dataset_name', 'budget'])['objective'].transform('max')
df_plot_this['objective_diff_perc'] = 100 * df_plot_this['objective_diff'] / df_plot_this.groupby('dataset_name')['objective'].transform('max')

g = seaborn.relplot(kind='line', facet_kws=dict(sharey=True, sharex=True), palette=[c for c, s in zip(palette, solvers_order) if s in df_plot_this.solver.unique()], height=2,
                    data=df_plot_this.rename(columns=colnames), x=colnames['budget'], y=colnames['objective_diff_perc'],
                    hue=colnames['solver'], hue_order=[s for s in solvers_order if s in df_plot_this.solver.unique()],
                    style=colnames['is_hard_assignment'],# style_order=(False, True),
                    col=colnames['dataset_name'], col_order=dataset_order
                   )
g.fig.set_size_inches(winch, hinch)
g.tight_layout()
g.fig.subplots_adjust(right=0.65, left=0.12)
g._legend.set_bbox_to_anchor((0.67 + g._legend.get_window_extent().width / (g.fig.dpi * g.fig.get_size_inches()[0]), 0.5))

if SAVE:
    plt.savefig(f'plots/pdf/objective-values-training-diff-softassign.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Crossvalidation

## Validation Datasets

In [47]:
def get_all_months(day_start, day_end):
    months = []
    month_last = day_end.replace(day=1) + relativedelta(months=1)
    
    month_next = day_start.replace(day=1)
    while month_next != month_last:
        months.append(month_next.replace())
        month_next += relativedelta(months=1)
    return months

In [48]:
day_start = traffic_full['arrival'].min().date()
day_end = traffic_full['arrival'].max().date()

np.random.seed(1)
random_days = np.random.choice([day_start + timedelta(days=d) for d in range(0, (day_end - day_start).days + 1)], 12)
random_days

all_months = get_all_months(day_start, day_end)

validation_traffic_specs = list(utils.factory_subsets(trf.TrafficSpec,
    [
        #*[dict(name=f'1day-random{i}', start_timestamp=tstmp, end_timestamp=tstmp + timedelta(days=1)) for i, tstmp in enumerate(random_days, 1)],
        #*[dict(name=m.strftime("%b").lower(), start_timestamp=m, end_timestamp=m + relativedelta(months=1)) for m in all_months],
        dict(name='mar', start_timestamp='2019-03-01', end_timestamp='2019-04-01'),
        dict(name='jul', start_timestamp='2019-07-01', end_timestamp='2019-08-01'),
        dict(name='jun-to-oct', start_timestamp='2019-06-01', end_timestamp=traffic_full['departure'].max().date() + timedelta(days=1)),
        dict(name='mar-to-jun', start_timestamp=traffic_full['arrival'].min().date(), end_timestamp='2019-06-01'),
    ],
    [
        dict(max_attempts=1, minimal_complete_station_subset=False, remove_redundant_station_attempts=False, max_station_distance=300)
    ]))

display(validation_traffic_specs)

[TrafficSpec('mar', '2019-03-01-to-2019-04-01-1att-300-inf'),
 TrafficSpec('jul', '2019-07-01-to-2019-08-01-1att-300-inf'),
 TrafficSpec('jun-to-oct', '2019-06-01-to-2019-09-15-1att-300-inf'),
 TrafficSpec('mar-to-jun', '2019-03-01-to-2019-06-01-1att-300-inf')]

In [49]:
validation_traffics = pipeline.build_all_traffics(validation_traffic_specs, progress=True, traffic_full=traffic_full, charging_stations=charging_stations, station_distances_mtx=station_distances_mtx)

Specs:   0%|          | 0/4 [00:00<?, ?it/s]

## Validation Simulation Parameters

In [50]:
val_simulation_params = list(utils.factory_subsets(trf.SimulationSpec, [
    dict(max_attempts=None, subsequent_attempts_by_original_position=True),
    dict(max_attempts=3),
    dict(max_attempts=1)
], [
    dict(max_station_distance=300, max_stations_pair_distance=300)
]))

In [51]:
val_simulation_params

[SimulationSpec('300-300-origpos'),
 SimulationSpec('3att-300-300'),
 SimulationSpec('1att-300-300')]

## Crossvalidation Resolution

#### Requests for Crossvalidation

In [52]:
crossval_request_keys = {pipeline.CrossvalidationRequestKey(a, b, c, is_auto_attempt_simulation=True)
                         for (a, b, c) in itertools.chain(
    itertools.product(validation_traffic_specs, [k for k, v in solutions.items() if not callable(v)], val_simulation_params),
    list((a, b, c) for (a, b), c in itertools.product(
            [
                (validation_traffic_specs[0].copy(name=tr.name.split('__')[0], start_timestamp=tr.start_timestamp, end_timestamp=tr.end_timestamp), pipeline.Solvable(tr, slv))
                for (tr, slv), v in solutions.items() if not callable(v)
            ],
            val_simulation_params))
)}

len(crossval_request_keys)

2118

In [53]:
crossval_requests = {k: opt.utils.unwrap_distributions(solutions[k.solvable], copy=False) for k in crossval_request_keys}

#### Try to load existing results

In [54]:
try:
    crossval_results = utils.gzip_pickle_load(fname('crossval_results'))
except FileNotFoundError:
    crossval_results = dict()

In [55]:
crossval_results = {k: v for k, v in crossval_results.items() if k in crossval_request_keys}

#### Find missing results

In [59]:
missing_crossval_requests = set(crossval_requests.keys()).difference(crossval_results.keys())
missing_crossval_requests = {k: crossval_requests[k] for k in missing_crossval_requests}
len(missing_crossval_requests)

59

#### Create external requests for resolution on cluster

Use `run_crossval.py` executable on the cluster

In [61]:
if len(missing_crossval_requests) > 0:
    N_CHUNKS = 12
    print("Amount of chunks:", N_CHUNKS, "; chunk size:", len(next(utils.chunks(missing_crossval_requests, N_CHUNKS))))

Amount of chunks: 12 ; chunk size: 59


In [62]:
if len(missing_crossval_requests) > 0:
    crossvalidate_func = functools.partial(
        pipeline.crossvalidate_all_multithread, auto_attempts=True, post_func=sim.get_satisfied_charging_requests,
        charging_stations=charging_stations, station_distances_mtx=station_distances_mtx
    )

    Path("tmp/crossval/request").mkdir(parents=True, exist_ok=True)

    for i, v in utils.progressify(list(enumerate(utils.chunks(missing_crossval_requests, N_CHUNKS))), "Chunks"):
        utils.gzip_pickle_dump(functools.partial(crossvalidate_func, v), "tmp/crossval/request/crossval_request_%03d.pgz" % i)

Chunks:   0%|          | 0/12 [00:00<?, ?it/s]

#### Load new external results and add them to existing

In [57]:
if len(missing_crossval_requests) > 0:
    from glob import glob

    for f in sorted(glob("tmp/crossval/result/RESULT-*.pgz")):
        crossval_results.update(utils.gzip_pickle_load(f))

    utils.gzip_pickle_dump(crossval_results, fname('crossval_results'))

# Visualize Crossvalidation

In [60]:
from typing import Tuple

def crossval_results_to_dataframe(crossval_results: Dict[pipeline.CrossvalidationRequestKey, Dict[int, float]]):
    data = []
    
    for key, values in crossval_results.items():
        opt_name_split = key.solvable.traffic_spec.name.split('__')
        for bdgt, objective in values.items():
            data.append({
                'val_traffic': key.val_traffic_spec.name,
                'val_simulation': key.simulation_spec.to_canonical_id(),
                'opt_traffic': key.solvable.traffic_spec.name,
                'opt_traffic_name': opt_name_split[0],
                'opt_traffic_spec': opt_name_split[1],
                'opt_solver': key.solvable.solver.name,
                'budget': bdgt,
                'objective': objective,
                'is_true_simulation': key.is_auto_attempt_simulation
            })
    
    data = pd.DataFrame(data)
    
    # rename hard assignment solver to reduced
    data.loc[data['opt_traffic_spec'] == 'hassign-reduced', 'opt_solver'] = data.loc[data['opt_traffic_spec'] == 'hassign-reduced', 'opt_solver'] + " (reduced)"
    
    # drop the legacy solver data because we don't need it
    data.drop(index=data.index[data['opt_solver'] == "Hard assignment (legacy)"], inplace=True)
    
    return data

In [61]:
df2 = crossval_results_to_dataframe(crossval_results)

In [74]:
df2_plot = df2.query("""
opt_traffic_spec.isin(['Natt-origpos', '3att-origpos', 'hassign', 'hassign-reduced']) and
opt_solver.isin(['Hard assignment', 'Hard assignment (reduced)', 'Hard assignment (ILP subset)', 'ILP solution (no FCFS)']) and
val_traffic.isin(['mar-to-jun', 'jun-to-oct'])
""".replace('\n', ' ')).copy()

df2_plot['opt_solver'] = df2_plot['opt_solver'].replace({
    'Hard assignment (ILP subset)': 'Two-stage (ILP subset)',
    'Hard assignment (reduced)': 'Two-stage (iterative subset)',
    'ILP solution (no FCFS)': 'ILP (no FCFS)'
})

df2_plot['is_hard_assignment'] = df2_plot.eval("(opt_traffic_spec == 'hassign' and opt_solver.str.startswith('ILP')) or opt_solver.str.contains('Hard assignment')")
df2_plot['is_crossval'] = df2_plot.eval("(val_traffic == 'mar-to-jun' and (opt_traffic_name == 'jul' or opt_traffic_name.str.startswith('2019-08'))) or (val_traffic == 'jun-to-oct' and (opt_traffic_name == 'mar' or opt_traffic_name.str.startswith('2019-05')))")

df2_plot['opt_traffic_spec'] = df2_plot['opt_traffic_spec'].replace({
    'hassign': 'hard',
    'Natt-origpos': 'Natt',
    '3att-origpos': '3att'
})

dataset_names = {
    MONTH_DATASET: 'March',
    'jul': "July",
    'mar-to-jun': "Mar-Jun",
    'jun-to-oct': 'Jun-Sep'
}

df2_plot[['opt_traffic_name', 'val_traffic']] = df2_plot[['opt_traffic_name', 'val_traffic']].replace(dataset_names)

df2_plot['val_traffic'] = df2_plot['val_traffic'].replace({sp.name: sp.start_timestamp.date().isoformat() for sp in validation_traffic_specs if sp.name.startswith('1day')})
df2_plot['opt_traffic_name'] = df2_plot['opt_traffic_name'].replace({sp.name.split('__')[0]: sp.start_timestamp.date().isoformat() for sp in training_traffic_specs if sp.name.startswith('1day')})

df2_plot['val_simulation'] = df2_plot['val_simulation'].replace({
    '1att-300-300': '1',
    '3att-300-300': '3',
    '300-300-origpos': 'N'
})

df2_plot.sort_values(['val_traffic', 'opt_traffic_name'], inplace=True)

df2_plot['traffic'] = 'Train = ' + df2_plot['opt_traffic_name'] + '\nVal = ' + df2_plot['val_traffic']

traffic_pairs = {i: v for i, v in enumerate(df2_plot['traffic'].unique())}
df2_plot['traffic'] = df2_plot['traffic'].replace({v: i for i, v in traffic_pairs.items()})

msk = df2_plot['opt_solver'].isin(['ILP (no FCFS)', 'Heuristic']) & (df2_plot['opt_traffic_spec'] != '')
df2_plot.loc[msk, 'opt_solver'] += ' (' + df2_plot.loc[msk, 'opt_traffic_spec'] + ')'
df2_plot.loc[msk, 'opt_solver'] = df2_plot.loc[msk, 'opt_solver'].str.replace(') (', ', ', regex=False)

df2_plot['objective_diff'] = df2_plot['objective'] - df2_plot.groupby(['traffic', 'val_simulation', 'budget'])['objective'].transform('max')
df2_plot['objective_perc'] = 100 * df2_plot['objective'] / df2_plot.groupby(['val_traffic', 'val_simulation'])['objective'].transform('max')
df2_plot['objective_diff_perc'] = 100 * df2_plot['objective_diff'] / df2_plot.groupby(['val_traffic', 'val_simulation'])['objective'].transform('max')

In [75]:
df2_plot.val_traffic.unique()

array(['Jun-Sep', 'Mar-Jun'], dtype=object)

In [76]:
df2_plot.val_simulation.unique()

array(['3', '1', 'N'], dtype=object)

In [77]:
df2_plot.opt_traffic_name.unique()

array(['2019-05-28', '2019-08-07', '2019-08-16', 'July', 'Jun-Sep',
       'Mar-Jun', 'March'], dtype=object)

In [78]:
colnames2 = {
    'val_traffic': 'Val',
    'objective': 'Satisfied demand',
    'objective_perc': 'Satisfied demand [%]',
    'objective_diff_perc': 'Satisfied demand [%]\n(difference from best)',
    'budget': 'Budget',
    'opt_solver': 'Budget distribution',
    'is_hard_assignment': 'Hard assignment',
    'opt_traffic_name': 'Training dataset',
    'val_simulation': 'Charging\nattempts',
}

In [79]:
df2_plot.opt_solver.unique()

array(['ILP (no FCFS, hard)', 'Two-stage (iterative subset)',
       'Two-stage (ILP subset)', 'ILP (no FCFS, Natt)',
       'ILP (no FCFS, 3att)', 'Hard assignment'], dtype=object)

In [80]:
df.dataset_specialization.unique()

array(['5att-reduced', 'Natt-origpos', '3att-origpos', '3att',
       'hassign-reduced', '3att-reduced', 'hassign'], dtype=object)

In [81]:
df2_plot_this = df2_plot.query("(val_traffic.isin(['Mar-Jun']) and opt_traffic_name.isin(['July', '2019-08-07'])) or (val_traffic.isin(['Jun-Sep']) and opt_traffic_name.isin(['March', '2019-05-28']))")
df2_plot_this_best = pd.merge(
        df2_plot.query("val_traffic.isin(['Mar-Jun', 'Jun-Sep']) and opt_traffic_name == val_traffic").groupby(['val_simulation', 'val_traffic', 'budget'])[['objective']].max().reset_index(),
        df2_plot_this[['val_traffic', 'traffic', 'opt_traffic_name']].drop_duplicates(), on=['val_traffic']).drop_duplicates()
df2_plot_this_best['opt_solver'] = 'Baseline'
df2_plot_this_best['is_hard_assignment'] = False
df2_plot_this_best['is_crossval'] = False

df2_plot_this = pd.concat([
    df2_plot_this,
    df2_plot_this_best
])
df2_plot_this.drop_duplicates(inplace=True)

df2_plot_this.sort_values(['val_traffic', 'opt_traffic_name'], inplace=True)
df2_plot_this['objective_diff'] = df2_plot_this['objective'] - df2_plot_this.groupby(['traffic', 'val_simulation', 'budget'])['objective'].transform('max')
df2_plot_this['objective_perc'] = 100 * df2_plot_this['objective'] / df2_plot_this.groupby(['val_traffic', 'val_simulation'])['objective'].transform('max')
df2_plot_this['objective_diff_perc'] = 100 * df2_plot_this['objective_diff'] / df2_plot_this.groupby(['val_traffic', 'val_simulation'])['objective'].transform('max')

In [82]:
solvers2_order = [*solvers_order, 'Baseline']

In [83]:
palette2 = seaborn.color_palette(list([*palette, tuple([0.3] * 3)]))
palette2

In [84]:
plt.close('all')

In [85]:
for atts in ['1', '3', 'N']:
    g = seaborn.relplot(kind='line', facet_kws=dict(sharey=True, sharex=True), palette=palette, height=2,
                        data=df2_plot_this.query('val_simulation == @atts').drop_duplicates().rename(columns=colnames2), x=colnames2['budget'], y=colnames2['objective_perc'],
                        hue=colnames2['opt_solver'], hue_order=solvers_order,
                        #row=colnames2['opt_traffic_name'],# col_order=['1 day', '1 month'],
                        col='traffic', col_order=df2_plot_this['traffic'].unique(), col_wrap=2,
                        #col='opt_solver', col_order=[s for s in solvers_order if s in solvers_this], col_wrap=2,
                        #col='is_true_simulation', col_order=(True, False), col_wrap=2,
                        style=colnames2['is_hard_assignment'], style_order=(False, True),
                        ci='sd')
    g.fig.set_size_inches(winch, g.fig.get_size_inches()[1])
    g.tight_layout()
    g.fig.subplots_adjust(right=0.65, left=0.12, top=.88, hspace=.35)
    g._legend.set_bbox_to_anchor((0.67 + g._legend.get_window_extent().width / (g.fig.dpi * g.fig.get_size_inches()[0]), 0.5))
    
    for tr, ax in g.axes_dict.items():
        ax.set_title(traffic_pairs[tr])

    if SAVE:
        plt.savefig(f'plots/pdf/objective-values-crossval-{atts}att.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [86]:
diff_plot_perc2 = df2_plot_this.set_index(['val_traffic', 'opt_traffic_name', 'val_simulation', 'budget', 'opt_solver'])['objective_perc'].unstack()

In [87]:
diff_plot_perc2.eval('`Two-stage (ILP subset)` - `Hard assignment`').groupby(['val_simulation', 'opt_traffic_name', 'val_traffic']).agg(['min', 'max', 'mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,min,max,mean
val_simulation,opt_traffic_name,val_traffic,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2019-05-28,Jun-Sep,-0.399885,7.338701,3.705633
1,2019-08-07,Mar-Jun,-0.61759,6.191557,3.564234
1,July,Mar-Jun,-0.070457,3.883858,2.430173
1,March,Jun-Sep,0.078005,4.149482,2.376094
3,2019-05-28,Jun-Sep,-1.408564,1.076821,-0.046893
3,2019-08-07,Mar-Jun,-1.62574,0.206153,-0.704024
3,July,Mar-Jun,-1.049903,0.142655,-0.507608
3,March,Jun-Sep,-0.698454,0.191873,-0.15618
N,2019-05-28,Jun-Sep,-2.199369,0.563964,-0.763191
N,2019-08-07,Mar-Jun,-1.873646,0.565399,-0.719107


In [88]:
diff_plot_perc2.eval('`Baseline` - `Two-stage (ILP subset)`').groupby(['val_simulation', 'opt_traffic_name', 'val_traffic']).agg(['min', 'max', 'mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,min,max,mean
val_simulation,opt_traffic_name,val_traffic,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2019-05-28,Jun-Sep,-0.033174,2.654843,1.08689
1,2019-08-07,Mar-Jun,-0.340109,1.949323,0.590397
1,July,Mar-Jun,0.020006,1.257796,0.763197
1,March,Jun-Sep,-0.045727,0.736112,0.361328
3,2019-05-28,Jun-Sep,0.004483,2.619876,1.333605
3,2019-08-07,Mar-Jun,0.0,2.529509,1.419329
3,July,Mar-Jun,0.020006,1.632699,0.819216
3,March,Jun-Sep,-0.031381,0.927088,0.383615
N,2019-05-28,Jun-Sep,0.179321,3.027831,1.72628
N,2019-08-07,Mar-Jun,0.0,2.24768,1.218169


In [89]:
diff_plot_perc2['Two-stage (ILP subset)'].unstack('opt_traffic_name').eval('July - `2019-08-07`').abs().dropna().groupby(['val_traffic', 'val_simulation']).agg(['min', 'max', 'mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,min,max,mean
val_traffic,val_simulation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mar-Jun,1,0.005219,1.198647,0.430519
Mar-Jun,3,0.003479,1.379574,0.718569
Mar-Jun,N,0.0,1.712725,0.850921


In [90]:
df2_plot.groupby(['opt_traffic_name'])['objective_perc'].max()

opt_traffic_name
2019-05-28     99.892408
2019-08-07     98.738479
2019-08-16     98.331421
July           99.980275
Jun-Sep       100.000000
Mar-Jun       100.000000
March          99.999103
Name: objective_perc, dtype: float64

In [92]:
for atts in ['1', '3', 'N']:
    df2_plot_this2 = df2_plot_this.query("val_simulation == @atts and not opt_solver.str.contains('att')")
    df2_plot_this2_solvers = df2_plot_this2.opt_solver.unique()
    
    g = seaborn.relplot(kind='line', facet_kws=dict(sharey=True, sharex=True), palette=[c for s, c in zip(solvers2_order, palette2) if s in df2_plot_this2_solvers], height=2,
                        data=df2_plot_this2.drop_duplicates().rename(columns=colnames2), x=colnames2['budget'], y=colnames2['objective_diff_perc'],
                        hue=colnames2['opt_solver'], hue_order=[s for s in solvers2_order if s in df2_plot_this2_solvers],
                        #row=colnames2['opt_traffic_name'],# col_order=['1 day', '1 month'],
                        col='traffic', col_order=df2_plot_this['traffic'].unique(), col_wrap=2,
                        #col='opt_solver', col_order=[s for s in solvers_order if s in solvers_this], col_wrap=2,
                        #col='is_true_simulation', col_order=(True, False), col_wrap=2,
                        style=colnames2['is_hard_assignment'], style_order=(False, True),
                        ci='sd')
    g.fig.set_size_inches(winch, g.fig.get_size_inches()[1])
    g.tight_layout()
    g.fig.subplots_adjust(right=0.65, left=0.12, top=.88, hspace=.35)
    g._legend.set_bbox_to_anchor((0.67 + g._legend.get_window_extent().width / (g.fig.dpi * g.fig.get_size_inches()[0]), 0.5))
    
    for tr, ax in g.axes_dict.items():
        ax.set_title(traffic_pairs[tr])

    if SAVE:
        plt.savefig(f'plots/pdf/objective-values-crossval-diff-{atts}att.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [93]:
fig, ax = plt.subplots(figsize=(4, 2))
tmp = traffic_full.groupby(pd.Grouper(key='arrival', freq='D'))['vehicle'].count()#.to_frame('No. of requests').plot(drawstyle='steps-pre')
tmp[tmp.index.weekday <= 4].to_frame('Workdays').rolling(5).mean().plot(ax=ax, color='#1378C7')
tmp[tmp.index.weekday > 4].to_frame('Weekends').rolling(5).mean().plot(ax=ax, color='tab:orange')
plt.xlabel(None)
plt.ylabel("Charging demand size")
#plt.text(0.02, 0.15, 'Training', ha='left', transform=plt.gca().transAxes)

for m in traffic_full.groupby(pd.Grouper(key='arrival', freq='M'))['vehicle'].count().index:
    m = date(year=m.year, month=m.month, day=1)
    plt.axvline(m, color='#ccddf3', zorder=-10)

days = list(set([tr.start_timestamp.date().isoformat() for tr in training_traffic_specs if tr.name.startswith('1day')]))
plt.scatter(days, [tmp.loc[d] for d in days], c='r', marker='x')
for d, (x, y) in zip(days, [(-55, -5), (-22, -15), (-22, 10)]):
    plt.annotate(d, (d, tmp.loc[d]), xytext=(x, y), textcoords='offset points', bbox=dict(boxstyle="round", fc="1", alpha=0.7))
#plt.axhline(len(get_median_day_traffic(traffic_opt)), color='r', xmax=0.38).set_label("Median day")
plt.legend()
fig.subplots_adjust(left=0.15, right=0.85)
#plt.text(0.02, len(get_median_day_traffic(traffic_opt)) - 150, 'Median day no.', transform=plt.gca().get_yaxis_transform(), color='r', va='top', fontsize=10)

if SAVE:
    plt.savefig("plots/pdf/demand_size_time.pdf")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [94]:
plt.close('all')

In [95]:
plt.ion()

for opt_traffic_names in [['July', '2019-08-07'], ['March', '2019-05-28']]:
    df2_plot_this2 = df2_plot_this.query("opt_traffic_name.isin(@opt_traffic_names) and opt_solver != 'Baseline'")
    df2_plot_this2 = df2_plot_this2.loc[df2_plot_this2.groupby(['opt_traffic_name', 'val_simulation', 'budget'])['objective'].idxmax()].copy()
    
    df2_plot_this2['objective_diff'] = df2_plot_this2['objective'] - df2_plot_this2.groupby(['val_simulation', 'budget'])['objective'].transform('max')
    df2_plot_this2['objective_perc'] = 100 * df2_plot_this2['objective'] / df2_plot_this2.groupby(['val_traffic', 'val_simulation'])['objective'].transform('max')
    df2_plot_this2['objective_diff_perc'] = 100 * df2_plot_this2['objective_diff'] / df2_plot_this2.groupby(['val_traffic', 'val_simulation'])['objective'].transform('max')
    #df2_plot_this2_solvers = df2_plot_this2.opt_solver.unique()

    g = seaborn.relplot(kind='line', facet_kws=dict(sharey=True, sharex=True), height=2,
                        data=df2_plot_this2.drop_duplicates().query("opt_solver != 'Baseline' or opt_traffic_name == @opt_traffic_names[0]").rename(columns=colnames2), x=colnames2['budget'], y=colnames2['objective_diff_perc'],
                        #hue=colnames2['opt_solver'], hue_order=[s for s in solvers2_order if s in df2_plot_this2_solvers],
                        col=colnames2['val_simulation'], col_order=['1', '3'],
                        #col='opt_solver', col_order=[s for s in solvers_order if s in solvers_this], col_wrap=2,
                        #col='is_true_simulation', col_order=(True, False), col_wrap=2,
                        hue=colnames2['opt_traffic_name'], hue_order=opt_traffic_names, #dashes=[(1, 0), (1, 1)],
                        ci='sd')
    g.fig.set_size_inches(winch, g.fig.get_size_inches()[1])
    g.tight_layout()
    g.fig.subplots_adjust(right=0.65, left=0.12, top=.8)
    g._legend.set_bbox_to_anchor((0.67 + g._legend.get_window_extent().width / (g.fig.dpi * g.fig.get_size_inches()[0]), 0.5))
    
    display(df2_plot_this2.groupby(['val_simulation', 'budget'])['objective_perc'].agg(['min', 'max']).eval('max - min').groupby('val_simulation').agg(['min', 'max', 'median']))

    if SAVE:
        plt.savefig(f'plots/pdf/objective-values-crossval-dataset-{opt_traffic_names[0].lower()}.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0_level_0,min,max,median
val_simulation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.0,4.953053,0.264951
3,0.0,0.987684,0.11192
N,0.0,0.822063,0.202033


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0_level_0,min,max,median
val_simulation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.0,3.024289,0.545952
3,0.0,1.202356,0.464445
N,0.0,1.082366,0.325517


In [100]:
plt.ion()

df2_plot_this2 = df2_plot_this.query("is_crossval and opt_traffic_name.isin(['March', 'July']) and budget >= 250 and budget <= 400")
df2_plot_this2_solvers = df2_plot_this2.opt_solver.unique()

g = seaborn.relplot(kind='line', facet_kws=dict(sharey=True, sharex=True), palette=[c for s, c in zip(solvers2_order, palette2) if s in df2_plot_this2_solvers], height=2,
                    data=df2_plot_this2.drop_duplicates().rename(columns=colnames2), x=colnames2['budget'], y=colnames2['objective_perc'],
                    hue=colnames2['opt_solver'], hue_order=[s for s in solvers2_order if s in df2_plot_this2_solvers],
                    col=colnames2['opt_traffic_name'],
                    #col='opt_solver', col_order=[s for s in solvers_order if s in solvers_this], col_wrap=2,
                    #col='is_true_simulation', col_order=(True, False), col_wrap=2,
                    style=colnames2['val_simulation'], style_order=['1', '3', 'N'],
                    ci='sd')
g.fig.set_size_inches(winch, g.fig.get_size_inches()[1])
g.tight_layout()
g.fig.subplots_adjust(right=0.65, left=0.12, top=.8)
g._legend.set_bbox_to_anchor((0.67 + g._legend.get_window_extent().width / (g.fig.dpi * g.fig.get_size_inches()[0]), 0.5))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [171]:
df2_plot_this2.set_index(['opt_traffic_name', 'budget', 'opt_solver', 'val_simulation'])['objective_perc'].unstack().eval("`3` - `1`").groupby(['opt_traffic_name', 'opt_solver']).agg(['min', 'max', 'mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,min,max,mean
opt_traffic_name,opt_solver,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
July,Two-stage (ILP subset),0.0,1.884084,1.030968
July,Two-stage (iterative subset),0.0,1.871037,0.962971
March,Two-stage (ILP subset),0.0,1.666786,0.855903
March,Two-stage (iterative subset),0.0,1.795897,0.871466
