In [1]:
# As we use our own external modules, we need the folder src to be in the PYTHONPATH env variable.
# However we do not expect the reader to add that folder to the env variable,
# therefore we manually load it temporarily in each notebook.
import os
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import pandas as pd
from timeit import default_timer as timer
from utils.config import PATH_SCENARIOS, N_REDUCED_SCNEARIOS, N_SCENARIOS, PATH_SCENARIOS_REDUCED, PATH_SCENARIO_PROBABILITY
from sklearn_extra.cluster import KMedoids

In [3]:
scenarios = pd.read_pickle(PATH_SCENARIOS)

In [4]:
scenarios = scenarios.unstack(level=['start_hex_ids', 'end_hex_ids', 'time', 'vehicle_types'])

In [5]:
X = scenarios.values

In [6]:
# kmedoids = KMedoids(
#     n_clusters=N_REDUCED_SCNEARIOS, 
#     random_state=0, 
#     method='pam', 
#     init="k-medoids++"
#     ).fit(X)
kmedoids = KMedoids(
    n_clusters=N_REDUCED_SCNEARIOS, 
    random_state=0, 
    ).fit(X)

In [7]:
scenario_reduction_assignment = pd.DataFrame(index = scenarios.index)
scenario_reduction_assignment['cluster_label'] = kmedoids.labels_
label_to_scenario_id_map = {i:indice for i,indice in enumerate(list(kmedoids.medoid_indices_))}
scenario_reduction_assignment['cluster_label'] = scenario_reduction_assignment['cluster_label'].replace(label_to_scenario_id_map)

In [8]:
# note that because the scenarios dataframe was sorted the indices of the medoids are also the 
# scenario ids
selected_scenario_ids = kmedoids.medoid_indices_

In [9]:
selected_scenarios = scenarios.loc[selected_scenario_ids]

In [10]:
selected_scenarios = selected_scenarios.stack(['start_hex_ids', 'end_hex_ids', 'time', 'vehicle_types'])

In [11]:
scenario_id_list = list(selected_scenarios.index.get_level_values('scenarios').unique())
scenario_reset_map = {id:i for i, id in enumerate(scenario_id_list)}

In [12]:
selected_scenarios = selected_scenarios.rename(index=scenario_reset_map)
scenario_reduction_assignment['cluster_label'] = scenario_reduction_assignment['cluster_label'].replace(scenario_reset_map)

In [13]:
scenario_probability =  scenario_reduction_assignment.reset_index().groupby('cluster_label').count()
scenario_probability = scenario_probability.rename(columns={'scenarios': 'n_scenarios'})
scenario_probability.index.names = ['scenarios']
scenario_probability['probability'] = scenario_probability['n_scenarios'] / N_SCENARIOS

In [14]:
os.makedirs(os.path.dirname(PATH_SCENARIO_PROBABILITY), exist_ok=True)
scenario_probability.to_pickle(PATH_SCENARIO_PROBABILITY)

os.makedirs(os.path.dirname(PATH_SCENARIOS_REDUCED), exist_ok=True)
selected_scenarios.to_pickle(PATH_SCENARIOS_REDUCED)