In [1]:
# As we use our own external modules, we need the folder src to be in the PYTHONPATH env variable.
# However we do not expect the reader to add that folder to the env variable,
# therefore we manually load it temporarily in each notebook.
import os, sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from scipy.stats import poisson
import numpy as np
import pandas as pd
from timeit import default_timer as timer
from modules.config import (
    PATH_TRIPS_GROUPED,
    PERIOD_DURATION,
    N_SCENARIOS,
    N_REALIZATIONS,
    PATH_SCENARIOS,
    MODE_IS_WEEKEND,
)


In [3]:
trips = pd.read_pickle(PATH_TRIPS_GROUPED)

In [4]:
trips = trips.rename_axis(['start_hex_id', 'end_hex_id', 'daytime']).reset_index()

In [5]:
trips['is_weekend'] = trips['daytime'].dt.dayofweek > 4
trips['time'] = trips['daytime'].dt.time

We use the maximum likelihood estimate of the poisson distribution, which is the mean.  
So for a n samples $ k_i \in \mathbb{N} $ for $ i =1,..,n $, we get the maximum likelihood estimate with:  
$ \lambda_{MLE} = \frac{1}{n}\sum_{i=1}^{n}k_i$


In [6]:
mu = trips.drop('daytime', axis=1) \
          .groupby(['is_weekend', 'time', 'start_hex_id', 'end_hex_id']) \
          .mean()

In [7]:
# start_hex_ids = trips['start_hex_id'].unique()
# end_hex_ids = trips['end_hex_id'].unique()
# time = trips['time'].unique()
# vehicle_types = mu.columns
# scenarios = [i for i in range(N_SCENARIOS)]
# index = pd.MultiIndex.from_product([start_hex_ids, end_hex_ids, time, vehicle_types, scenarios], 
#                      names=['start_hex_ids', 'end_hex_ids', 'time', 'vehicle_types', 'scenarios'])

In [8]:
# scenarios = pd.DataFrame(index=index)

In [9]:
# # add column & ensure correct dtype
# scenarios['demand'] = 0
# scenarios = scenarios.sort_index()

Let $r$ be the number of realizations of each random variable, $d$ the duration of each period, $n$ the number of scenarios and $t$ a specific time interval.  
So for example:  
$r = 2, d=4, n=32, t \in \{0, 4, 8, 12, 16, 20\}$  
Then we can calculate the number of differing scenarios for a specific time interval with:  
$r^{\frac{t}{d}}$  
Also we can calculate the number of scenarios in each group with:  
$\frac{n}{r^{\frac{h}{d}}}$

In [10]:
trips['time'].unique()

array([datetime.time(0, 0), datetime.time(12, 0)], dtype=object)

In [11]:
hours_list = list(map(lambda time: time.hour,trips['time'].unique()))
# TODO rename these variables!
batch_map = {
    hour: {
        'n_batches': N_REALIZATIONS**(int(hour / PERIOD_DURATION)),
        'batch_size': int(N_SCENARIOS / N_REALIZATIONS**(hour/PERIOD_DURATION)),
    }
    for hour in hours_list
}
batch_map

{0: {'n_batches': 1, 'batch_size': 2048},
 12: {'n_batches': 2048, 'batch_size': 1}}

In [12]:
start = timer()
scenario_dict = {}
for [[time, start_hex_id, end_hex_id], [mu_kick_scooter, mu_car, mu_bicycle]] in mu.loc[
    MODE_IS_WEEKEND
].iterrows():
    n_batches, batch_size = batch_map[time.hour].values()

    kick_scooter_scenarios = np.repeat(
        poisson.rvs(mu_kick_scooter, size=n_batches, random_state=42), batch_size
    )
    car_scenarios = np.repeat(
        poisson.rvs(mu_car, size=n_batches, random_state=42), batch_size
    )
    bicycle_scenarios = np.repeat(
        poisson.rvs(mu_bicycle, size=n_batches, random_state=42), batch_size
    )
    
    scenario_dict[
        (start_hex_id, end_hex_id, time, "kick_scooter")
    ] = kick_scooter_scenarios
    scenario_dict[(start_hex_id, end_hex_id, time, "bicycle")] = bicycle_scenarios
    scenario_dict[(start_hex_id, end_hex_id, time, "car")] = car_scenarios

end = timer()
print(f"Succesfully generated scenarios in {(end - start):.2f} seconds")


Succesfully generated scenarios in 7.85 seconds


In [13]:
scenarios = pd.DataFrame.from_dict(scenario_dict, orient="index")
del scenario_dict

In [14]:
scenarios.index = pd.MultiIndex.from_tuples(scenarios.index)
scenarios = scenarios.stack().to_frame()

In [15]:
scenarios.index = scenarios.index.rename(['start_hex_ids', 'end_hex_ids', 'time', 'vehicle_types', 'scenarios'])
scenarios = scenarios.rename(columns={0: 'demand'})

In [16]:
os.makedirs(os.path.dirname(PATH_SCENARIOS), exist_ok=True)
scenarios.to_pickle(PATH_SCENARIOS)

In [17]:
scenarios.index.get_level_values('scenarios').nunique()

2048

In [18]:
scenarios.head(3)
# free the memory
%reset -f
import gc
gc.collect()
# this still does not free all memory for some reason
# we recommend to close the notebook after execution or restart the kernel manually

0