In [1]:
# As we use our own external modules, we need the folder src to be in the PYTHONPATH env variable.
# However we do not expect the reader to add that folder to the env variable,
# therefore we manually load it temporarily in each notebook.
import os
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from scipy.stats import poisson
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime
from timeit import default_timer as timer
from utils.config import PATH_TRIPS_GROUPED, PERIOD_DURATION, N_SCENARIOS, N_REALIZATIONS, PATH_SCENARIOS

In [3]:
trips = pd.read_pickle(PATH_TRIPS_GROUPED)

In [4]:
trips = trips.rename_axis(['start_hex_id', 'end_hex_id', 'daytime']).reset_index()

In [5]:
trips['is_weekend'] = trips['daytime'].dt.dayofweek > 4
trips['time'] = trips['daytime'].dt.time

We use the maximum likelihood estimate of the poisson distribution, which is the mean.  
So for a n samples $ k_i \in \mathbb{N} $ for $ i =1,..,n $, we get the maximum likelihood estimate with:  
$ \lambda_{MLE} = \frac{1}{n}\sum_{i=1}^{n}k_i$


In [6]:
mu = trips.drop('daytime', axis=1) \
          .groupby(['is_weekend', 'time', 'start_hex_id', 'end_hex_id']) \
          .mean()

In [7]:
start_hex_ids = trips['start_hex_id'].unique()
end_hex_ids = trips['end_hex_id'].unique()
time = trips['time'].unique()
vehicle_types = mu.columns
scenarios = [i for i in range(N_SCENARIOS)]
index = pd.MultiIndex.from_product([start_hex_ids, end_hex_ids, time, vehicle_types, scenarios], 
                     names=['start_hex_ids', 'end_hex_ids', 'time', 'vehicle_types', 'scenarios'])

In [8]:
scenarios = pd.DataFrame(index=index)

In [9]:
# add column & ensure correct dtype
scenarios['demand'] = 0
scenarios = scenarios.sort_index()

Let $r$ be the number of realizations of each random variable, $d$ the duration of each period, $n$ the number of scenarios and $t$ a specific time interval.  
So for example:  
$r = 2, d=4, n=32, t \in \{0, 4, 8, 12, 16, 20\}$  
Then we can calculate the number of differing scenarios for a specific time interval with:  
$r^{\frac{t}{d}}$  
Also we can calculate the number of scenarios in each group with:  
$\frac{n}{r^{\frac{h}{d}}}$

In [10]:
hours_list = list(scenarios.index.get_level_values(2).unique().map(lambda time: time.hour))
# TODO rename these variables!
batch_map = {
    hour: {
        'n_batches': N_REALIZATIONS**(int(hour / PERIOD_DURATION)),
        'batch_size': int(N_SCENARIOS / N_REALIZATIONS**(hour/PERIOD_DURATION)),
    }
    for hour in hours_list
}
batch_map

{0: {'n_batches': 1, 'batch_size': 1024},
 4: {'n_batches': 4, 'batch_size': 256},
 8: {'n_batches': 16, 'batch_size': 64},
 12: {'n_batches': 64, 'batch_size': 16},
 16: {'n_batches': 256, 'batch_size': 4},
 20: {'n_batches': 1024, 'batch_size': 1}}

In [11]:
start = timer()

for [[time, start_hex_id, end_hex_id], [mu_kick_scooter, mu_car, mu_bicycle]] in mu.loc[False].iterrows():
    n_batches, batch_size = batch_map[time.hour].values()
    
    # performance of list may be bad
    kick_scooter_scenarios = list(np.repeat(
        poisson.rvs(mu_kick_scooter, size=n_batches, random_state=42),
        batch_size))
    car_scenarios = list(np.repeat(
        poisson.rvs(mu_car, size=n_batches, random_state=42),
        batch_size))
    bicycle_scenarios = list(np.repeat(
        poisson.rvs(mu_bicycle, size=n_batches, random_state=42),
        batch_size))
    
    scenarios.loc[(start_hex_id, end_hex_id, time, 'kick_scooter')]['demand'] = kick_scooter_scenarios 
    scenarios.loc[(start_hex_id, end_hex_id, time, 'car')]['demand'] = car_scenarios
    scenarios.loc[(start_hex_id, end_hex_id, time, 'bicycle')]['demand'] = bicycle_scenarios

end = timer()
print(f"Succesfully generated scenarios in {(end - start):.2f} seconds")

Succesfully generated scenarios in 114.73 seconds


In [12]:
os.makedirs(os.path.dirname(PATH_SCENARIOS), exist_ok=True)
scenarios.to_pickle(PATH_SCENARIOS)