In [None]:
# As we use our own external modules, we need the folder src to be in the PYTHONPATH env variable.
# However we do not expect the reader to add that folder to the env variable,
# therefore we manually load it temporarily in each notebook.
import os, sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import pandas as pd
import h3
from modules.config import (
    PATH_SCENARIOS_REDUCED,
    PATH_DISTANCES,
    PATH_SPEEDS,
    PATH_HEXAGON_RESOLUTION_MAP,
    PERIOD_DURATION,
    VEHICLE_PROFIT,
    VEHICLE_STACK_RELOCATIONS,
    RELOCATION_DRIVER_SALARY,
    VEHICLE_PARKING_COSTS,
)
from modules.helpers import calc_distance_haversine

# Distance Profit & Cost Calculation
In this notebook our goal is to determine the cost and profits for each region tuple for each vehicle type.  
In our paper this is denoted as $c_{ijm}$ and $p_{ijm}$.

## Distances

In order to calculate the profit of a trip and the cost of a relocation between two specific regions, we first have to know the distances between each region tuple.  
Note that our model also allows round trips. However it we do not know the exact route of a round trip. We therefore assume that a round trip simply is a trip that starts and ends in the same region without detouring to any other region.  
We can then calculate the average distance of such a round trip by the average distance between two random points in a hexagon.  
With the help of an experiment, we conducted [here](https://editor.p5js.org/mgottsch/sketches/qQiMOBRD7) we found out that the average distance of two random pointss in a hexagon with side length $l$ can be calculated by $l*0.8259763178117513$.

In [None]:
avg_distance_l1 = 0.8259763178117513

In [None]:
scenarios = pd.read_pickle(PATH_SCENARIOS_REDUCED)
hexagon_resolution_map = pd.read_pickle(PATH_HEXAGON_RESOLUTION_MAP)['resolution'].to_dict()
hex_ids = {*scenarios.index.get_level_values('start_hex_ids').unique()}
hex_ids = list(hex_ids.union({*scenarios.index.get_level_values('end_hex_ids').unique()}))

In [None]:
def calc_distance(hexes):
    if hexes[0] == hexes[1]:
        return avg_distance_l1 * h3.edge_length(hexagon_resolution_map[hexes[0]], unit='km')
    return calc_distance_haversine(
        h3.h3_to_geo(hexes[0]),
        h3.h3_to_geo(hexes[1])
        )

We now create a dataframe whose entries are region tuples with their corresponding distance.

In [None]:
distances = pd.DataFrame(index=pd.MultiIndex.from_product([hex_ids, hex_ids]))
distances.index = distances.index.rename(['start_hex_id', 'end_hex_id'])
distances['distance'] = distances.index.map(calc_distance)

In [None]:
scenarios.reset_index()

Unnamed: 0,scenarios,start_hex_ids,end_hex_ids,time,vehicle_types,demand
0,0,871fa199cffffff,871fa199cffffff,00:00:00,bicycle,5
1,0,871fa199cffffff,871fa199cffffff,00:00:00,car,4
2,0,871fa199cffffff,871fa199cffffff,00:00:00,kick_scooter,33
3,0,871fa199cffffff,871fa199cffffff,08:00:00,bicycle,16
4,0,871fa199cffffff,871fa199cffffff,08:00:00,car,5
...,...,...,...,...,...,...
30271,3,861fa18b7ffffff,861fa18b7ffffff,08:00:00,car,4
30272,3,861fa18b7ffffff,861fa18b7ffffff,08:00:00,kick_scooter,32
30273,3,861fa18b7ffffff,861fa18b7ffffff,16:00:00,bicycle,13
30274,3,861fa18b7ffffff,861fa18b7ffffff,16:00:00,car,0


## Profit
To define the profit per region tuple for every vehicle type we make use of the average speed we calculated in our data analysis. The exact price (profit) per minute depends on the provider and is configurable in the `config.py`.

In [None]:
vehicle_speed = pd.read_pickle(PATH_SPEEDS).to_dict()['speed (km/min)']
vehicle_speed

{'car': 0.1116506648480835,
 'bicycle': 0.04967387633867044,
 'kick_scooter': 0.22438853350603996}

In [None]:
vehicle_profit_km = {
        key: p_m/vehicle_speed[key] 
        for key,p_m in VEHICLE_PROFIT.items() 
}
vehicle_profit_km

{'kick_scooter': 0.846745584684191,
 'bicycle': 0.6710435301257894,
 'car': 2.5973871306058585}

In [None]:
distances['profit_kick_scooter'] = distances['distance'].map(
    lambda distance: (distance * vehicle_profit_km['kick_scooter'])
)
distances['profit_bicycle'] = distances['distance'].map(
    lambda distance: (distance * vehicle_profit_km['bicycle'])
)
distances['profit_car'] = distances['distance'].map(
    lambda distance: (distance * vehicle_profit_km['car'])
)

## Costs
In the same manner we calculated the profits, we now calculate the costs.  
Note that we use the relocation drivers salary to calculate the costs. As the salary most likely also varies per provider it is configurable in the `config.py`.  
We also expect that for some vehicle types (bicycles & kick scooters) multiple vehicles can be relocated at once. This ratio is also configurable.

In [None]:
vehicle_cost_min = {
    key: (RELOCATION_DRIVER_SALARY / 60 / rel_at_once) 
    for key, rel_at_once in VEHICLE_STACK_RELOCATIONS.items()
}
vehicle_cost_min

{'kick_scooter': 0.005833333333333334,
 'bicycle': 0.011666666666666667,
 'car': 0.23333333333333334}

In [None]:
vehicle_cost_km = {
    key: c_m/vehicle_speed[key] 
    for key,c_m in vehicle_cost_min.items() 
}
vehicle_cost_km

{'kick_scooter': 0.02599657496837429,
 'bicycle': 0.2348652355440263,
 'car': 2.089851714280576}

In [None]:
distances['cost_kick_scooter'] = distances['distance'].map(
    lambda distance: (distance * vehicle_cost_km['kick_scooter'])
)
distances['cost_bicycle'] = distances['distance'].map(
    lambda distance: (distance * vehicle_cost_km['bicycle'])
)
distances['cost_car'] = distances['distance'].map(
    lambda distance: (distance * vehicle_cost_km['car'])
)

## Parking costs
As our model also incorporates parking costs we also write those into the cost columns.  
The parking costs per hour can also be configured in the `config.py`.

In [None]:
distances = distances.reset_index()

same_hexagon = distances['start_hex_id'] == distances['end_hex_id']
distances.loc[same_hexagon, 'cost_kick_scooter'] = VEHICLE_PARKING_COSTS['kick_scooter'] * PERIOD_DURATION
distances.loc[same_hexagon, 'cost_car'] = VEHICLE_PARKING_COSTS['car'] * PERIOD_DURATION
distances.loc[same_hexagon, 'cost_bicycle'] = VEHICLE_PARKING_COSTS['bicycle'] * PERIOD_DURATION

distances = distances.set_index(['start_hex_id', 'end_hex_id'])

We now save the the costs and profits in a pickle file, which can be used by our model.

In [None]:
os.makedirs(os.path.dirname(PATH_DISTANCES), exist_ok=True)
distances.to_pickle(PATH_DISTANCES)