In [1]:
# As we use our own external modules, we need the folder src to be in the PYTHONPATH env variable.
# However we do not expect the reader to add that folder to the env variable,
# therefore we manually load it temporarily in each notebook.
import os
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:

from timeit import default_timer as timer
import pandas as pd
import h3
from utils.config import PATH_TRIPS, H3_RESOLUTION, PERIOD_DURATION, PATH_TRIPS_GROUPED

In [3]:
trips = pd.read_pickle(PATH_TRIPS)

# sampling for testing purposes
trips = trips.sample(frac=0.2, random_state=42)

In [4]:
def extract_hexagon_cb(latitude_col, longitude_col):
    return lambda row : h3.geo_to_h3(row[latitude_col], row[longitude_col], H3_RESOLUTION)

In [5]:
start = timer()

trips['start_hex_id'] = trips.apply(extract_hexagon_cb('latitude_start', 'longitude_start'), axis=1)
trips['end_hex_id'] = trips.apply(extract_hexagon_cb('latitude_end', 'longitude_end'), axis=1)

end = timer()
print(f"Succesfully saved dataframe to pickle in {(end - start):.2f} seconds")

Succesfully saved dataframe to pickle in 10.03 seconds


In [6]:
trips['datetime_start_floored'] = trips['datetime_start'].dt.floor('%dH' % PERIOD_DURATION)

In [7]:
trips_grouped = trips.groupby(
    ['vehicleType','start_hex_id', 'end_hex_id', 'datetime_start_floored']) \
                     .size() \
                     .to_frame('demand')

In [8]:
sanity_index = trips_grouped.index[0]
sanity_check_entry_1 = trips_grouped.loc[sanity_index][0]


full_index = pd.MultiIndex.from_product([
    trips['vehicleType'].unique(),
    trips['start_hex_id'].unique(), 
    trips['end_hex_id'].unique(),
    trips['datetime_start_floored'].unique(),
])

trips_grouped = trips_grouped.reindex(full_index)
trips_grouped = trips_grouped.fillna(0)

sanity_check_entry_2 = trips_grouped.loc[sanity_index][0]
sanity_check = sanity_check_entry_1 == sanity_check_entry_2

print("sanity check: " + "️️✔️" if sanity_check else "❌")

sanity check: ️️✔️


In [9]:
trips_grouped = trips_grouped.reset_index(level=0).rename(columns={"level_0": "vehicle_type"})

In [10]:
demand_dict = {}
for vehicle_type in trips_grouped['vehicle_type'].unique():
    demand_dict[vehicle_type] = trips_grouped[trips_grouped['vehicle_type'] == vehicle_type]['demand']

In [11]:
trips_seperated = pd.DataFrame(demand_dict)

sanity_vehicle_type = list(sanity_index)[0]
sanity_index_short = list(sanity_index)[1:]

sanity_check_entry_3 = trips_seperated.loc[tuple(sanity_index_short), sanity_vehicle_type]
sanity_check = sanity_check_entry_2 == sanity_check_entry_3

print("sanity check: " + "️️✔️" if sanity_check else "❌")

sanity check: ️️✔️


In [12]:
trips_seperated.head(3)

Unnamed: 0,Unnamed: 1,Unnamed: 2,kick_scooter,car,bicycle
861fa199fffffff,861fa199fffffff,2019-12-10 16:00:00,516.0,80.0,28.0
861fa199fffffff,861fa199fffffff,2019-11-27 08:00:00,462.0,40.0,19.0
861fa199fffffff,861fa199fffffff,2019-12-08 16:00:00,494.0,0.0,12.0


In [13]:
start = timer()

os.makedirs(os.path.dirname(PATH_TRIPS_GROUPED), exist_ok=True)
trips_seperated.to_pickle(PATH_TRIPS_GROUPED)

end = timer()
print(f"Succesfully saved dataframe to pickle in {(end - start):.2f} seconds")

Succesfully saved dataframe to pickle in 0.01 seconds
