In [None]:
import sys, os
sys.path.append(os.path.abspath('..'))
%load_ext autoreload
%autoreload 2

from modules.config import *
from modules import h3_visualization

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import h3

**Note**:
Before committing we removed all cell outputs. We understand that this is inconvenient, but the plotly maps are too large to be included in the repository.
We are sorry and hope that the execution of this notebook does not take too long. 

# Spatial And Temporal Analysis Of Trips
In this notebook we will analyze the total flow, net flow and idle time of trips
along multiple spatial and temporal resolutions.


In [None]:
trips_grouped_all = pd.read_parquet(TRIPS_GROUPED_SPATIO_TEMPORAL_PATH)

In [None]:
def get_trips_grouped(h3_res: int, time_interval_length: int) -> pd.DataFrame:
    return trips_grouped_all[
        (trips_grouped_all["h3_res"] == h3_res)
        & (trips_grouped_all["time_interval_length"] == time_interval_length)
    ]

def get_trips_net(h3_res: int, time_interval_length: int) -> pd.DataFrame:
	trips_grouped = get_trips_grouped(h3_res, time_interval_length)
	trips_by_start_hex = trips_grouped.groupby("start_hex_id").sum()
	trips_by_end_hex = trips_grouped.groupby("end_hex_id").sum()
	trips_net = (trips_by_end_hex - trips_by_start_hex).fillna(0)
	return trips_net.reset_index().rename(columns={"index": "hex_id"})

def get_trips_total(h3_res: int, time_interval_length: int) -> pd.DataFrame:
	trips_grouped = get_trips_grouped(h3_res, time_interval_length)
	trips_by_start_hex = trips_grouped.groupby("start_hex_id").sum()
	trips_by_end_hex = trips_grouped.groupby("end_hex_id").sum()
	trips_total = (trips_by_end_hex + trips_by_start_hex).fillna(0)
	return trips_total.reset_index().rename(columns={"index": "hex_id"})


In [None]:
lat_lon_leipzig = {"lat": 51.33, "lon": 12.37}

## Total Flow Of Bicycles
First we will plot the total flow of bicycles per hexagon, that is the sum of incoming and outgoing trips.

In [None]:
def plot_total_trips(h3_res: int, time_interval_length):
	h3_visualization.plot_choropleth(
		get_trips_total(h3_res, time_interval_length).reset_index(),
		hex_col="hex_id",
		color_by_col="demand",
		center=lat_lon_leipzig,
		color_continuous_scale="magma",
		opacity=0.7,
		zoom=10,
		labels={'demand': '# starting & ending trips'},
	)

In [None]:
plot_total_trips(9, 1)

As we can see the most flow is in the center of city, where the train station as well as the most shopping areas are. In addition we can see a cluster of hexagons with high flow in the west and east where residential areas are.
We also see that the the hexagons around the borders of the city have very low flow. This shows that it is possible to satisfy a large portion of the demand by just focusing on the hotspots, which is an important consideration when entering the shared mobility market.

In [None]:
plot_total_trips(8, 1)

Changing the resolution of we can draw the same conclusions.

## Net Flow Of Bicycles Per Hexagon
Next we will look at the net flow of bicycles per hexagon, which is the difference between incoming and outgoing trips.
This measure is highly relevant for vehicle sharing system operators as it shows possible imbalance in the system.
Imbalance in the system can lead to unmet demand and therefore wasted resources.
Seeing where these imbalances occur is essential when performing relocations.

In [None]:
def plot_net_trips(h3_res: int, time_interval_length, quantile=None):
    trips_net_reset = get_trips_net(h3_res, time_interval_length)
    max_mag = max(trips_net_reset["demand"].max(), abs(trips_net_reset["demand"].min()))

    mininum = -max_mag if quantile == None else -trips_net_reset["demand"].quantile(quantile)
    maximum = max_mag if quantile == None else trips_net_reset["demand"].quantile(quantile)

    h3_visualization.plot_choropleth(
        trips_net_reset,
        hex_col="hex_id",
        color_by_col="demand",
        center=lat_lon_leipzig,
        color_continuous_scale="RdBu",
        range_color=(
            mininum,
            maximum
        ),
        opacity=0.7,
        zoom=10,
        labels={'demand': 'inflow - outflow'}
    )


In [None]:
plot_net_trips(9, 1)

With our current color scale which is set the maximum absolute value of both
inflow and outflow we can barely see any hexagons. The hexagon in which is next
to the main train station has a incredibly high negative net flow, which means
that a lot of people are leaving the station than they are coming in. Our custom
should expect to observe the same behaviour when entering the market and
therefore should consider to relocate bicycles to that hexagon, in order to
avoid unmet demand.

To continue our analysis we will lower the color scale. This means that some hexagons will have higher/lower values than those that can be represented by our current color scale. However, this still is necessary to see other interesting imbalances.


In [None]:
plot_net_trips(9, 1, 0.9999)

With the adjusted color scale we can see multiple imbalanced hexagons. Most of them are in the center of the map. The hexagon with the highest inflow is at a small park called the "Fritz-von-Harck-Anlage". A very high inflow shows that more people end their trips at this hexagon than they start. Maybe the park is used by students from the main university nearby to relax. 
A local guide on google maps backs this cojection.  

"Ist sehr gut zum chillen"  
"Is very good for chilling out" (Translated by Google)  
\- [Franz Constapel](https://goo.gl/maps/gU6xMgQ1b7X7tcq5A) (a local guide)


### Decrease Spatial Resolution
Next we will decrease the spatial resolution to see if the portrayed imbalances are still present.

In [None]:
plot_net_trips(8, 1, 0.9999)

The lower resolution still shows some imbalances, however the hexagon with the high inflow we just analyzed is not present anymore.
This happens because if two neighboring hexagons have negative and positive net flow, then they cancel each other out when the resolution is decreased. 
Therefore we advise to be careful when lowering the h3 resolution.

However, we also see the imbalance of hexagons around the area of Plagwitz much more clearly now. Previously this imbalanced was distributed across multiple hexagon, that were all colored lightblue, but now it is one hexagon with a much more saturated blue. Therefore observing the same map in different spatial resolutions can help to identify the imbalances.


In [None]:
def get_trips_net_monthly(h3_res: int, time_interval_length: int) -> pd.DataFrame:
	trips_grouped = get_trips_grouped(h3_res, time_interval_length)
	trips_monthly_start = (
		trips_grouped.groupby(
			[
				pd.Grouper(key="start_hex_id"),
				pd.Grouper(key="datetime_start_floored", freq="M"),
			]
		)
		.sum()
	)
	trips_monthly_end = (
		trips_grouped.groupby(
			[
				pd.Grouper(key="end_hex_id"),
				pd.Grouper(key="datetime_start_floored", freq="M"),
			]
		)
		.sum()
	)

	trips_monthly_start.index.names = ['hex_id', 'datetime']
	trips_monthly_end.index.names = ['hex_id', 'datetime']

	return (trips_monthly_end - trips_monthly_start).fillna(0).reset_index()

In [None]:
def plot_trips_net_monthly(h3_res: int, time_interval_length: int):
	trips_monthly_net = get_trips_net_monthly(h3_res, time_interval_length)

	trips_monthly_net['month'] = trips_monthly_net['datetime'].dt.month
	trips_monthly_net = trips_monthly_net.sort_values(by=['month'])

	h3_visualization.plot_choropleth(
		trips_monthly_net,
		hex_col="hex_id",
		color_by_col="demand",
		center=lat_lon_leipzig,
		color_continuous_scale="RdBu",
		range_color=(
			-50,
			50
		),
		animation_frame="month",
		opacity=0.7,
		zoom=10,
		labels={'demand': 'inflow - outflow'},
		mapbox_style="open-street-map",
	)

In [None]:
# only for vscode
# https://github.com/microsoft/vscode-jupyter/issues/4364#issuecomment-817352686
import plotly.io as pio
pio.renderers.default = 'notebook_connected'

In [None]:
plot_trips_net_monthly(9, 1)

In terms of seasonal patterns we can see that the hexagon that covers the "Floßplatz"-park has very high outflow during the winter (september-december) and is relatively balanced for the other months. 
![](../figures/location_flossplatz_park.png)
While we don't have a conjecture why there is so much outflow, we can reason why the overall balance is better during the summer, as it is very likely that more people will go to the park when it is warm.  
This was just one example of such a seasonal pattern. There are many more seasonal dependent imbalances, which are all useful for operational decisions, such as relocations.


## Idle Times Of Bicycles
Next we will analyze the idle times of bicycles. The idle time is the time between two consecutive trips of the same bicycle.  

The idle time is important for the bicycle sharing system operator as it might indicate an oversaturation of demand.  
Also bicycles with unusually high idle times could indicate that the bicycle is damaged or hidden so that users cannot find it.

In [None]:
trips = pd.read_parquet(TRIPS_PATH)

In [None]:
trips_shifted = trips.groupby("b_number").shift(1).dropna(subset=["trip_start"])
# trips_shifted.trip_end.isna().sum()
trips_with_next = trips.merge(
    trips_shifted, left_index=True, right_index=True, how="inner", suffixes=("", "_next")
)
trips_with_next['b_number'] = trips['b_number']

In [None]:
trips_with_next['idle_time'] = (
	trips_with_next.datetime_end - trips_with_next.datetime_start_next
)

In [None]:
trips_with_next.idle_time.describe()

In [None]:
trips_with_next['timeinterval'] = (
	trips_with_next.datetime_start.dt.floor('1D')
)

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))

ax.plot(
    trips_with_next.groupby("timeinterval").idle_time.median().dt.total_seconds()
    / 60
    / 60,
)
ax.set_xlabel("Time interval")
ax.set_ylabel("Median idle time (hours)")


plt.savefig("../figures/idle_time_daily.png")
plt.show()


We see that the idle time of bicycles seems to decrease until around July, after
which it keeps steady.
One possible explanation is that the operator was able to improve the relocation
strategy, which results in less idle time of bicycles.
Therefore, we advise the analysis of the bicycle relocations, to learn more about
NextBikes relocation strategy.

Next we will plot the idle time of bicycles per hexagon. This will allow operators to identify hexagons with unusual high idle times, in order to, mitigate inefficiencies.

In [None]:
trips_with_next["hex_id"] = trips_with_next.apply(
    lambda row: h3.geo_to_h3(row.lat_start, row.lng_start, H3_RESOLUTION), axis=1
)


In [None]:
trips_with_next['month'] = trips_with_next.datetime_start.dt.month

In [None]:
idle_by_hex_time_median = trips_with_next.groupby(["hex_id", "month"])[
    "idle_time"
].median().rename("idle_time_median")
idle_by_hex_time_mean = trips_with_next.groupby(["hex_id", "month"])[
    "idle_time"
].mean().rename("idle_time_mean")

idle_by_hex_time = pd.concat(
	[idle_by_hex_time_median, idle_by_hex_time_mean], axis=1
).reset_index()

In [None]:
idle_by_hex_time["idle_time_median_days"] = (
    idle_by_hex_time["idle_time_median"].dt.total_seconds() / 60 / 60 / 24
)
idle_by_hex_time["idle_time_mean_days"] = (
    idle_by_hex_time["idle_time_mean"].dt.total_seconds() / 60 / 60 / 24
)


In [None]:
idle_by_hex = idle_by_hex_time.groupby("hex_id").mean().reset_index()

In [None]:
h3_visualization.plot_choropleth(
	idle_by_hex,
	hex_col="hex_id",
	color_by_col="idle_time_median_days",
	center=lat_lon_leipzig,
	zoom=10,
	color_continuous_scale="OrRd",
	labels={'idle_time_median_days': 'median idle time (days)'},
	width=800,
	height=600,
)

Unexpectedly, we see that most of the hexagons with high idle time are at the border of the city.  
To additionally accomodate seasonal patterns, we will now plot the idle time of bicycles per hexagon for each month.

In [None]:
idle_by_hex_time = idle_by_hex_time.sort_values('month')

In [None]:
h3_visualization.plot_choropleth(
	idle_by_hex_time,
	hex_col="hex_id",
	color_by_col="idle_time_median_days",
	center=lat_lon_leipzig,
	zoom=10,
	animation_frame="month",
	color_continuous_scale="OrRd",
	labels={'idle_time_median_days': 'median idle time (days)'},
	width=800,
	height=600,
)


This interactive map now allows to identify hexagons with high idle time for each month. We see that the most critical hexagons differ vastly between the months.