In [83]:
import pandas as pd
import numpy as np
import folium

In [84]:
trips_df = pd.read_pickle('../00_data/trips.pkl')
stations_df = pd.read_pickle('../00_data/stations.pkl')
stations_df = stations_df.set_index("station_id")

In [85]:
station_ids = np.unique(
    np.concatenate([trips_df["start_station_id"], trips_df["end_station_id"]])
)

In [86]:
dates = pd.date_range(start='2019-01-01', end='2019-12-31', freq='D')

In [87]:
full_index = pd.MultiIndex.from_product([station_ids, dates], names=['station_id', 'date'])

In [88]:
trips_df["start_time_floored"] = trips_df["start_time"].dt.floor("1D")
trips_per_station_date = trips_df.groupby(
    ["start_station_id", "start_time_floored"]
).size()
trips_per_station_date = (
    trips_per_station_date.reindex(full_index).fillna(0).to_frame("n_trips")
)


In [89]:
time_interval = pd.date_range(start='2019-12-01', end='2019-12-31', freq='H')

trips_per_station_date.loc[(slice(None), time_interval), :].index.get_level_values(0).nunique()

148

In [90]:
def get_station_utilization(time_interval):
    station_utilization = trips_per_station_date.loc[(slice(None), time_interval), :]
    station_utilization = (
        station_utilization[station_utilization["n_trips"] != 0]
        .groupby(level=[0])
        .count()
    )
    station_utilization = station_utilization.reindex(station_ids).fillna(0)
    station_utilization = station_utilization.rename(columns={'n_trips': 'days_used'})
    station_utilization['station_usage'] = station_utilization['days_used'] / len(time_interval)
    return station_utilization

In [91]:
def evaluate_station_utilization(station_utilization, threshold):
    station_utilization_kpi = len(station_utilization[
        station_utilization["station_usage"] < threshold
    ])
    n_unused_stations = len(station_utilization[station_utilization["station_usage"] == 0])
    print(
        f"Number of stations with utilization below "
        + f"{threshold*100:.0f}%: {station_utilization_kpi}"
    )
    print(
        f"That is {station_utilization_kpi / len(station_utilization) * 100:.0f}% of all stations"
    )
    print(f"Number of stations that are unused: {n_unused_stations}")

In [92]:
station_utilization = get_station_utilization(
    pd.date_range(start="2019-01-01", end="2019-12-31", freq="D")
)
evaluate_station_utilization(station_utilization, threshold=0.2)


Number of stations with utilization below 20%: 46
That is 31% of all stations
Number of stations that are unused: 0


In [93]:
station_utilization = get_station_utilization(
    pd.date_range(start="2019-12-01", end="2019-12-31", freq="D")
)
evaluate_station_utilization(station_utilization, threshold=0.2)

Number of stations with utilization below 20%: 23
That is 16% of all stations
Number of stations that are unused: 14


In [94]:
station_utilization = get_station_utilization(
    pd.date_range(start="2019-11-01", end="2019-11-30", freq="D")
)
evaluate_station_utilization(station_utilization, threshold=0.2)

Number of stations with utilization below 20%: 27
That is 18% of all stations
Number of stations that are unused: 19


In [95]:
station_utilization = get_station_utilization(
    pd.date_range(start="2019-12-14", end="2019-12-31", freq="D")
)
evaluate_station_utilization(station_utilization, threshold=0.2)

Number of stations with utilization below 20%: 22
That is 15% of all stations
Number of stations that are unused: 15


In [136]:
def draw_underutilized_stations(station_utilization, threshold):
    underutilized_stations = station_utilization[
        station_utilization["station_usage"] < threshold
    ]
    stations = stations_df.loc[underutilized_stations.index]
    stations = stations.merge(underutilized_stations, left_index=True, right_index=True)
    la_map = folium.Map(
        location=(
            34.052235,
            -118.243683,
        ),  # the orig mean values as location coordinates
        zoom_start=11,
        control_scale=True,
        max_zoom=20,
    )

    for index, row in stations.iterrows():
        days_used = int(row['days_used'])
        popup = folium.Popup(
            html=f"Station ID: <b>{index}</b><br>Days used: <b>{days_used}</b>",
			max_width=100,
			min_width=100,
        )
        color = 'red' if days_used == 0 else 'orange'
        folium.Marker(
            location=[row["latitude"], row["longitude"]],
            popup=popup,
            icon=folium.Icon(color=color),
        ).add_to(la_map)

    return la_map


In [137]:
draw_underutilized_stations(station_utilization, threshold=0.2)