In [1]:
import pandas as pd
import numpy as np
from joblib import load

In [2]:
# Import dataframes of stations with bike/slot demand and station depeletion status
prediction_07152020 = pd.read_csv('../Data/station_predictions_07152020.csv')
prediction_09102020 = pd.read_csv('../Data/station_predictions_09102020.csv')
prediction_11042020 = pd.read_csv('../Data/station_predictions_11042020.csv')
station_dict = load('../Data/station_dict.pkl')

In [3]:
# Define a function to map numerical values onto the bike and dock demand for easier indexing
def map_demand(df):
    df.bike_demand = df.bike_demand.map({'High': 3, 'Medium': 2, 'Low': 1})
    df.dock_demand = df.dock_demand.map({'High': 3, 'Medium': 2, 'Low': 1})
    df = df.sort_values('bike_demand', ascending = False)

In [4]:
# Map numerical values onto bike and dock demand for the three prediction dataframes
map_demand(prediction_07152020)
map_demand(prediction_09102020)
map_demand(prediction_11042020)

In [5]:
# Define a function that, when given a list of stations to bring bikes to, a list of 
# stations to take bikes from, the original dataframe of status and demand predictions,
# and a list (either empty or of paired predictions), generates pairs of stations to/from
# which to rebalance bikes based on closest distance
def generate_pairs(rebalance_in, rebalance_out, original_df, paired_list):
    # Iterate through stations that need bikes added to them
    # Pair each station with the closest station that needs bikes removed
    for i in rebalance_in.station_id:
        out_stations = list(rebalance_out.station_id)
        if len(out_stations) > 0:
            # Find closest station from which to take bikes from
            df = pd.DataFrame(station_dict[i], columns = ['station', 'distance']).set_index('station')
            df = df.loc[out_stations].sort_values('distance')
            nearest_station = int(list(df.index.values)[0])
            
            # Update original dataframe of stations that need rebalancing to reflect the rebalancing
            original_df.loc[original_df['station_id'] == nearest_station, 'depletion_status'] = "Healthy"
            original_df.loc[original_df['station_id'] == i, 'depletion_status'] = "Healthy"
            # Remove the rebalanced station from the data frame of stations to remove bikes from
            rebalance_out = rebalance_out[rebalance_out['station_id']!=nearest_station]
            
            #print(f'Station {i} is closest to Station {nearest_station}')
            paired_list.append((i, nearest_station))
        else:
            break
    return original_df, paired_list

In [6]:
def pair_stations(predictions):
    copy_df = predictions.copy()

    # Initialize empty list to which station pairs will be added
    paired_stations = []

    # Generate a dataframe of stations that need bikes brought in and
    # a dataframe of stations that need bikes taken out
    rebalance_in = copy_df[(copy_df['depletion_status'] == "Empty Risk") & (copy_df['bike_demand']>copy_df['dock_demand'])].reset_index().drop('index', axis = 1)
    rebalance_out = copy_df[(copy_df['depletion_status'] == "Full Risk") & (copy_df['bike_demand']<copy_df['dock_demand'])].reset_index().drop('index', axis = 1)
    
    # Do the initial pass to pair stations
    copy_df, paired_stations = generate_pairs(rebalance_in, rebalance_out, copy_df, paired_stations)
    
    # Check to see if there are still stations that need balancing
    # This will occur if the number of stations that need bikes brought in and the
    # number that need bikes taken out are not equal at the start of the process
    rebalance_in = copy_df[(copy_df['depletion_status'] == "Empty Risk") & (copy_df['bike_demand']>copy_df['dock_demand'])].reset_index().drop('index', axis = 1)
    rebalance_out = copy_df[(copy_df['depletion_status'] == "Full Risk") & (copy_df['bike_demand']<copy_df['dock_demand'])].reset_index().drop('index', axis = 1)

    # If there are still stations that need bikes brought in, take bikes from stations
    # which are full/nearly full and have a medium number of both bikes and slots available
    if rebalance_in.shape[0] > rebalance_out.shape[0]:
        rebalance_out = copy_df[(copy_df.depletion_status == "Full Risk") & np.logical_and(copy_df.bike_demand == 2, copy_df.dock_demand == 2)]
        copy_df, paired_stations = generate_pairs(rebalance_in, rebalance_out, copy_df, paired_stations)
    # If there are still stations that need bikes taken out, take bikes to stations
    # which are empty/nearly empty and have a medium number of both bikes and slots available
    elif rebalance_in.shape[0] < rebalance_out.shape[0]:
        rebalance_in = copy_df[(copy_df.depletion_status == "Empty Risk") & np.logical_and(copy_df.bike_demand == 2, copy_df.dock_demand == 2)]
        copy_df, paired_stations = generate_pairs(rebalance_in, rebalance_out, copy_df, paired_stations)
    
    return(pd.DataFrame(paired_stations, columns = ["Rebalance Bikes To Station #", "Rebalance Bikes From Station #"]), copy_df)

In [7]:
# Generate a rebalancing strategy for three datetimes:
# 07-15-2020 9:00
# 09-10-2020 17:00
# 11-04-2020 13:00
pair_list_07152020, _ = pair_stations(prediction_07152020)
pair_list_09102020, _ = pair_stations(prediction_09102020)
pair_list_11042020, _ = pair_stations(prediction_11042020)

In [8]:
# Save rebalancing strategies to CSV files
pair_list_07152020.to_csv('../Data/matched_rebalancing_stations_07152020.csv', index = False)
pair_list_09102020.to_csv('../Data/matched_rebalancing_stations_09102020.csv', index = False)
pair_list_11042020.to_csv('../Data/matched_rebalancing_stations_11042020.csv', index = False)

In [14]:
prediction_07152020.head(5)

Unnamed: 0,station_id,bike_demand,dock_demand,depletion_status,station_lat,station_long
0,3382,2,2,Full Risk,40.680611,-73.994758
1,362,2,3,Empty Risk,40.751726,-73.987535
2,146,2,3,Empty Risk,40.71625,-74.009106
3,3834,2,1,Full Risk,40.69467,-73.90663
4,500,3,3,Full Risk,40.762288,-73.983362


In [9]:
pair_list_07152020

Unnamed: 0,Rebalance Bikes To Station #,Rebalance Bikes From Station #
0,340,3786
1,3101,3899
2,3383,3539
3,312,3905
4,3452,3093
...,...,...
115,3354,3335
116,244,3303
117,3325,3908
118,3321,3382


In [15]:
prediction_09102020

Unnamed: 0,station_id,bike_demand,dock_demand,depletion_status,station_lat,station_long
0,3382,2,2,Empty Risk,40.680611,-73.994758
1,362,3,2,Empty Risk,40.751726,-73.987535
2,146,2,2,Healthy,40.716250,-74.009106
3,3834,1,2,Healthy,40.694670,-73.906630
4,500,3,2,Empty Risk,40.762288,-73.983362
...,...,...,...,...,...,...
890,3226,2,2,Empty Risk,40.782750,-73.971370
891,3917,2,1,Healthy,40.691780,-73.978770
892,3916,2,1,Healthy,40.708485,-74.002751
893,3918,2,1,Healthy,40.723870,-73.975767


In [10]:
pair_list_09102020

Unnamed: 0,Rebalance Bikes To Station #,Rebalance Bikes From Station #
0,362,251
1,500,3407
2,469,3112
3,464,3016
4,3143,3092
5,311,276
6,533,248
7,520,239
8,3156,243
9,3443,324


In [11]:
pair_list_11042020

Unnamed: 0,Rebalance Bikes To Station #,Rebalance Bikes From Station #
0,3518,336
1,533,161
2,393,128
3,3374,248
4,473,415
5,3314,324
6,143,390
7,3107,3112
8,3416,243
9,445,251
