### Station Data

In [None]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [None]:
stations = pd.read_csv('station_availability_df', index_col=0).reset_index()

#combine hour and date to create timestamp
stations['date'] = pd.to_datetime(stations[['date', 'hour']].astype(str).apply(lambda r: ' '.join(r), axis=1) + ':00:00', format = '%Y-%m-%d %H:%M:%S') 
station_availabilities = stations.loc[stations['in_service']==1,['dock_id','date','avail_bikes','avail_docks']]
station_availabilities.head(1)

### Bike Data

In [None]:
bike_trips = pd.read_csv('bike_trip_df',index_col = 0).reset_index()

In [None]:
import datetime
bike_trips['starttime'] = pd.to_datetime(bike_trips['starttime'], format = '%Y-%m-%d %H:%M:%S')
bike_trips['stoptime'] = pd.to_datetime(bike_trips['stoptime'], format = '%Y-%m-%d %H:%M:%S')

#round all times to earliest hour
bike_trips['starttime'] = bike_trips['starttime'].apply(lambda dt: datetime.datetime(dt.year, dt.month, dt.day, dt.hour))
bike_trips['stoptime'] = bike_trips['stoptime'].apply(lambda dt: datetime.datetime(dt.year, dt.month, dt.day, dt.hour))

#create two dataframes counting bike trips aggregated by timestamp and station id
outgoing_trips = bike_trips[['starttime','start station id']]
incoming_trips = bike_trips[['stoptime','end station id']]
incoming_bike_trips = incoming_trips.groupby(['stoptime','end station id']).size().reset_index()
outgoing_bike_trips = outgoing_trips.groupby(['starttime','start station id']).size().reset_index()

In [None]:
#merge two dataframes to find net bikes being added to the station
all_trips = pd.merge(incoming_bike_trips,outgoing_bike_trips, left_on = ['stoptime','end station id'], right_on = ['starttime','start station id'], copy = False)
all_trips = all_trips.rename(index = str, columns = {"stoptime": "date", "end station id":"dock_id", "0_x":"incoming_bikes","0_y":"outgoing_bikes"}).drop(columns = ["starttime","start station id"])
all_trips['net_bikes'] = all_trips.incoming_bikes-all_trips.outgoing_bikes
all_trips = all_trips.drop(columns = ['incoming_bikes','outgoing_bikes'])

all_trips.head(1)

### Rebalancing Calculations

In [None]:
#combine station availability data to net incoming bike data
df = pd.merge(station_availabilities,all_trips, on = ['dock_id','date'], how = 'left', copy = False).sort_values(by = ['dock_id', 'date'], ascending = True)
#replace all instances when no bikes left or arrived with 0
df.net_bikes.fillna(0,inplace=True)
df.head(1)

In [None]:
#create column calculating how many bikes should be available at the next hour
df['theoretical_eoh_avail_bikes'] = df.avail_bikes+df.net_bikes
df = df.reset_index(drop=True)
df.head(5)

In [None]:
#shift inventory numbers back one hour to compare between how many bikes should be available 
#and how many bikes actually are
df['actual_eoh_avail_bikes'] = df.groupby(['dock_id'])['avail_bikes'].shift(-1)

In [None]:
df.actual_eoh_avail_bikes = df.actual_eoh_avail_bikes.fillna(df.theoretical_eoh_avail_bikes)
df.head(5)

In [None]:
df['bikes_added_by_citibike'] = df.actual_eoh_avail_bikes - df.theoretical_eoh_avail_bikes
df.head(5)

### Labeling

In [None]:
no_rebalancing_df = df[['dock_id','date','avail_bikes','avail_docks','net_bikes','bikes_added_by_citibike']].copy()

In [None]:
no_rebalancing_df.head(1)

In [None]:
### this is to get indexes of full or empty stations

#loop through each row
no_rebalancing_df = df.loc[df['dock_id']==72,['dock_id','date','avail_bikes','avail_docks','net_bikes','bikes_added_by_citibike']].copy()

full_stations = []
empty_stations = []
start_point = 0

#how do i get the for loop to go back one step
for i in no_rebalancing_df.index:
    #if available bikes hits 0, save index and set the start point to that row index
    if no_rebalancing_df.iloc[i].avail_bikes<=0:
        empty_stations.append(no_rebalancing_df.iloc[i].name)
        start_point = i
        no_rebalancing_df = df.loc[df['dock_id']==72,['dock_id','date','avail_bikes','avail_docks','net_bikes','bikes_added_by_citibike']].copy()
    #if available bikes hits 0, save index and set the start point to that row index
    elif no_rebalancing_df.iloc[i].avail_docks<=0:
        full_stations.append(no_rebalancing_df.iloc[i].name)
        start_point = i
        no_rebalancing_df = df.loc[df['dock_id']==72,['dock_id','date','avail_bikes','avail_docks','net_bikes','bikes_added_by_citibike']].copy()
    #if bikes_added_by_citibike is not 0
    elif no_rebalancing_df.bikes_added_by_citibike[i]!=0:
        #subtract that number from all avail_bikes and add that number to all avail_docks for that dock_id after that time
        #save the dock_id of the row we're looking at
        dock_id = no_rebalancing_df.iloc[i].dock_id
        rebalancing_int = no_rebalancing_df.iloc[i].bikes_added_by_citibike
        #find the last row with the same dock_id, add 1, and subtract rebalanced bike number from slice from index + 1 to last row
        no_rebalancing_df.loc[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1,'avail_bikes'] = no_rebalancing_df[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1].avail_bikes - rebalancing_int
        no_rebalancing_df.loc[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1,'avail_docks'] = no_rebalancing_df[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1].avail_docks + rebalancing_int
        
        
        
        

In [None]:
### this is to check the code


#loop through each row
no_rebalancing_df = df.loc[df['dock_id']==72,['dock_id','date','avail_bikes','avail_docks','net_bikes','bikes_added_by_citibike']].copy()

#how do i get the for loop to go back one step
for i in no_rebalancing_df.index:
    #if available bikes hits 0, save index and set the start point to that row index
    if no_rebalancing_df.bikes_added_by_citibike[i]!=0:
        #subtract that number from all avail_bikes and add that number to all avail_docks for that dock_id after that time
        #save the dock_id of the row we're looking at
        dock_id = no_rebalancing_df.iloc[i].dock_id
        rebalancing_int = no_rebalancing_df.iloc[i].bikes_added_by_citibike
        #find the last row with the same dock_id, add 1, and subtract rebalanced bike number from slice from index + 1 to last row
        no_rebalancing_df.loc[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1,'avail_bikes'] = no_rebalancing_df[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1].avail_bikes - rebalancing_int
        no_rebalancing_df.loc[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1,'avail_docks'] = no_rebalancing_df[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1].avail_docks + rebalancing_int
        
        
        
        

In [None]:
print(full_stations)
print(empty_stations)
print(start_point)

In [None]:
pd.set_option('display.max_rows', 10000)
no_rebalancing_df.loc[no_rebalancing_df['dock_id']==72,:]

In [None]:
no_rebalancing_df.head()

In [None]:
#loop through each row
no_rebalancing_df = df.loc[df['dock_id']==72,['dock_id','date','avail_bikes','avail_docks','net_bikes','bikes_added_by_citibike']].copy()

full_stations = []
empty_stations = []
i = 0


#to do: how do i get the for loop to go back one step
while i < len(no_rebalancing_df):
    #if available bikes hits 0, save index and set the start point to that row index
    if no_rebalancing_df.iloc[i].avail_bikes==0:
        empty_stations.append(no_rebalancing_df.iloc[i].name)
        no_rebalancing_df = df.loc[df['dock_id']==72,['dock_id','date','avail_bikes','avail_docks','net_bikes','bikes_added_by_citibike']].copy()
        i += 1
    #if available bikes hits 0, save index and set the start point to that row index
    elif no_rebalancing_df.iloc[i].avail_docks==0:
        full_stations.append(no_rebalancing_df.iloc[i].name)
        no_rebalancing_df = df.loc[df['dock_id']==72,['dock_id','date','avail_bikes','avail_docks','net_bikes','bikes_added_by_citibike']].copy()
        i += 1
    #if bikes_added_by_citibike is not 0
    elif no_rebalancing_df.bikes_added_by_citibike[i]!=0:
        #subtract that number from all avail_bikes and add that number to all avail_docks for that dock_id after that time
        #save the dock_id of the row we're looking at
        dock_id = no_rebalancing_df.iloc[i].dock_id
        rebalancing_int = no_rebalancing_df.iloc[i].bikes_added_by_citibike
        #find the last row with the same dock_id, add 1, and subtract rebalanced bike number from slice from index + 1 to last row
        no_rebalancing_df.loc[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1,'avail_bikes'] = no_rebalancing_df[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1].avail_bikes - rebalancing_int
        no_rebalancing_df.loc[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1,'avail_docks'] = no_rebalancing_df[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1].avail_docks + rebalancing_int
        i += 1

In [None]:
### this is to get indexes of full or empty stations

#loop through each row
no_rebalancing_df = df.loc[df['dock_id']==72,['dock_id','date','avail_bikes','avail_docks','net_bikes','bikes_added_by_citibike']].copy()

full_stations = []
empty_stations = []
i = 0

#how do i get the for loop to go back one step
while i < len(no_rebalancing_df):
    #if available bikes hits 0, save index and set the start point to that row index
    if no_rebalancing_df.iloc[i].avail_bikes<=0 and no_rebalancing_df.iloc[i - 1].bikes_added_by_citibike != 0:
        empty_stations.append(no_rebalancing_df.iloc[i].name)
        no_rebalancing_df = df.loc[df['dock_id']==72,['dock_id','date','avail_bikes','avail_docks','net_bikes','bikes_added_by_citibike']].copy()
    #if available bikes hits 0, save index and set the start point to that row index
    elif no_rebalancing_df.iloc[i].avail_docks<=0 and no_rebalancing_df.iloc[i - 1].bikes_added_by_citibike != 0:
        full_stations.append(no_rebalancing_df.iloc[i].name)
        no_rebalancing_df = df.loc[df['dock_id']==72,['dock_id','date','avail_bikes','avail_docks','net_bikes','bikes_added_by_citibike']].copy()
    #if bikes_added_by_citibike is not 0
    elif no_rebalancing_df.bikes_added_by_citibike[i]!=0:
        #subtract that number from all avail_bikes and add that number to all avail_docks for that dock_id after that time
        #save the dock_id of the row we're looking at
        dock_id = no_rebalancing_df.iloc[i].dock_id
        rebalancing_int = no_rebalancing_df.iloc[i].bikes_added_by_citibike
        #find the last row with the same dock_id, add 1, and subtract rebalanced bike number from slice from index + 1 to last row
        no_rebalancing_df.loc[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1,'avail_bikes'] = no_rebalancing_df[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1].avail_bikes - rebalancing_int
        no_rebalancing_df.loc[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1,'avail_docks'] = no_rebalancing_df[i+1:no_rebalancing_df[no_rebalancing_df.dock_id == dock_id].index.max() + 1].avail_docks + rebalancing_int
        i+=1
    else:
        i+=1

In [None]:
print(full_stations)
print(empty_stations)
print(start_point)