# Generate dataset for SARSA algorithm

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import datetime

In [2]:
cleaned_trip_df = pd.read_csv('../data/trip_cleaned.csv')

In [3]:
pd.set_option('max_row', 10)

## Convert time to datetime

In [5]:
episode_data = cleaned_trip_df.copy()
episode_data.sort_values(['hack_license', 'pickup_datetime'], inplace=True)
episode_data['pickup_datetime'] = pd.to_datetime(episode_data['pickup_datetime'])
episode_data['dropoff_datetime'] = pd.to_datetime(episode_data['dropoff_datetime'])
episode_data['pickup_time'] = pd.to_datetime(episode_data['pickup_time']).dt.time
episode_data['dropoff_time'] = pd.to_datetime(episode_data['dropoff_time']).dt.time

## Get selected shift
* start in the morning
* end in the afternoon

In [6]:
selected_shift = 'A'

In [7]:
def get_shift(time, weekday=True, pickup=True):
    if weekday:
        if pickup:
            if (time > datetime.time(23, 30)) | (time <= datetime.time(11, 30)):
                return 'A'
            else:
                return 'B'
        else:
            if (time > datetime.time(8, 30)) & (time <= datetime.time(20, 30)):
                return 'A'
            else:
                return 'B'            
    else: ## weekend
        if pickup:
            if (time > datetime.time(2, 0)) & (time <= datetime.time(14, 0)):
                return 'A'
            else:
                return 'B'
        else:
            if (time > datetime.time(10, 0)) & (time <= datetime.time(22, 0)):
                return 'A'
            else:
                return 'B'        
        return None

### Label trips with either shift A or B

In [8]:
## determine shift for weekdays
episode_data.loc[(episode_data['pickup_weekday']==1) &
                      (episode_data['first_pickup']==1) , '_PU_shift'] = episode_data['pickup_time']\
                                .apply(lambda x: get_shift(x, weekday=True, pickup=True))
episode_data.loc[(episode_data['dropoff_weekday']==1) &
                      (episode_data['last_dropoff']==1), '_DO_shift'] = episode_data['dropoff_time']\
                                .apply(lambda x: get_shift(x, weekday=True, pickup=False))

## determine shift for weekends
episode_data.loc[(episode_data['pickup_weekday']==0) &
                      (episode_data['first_pickup']==1) , '_PU_shift'] = episode_data['pickup_time']\
                                .apply(lambda x: get_shift(x, weekday=False, pickup=True))
episode_data.loc[(episode_data['dropoff_weekday']==0) &
                      (episode_data['last_dropoff']==1), '_DO_shift'] = episode_data['dropoff_time']\
                                .apply(lambda x: get_shift(x, weekday=False, pickup=False))

## fill shifts for other rows
episode_data['_PU_shift'] = episode_data.groupby('hack_license')['_PU_shift'].ffill()
episode_data['_DO_shift'] = episode_data.groupby('hack_license')['_DO_shift'].bfill()

In [9]:
print('# consistent shift trips: ', episode_data.loc[episode_data['_PU_shift'] == episode_data['_DO_shift']].shape)
print('total trips: ', episode_data.shape)

# consistent shift trips:  (11590795, 35)
total trips:  (13885739, 35)


### Keep only trips with consistent start and end time

In [10]:
## keep only consistent shift
episode_data = episode_data.loc[episode_data['_PU_shift'] == episode_data['_DO_shift']]
episode_data = episode_data.loc[episode_data['_PU_shift'] == selected_shift]
episode_data = episode_data[['hack_license', 'pickup_datetime', 'dropoff_datetime', 
                            'pickup_taxizone_id', 'dropoff_taxizone_id', 
                            'total_amount', 'first_pickup', 'last_dropoff']].copy()

## Keep only data points of full shift 
A complete shift must have both the first pickup and the last drop off

In [11]:
episode_data['mask_start'] = np.where(episode_data["first_pickup"] == 1, 1, np.nan)
episode_data['mask_start'] = episode_data.groupby('hack_license')['mask_start'].ffill()
episode_data['mask_end'] = np.where(episode_data["last_dropoff"] == 1, 1, np.nan)
episode_data['mask_end'] = episode_data.groupby('hack_license')['mask_end'].bfill()
episode_data['mask'] = np.where((episode_data["mask_start"] == 1) & (episode_data["mask_end"] == 1), 1, np.nan)
episode_data.dropna(subset=['mask'], inplace=True)
episode_data.drop(columns=['mask_start', 'mask_end', 'mask'], inplace=True)

## Assign unique episode id
One shift of a driver is assgined with a unique id

In [12]:
episode_data['episode'] = np.where(episode_data["first_pickup"] == 1, episode_data["first_pickup"].index, np.nan)
episode_data['episode'] = episode_data.groupby('hack_license')['episode'].ffill()

## Convert all time columns to time index
depending on `delta_t`. First round and convert using `interval_index_table`.

In [13]:
interval_index_table = pd.read_csv('../data/interval_index_table_0.csv')
interval_index_table['interval'] = pd.to_datetime(interval_index_table['interval']).dt.time

In [14]:
delta_t = 15
round_by = '{}min'.format(delta_t)
episode_data['pickup_datetime_interval'] = episode_data['pickup_datetime'].dt.round(round_by).dt.time
episode_data['dropoff_datetime_interval'] = episode_data['dropoff_datetime'].dt.round(round_by).dt.time

## convert DO interval to time index
current_conversion = dict(zip(interval_index_table.interval, interval_index_table[f'time_index_{delta_t}m']))
episode_data['pickup_datetime_index'] = [current_conversion[t] for t in episode_data['pickup_datetime_interval']]
episode_data['dropoff_datetime_index'] = [current_conversion[t] for t in episode_data['dropoff_datetime_interval']]

## Collapse Immediate trips into one 

trips/transition must collaspe if current action = 0 and there is immediate transition after the current state

In [15]:
trip_df = episode_data[['pickup_taxizone_id', 'dropoff_taxizone_id', 'pickup_datetime_index',
                        'dropoff_datetime_index', 'episode', 'total_amount']].copy()

trip_df['pickup_datetime_index_next'] = trip_df.groupby('episode')['pickup_datetime_index'].shift(-1)
trip_df['pickup_taxizone_id_next'] = trip_df.groupby('episode')['pickup_taxizone_id'].shift(-1)

trip_df['dropoff_datetime_index_prev'] = trip_df.groupby('episode')['dropoff_datetime_index'].shift(1)
trip_df['dropoff_taxizone_id_prev'] = trip_df.groupby('episode')['dropoff_taxizone_id'].shift(1)
trip_df.reset_index(inplace=True)

* Forward and backward fill so rows that should be collapse together
* rewards = sum total amount of those rows
* start location = start location of the first row
* end location = end location of the last row

In [16]:
trip_df['mask_bfill'] = np.where((trip_df['dropoff_taxizone_id']==trip_df['pickup_taxizone_id_next']) &
                                 (trip_df['dropoff_datetime_index']==trip_df['pickup_datetime_index_next']), 1, np.nan)
trip_df['mask_ffill'] = np.where((trip_df['pickup_taxizone_id']==trip_df['dropoff_taxizone_id_prev']) &
                                 (trip_df['pickup_datetime_index']==trip_df['dropoff_datetime_index_prev']), 1, np.nan)

ffill_cols = ['pickup_taxizone_id', 'pickup_datetime_index', 'episode', 'index']
for col_names in ffill_cols:
    trip_df[col_names] = np.where(trip_df['mask_ffill'] == 1, np.nan, trip_df[col_names])
    trip_df[col_names] = trip_df[col_names].ffill()
bfill_cols = ['dropoff_taxizone_id', 'dropoff_datetime_index']
for col_names in bfill_cols:
    trip_df[col_names] = np.where(trip_df['mask_bfill'] == 1, np.nan, trip_df[col_names])
    trip_df[col_names] = trip_df[col_names].bfill()

* group data and aggregatedly sum

In [17]:
trip_df = trip_df[['episode', 'pickup_taxizone_id', 'dropoff_taxizone_id', 'pickup_datetime_index', 
                   'dropoff_datetime_index', 'total_amount', 'index']]
trip_df = trip_df.groupby(['episode', 'index', 'pickup_taxizone_id', 'dropoff_taxizone_id', 
                           'pickup_datetime_index', 'dropoff_datetime_index'], as_index=False).agg('sum')
trip_df.sort_values(['episode', 'index'], inplace=True)
trip_df.rename(columns={'pickup_taxizone_id': 'loc',
                        'pickup_datetime_index': 'time',
                        'dropoff_taxizone_id': 'loc_next',
                        'dropoff_datetime_index': 'time_next',
                        'total_amount': 'reward'}, inplace=True)
trip_df['type'] = 'delivery'

In [18]:
trip_df.head(10)         

Unnamed: 0,episode,index,loc,loc_next,time,time_next,reward,type
0,2131.0,2131.0,42.0,166.0,36.0,36.0,9.5,delivery
1,2131.0,2132.0,238.0,186.0,37.0,38.0,17.65,delivery
2,2131.0,2133.0,186.0,211.0,39.0,40.0,13.5,delivery
3,2131.0,2134.0,186.0,234.0,40.0,40.0,8.45,delivery
4,2131.0,2135.0,90.0,161.0,40.0,41.0,7.5,delivery
5,2131.0,2136.0,142.0,186.0,41.0,43.0,27.1,delivery
6,2131.0,2138.0,246.0,211.0,44.0,45.0,15.5,delivery
7,2131.0,2139.0,114.0,233.0,46.0,48.0,21.0,delivery
8,2131.0,2141.0,162.0,162.0,48.0,49.0,12.5,delivery
9,2131.0,2142.0,237.0,236.0,49.0,51.0,14.0,delivery


## Create reposition dataset

repostion from current DO loc to the next PU location

In [19]:
reposition_df = trip_df.copy()
reposition_df['loc_next_next'] = reposition_df.groupby('episode')['loc'].shift(-1) ## will be next location
reposition_df['time_next_next'] = reposition_df.groupby('episode')['time'].shift(-1) ## will be next time
reposition_df.drop(columns=['loc', 'time'], inplace=True)
reposition_df.rename(columns={'loc_next': 'loc',
                        'time_next': 'time',
                        'loc_next_next': 'loc_next',
                        'time_next_next': 'time_next'}, inplace=True)
reposition_df['reward'] = 0
reposition_df['type'] = np.where(reposition_df['loc'] == reposition_df['loc_next'], 'cruise', 'repo')
reposition_df.head(10)

Unnamed: 0,episode,index,loc,time,reward,type,loc_next,time_next
0,2131.0,2131.0,166.0,36.0,0,repo,238.0,37.0
1,2131.0,2132.0,186.0,38.0,0,cruise,186.0,39.0
2,2131.0,2133.0,211.0,40.0,0,repo,186.0,40.0
3,2131.0,2134.0,234.0,40.0,0,repo,90.0,40.0
4,2131.0,2135.0,161.0,41.0,0,repo,142.0,41.0
5,2131.0,2136.0,186.0,43.0,0,repo,246.0,44.0
6,2131.0,2138.0,211.0,45.0,0,repo,114.0,46.0
7,2131.0,2139.0,233.0,48.0,0,repo,162.0,48.0
8,2131.0,2141.0,162.0,49.0,0,repo,237.0,49.0
9,2131.0,2142.0,236.0,51.0,0,repo,43.0,51.0


## Create cruise dataset 

* with expanded rows if cruising more than 1 time interval

In [20]:
cruise_list = []
repeat_size = []
kept_list = []
for row in reposition_df.itertuples(index=False):
    if (getattr(row, 'type') == 'cruise') & (getattr(row, 'time_next') - getattr(row, 'time') > 1):
        cruise_list.append(tuple(row))
        repeat_size.append(getattr(row, 'time_next') - getattr(row, 'time'))
        kept_list.append(np.nan)
    else:
        kept_list.append(1)
reposition_df['kept'] = kept_list
reposition_df.dropna(subset=['kept'], inplace=True)
reposition_df.drop(columns=['kept'], inplace=True)

cruise_df = pd.DataFrame(np.repeat(np.array(cruise_list), repeat_size, axis=0))
cruise_df.columns = reposition_df.columns

cruise_df = cruise_df.astype({'episode': 'float',
                              'index': 'float',
                                   'loc': 'float',
                                   'time': 'float',
                             'loc_next': 'float',
                                   'time_next': 'float'})
cruise_df = cruise_df.astype({'episode': 'Int64',
                              'index': 'Int64',
                              'loc': 'Int64',
                              'time': 'Int64',
                              'loc_next': 'Int64',
                              'time_next': 'Int64'})

In [21]:
cruise_df

Unnamed: 0,episode,index,loc,time,reward,type,loc_next,time_next
0,3232,3257,238,77,0,cruise,238,79
1,3232,3257,238,77,0,cruise,238,79
2,3343,3356,138,62,0,cruise,138,64
3,3343,3356,138,62,0,cruise,138,64
4,3391,3392,138,37,0,cruise,138,39
...,...,...,...,...,...,...,...,...
326900,13883347,13883358,234,52,0,cruise,234,54
326901,13884027,13884034,138,44,0,cruise,138,46
326902,13884027,13884034,138,44,0,cruise,138,46
326903,13884115,13884117,138,35,0,cruise,138,37


In [22]:
## insert rows to dataframe if cruising takes more than 1 interval
def expand_cruise(selected_df):
    df = selected_df.copy()
    lower_time = min(df['time'])
    upper_time = max(df['time_next'])
    df['time'] = range(lower_time, upper_time)
    df['time_next'] = range(lower_time+1, upper_time+1)
    return df

### Expand and combine to reposition df

In [23]:
cruise_df_2 = cruise_df.groupby(['episode', 'index', 'loc', 'reward', 'type', 'loc_next']).apply(expand_cruise)
reposition_df = pd.concat([reposition_df, cruise_df_2]).sort_values(['episode', 'index', 'time', 'time_next'])

In [24]:
reposition_df

Unnamed: 0,episode,index,loc,time,reward,type,loc_next,time_next
0,2131,2131,166,36.0,0,repo,238,37.0
1,2131,2132,186,38.0,0,cruise,186,39.0
2,2131,2133,211,40.0,0,repo,186,40.0
3,2131,2134,234,40.0,0,repo,90,40.0
4,2131,2135,161,41.0,0,repo,142,41.0
...,...,...,...,...,...,...,...,...
3948930,1.38841e+07,1.38841e+07,237,50.0,0,cruise,237,51.0
3948931,1.38841e+07,1.38841e+07,239,51.0,0,repo,151,67.0
3948932,1.38841e+07,1.38841e+07,239,67.0,0,repo,238,67.0
3948933,1.38841e+07,1.38841e+07,24,68.0,0,repo,151,68.0


In [25]:
trip_df.head(10)

Unnamed: 0,episode,index,loc,loc_next,time,time_next,reward,type
0,2131.0,2131.0,42.0,166.0,36.0,36.0,9.5,delivery
1,2131.0,2132.0,238.0,186.0,37.0,38.0,17.65,delivery
2,2131.0,2133.0,186.0,211.0,39.0,40.0,13.5,delivery
3,2131.0,2134.0,186.0,234.0,40.0,40.0,8.45,delivery
4,2131.0,2135.0,90.0,161.0,40.0,41.0,7.5,delivery
5,2131.0,2136.0,142.0,186.0,41.0,43.0,27.1,delivery
6,2131.0,2138.0,246.0,211.0,44.0,45.0,15.5,delivery
7,2131.0,2139.0,114.0,233.0,46.0,48.0,21.0,delivery
8,2131.0,2141.0,162.0,162.0,48.0,49.0,12.5,delivery
9,2131.0,2142.0,237.0,236.0,49.0,51.0,14.0,delivery


## Combine reposition to trip dataset

In [26]:
transition_df = pd.concat([trip_df, reposition_df]).sort_values(['episode', 'index', 'time', 'time_next']).reset_index()
transition_df.drop(columns=['level_0', 'index'], inplace=True)
transition_df.reset_index(inplace=True)
## collaspe consecutive repo-delivery
transition_df['mask'] = np.where((transition_df['type'] == 'repo') | (transition_df['type'] == 'cruise'), 
                                 transition_df['index'], np.nan)
transition_df['mask'] = transition_df.groupby('episode')['mask'].ffill()
transition_df.head(20)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,index,episode,loc,loc_next,reward,time,time_next,type,mask
0,0,2131,42,166,9.5,36.0,36.0,delivery,
1,1,2131,166,238,0,36.0,37.0,repo,1.0
2,2,2131,238,186,17.65,37.0,38.0,delivery,1.0
3,3,2131,186,186,0,38.0,39.0,cruise,3.0
4,4,2131,186,211,13.5,39.0,40.0,delivery,3.0
...,...,...,...,...,...,...,...,...,...
15,15,2131,233,162,0,48.0,48.0,repo,15.0
16,16,2131,162,162,12.5,48.0,49.0,delivery,15.0
17,17,2131,162,237,0,49.0,49.0,repo,17.0
18,18,2131,237,236,14,49.0,51.0,delivery,17.0


## Convert into SARSA format

In [27]:
sarsa_df = transition_df.copy()
sarsa_df.dropna(subset=['mask'], inplace=True)
sarsa_df['next_transition'] = sarsa_df.groupby('episode')['type'].shift(-1)

ffill_cols = ['index', 'episode', 'loc', 'time']
for col_names in ffill_cols:
    sarsa_df[col_names] = np.where(sarsa_df['type'] == 'delivery', np.nan, sarsa_df[col_names])
    sarsa_df[col_names] = sarsa_df[col_names].ffill()
bfill_cols = ['loc_next', 'time_next']
for col_names in bfill_cols:
    sarsa_df[col_names] = np.where(sarsa_df['next_transition'] == 'delivery', np.nan, sarsa_df[col_names])
    sarsa_df[col_names] = sarsa_df[col_names].bfill()

sarsa_df.dropna(subset=['next_transition'], inplace=True)
sarsa_df.drop(columns=['index', 'type', 'mask', 'next_transition'], inplace=True)
sarsa_df = sarsa_df.astype('float64')
sarsa_df = sarsa_df.groupby(['episode', 'loc', 'loc_next', 
                             'time', 'time_next'], as_index=False).agg('sum')
sarsa_df.sort_values(['episode','time', 'time_next'], inplace=True)
sarsa_df.reset_index(inplace=True)
# sarsa_df.drop(columns=['index'], inplace=True)
sarsa_df = sarsa_df.astype({'episode': 'Int64',
                              'loc': 'Int64',
                              'time': 'Int64',
                              'loc_next': 'Int64',
                              'time_next': 'Int64'})
sarsa_df['action'] = sarsa_df['loc_next']#.groupby('episode')['loc_next'].shift(-1)
sarsa_df['action_next'] = sarsa_df.groupby('episode')['loc_next'].shift(-1)
sarsa_df['state'] = [(loc, time) for loc, time in zip(sarsa_df['loc'], sarsa_df['time'])]
sarsa_df['state_next'] = [(loc, time) for loc, time in zip(sarsa_df['loc_next'], sarsa_df['time_next'])]
sarsa_df = sarsa_df[['episode', 'state', 'action', 'reward', 'state_next', 'action_next']]

In [28]:
import pickle
with open('../data/SARSA_eps.pickle', 'wb') as handle:
    pickle.dump(sarsa_df, handle)

## For interpretable read. Suffering data loss.
sarsa_df.to_csv('../data/SARSA_eps.csv', index=False)

In [29]:
## load pickle
# with open('../data/historical_for_SARSA.pickle', 'rb') as handle:
#     episode_data = pickle.load(handle)