## Generate dataset for SARSA algorithm

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [15]:
cleaned_trip_df = pd.read_csv('../data/trip_cleaned.csv')

In [16]:
episode_data = cleaned_trip_df
episode_data = episode_data[['hack_license', 'pickup_datetime', 'dropoff_datetime', 
                            'pickup_taxizone_id', 'dropoff_taxizone_id', 
                            'total_amount', 'first_pickup', 'last_dropoff']].copy()
episode_data.sort_values(['hack_license', 'pickup_datetime'], inplace=True)
episode_data['pickup_datetime'] = pd.to_datetime(episode_data['pickup_datetime'])
episode_data['dropoff_datetime'] = pd.to_datetime(episode_data['dropoff_datetime'])

## Keep only data points of full shift 
A complete shift must have both the first pickup and the last drop off

In [17]:
episode_data['mask_start'] = np.where(episode_data["first_pickup"] == 1, 1, np.nan)
episode_data['mask_start'] = episode_data.groupby('hack_license')['mask_start'].ffill()
episode_data['mask_end'] = np.where(episode_data["last_dropoff"] == 1, 1, np.nan)
episode_data['mask_end'] = episode_data.groupby('hack_license')['mask_end'].bfill()
episode_data['mask'] = np.where((episode_data["mask_start"] == 1) & (episode_data["mask_end"] == 1), 1, np.nan)
episode_data.dropna(subset=['mask'], inplace=True)
episode_data.drop(columns=['mask_start', 'mask_end', 'mask'], inplace=True)

## Assign unique episode id
One shift of a driver is assgined with a unique id

In [18]:
episode_data['episode'] = np.where(episode_data["first_pickup"] == 1, episode_data["first_pickup"].index, np.nan)
episode_data['episode'] = episode_data.groupby('hack_license')['episode'].ffill()

## Convert all time columns to time index
depending on `delta_t`. First round and convert using `interval_index_table`.

In [19]:
interval_index_table = pd.read_csv('../data/interval_index_table_0.csv')
interval_index_table['interval'] = pd.to_datetime(interval_index_table['interval']).dt.time

In [20]:
delta_t = 15
round_by = '{}min'.format(delta_t)
episode_data['pickup_datetime_interval'] = episode_data['pickup_datetime'].dt.round(round_by).dt.time
episode_data['dropoff_datetime_interval'] = episode_data['dropoff_datetime'].dt.round(round_by).dt.time

## convert DO interval to time index
current_conversion = dict(zip(interval_index_table.interval, interval_index_table[f'time_index_{delta_t}m']))
episode_data['pickup_datetime_index'] = [current_conversion[t] for t in episode_data['pickup_datetime_interval']]
episode_data['dropoff_datetime_index'] = [current_conversion[t] for t in episode_data['dropoff_datetime_interval']]

In [21]:
# episode_data = episode_data[['pickup_taxizone_id', 'dropoff_taxizone_id', 
#                              'pickup_datetime_index', 'dropoff_datetime_index',
#                              'total_amount', 'episode']]

## Collapse Immediate trips 

trips/transition must collaspe if current action = 0 and there is immediate transition after the current state

In [22]:
## action that bring from current row to before the transition of the next row
episode_data['pickup_taxizone_id_next'] = episode_data.groupby(['hack_license'])['pickup_taxizone_id'].shift(-1)
episode_data['action'] = np.where(episode_data['dropoff_taxizone_id']  == episode_data['pickup_taxizone_id_next'], 
                                  0, episode_data['pickup_taxizone_id_next'])

## immediate = 1 if current row is immediate of prev row
episode_data['pickup_datetime_index_next'] = episode_data.groupby(['hack_license'])['pickup_datetime_index'].shift(-1)
episode_data['same_time_interval'] = np.where(episode_data['pickup_datetime_index_next']  == episode_data['dropoff_datetime_index'], 
                                  1, 0)
episode_data['finished'] = np.where((episode_data['same_time_interval']==1) & (episode_data['action']==0), 
                                  np.nan, 1)

In [23]:
episode_data['state_loc'] = episode_data['dropoff_taxizone_id']
episode_data['state_time'] = episode_data['dropoff_datetime_index']
episode_data.dropna(subset=['finished'], inplace=True)

## Keep only relevant columns
* state_loc = current location
* state_time = current time index
* action = next action to take 
* total_amount = current reward (the previous reward before the R in SARSA)

In [24]:
episode_data = episode_data[['state_loc', 'state_time', 'action', 'total_amount', 'episode']]

In [27]:
import pickle
with open('../data/historical_for_SARSA.pickle', 'wb') as handle:
    pickle.dump(episode_data, handle)

## For interpretable read. Suffering data loss.
episode_data.to_csv('../data/historical_for_SARSA.pickle', index=False)

In [26]:
## load pickle
# with open('../data/historical_for_SARSA.pickle', 'rb') as handle:
#     episode_data = pickle.load(handle)