In [82]:
import sqlite3
import pandas as pd
import numpy as np
import matplotlib

## Connecting to the database

In [83]:
db_path = '../db_logging/simlog.db'
conn = sqlite3.connect(db_path)
c = conn.cursor()

## Getting Ride lifecycle

In [84]:
sim_id_col = "Requests.sim_id as Sim_ID"
ride_id_col = "Requests.ride_id as Ride_ID"
origin_node_col = "Requests.origin_node as Origin"
dest_node_col = "Requests.dest_node as Destination"
request_ts_col = "Requests.execute_ts as Request_TS"
schedule_ts_col = "Schedules.execute_ts as Schedule_TS"
pickup_ts_col = "Pickups.execute_ts as Pickup_TS"
dropoff_ts_col = "Dropoffs.execute_ts as Dropoff_TS"

inner_join_str = "(((Requests INNER JOIN Schedules ON Requests.ride_id = Schedules.ride_id)" + \
    "INNER JOIN Pickups ON Requests.ride_id = Pickups.ride_id)" + \
    "INNER JOIN Dropoffs ON Requests.ride_id = Dropoffs.ride_id)"

query_str = "SELECT " + ','.join([sim_id_col, ride_id_col, origin_node_col, dest_node_col,
                                  request_ts_col, schedule_ts_col, pickup_ts_col, dropoff_ts_col]) + \
            " FROM " + inner_join_str + ";"


ride_life_cycle = pd.read_sql_query(query_str, conn)
ride_life_cycle.head()





Unnamed: 0,Sim_ID,Ride_ID,Origin,Destination,Request_TS,Schedule_TS,Pickup_TS,Dropoff_TS
0,1,bfbd9a19-3614-47db-a4e2-7ccbc438e747,192,135,6.002083,6.002083,6.002083,12.008175
1,1,bfbd9a19-3614-47db-a4e2-7ccbc438e747,192,135,6.002083,6.002083,6.029963,12.008175
2,1,bfbd9a19-3614-47db-a4e2-7ccbc438e747,192,135,6.002083,6.002083,7.14528,12.008175
3,1,bfbd9a19-3614-47db-a4e2-7ccbc438e747,192,135,6.002083,6.002083,7.14528,12.008175
4,1,bfbd9a19-3614-47db-a4e2-7ccbc438e747,192,135,6.002083,6.002083,7.145508,12.008175


## Checking order of execution of events

In [85]:
#Column Names
ride_life_cycle.columns

Index(['Sim_ID', 'Ride_ID', 'Origin', 'Destination', 'Request_TS',
       'Schedule_TS', 'Pickup_TS', 'Dropoff_TS'],
      dtype='object')

Are all scheduled time stamps at or after requests?

In [86]:
((ride_life_cycle['Schedule_TS'] - ride_life_cycle['Request_TS']) >= 0).all()

True

Are all pickup time stamps at or after schedules?

In [87]:
((ride_life_cycle['Pickup_TS'] - ride_life_cycle['Schedule_TS']) >= 0).all()

True

Are all dropoff time stamps after pickup_ts_col schedules?

In [88]:
((ride_life_cycle['Dropoff_TS'] - ride_life_cycle['Pickup_TS']) > 0).all()

False

In [89]:
problematic_rows = ride_life_cycle[ride_life_cycle['Dropoff_TS'] - ride_life_cycle['Pickup_TS'] < 0]

In [90]:
len(problematic_rows)/len(ride_life_cycle)

0.043738898756660746

In [91]:
ride_life_cycle["Pickup_TO_Dropoff"] = ride_life_cycle['Dropoff_TS'] - ride_life_cycle['Pickup_TS']

In [92]:
ride_life_cycle.sort_values(by='Pickup_TO_Dropoff')

Unnamed: 0,Sim_ID,Ride_ID,Origin,Destination,Request_TS,Schedule_TS,Pickup_TS,Dropoff_TS,Pickup_TO_Dropoff
4342,7,638e3736-659e-4b6f-ab26-3b309564e374,7,105,6.238773,6.238773,8.212029,6.269135,-1.942894
4252,7,31cb1047-add4-4c4b-b694-6868b9f0dbf6,105,7,6.063583,6.063583,7.729884,6.095328,-1.634556
3826,5,9b3123f4-e1c8-4c9c-b865-953950a0ee2f,186,190,6.063147,6.063147,7.700812,6.139801,-1.561011
3828,5,43ce6a0f-0695-4a91-9f64-c8c918d66ec0,198,137,6.067800,6.067800,7.899658,6.628247,-1.271411
3825,5,48066614-f5f9-4294-99f5-7da9f6d6e69b,186,190,6.063147,6.063147,7.387424,6.139801,-1.247623
...,...,...,...,...,...,...,...,...,...
1048,1,14761256-c07d-4be0-8cb3-b89d00f65232,129,223,6.304359,6.304359,6.307738,13.029666,6.721927
1047,1,14761256-c07d-4be0-8cb3-b89d00f65232,129,223,6.304359,6.304359,6.305989,13.029666,6.723677
1046,1,14761256-c07d-4be0-8cb3-b89d00f65232,129,223,6.304359,6.304359,6.305753,13.029666,6.723913
1045,1,14761256-c07d-4be0-8cb3-b89d00f65232,129,223,6.304359,6.304359,6.304359,13.029666,6.725306


In [93]:
len(ride_life_cycle)

4504

In [95]:
ride_life_cycle.dtypes

Sim_ID                 int64
Ride_ID               object
Origin                 int64
Destination            int64
Request_TS           float64
Schedule_TS          float64
Pickup_TS            float64
Dropoff_TS           float64
Pickup_TO_Dropoff    float64
dtype: object

## Closing the database.

In [94]:
c.close()
conn.close()