In [1]:
# imports
import pandas as pd
import numpy as np
from ipyleaflet import (Map, GeoData, basemaps, WidgetControl, GeoJSON,
 LayersControl, Icon, Marker,basemap_to_tiles, Choropleth,
 MarkerCluster, Heatmap,SearchControl, 
 FullScreenControl)
from ipywidgets import Text, HTML
from branca.colormap import linear
import geopandas
import json

In [2]:
# files
file1 = 'data/driver_rides.csv'
file2 = 'data/driver_sessions.csv'
file3 = 'data/passenger_rides.csv'

In [3]:
# initial file dataframes
driver_rides_df = pd.read_csv(file1)
driver_sessions_df = pd.read_csv(file2)
passenger_rides_df = pd.read_csv(file3)

### Create DataFrame of When and Where Rides are Accepted

In [4]:
# pull file
driver_rides_df.head()

Unnamed: 0,status,accepted_timestamp,accepted_lat,accepted_lng,pickup_timestamp,pickup_lat,pickup_lng,dropped_off_timestamp,dropoff_lat,dropoff_lng
0,finished,2021-07-15 22:29:22 UTC,29.4693,-98.64454,2021-07-15 22:37:28 UTC,,,2021-07-15 22:51:25 UTC,,
1,finished,2021-07-15 23:30:53 UTC,29.42473,-98.48886,2021-07-15 23:35:51 UTC,,,2021-07-15 23:46:21 UTC,,
2,finished,2021-07-15 19:31:09 UTC,29.4482,-98.66975,2021-07-15 19:37:27 UTC,,,2021-07-15 19:49:47 UTC,,
3,finished,2021-07-10 23:25:59 UTC,29.52192,-98.55595,2021-07-10 23:29:56 UTC,,,2021-07-10 23:59:56 UTC,,
4,finished,2021-07-07 21:32:25 UTC,29.453194,-98.637306,2021-07-07 21:38:39 UTC,,,2021-07-07 21:56:16 UTC,,


In [5]:
# drop unneccesary columns
driver_timestamps_df = driver_rides_df.drop(['status','pickup_lat','pickup_lng','dropoff_lat','dropoff_lng'], axis = 1)
driver_timestamps_df.head()

Unnamed: 0,accepted_timestamp,accepted_lat,accepted_lng,pickup_timestamp,dropped_off_timestamp
0,2021-07-15 22:29:22 UTC,29.4693,-98.64454,2021-07-15 22:37:28 UTC,2021-07-15 22:51:25 UTC
1,2021-07-15 23:30:53 UTC,29.42473,-98.48886,2021-07-15 23:35:51 UTC,2021-07-15 23:46:21 UTC
2,2021-07-15 19:31:09 UTC,29.4482,-98.66975,2021-07-15 19:37:27 UTC,2021-07-15 19:49:47 UTC
3,2021-07-10 23:25:59 UTC,29.52192,-98.55595,2021-07-10 23:29:56 UTC,2021-07-10 23:59:56 UTC
4,2021-07-07 21:32:25 UTC,29.453194,-98.637306,2021-07-07 21:38:39 UTC,2021-07-07 21:56:16 UTC


### Create DataFrame of Driver Start and End

In [6]:
driver_sessions_df

Unnamed: 0,start_time,duration,start_lat,start_lng,end_lat,end_lng
0,2021-07-24 20:20:47 UTC,0.491111,29.456915,-98.674567,29.432770,-98.482840
1,2021-07-24 19:27:11 UTC,0.792778,29.435907,-98.691461,29.460005,-98.670725
2,2021-07-24 05:57:11 UTC,0.386111,29.314874,-98.384354,29.444857,-98.665654
3,2021-07-24 02:10:10 UTC,3.780278,29.450487,-98.682075,29.314866,-98.384366
4,2021-07-11 18:19:07 UTC,0.256667,29.489100,-98.622475,29.470984,-98.621434
...,...,...,...,...,...,...
80,2021-07-12 16:51:14 UTC,3.344444,29.435785,-98.691378,29.523626,-98.453220
81,2021-07-12 01:01:55 UTC,0.192222,29.519016,-98.494876,29.444012,-98.664440
82,2021-07-21 18:44:35 UTC,3.401667,29.435843,-98.691386,29.520713,-98.506468
83,2021-07-28 21:41:55 UTC,1.304722,29.470447,-98.621214,29.435581,-98.708463


In [7]:
driver_start_end_df = driver_sessions_df.drop(['start_time','duration'], axis = 1)
driver_start_end_df

Unnamed: 0,start_lat,start_lng,end_lat,end_lng
0,29.456915,-98.674567,29.432770,-98.482840
1,29.435907,-98.691461,29.460005,-98.670725
2,29.314874,-98.384354,29.444857,-98.665654
3,29.450487,-98.682075,29.314866,-98.384366
4,29.489100,-98.622475,29.470984,-98.621434
...,...,...,...,...
80,29.435785,-98.691378,29.523626,-98.453220
81,29.519016,-98.494876,29.444012,-98.664440
82,29.435843,-98.691386,29.520713,-98.506468
83,29.470447,-98.621214,29.435581,-98.708463


### Create DataFrame of Passenger Requests

In [8]:
# assess necessary columns
passenger_rides_df.head()

Unnamed: 0,status,requested_timestamp,requested_lat,requested_lng,pickup_timestamp,pickup_address,pickup_lat,pickup_lng,destination_address,dropoff_timestamp,dropoff_lat,dropoff_lng
0,finished,2021-06-22 17:27:30 UTC,29.45157,-98.6823,2021-06-22 17:32:24 UTC,"10131 Military Dr W, San Antonio, TX 78251, Un...",29.45138,-98.68242,"9427 Culebra Rd, San Antonio, TX, United States",2021-06-22 17:42:14 UTC,29.487,-98.66855
1,finished,2021-06-17 12:30:15 UTC,29.45053,-98.68126,2021-06-17 12:37:10 UTC,"9914 W Military Dr, San Antonio, TX, United St...",29.45144,-98.68237,"9427 Culebra Rd, San Antonio, TX, United States",2021-06-17 12:46:57 UTC,29.48693,-98.668
2,finished,2021-06-28 16:23:06 UTC,29.45143,-98.68234,2021-06-28 16:29:24 UTC,"10131 Military Dr W, San Antonio, TX 78251, Un...",29.45137,-98.68241,"8770 Broadway, Lyft Flexdrive",2021-06-28 16:50:33 UTC,29.5189,-98.46186
3,finished,2021-06-18 17:19:58 UTC,29.45049,-98.68197,2021-06-18 17:29:28 UTC,"9914 W Military Dr, San Antonio, TX, United St...",29.45144,-98.68239,"9427 Culebra Rd, San Antonio, TX, United States",2021-06-18 17:38:02 UTC,29.48694,-98.66891
4,finished,2021-06-02 13:12:03 UTC,29.45157,-98.68229,2021-06-02 13:18:12 UTC,"10131 Military Dr W, San Antonio, TX 78251, Un...",29.45139,-98.68239,"Terminal A, American",2021-06-02 13:36:28 UTC,29.52729,-98.47281


In [9]:
# drop unnecesary columns and NaN
passenger_requests_df = passenger_rides_df.drop(['status','pickup_address','destination_address'], axis = 1).dropna()
passenger_requests_df

Unnamed: 0,requested_timestamp,requested_lat,requested_lng,pickup_timestamp,pickup_lat,pickup_lng,dropoff_timestamp,dropoff_lat,dropoff_lng
0,2021-06-22 17:27:30 UTC,29.45157,-98.6823,2021-06-22 17:32:24 UTC,29.45138,-98.68242,2021-06-22 17:42:14 UTC,29.487,-98.66855
1,2021-06-17 12:30:15 UTC,29.45053,-98.68126,2021-06-17 12:37:10 UTC,29.45144,-98.68237,2021-06-17 12:46:57 UTC,29.48693,-98.668
2,2021-06-28 16:23:06 UTC,29.45143,-98.68234,2021-06-28 16:29:24 UTC,29.45137,-98.68241,2021-06-28 16:50:33 UTC,29.5189,-98.46186
3,2021-06-18 17:19:58 UTC,29.45049,-98.68197,2021-06-18 17:29:28 UTC,29.45144,-98.68239,2021-06-18 17:38:02 UTC,29.48694,-98.66891
4,2021-06-02 13:12:03 UTC,29.45157,-98.68229,2021-06-02 13:18:12 UTC,29.45139,-98.68239,2021-06-02 13:36:28 UTC,29.52729,-98.47281
5,2021-06-24 16:52:33 UTC,29.59258,-98.59653,2021-06-24 16:54:46 UTC,29.5926,-98.5964,2021-06-24 17:13:02 UTC,29.45144,-98.68234
7,2021-06-17 16:27:56 UTC,29.48698,-98.66845,2021-06-17 16:36:16 UTC,29.48703,-98.66846,2021-06-17 16:46:50 UTC,29.45049,-98.68197
8,2021-06-01 19:13:27 UTC,29.49035,-98.62206,2021-06-01 19:15:11 UTC,29.49024,-98.62192,2021-06-01 19:26:48 UTC,29.45124,-98.68265
9,2021-06-19 03:38:49 UTC,29.48698,-98.66845,2021-06-19 03:47:11 UTC,29.48703,-98.66847,2021-06-19 03:56:40 UTC,29.45048,-98.68197
10,2021-06-24 15:55:49 UTC,29.45049,-98.68197,2021-06-24 15:59:36 UTC,29.45147,-98.68239,2021-06-24 16:16:29 UTC,29.59258,-98.59653


### GeoMap Passenger Requests with GeoPandas

In [12]:
texas_shape = geopandas.read_file('data/cb_2018_48_place_500k.shp')
texas_shape.head()

Unnamed: 0,geometry
0,"MULTIPOLYGON (((-98.21577 29.66027, -98.20412 ..."
1,"MULTIPOLYGON (((-94.93500 29.71517, -94.93372 ..."
2,"MULTIPOLYGON (((-101.87444 33.46171, -101.8731..."
3,"POLYGON ((-100.39771 35.90979, -100.38955 35.9..."
4,"POLYGON ((-97.31433 33.09949, -97.30795 33.099..."


In [15]:
str(texas_shape.iloc[0]['geometry'])

'MULTIPOLYGON (((-98.215767 29.660273, -98.20412399999999 29.667006, -98.20403899999999 29.66689, -98.21568099999999 29.660158, -98.215767 29.660273)), ((-98.30183799999999 29.616674, -98.289464 29.62108, -98.281576 29.623909, -98.26981499999999 29.62808, -98.26920799999999 29.627152, -98.26553299999999 29.629503, -98.26601099999999 29.630176, -98.262562 29.632237, -98.26073699999999 29.629328, -98.25098199999999 29.634919, -98.25026299999999 29.635261, -98.24907399999999 29.63576, -98.227857 29.642757, -98.229388 29.644743, -98.227142 29.646112, -98.225933 29.644396, -98.216909 29.648429, -98.21550599999999 29.650364, -98.211748 29.654307, -98.209199 29.655754, -98.20339299999999 29.657117, -98.20007799999999 29.659278, -98.202564 29.657355, -98.201393 29.657897, -98.20071999999999 29.656801, -98.19725699999999 29.649845, -98.195441 29.649976, -98.195016 29.649461, -98.196023 29.648707, -98.19434199999999 29.645384, -98.19177499999999 29.640523, -98.18977 29.641367, -98.18852699999999