In [7]:
### Import Python Package ###
import pandas as pd
import numpy as np

In [8]:
### Import Data ###
df = pd.read_csv('Data/uber-rides-dataset.csv')

In [9]:
df.columns

Index(['trip_completed_at', 'trip_status', 'ride_hailing_app', 'trip_uid',
       'driver_uid', 'rider_uid', 'customer', 'trip_start_time',
       'trip_end_time', 'trip_time', 'total_time', 'wait_time', 'trip_type',
       'surge_multiplier', 'vehicle_make_model', 'vehicle_license_plate',
       'driver_name_en', 'vehicle_make', 'vehicle_model', 'driver_gender',
       'driver_photo_url', 'driver_phone_number', 'pickup_lat', 'pickup_long',
       'dropoff_lat', 'dropoff_long', 'trip_map_image_url',
       'trip_path_image_url', 'city', 'country', 'trip_start_address',
       'trip_end_address', 'rub_usd_exchange_rate', 'price_rub', 'price_usd',
       'distance_kms', 'temperature_time', 'temperature_value', 'feels_like',
       'humidity', 'wind_speed', 'cloudness', 'weather_main', 'weather_desc',
       'precipitation'],
      dtype='object')

In [10]:
### Preprocess Data ###
## 1. Keep only uber record
df = df[df['ride_hailing_app']=='Uber']

## 2. Remove unrelated data (drivers information, ANONYMIZED record, etc)
df = df.drop(columns=['ride_hailing_app','trip_uid','driver_uid','rider_uid','customer','surge_multiplier','vehicle_make_model','vehicle_license_plate','driver_name_en', 'vehicle_make', 'vehicle_model', 'driver_gender',
       'driver_photo_url', 'driver_phone_number','trip_map_image_url',
       'trip_path_image_url'])

## 3. Convert time related columns to min ##
df['trip_time'] = pd.to_timedelta(df['trip_time']).dt.total_seconds()/60
df['total_time'] = pd.to_timedelta(df['total_time']).dt.total_seconds()/60
df['wait_time'] = pd.to_timedelta(df['wait_time']).dt.total_seconds()/60

## 4. Remove unclear Uber types ##
df = df[~df['trip_type'].isin(['uberELKA','uberBEAUTY','uberSELECT'])]

In [11]:
df

Unnamed: 0,trip_completed_at,trip_status,trip_start_time,trip_end_time,trip_time,total_time,wait_time,trip_type,pickup_lat,pickup_long,...,distance_kms,temperature_time,temperature_value,feels_like,humidity,wind_speed,cloudness,weather_main,weather_desc,precipitation
0,"May 11, 2015 at 6:55PM",Completed,2015-05-11 17:26:00,2015-05-11 17:55:00,21.550000,29.0,7.450000,uberX,60.031438,30.329826,...,9.29,2015-05-11T17:26:00,12,12,0.69,4.81,0.78,partly-cloudy-day,Mostly Cloudy,none
1,"May 11, 2015 at 8:12PM",Completed,2015-05-11 18:46:00,2015-05-11 19:12:00,19.450000,26.0,6.550000,uberX,59.963014,30.307313,...,9.93,2015-05-11T18:46:00,10,10,0.70,6.53,0.66,partly-cloudy-day,Mostly Cloudy,none
2,"May 13, 2015 at 11:38AM",Completed,2015-05-13 09:15:00,2015-05-13 10:38:00,66.883333,83.0,16.116667,uberX,60.031529,30.329416,...,18.01,2015-05-13T09:15:00,14,14,0.61,5.25,0.86,partly-cloudy-day,Mostly Cloudy,none
3,"May 16, 2015 at 1:44AM",Completed,2015-05-16 00:24:00,2015-05-16 00:44:00,13.616667,20.0,6.383333,uberBLACK,59.959883,30.311159,...,5.10,2015-05-16T00:24:00,3,3,0.84,0.87,0.56,partly-cloudy-night,Partly Cloudy,none
4,"May 16, 2015 at 3:18AM",Completed,2015-05-16 01:29:00,2015-05-16 02:18:00,38.900000,49.0,10.100000,uberX,59.934813,30.308553,...,21.92,2015-05-16T01:29:00,3,1,0.90,1.61,0.31,partly-cloudy-night,Partly Cloudy,none
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
673,"April 23, 2018 at 12:11PM",Completed,2018-04-23 11:59:00,2018-04-23 12:11:00,10.616667,12.0,1.383333,uberX,59.939274,30.416977,...,5.79,2018-04-23T11:59:00,6,4,0.39,2.57,0.18,clear-day,Clear,none
674,"April 24, 2018 at 02:58PM",Completed,2018-04-24 14:21:00,2018-04-24 14:58:00,23.683333,37.0,13.316667,uberX,59.941415,30.366456,...,11.46,2018-04-24T14:21:00,7,3,0.62,5.37,0.48,partly-cloudy-day,Partly Cloudy,none
675,"April 26, 2018 at 03:57PM",Completed,2018-04-26 15:22:00,2018-04-26 15:57:00,16.333333,35.0,18.666667,uberX,59.941415,30.366456,...,6.68,2018-04-26T15:22:00,8,6,0.76,4.02,0.9,partly-cloudy-day,Mostly Cloudy,none
676,"April 26, 2018 at 05:00PM",Completed,2018-04-26 16:37:00,2018-04-26 17:00:00,19.316667,23.0,3.683333,uberX,59.916862,30.451020,...,7.48,2018-04-26T16:37:00,9,7,0.68,4.28,0.79,partly-cloudy-day,Mostly Cloudy,none


In [13]:
### Save preprocessed data ###
#df.to_csv('Data/processed_personal_rides.csv')